In this paper, we present a solution for the Dialogue Emotion Recognition Challenge, EmotionX-2019 based onBidirectional Encoder Representations from Transformer (BERT) which is the state-of-the-art for Natural Language Processing with fine-tuning on dialogue utterance classification. We use cascade classification to tackle the dominance of a majority class present in the data. Cascading the classifiers allowed to improve our reported accuracy measures for emotion prediction in text.
EmotionX-AlexU: Cascade BERT-based Emotion Classifier by Meena Alfons, Marwan Torki, and Nagwa El-Makky
Submitted in EmotionX-2019, made it to the leaderboard, and presented in SocialNLP @ IJCAI 2019.
Use the following commands to install dependencies
git clone
cd pytorch-pretrained-BERT
git checkout master
python install
pip install ./
cd ..
pip install emoji
git clone
cd apex
pip install -v --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" .
cd ..
If you are using Colab, use the following:
!git clone
import os
!git checkout master
!python install
!pip install ./
import os
!git clone
import os
!pip install -v --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" .
import os
!pip install emoji
git clone
cd EmotionX-2019
Or using Colab:
!git clone
import os
Extract the EmotionPush and Friends datasets in the following heirarchy:
_ . (current directory)
|_ dataset
|_ EmotionPush
| |_ emotionpush.augmented.json
| |_ emotionpush.json
|_ Friends
|_ friends.augmented.json
|_ friends.json
You may use the following commands:
unzip -o ./path/to/ -d ./dataset
unzip -o ./path/to/ -d ./dataset
python EmotionX-2019/ ./dataset
Train Friends Majory Classifier
from processor import Majority_OneSentence_Processor
from Trainer import Trainer
class Args(object):
args = Args()
args.bert_model = 'bert-base-uncased'
args.do_lower_case = True
args.warmup_proportion = 0.1
args.cache_dir = "./cache"
args.no_cuda = False
args.local_rank = -1
args.fp16 = False
args.loss_scale = 0
args.gradient_accumulation_steps = 1
args.server_ip = ''
args.server_port = ''
args.output_mode = "classification"
args.save_model_steps = 2000
args.resume_epochs = 0
args.resume_steps = 0
# Important configurations
args.data_dir = './dataset/preprocessed/'
args.train_file = 'train_friends.augmented.json'
args.dev_file = 'dev_friends.augmented.json'
args.train_batch_size = 32
args.eval_batch_size = 32
args.do_train = True
args.do_eval = False
args.do_run = False
args.num_train_epochs = 8.0
args.max_seq_length = 256
args.processor = Majority_OneSentence_Processor
args.output_dir = os.path.join(model_dir, 'friends_majority')
args.resume_dir = None
args.learning_rate = 1e-5
args.seed = 1991
trainer = Trainer(args)
Train EmotionPush Majory Classifier
from processor import Majority_OneSentence_Processor
from Trainer import Trainer
class Args(object):
args = Args()
args.bert_model = 'bert-base-uncased'
args.do_lower_case = True
args.warmup_proportion = 0.1
args.cache_dir = "./cache"
args.no_cuda = False
args.local_rank = -1
args.fp16 = False
args.loss_scale = 0
args.gradient_accumulation_steps = 1
args.server_ip = ''
args.server_port = ''
args.output_mode = "classification"
args.save_model_steps = 2000
args.resume_epochs = 0
args.resume_steps = 0
# Important configurations
args.data_dir = './dataset/preprocessed/'
args.train_file = 'train_emotionpush.augmented.json'
args.dev_file = 'dev_emotionpush.augmented.json'
args.train_batch_size = 32
args.eval_batch_size = 32
args.do_train = True
args.do_eval = False
args.do_run = False
args.num_train_epochs = 8.0
args.max_seq_length = 256
args.processor = Majority_OneSentence_Processor
args.output_dir = os.path.join(model_dir, 'emotionpush_majority')
args.resume_dir = None
args.learning_rate = 1e-5
args.seed = 1991
trainer = Trainer(args)
Train Friends Others Classifier
args = Args()
args.bert_model = 'bert-base-uncased'
args.do_lower_case = True
args.warmup_proportion = 0.1
args.cache_dir = "./cache"
args.no_cuda = False
args.local_rank = -1
args.fp16 = False
args.loss_scale = 0
args.gradient_accumulation_steps = 1
args.server_ip = ''
args.server_port = ''
args.output_mode = "classification"
args.data_dir = './dataset/preprocessed/'
args.save_model_steps = 2000
args.resume_epochs = 0
args.resume_steps = 0
# Important configurations
args.data_dir = './dataset/preprocessed/'
args.train_file = 'train_friends.augmented.json'
args.dev_file = 'dev_friends.augmented.json'
args.train_batch_size = 32
args.eval_batch_size = 32
args.do_train = True
args.do_eval = False
args.do_run = False
args.num_train_epochs = 8.0
args.max_seq_length = 256
args.processor = Others_OneSentence_Processor
args.output_dir = os.path.join(model_dir, 'friends_others')
args.resume_dir = None
args.learning_rate = 1e-5
args.seed = 1991
trainer = Trainer(args)
Train EmotionPush Others Classifier
args = Args()
args.bert_model = 'bert-base-uncased'
args.do_lower_case = True
args.warmup_proportion = 0.1
args.cache_dir = "./cache"
args.no_cuda = False
args.local_rank = -1
args.fp16 = False
args.loss_scale = 0
args.gradient_accumulation_steps = 1
args.server_ip = ''
args.server_port = ''
args.output_mode = "classification"
args.data_dir = './dataset/preprocessed/'
args.save_model_steps = 2000
args.resume_epochs = 0
args.resume_steps = 0
# Important configurations
args.data_dir = './dataset/preprocessed/'
args.train_file = 'train_emotionpush.augmented.json'
args.dev_file = 'dev_emotionpush.augmented.json'
args.train_batch_size = 32
args.eval_batch_size = 32
args.do_train = True
args.do_eval = False
args.do_run = False
args.num_train_epochs = 8.0
args.max_seq_length = 256
args.processor = Others_OneSentence_Processor
args.output_dir = os.path.join(model_dir, 'emotionpush_others')
args.resume_dir = None
args.learning_rate = 1e-5
args.seed = 1991
trainer = Trainer(args)
Run Friends Majority Classifier
args = Args()
args.bert_model = 'bert-base-uncased'
args.do_lower_case = True
args.warmup_proportion = 0.1
args.cache_dir = "./cache"
args.no_cuda = False
args.local_rank = -1
args.fp16 = False
args.loss_scale = 0
args.gradient_accumulation_steps = 1
args.server_ip = ''
args.server_port = ''
args.output_mode = "classification"
args.save_model_steps = 2000
args.resume_epochs = 0
args.resume_steps = 0
# Important configurations
args.data_dir = './eval/'
args.train_file = None
args.dev_file = 'friends_eval.json'
args.result_file = 'friends_majority_result.json'
args.train_batch_size = 32
args.eval_batch_size = 32
args.do_train = False
args.do_eval = False
args.do_run = True
args.num_train_epochs = 1.0
args.max_seq_length = 256
args.processor = Majority_OneSentence_Processor
args.output_dir = None
args.resume_dir = os.path.join(model_dir, 'friends_majority/epoch_0')
args.learning_rate = 1e-5
args.seed = 69847
trainer = Trainer(args)
Run Friends Others Classifier
args = Args()
args.bert_model = 'bert-base-uncased'
args.do_lower_case = True
args.warmup_proportion = 0.1
args.cache_dir = "./cache"
args.no_cuda = False
args.local_rank = -1
args.fp16 = False
args.loss_scale = 0
args.gradient_accumulation_steps = 1
args.server_ip = ''
args.server_port = ''
args.output_mode = "classification"
args.data_dir = './dataset/preprocessed/'
args.save_model_steps = 2000
args.resume_epochs = 0
args.resume_steps = 0
# Important configurations
args.data_dir = './eval/'
args.train_file = None
args.dev_file = 'friends_majority_result.json'
args.result_file = 'friends_result.json'
args.train_batch_size = 32
args.eval_batch_size = 32
args.do_train = False
args.do_eval = False
args.do_run = True
args.num_train_epochs = 1.0
args.max_seq_length = 256
args.processor = Others_OneSentence_Processor
args.output_dir = None
args.resume_dir = os.path.join(model_dir, 'friends_others/epoch_1')
args.learning_rate = 1e-5
args.seed = 69847
# include first 3 labels (joy, sadness, anger)
trainer = Trainer(args)
Run EmotionPush Majority Classifier
args = Args()
args.bert_model = 'bert-base-uncased'
args.do_lower_case = True
args.warmup_proportion = 0.1
args.cache_dir = "./cache"
args.no_cuda = False
args.local_rank = -1
args.fp16 = False
args.loss_scale = 0
args.gradient_accumulation_steps = 1
args.server_ip = ''
args.server_port = ''
args.output_mode = "classification"
args.save_model_steps = 2000
args.resume_epochs = 0
args.resume_steps = 0
# Important configurations
args.data_dir = './eval/'
args.train_file = None
args.dev_file = 'emotionpush_eval.json'
args.result_file = 'emotionpush_majority_result.json'
args.train_batch_size = 32
args.eval_batch_size = 32
args.do_train = False
args.do_eval = False
args.do_run = True
args.num_train_epochs = 1.0
args.max_seq_length = 256
args.processor = Majority_OneSentence_Processor
args.output_dir = None
args.resume_dir = os.path.join(model_dir, 'emotionpush_majority/epoch_0')
args.learning_rate = 1e-5
args.seed = 69847
# labels are (yes, no)
trainer = Trainer(args)
Run EmotionPush Others Classifier
args = Args()
args.bert_model = 'bert-base-uncased'
args.do_lower_case = True
args.warmup_proportion = 0.1
args.cache_dir = "./cache"
args.no_cuda = False
args.local_rank = -1
args.fp16 = False
args.loss_scale = 0
args.gradient_accumulation_steps = 1
args.server_ip = ''
args.server_port = ''
args.output_mode = "classification"
args.data_dir = './dataset/preprocessed/'
args.save_model_steps = 2000
args.resume_epochs = 0
args.resume_steps = 0
# Important configurations
args.data_dir = './eval/'
args.train_file = None
args.dev_file = 'emotionpush_majority_result.json'
args.result_file = 'emotionpush_result.json'
args.train_batch_size = 32
args.eval_batch_size = 32
args.do_train = False
args.do_eval = False
args.do_run = True
args.num_train_epochs = 1.0
args.max_seq_length = 256
args.processor = Others_OneSentence_Processor
args.output_dir = None
args.resume_dir = os.path.join(model_dir, 'emotionpush_others/epoch_1')
args.learning_rate = 1e-5
args.seed = 69847
# include first 3 labels (joy, sadness, anger)
trainer = Trainer(args)