Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Minimum lr le-4 to le-8 #156

Open
wants to merge 6 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 6 additions & 4 deletions synthesize.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,16 +15,16 @@ def prepare_run(args):
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

run_name = args.name or args.tacotron_name or args.model
taco_checkpoint = os.path.join('logs-' + run_name, 'taco_' + args.checkpoint)
taco_checkpoint = os.path.join(args.base_dir, 'logs-' + run_name, 'taco_' + args.checkpoint)

run_name = args.name or args.wavenet_name or args.model
wave_checkpoint = os.path.join('logs-' + run_name, 'wave_' + args.checkpoint)
wave_checkpoint = os.path.join(args.base_dir, 'logs-' + run_name, 'wave_' + args.checkpoint)
return taco_checkpoint, wave_checkpoint, modified_hp

def get_sentences(args):
if args.text_list != '':
with open(args.text_list, 'rb') as f:
sentences = list(map(lambda l: l.decode("utf-8")[:-1], f.readlines()))
sentences = list(map(lambda l: l.decode("utf-8").strip(), f.readlines()))
else:
sentences = hparams.sentences
return sentences
Expand All @@ -33,6 +33,7 @@ def synthesize(args, hparams, taco_checkpoint, wave_checkpoint, sentences):
log('Running End-to-End TTS Evaluation. Model: {}'.format(args.name or args.model))
log('Synthesizing mel-spectrograms from text..')
wavenet_in_dir = tacotron_synthesize(args, hparams, taco_checkpoint, sentences)

#Delete Tacotron model from graph
tf.reset_default_graph()
log('Synthesizing audio from mel-spectrograms.. (This may take a while)')
Expand All @@ -44,6 +45,7 @@ def synthesize(args, hparams, taco_checkpoint, wave_checkpoint, sentences):
def main():
accepted_modes = ['eval', 'synthesis', 'live']
parser = argparse.ArgumentParser()
parser.add_argument('--base_dir', default='')
parser.add_argument('--checkpoint', default='pretrained/', help='Path to model checkpoint')
parser.add_argument('--hparams', default='',
help='Hyperparameter overrides as a comma-separated list of name=value pairs')
Expand All @@ -52,7 +54,7 @@ def main():
parser.add_argument('--wavenet_name', help='Name of logging directory of WaveNet. If trained separately')
parser.add_argument('--model', default='Tacotron-2')
parser.add_argument('--input_dir', default='training_data/', help='folder to contain inputs sentences/targets')
parser.add_argument('--mels_dir', default='tacotron_output/eval/', help='folder to contain mels to synthesize audio from using the Wavenet')
#parser.add_argument('--mels_dir', default='tacotron_output/eval/', help='folder to contain mels to synthesize audio from using the Wavenet')
parser.add_argument('--output_dir', default='output/', help='folder to contain synthesized mel spectrograms')
parser.add_argument('--mode', default='eval', help='mode of run: can be one of {}'.format(accepted_modes))
parser.add_argument('--GTA', default='True', help='Ground truth aligned synthesis, defaults to True, only considered in synthesis mode')
Expand Down
18 changes: 9 additions & 9 deletions tacotron/synthesize.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,8 @@ def run_eval(args, checkpoint_path, output_dir, hparams, sentences):
eval_dir = os.path.join(output_dir, 'eval')
log_dir = os.path.join(output_dir, 'logs-eval')

if args.model == 'Tacotron-2':
assert os.path.normpath(eval_dir) == os.path.normpath(args.mels_dir) #mels_dir = wavenet_input_dir
# if args.model == 'Tacotron-2':
# assert os.path.normpath(eval_dir) == os.path.normpath(args.mels_dir) #mels_dir = wavenet_input_dir

#Create output path if it doesn't exist
os.makedirs(eval_dir, exist_ok=True)
Expand All @@ -66,7 +66,7 @@ def run_eval(args, checkpoint_path, output_dir, hparams, sentences):
log('synthesized mel spectrograms at {}'.format(eval_dir))
return eval_dir

def run_synthesis(args, checkpoint_path, output_dir, hparams):
def run_synthesis(args, checkpoint_path, input_dir, output_dir, hparams):
GTA = (args.GTA == 'True')
if GTA:
synth_dir = os.path.join(output_dir, 'gta')
Expand All @@ -80,7 +80,7 @@ def run_synthesis(args, checkpoint_path, output_dir, hparams):
os.makedirs(synth_dir, exist_ok=True)


metadata_filename = os.path.join(args.input_dir, 'train.txt')
metadata_filename = os.path.join(input_dir, 'train.txt')
log(hparams_debug_string())
synth = Synthesizer()
synth.load(checkpoint_path, hparams, gta=GTA)
Expand All @@ -93,8 +93,8 @@ def run_synthesis(args, checkpoint_path, output_dir, hparams):
metadata = [metadata[i: i+hparams.tacotron_synthesis_batch_size] for i in range(0, len(metadata), hparams.tacotron_synthesis_batch_size)]

log('starting synthesis')
mel_dir = os.path.join(args.input_dir, 'mels')
wav_dir = os.path.join(args.input_dir, 'audio')
mel_dir = os.path.join(input_dir, 'mels')
wav_dir = os.path.join(input_dir, 'audio')
with open(os.path.join(synth_dir, 'map.txt'), 'w') as file:
for i, meta in enumerate(tqdm(metadata)):
texts = [m[5] for m in meta]
Expand All @@ -109,8 +109,8 @@ def run_synthesis(args, checkpoint_path, output_dir, hparams):
return os.path.join(synth_dir, 'map.txt')

def tacotron_synthesize(args, hparams, checkpoint, sentences=None):
output_dir = 'tacotron_' + args.output_dir

input_dir = os.path.join(args.base_dir, args.input_dir)
output_dir = os.path.join(args.base_dir, 'tacotron_' + args.output_dir)
try:
checkpoint_path = tf.train.get_checkpoint_state(checkpoint).model_checkpoint_path
log('loaded model at {}'.format(checkpoint_path))
Expand All @@ -120,6 +120,6 @@ def tacotron_synthesize(args, hparams, checkpoint, sentences=None):
if args.mode == 'eval':
return run_eval(args, checkpoint_path, output_dir, hparams, sentences)
elif args.mode == 'synthesis':
return run_synthesis(args, checkpoint_path, output_dir, hparams)
return run_synthesis(args, checkpoint_path, input_dir, output_dir, hparams)
else:
run_live(args, checkpoint_path, hparams)
4 changes: 2 additions & 2 deletions wavenet_vocoder/models/wavenet.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ def set_mode(self, is_training):
except AttributeError:
pass

def initialize(self, y, c, g, input_lengths, x=None, synthesis_length=None):
def initialize(self, y, c, g, input_lengths, split_infos=None, x=None, synthesis_length=None):
'''Initialize wavenet graph for train, eval and test cases.
'''
hparams = self._hparams
Expand Down Expand Up @@ -415,7 +415,7 @@ def _noam_learning_rate_decay(self, init_lr, global_step):
# Noam scheme from tensor2tensor:
warmup_steps = 4000.0
step = tf.cast(global_step + 1, dtype=tf.float32)
return tf.maximum(init_lr * warmup_steps**0.5 * tf.minimum(step * warmup_steps**-1.5, step**-0.5), 1e-4)
return tf.maximum(init_lr * warmup_steps**0.5 * tf.minimum(step * warmup_steps**-1.5, step**-0.5), 1e-8) # 1e-4 to 1e-8


def get_mask(self, input_lengths, maxlen=None):
Expand Down
12 changes: 6 additions & 6 deletions wavenet_vocoder/synthesize.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from wavenet_vocoder.synthesizer import Synthesizer


def run_synthesis(args, checkpoint_path, output_dir, hparams):
def run_synthesis(args, checkpoint_path, input_dir, output_dir, hparams):
log_dir = os.path.join(output_dir, 'plots')
wav_dir = os.path.join(output_dir, 'wavs')

Expand All @@ -20,7 +20,7 @@ def run_synthesis(args, checkpoint_path, output_dir, hparams):

if args.model == 'Tacotron-2':
#If running all Tacotron-2, synthesize audio from evaluated mels
metadata_filename = os.path.join(args.mels_dir, 'map.txt')
metadata_filename = os.path.join(input_dir, 'map.txt')
with open(metadata_filename, encoding='utf-8') as f:
metadata = np.array([line.strip().split('|') for line in f])

Expand All @@ -31,7 +31,7 @@ def run_synthesis(args, checkpoint_path, output_dir, hparams):
speaker_ids = None if (speaker_ids == '<no_g>').all() else speaker_ids
else:
#else Get all npy files in input_dir (supposing they are mels)
mel_files = [os.path.join(args.mels_dir, f) for f in os.listdir(args.mels_dir) if f.split('.')[-1] == 'npy']
mel_files = [os.path.join(input_dir, f) for f in os.listdir(input_dir) if f.split('.')[-1] == 'npy']
speaker_ids = None if args.speaker_id is None else args.speaker_id.replace(' ', '').split(',')

if speaker_ids is not None:
Expand Down Expand Up @@ -68,12 +68,12 @@ def run_synthesis(args, checkpoint_path, output_dir, hparams):


def wavenet_synthesize(args, hparams, checkpoint):
output_dir = 'wavenet_' + args.output_dir

input_dir = os.path.join(args.base_dir, 'tacotron_' + args.output_dir, 'eval')
output_dir = os.path.join(args.base_dir, 'wavenet_' + args.output_dir)
try:
checkpoint_path = tf.train.get_checkpoint_state(checkpoint).model_checkpoint_path
log('loaded model at {}'.format(checkpoint_path))
except:
raise RuntimeError('Failed to load checkpoint at {}'.format(checkpoint))

run_synthesis(args, checkpoint_path, output_dir, hparams)
run_synthesis(args, checkpoint_path, input_dir, output_dir, hparams)