import os
import shutil
import subprocess
import re
from tqdm import tqdm
from ..helper import thirdparty_binary, make_path_safe
from ..multiprocessing import (align, mono_align_equal, compile_train_graphs,
acc_stats, tree_stats, convert_alignments,
convert_ali_to_textgrids, calc_fmllr)
from ..exceptions import NoSuccessfulAlignments
from .base import BaseAligner
from ..models import AcousticModel
[docs]class TrainableAligner(BaseAligner):
'''
Aligner that aligns and trains acoustics models on a large dataset
Parameters
----------
corpus : :class:`~aligner.corpus.Corpus`
Corpus object for the dataset
dictionary : :class:`~aligner.dictionary.Dictionary`
Dictionary object for the pronunciation dictionary
output_directory : str
Path to export aligned TextGrids
temp_directory : str, optional
Specifies the temporary directory root to save files need for Kaldi.
If not specified, it will be set to ``~/Documents/MFA``
num_jobs : int, optional
Number of processes to use, defaults to 3
call_back : callable, optional
Specifies a call back function for alignment
mono_params : :class:`~aligner.config.MonophoneConfig`, optional
Monophone training parameters to use, if different from defaults
tri_params : :class:`~aligner.config.TriphoneConfig`, optional
Triphone training parameters to use, if different from defaults
tri_fmllr_params : :class:`~aligner.config.TriphoneFmllrConfig`, optional
Speaker-adapted triphone training parameters to use, if different from defaults
'''
def save(self, path):
'''
Output an acoustic model and dictionary to the specified path
Parameters
----------
path : str
Path to save acoustic model and dictionary
'''
directory, filename = os.path.split(path)
basename, _ = os.path.splitext(filename)
acoustic_model = AcousticModel.empty(basename)
acoustic_model.add_meta_file(self)
#acoustic_model.add_triphone_model(self.tri_fmllr_directory)
acoustic_model.add_triphone_fmllr_model(self.tri_fmllr_directory)
os.makedirs(directory, exist_ok=True)
basename, _ = os.path.splitext(path)
acoustic_model.dump(basename)
print('Saved model to {}'.format(path))
def _do_tri_training(self):
self.call_back('Beginning triphone training...')
self._do_training(self.tri_directory, self.tri_config)
def train_tri(self):
'''
Perform triphone training
'''
if os.path.exists(self.tri_final_model_path):
print('Triphone training already done, using previous final.mdl')
return
if not os.path.exists(self.mono_ali_directory):
self._align_si()
os.makedirs(os.path.join(self.tri_directory, 'log'), exist_ok=True)
self._init_tri(fmllr=False)
self._do_tri_training()
def _init_mono(self):
'''
Initialize monophone training
'''
log_dir = os.path.join(self.mono_directory, 'log')
os.makedirs(log_dir, exist_ok=True)
tree_path = os.path.join(self.mono_directory, 'tree')
mdl_path = os.path.join(self.mono_directory, '0.mdl')
directory = self.corpus.split_directory
feat_dim = self.corpus.get_feat_dim()
path = os.path.join(directory, 'cmvndeltafeats.0_sub')
feat_path = os.path.join(directory, 'cmvndeltafeats.0')
shared_phones_opt = "--shared-phones=" + os.path.join(self.dictionary.phones_dir, 'sets.int')
log_path = os.path.join(log_dir, 'log')
with open(path, 'rb') as f, open(log_path, 'w') as logf:
subprocess.call([thirdparty_binary('gmm-init-mono'), shared_phones_opt,
"--train-feats=ark:-",
os.path.join(self.dictionary.output_directory, 'topo'),
feat_dim,
mdl_path,
tree_path],
stdin=f,
stderr=logf)
num_gauss = self.get_num_gauss_mono()
compile_train_graphs(self.mono_directory, self.dictionary.output_directory,
self.corpus.split_directory, self.num_jobs)
mono_align_equal(self.mono_directory,
self.corpus.split_directory, self.num_jobs)
log_path = os.path.join(self.mono_directory, 'log', 'update.0.log')
with open(log_path, 'w') as logf:
acc_files = [os.path.join(self.mono_directory, '0.{}.acc'.format(x)) for x in range(self.num_jobs)]
est_proc = subprocess.Popen([thirdparty_binary('gmm-est'),
'--min-gaussian-occupancy=3',
'--mix-up={}'.format(num_gauss), '--power={}'.format(self.mono_config.power),
mdl_path, "{} - {}|".format(thirdparty_binary('gmm-sum-accs'),
' '.join(map(make_path_safe, acc_files))),
os.path.join(self.mono_directory, '1.mdl')],
stderr=logf)
est_proc.communicate()
def _do_mono_training(self):
self.mono_config.initial_gauss_count = self.get_num_gauss_mono()
self.call_back('Beginning monophone training...')
self._do_training(self.mono_directory, self.mono_config)
def train_mono(self):
'''
Perform monophone training
'''
final_mdl = os.path.join(self.mono_directory, 'final.mdl')
if os.path.exists(final_mdl):
print('Monophone training already done, using previous final.mdl')
return
os.makedirs(os.path.join(self.mono_directory, 'log'), exist_ok=True)
self._init_mono()
self._do_mono_training()