Source code for aligner.config

import os

TEMP_DIR = os.path.expanduser('~/Documents/MFA')


def make_safe(value):
    if isinstance(value, bool):
        return str(value).lower()
    return str(value)


[docs]class MonophoneConfig(object): ''' Configuration class for monophone training Scale options defaults to:: ['--transition-scale=1.0', '--acoustic-scale=0.1', '--self-loop-scale=0.1'] If ``align_often`` is True in the keyword arguments, ``realign_iters`` will be:: [1, 5, 10, 15, 20, 25, 30, 35, 38] Otherwise, ``realign_iters`` will be:: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 14, 16, 18, 20, 23, 26, 29, 32, 35, 38] Attributes ---------- num_iters : int Number of training iterations to perform, defaults to 40 scale_opts : list Options for specifying scaling in alignment beam : int Default beam width for alignment, defaults = 10 retry_beam : int Beam width to fall back on if no alignment is produced, defaults to 40 max_iter_inc : int Last iter to increase #Gauss on, defaults to 30 totgauss : int Total number of gaussians, defaults to 1000 boost_silence : float Factor by which to boost silence likelihoods in alignment, defaults to 1.0 realign_iters : list List of iterations to perform alignment stage : int Not used power : float Exponent for number of gaussians according to occurrence counts, defaults to 0.25 do_fmllr : bool Specifies whether to do speaker adaptation, defaults to False ''' def __init__(self, **kwargs): self.num_iters = 40 self.scale_opts = ['--transition-scale=1.0', '--acoustic-scale=0.1', '--self-loop-scale=0.1'] self.beam = 10 self.retry_beam = 40 self.max_gauss_count = 1000 self.boost_silence = 1.0 if kwargs.get('align_often', False): self.realign_iters = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 14, 16, 18, 20, 23, 26, 29, 32, 35, 38] else: self.realign_iters = [1, 5, 10, 15, 20, 25, 30, 35, 38] self.stage = -4 self.power = 0.25 self.do_fmllr = False for k, v in kwargs.items(): setattr(self, k, v) @property def max_iter_inc(self): return self.num_iters - 10 @property def inc_gauss_count(self): return int((self.max_gauss_count - self.initial_gauss_count) / self.max_iter_inc)
[docs]class TriphoneConfig(MonophoneConfig): ''' Configuration class for triphone training Scale options defaults to:: ['--transition-scale=1.0', '--acoustic-scale=0.1', '--self-loop-scale=0.1'] If ``align_often`` is True in the keyword arguments, ``realign_iters`` will be:: [1, 5, 10, 15, 20, 25, 30, 35, 38] Otherwise, ``realign_iters`` will be:: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 14, 16, 18, 20, 23, 26, 29, 32, 35, 38] Attributes ---------- num_iters : int Number of training iterations to perform, defaults to 35 scale_opts : list Options for specifying scaling in alignment beam : int Default beam width for alignment, defaults = 10 retry_beam : int Beam width to fall back on if no alignment is produced, defaults to 40 max_iter_inc : int Last iter to increase #Gauss on, defaults to 30 totgauss : int Total number of gaussians, defaults to 1000 boost_silence : float Factor by which to boost silence likelihoods in alignment, defaults to 1.0 realign_iters : list List of iterations to perform alignment stage : int Not used power : float Exponent for number of gaussians according to occurrence counts, defaults to 0.25 do_fmllr : bool Specifies whether to do speaker adaptation, defaults to False num_states : int Number of states in the decision tree, defaults to 3100 num_gauss : int Number of gaussians in the decision tree, defaults to 50000 cluster_threshold : int For build-tree control final bottom-up clustering of leaves, defaults to 100 ''' def __init__(self, **kwargs): defaults = {'num_iters': 35, 'initial_gauss_count': 3100, 'max_gauss_count': 50000, 'cluster_threshold': 100} defaults.update(kwargs) super(TriphoneConfig, self).__init__(**defaults)
[docs]class TriphoneFmllrConfig(TriphoneConfig): ''' Configuration class for speaker-adapted triphone training Scale options defaults to:: ['--transition-scale=1.0', '--acoustic-scale=0.1', '--self-loop-scale=0.1'] If ``align_often`` is True in the keyword arguments, ``realign_iters`` will be:: [1, 5, 10, 15, 20, 25, 30, 35, 38] Otherwise, ``realign_iters`` will be:: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 14, 16, 18, 20, 23, 26, 29, 32, 35, 38] ``fmllr_iters`` defaults to:: [2, 4, 6, 12] Attributes ---------- num_iters : int Number of training iterations to perform, defaults to 35 scale_opts : list Options for specifying scaling in alignment beam : int Default beam width for alignment, defaults = 10 retry_beam : int Beam width to fall back on if no alignment is produced, defaults to 40 max_iter_inc : int Last iter to increase #Gauss on, defaults to 30 totgauss : int Total number of gaussians, defaults to 1000 boost_silence : float Factor by which to boost silence likelihoods in alignment, defaults to 1.0 realign_iters : list List of iterations to perform alignment stage : int Not used power : float Exponent for number of gaussians according to occurrence counts, defaults to 0.25 do_fmllr : bool Specifies whether to do speaker adaptation, defaults to True num_states : int Number of states in the decision tree, defaults to 3100 num_gauss : int Number of gaussians in the decision tree, defaults to 50000 cluster_threshold : int For build-tree control final bottom-up clustering of leaves, defaults to 100 fmllr_update_type : str Type of fMLLR estimation, defaults to ``'full'`` fmllr_iters : list List of iterations to perform fMLLR estimation fmllr_power : float Defaults to 0.2 silence_weight : float Weight on silence in fMLLR estimation ''' def __init__(self, align_often=True, **kwargs): defaults = {'do_fmllr': True, 'fmllr_update_type': 'full', 'fmllr_iters': [2, 4, 6, 12], 'fmllr_power': 0.2, 'silence_weight': 0.0} defaults.update(kwargs) super(TriphoneFmllrConfig, self).__init__(**defaults)
[docs]class MfccConfig(object): ''' Class to store configuration information about MFCC generation The ``config_dict`` currently stores one key ``'use-energy'`` which defaults to False Parameters ---------- output_directory : str Path to directory to save configuration files for Kaldi kwargs : dict, optional If specified, updates ``config_dict`` with this dictionary Attributes ---------- config_dict : dict Dictionary of configuration parameters ''' def __init__(self, output_directory, job=None, kwargs=None): if kwargs is None: kwargs = {} self.job = job self.config_dict = {'use-energy': False, 'frame-shift': 10} self.config_dict.update(kwargs) self.output_directory = output_directory self.write() def update(self, kwargs): ''' Update configuration dictionary with new dictionary Parameters ---------- kwargs : dict Dictionary of new parameter values ''' self.config_dict.update(kwargs) self.write() @property def config_directory(self): path = os.path.join(self.output_directory, 'config') os.makedirs(path, exist_ok=True) return path @property def path(self): if self.job is None: f = 'mfcc.conf' else: f = 'mfcc.{}.conf'.format(self.job) return os.path.join(self.config_directory, f) def write(self): ''' Write configuration dictionary to a file for use in Kaldi binaries ''' with open(self.path, 'w', encoding='utf8') as f: for k, v in self.config_dict.items(): f.write('--{}={}\n'.format(k, make_safe(v)))