Source code for montreal_forced_aligner.validation.dictionary_validator
"""Classes for validating dictionaries"""
import logging
import shutil
import typing
from pathlib import Path
from montreal_forced_aligner import config
from montreal_forced_aligner.data import WorkflowType
from montreal_forced_aligner.g2p.generator import PyniniValidator
from montreal_forced_aligner.g2p.trainer import PyniniTrainer
logger = logging.getLogger("mfa")
[docs]
class DictionaryValidator(PyniniTrainer):
"""
Mixin class for performing validation on a corpus
Parameters
----------
g2p_model_path: :class:`~pathlib.Path`, optional
Path to pretrained G2P model
g2p_threshold: float, optional
Threshold for pruning pronunciations, defaults to 1.5, which returns the optimal pronunciations and those with scores less than 1.5 times
the optimal pronunciation's score. Increase
to allow for more suboptimal pronunciations
See Also
--------
:class:`~montreal_forced_aligner.alignment.base.CorpusAligner`
For corpus, dictionary, and alignment parameters
Attributes
----------
printer: TerminalPrinter
Printer for output messages
"""
def __init__(
self,
g2p_model_path: typing.Optional[Path] = None,
g2p_threshold: float = 1.5,
**kwargs,
):
kwargs["clean"] = True
super().__init__(**kwargs)
self.g2p_model_path = g2p_model_path
self.g2p_threshold = g2p_threshold
[docs]
def setup(self) -> None:
"""Set up the dictionary validator"""
if self.initialized:
return
self.initialize_database()
self.dictionary_setup()
self.write_lexicon_information()
if self.g2p_model_path is None:
self.create_new_current_workflow(WorkflowType.train_g2p)
logger.info("Not using a pretrained G2P model, training from the dictionary...")
self.initialize_training()
self.train()
self.g2p_model_path = self.working_log_directory.joinpath("g2p_model.zip")
self.export_model(self.g2p_model_path)
self.create_new_current_workflow(WorkflowType.g2p)
else:
self.create_new_current_workflow(WorkflowType.g2p)
self.initialize_training()
self.initialized = True
[docs]
def validate(self, output_path: typing.Optional[Path] = None) -> None:
"""
Validate the dictionary
Parameters
----------
output_path: :class:`~pathlib.Path`, optional
Path to save scored CSV
"""
self.setup()
gen = PyniniValidator(
g2p_model_path=self.g2p_model_path,
word_list=list(self.g2p_training_dictionary.keys()),
temporary_directory=self.working_directory.joinpath("validation"),
num_jobs=config.NUM_JOBS,
num_pronunciations=self.num_pronunciations,
)
gen.evaluate_g2p_model(self.g2p_training_dictionary)
if output_path is not None:
shutil.copyfile(gen.evaluation_csv_path, output_path)
logger.info(f"Wrote scores to {output_path}")