"""
Exception classes
=================
"""
from __future__ import annotations
import datetime
import logging
import sys
import typing
from pathlib import Path
from typing import TYPE_CHECKING, Collection, Dict, List, Optional
import requests.structures
from montreal_forced_aligner import config
from montreal_forced_aligner.helper import comma_join
if TYPE_CHECKING:
from montreal_forced_aligner.data import CtmInterval
__all__ = [
"MFAError",
"SoundFileError",
"SoxError",
"G2PError",
"CtmError",
"PyniniAlignmentError",
"PyniniGenerationError",
"NoAlignmentsError",
"SegmenterError",
"ConfigError",
"LMError",
"LanguageModelNotFoundError",
"FeatureGenerationError",
"ModelExtensionError",
"ThirdpartyError",
"IvectorTrainingError",
"MultiprocessingError",
"TrainerError",
"ModelError",
"CorpusError",
"ModelLoadError",
"CorpusReadError",
"AlignerError",
"AlignmentError",
"AlignmentCollectionError",
"AlignmentExportError",
"NoSuccessfulAlignments",
"KaldiProcessingError",
"TextParseError",
"TextGridParseError",
"DictionaryError",
"NoDefaultSpeakerDictionaryError",
"RemoteModelVersionNotFoundError",
"PhoneMismatchError",
"RemoteModelNotFoundError",
"DictionaryPathError",
"DictionaryFileError",
"FileArgumentNotFoundError",
"PretrainedModelNotFoundError",
"MultipleModelTypesFoundError",
"ModelTypeNotSupportedError",
"PronunciationAcousticMismatchError",
"RootDirectoryError",
"DatabaseError",
]
[docs]
class MFAError(Exception):
"""
Base exception class
"""
def __init__(self, base_error_message: str, *args, **kwargs):
self.message_lines: List[str] = [base_error_message]
@property
def message(self) -> str:
"""Formatted exception message"""
return "\n".join(self.message_lines)
def __str__(self) -> str:
"""Output the error"""
message = type(self).__name__ + ":"
message += "\n\n" + self.message
return message
class PlatformError(MFAError):
"""
Exception class for platform compatibility issues
Parameters
----------
functionality_name: str
Functionality not available on the current platform
"""
def __init__(self, functionality_name):
super().__init__("")
self.message_lines = [
f"Functionality for {functionality_name} is not available on {sys.platform}."
]
if sys.platform == "win32":
self.message_lines.append("")
self.message_lines.append(
f" If you'd like to use {functionality_name} on Windows, please follow the MFA installation "
f"instructions for the Windows Subsystem for Linux (WSL)."
)
[docs]
class ThirdpartyError(MFAError):
"""
Exception class for errors in third party binary (usually Kaldi or OpenFst)
Parameters
----------
binary_name: str
Name of third party binary
open_fst: bool, optional
Flag for the error having to do with OpenFst
open_blas: bool, optional
Flag for the error having to do with the BLAS library
libc: bool, optional
Flag for the error having to do with the system libraries
sox: bool, optional
Flag for the error having to do with SoX
"""
def __init__(
self, binary_name, open_fst=False, open_blas=False, libc=False, sox=False, error_text=None
):
super().__init__("")
if error_text:
self.message_lines = [
f"There was an error when invoking '{binary_name}':",
error_text,
"This likely indicates that MFA's dependencies were not correctly installed, or there is an issue with your Conda environment.",
"If you are in the correct environment, please try re-creating the environment from scratch as a first step, i.e.:",
"conda create -n aligner -c conda-forge montreal-forced-aligner",
]
else:
self.message_lines = [f"Could not find '{binary_name}'."]
self.message_lines.append(
"Please ensure that you have installed MFA's conda dependencies and are in the correct environment."
)
if open_fst:
self.message_lines.append(
f"Please ensure that you are in an environment that has the {'openfst'} conda package installed, "
f"or that the {'openfst'} binaries are on your path if you compiled them yourself."
)
elif open_blas:
self.message_lines.append(
f"Try installing {'openblas'} via system package manager or verify it's on your system path?"
)
elif libc:
self.message_lines.append(
f"You likely have a different version of {'glibc'} than the packages binaries use. "
f"Try compiling {'Kaldi'} on your machine and collecting the binaries via the "
f"{'mfa thirdparty kaldi'} command."
)
elif sox:
self.message_lines = []
self.message_lines.append(
f"Your version of {'sox'} does not support the file format in your corpus. "
f"Try installing another version of {'sox'} with support for {binary_name}."
)
# Feature Generation Errors
class FeatureGenerationError(MFAError):
"""
Exception class related to generating features
"""
pass
# Database Errors
class DatabaseError(MFAError):
"""
Exception class related to database servers
"""
def __init__(self, message=None):
if message is None:
from montreal_forced_aligner import config
message = (
f"There was an error connecting to the {config.CURRENT_PROFILE_NAME} MFA database server. "
"Please ensure the server is initialized (mfa server init) or running (mfa server start)"
)
super().__init__(message)
# Model Errors
[docs]
class ModelError(MFAError):
"""
Exception class related to MFA model archives
"""
pass
[docs]
class ModelLoadError(ModelError):
"""
Exception during loading of a model archive
Parameters
----------
path: :class:`~pathlib.Path`
Path of the model archive
"""
def __init__(self, path: typing.Union[str, Path]):
super().__init__("")
self.message_lines = [f"The archive {path} could not be parsed as an MFA model."]
class ModelSaveError(ModelError):
"""
Exception during saving of a model archive
Parameters
----------
path: :class:`~pathlib.Path`
Path of the model archive
"""
def __init__(self, path: Path):
super().__init__("")
self.message_lines = [
f"The archive {path} already exists.",
"Please specify --overwrite if you would like to overwrite it.",
]
class ModelsConnectionError(ModelError):
"""
Exception during connecting to online repo for downloading models
Parameters
----------
response_code: int
Response code for the request
response: dict[str, Any]
Response dictionary
headers: requests.structures.CaseInsensitiveDict
Request headers
"""
def __init__(
self,
response_code: int,
response: typing.Dict[str, typing.Any],
headers: requests.structures.CaseInsensitiveDict,
):
super().__init__("")
if response_code == 403 and "API rate limit" in response["message"]:
rate_limit = headers["x-ratelimit-limit"]
rate_limit_reset = datetime.datetime.fromtimestamp(int(headers["x-ratelimit-reset"]))
self.message_lines = [
f"Current hourly rate limit ({rate_limit} per hour) has been exceeded for the GitHub API.",
"You can increase it by providing a personal authentication token to via --github_token.",
f"The rate limit will reset at {rate_limit_reset}",
]
else:
self.message_lines = [
f"The response returned code {response_code}: {response['message']}"
]
# Dictionary Errors
[docs]
class DictionaryError(MFAError):
"""
Exception class for errors in creating dictionary objects
"""
pass
class PhoneMismatchError(DictionaryError):
"""
Exception class for when a dictionary receives a new phone
Parameters
----------
missing_phones: Collection[str]
Phones that are not in the acoustic model
"""
def __init__(self, missing_phones: Collection[str]):
super().__init__("There were extra phones that were not in the dictionary: ")
missing_phones = [f"{x}" for x in sorted(missing_phones)]
self.message_lines.append(comma_join(missing_phones))
[docs]
class NoDefaultSpeakerDictionaryError(DictionaryError):
"""
Exception class for errors in creating MultispeakerDictionary objects
"""
def __init__(self):
super().__init__("")
self.message_lines = [f'No "{"default"}" dictionary was found.']
[docs]
class DictionaryPathError(DictionaryError):
"""
Exception class for errors in locating paths for dictionary objects
Parameters
----------
input_path: :class:`~pathlib.Path`
Path of the pronunciation dictionary
"""
def __init__(self, input_path: Path):
super().__init__("")
self.message_lines = [
f"The specified path for the dictionary ({input_path}) was not found."
]
[docs]
class DictionaryFileError(DictionaryError):
"""
Exception class for file type being wrong for DictionaryModel objects
Parameters
----------
input_path: :class:`~pathlib.Path`
Path of the pronunciation dictionary
"""
def __init__(self, input_path: Path):
super().__init__("")
self.message_lines = [
f"The specified path for the dictionary ({input_path}) is not a file."
]
# Corpus Errors
[docs]
class CorpusError(MFAError):
"""
Class for errors in creating Corpus objects
"""
pass
[docs]
class CorpusReadError(CorpusError):
"""
Class for errors in reading a file
Parameters
----------
file_name: str
File name that was not readable
"""
def __init__(self, file_name: str):
super().__init__("")
self.message_lines = [f"There was an error reading {file_name}."]
[docs]
class TextParseError(CorpusReadError):
"""
Class for errors parsing lab and txt files
Parameters
----------
file_name: str
File name that had the error
"""
def __init__(self, file_name: str):
super().__init__("")
self.message_lines = [
f"There was an error decoding {file_name}, maybe try re-saving it as utf8?"
]
[docs]
class TextGridParseError(CorpusReadError):
"""
Class capturing TextGrid reading errors
Parameters
----------
file_name: str
File name
error: str
Error in TextGrid file
"""
def __init__(self, file_name: str, error: str):
super().__init__("")
self.file_name = file_name
self.error = error
self.message_lines.extend(
[
f"Reading {file_name} has the following error:",
"",
"",
self.error,
]
)
[docs]
class SoxError(CorpusReadError):
"""
Class for errors in calling and finding Sox
"""
pass
class SoundFileError(CorpusReadError):
"""
Class for errors in sound files
Parameters
----------
file_name: str
File name
error: str
Error in TextGrid file
"""
def __init__(self, file_name: typing.Union[str, Path], error: str):
super().__init__("")
self.file_name = file_name
self.error = error
self.message_lines.extend(
[
f"Reading {file_name} has the following error:",
"",
"",
self.error,
]
)
# Aligner Errors
class AlignerError(MFAError):
"""
Class for errors during alignment
"""
pass
class NoAlignmentsError(MFAError):
"""
Class for errors during alignment
"""
def __init__(self, num_utterances, beam_size, retry_beam_size):
super(NoAlignmentsError, self).__init__(
f"There were no successful alignments for {num_utterances} utterances."
)
self.message_lines.append(
f"The current set up used a beam of {beam_size} and a retry beam of {retry_beam_size}."
)
suggested_beam_size = beam_size * 10
suggested_retry_beam_size = suggested_beam_size * 4
self.message_lines.append(
f'You can try rerunning with a larger beam (i.e. "mfa align ... --beam {suggested_beam_size} --retry_beam {suggested_retry_beam_size}").'
)
self.message_lines.append(
'If increasing the beam size does not help, then there are likely issues with the corpus, dictionary, or acoustic model, and can be further diagnosed with the "mfa validate" command'
)
class AlignmentError(MFAError):
"""
Class for errors during alignment
Parameters
----------
error_logs: list[str]
List of Kaldi log files with errors
"""
def __init__(self, error_logs: List[str]):
super().__init__("")
self.message_lines = [
f"There were {len(error_logs)} job(s) with errors. "
f"For more information, please see:",
"",
"",
]
for path in error_logs:
self.message_lines.append(path)
class AlignmentCollectionError(MFAError):
"""
Class for errors during alignment
Parameters
----------
sound_file_path: str or :class:`~pathlib.Path`
Sound file associated with utterance that hit an error
text_file_path: str or :class:`~pathlib.Path`
Text file associated with utterance that hit an error
utterance_begin: float
Utterance beginning timestamp
utterance_end: float
Utterance end timestamp
traceback: list[str]
Traceback of the exception encountered
log_path: str or :class:`~pathlib.Path`, optional
Path to log file if saved
"""
def __init__(
self,
sound_file_path: typing.Union[str, Path],
text_file_path: typing.Union[str, Path],
utterance_begin: float,
utterance_end: float,
traceback: typing.List[str],
log_path: typing.Union[str, Path] = None,
):
super().__init__("")
self.message_lines.extend(traceback)
self.message_lines.append(
f"The above error was encountered for the utterance from {utterance_begin} to {utterance_end} for {sound_file_path} and {text_file_path}."
)
if log_path is not None:
self.message_lines.append(f"This error has been logged to {log_path}.")
[docs]
class AlignmentExportError(AlignmentError):
"""
Class for errors in exporting alignments
Parameters
----------
path: :class:`pathlib.Path`
Path for export
error_lines: list[str]
Lines in the error message
"""
def __init__(self, path: Path, error_lines: List[str]):
MFAError.__init__(self, f"Error was encountered in exporting {path}:")
self.path = path
self.message_lines.append("")
self.message_lines.append("")
self.message_lines.extend(error_lines)
class CtmError(AlignmentError):
"""
Class for errors in creating CTM intervals
Parameters
----------
ctm: :class:`~montreal_forced_aligner.data.CtmInterval`
CTM interval that was not parsed correctly
"""
def __init__(self, ctm: CtmInterval):
MFAError.__init__(self, f"Error was encountered in processing CTM interval: {ctm}")
[docs]
class NoSuccessfulAlignments(AlignerError):
"""
Class for errors where nothing could be aligned
"""
pass
[docs]
class PronunciationAcousticMismatchError(AlignerError):
"""
Exception class for when an acoustic model and pronunciation dictionary have different phone sets
Parameters
----------
missing_phones: Collection[str]
Phones that are not in the acoustic model
"""
def __init__(self, missing_phones: Collection[str]):
super().__init__("There were phones in the dictionary that do not have acoustic models: ")
missing_phones = [f"{x}" for x in sorted(missing_phones)]
self.message_lines.append(comma_join(missing_phones))
# Command line exceptions
[docs]
class ArgumentError(MFAError):
"""
Exception class for errors parsing command line arguments
"""
pass
[docs]
class FileArgumentNotFoundError(ArgumentError):
"""
Exception class for not finding a specified file
Parameters
----------
path: :class:`~pathlib.Path`
Path not found
"""
def __init__(self, path: Path):
super().__init__("")
self.message_lines = [f'Could not find "{path}".']
[docs]
class PretrainedModelNotFoundError(ArgumentError):
"""
Exception class for not finding a specified pretrained model
Parameters
----------
name: str
Model name
model_type: str, optional
Model type searched
available: list[str], optional
List of models that were found
"""
def __init__(
self, name: str, model_type: Optional[str] = None, available: Optional[List[str]] = None
):
super().__init__("")
extra = ""
if model_type:
extra += f" for {model_type}"
self.message_lines = [f'Could not find a model named "{name}"{extra}.']
if available:
available = [f"{x}" for x in available]
self.message_lines.append(f"Available: {comma_join(available)}.")
class RemoteModelNotFoundError(ArgumentError):
"""
Exception class for not finding a specified pretrained model
Parameters
----------
name: str
Model name
model_type: str, optional
Model type searched
available: list[str], optional
List of models that were found
"""
def __init__(
self, name: str, model_type: Optional[str] = None, available: Optional[List[str]] = None
):
super().__init__("")
extra = ""
if model_type:
extra += f" for {model_type}"
self.message_lines = [f'Could not find a model named "{name}"{extra}.']
if available:
available = [f"{x}" for x in available]
self.message_lines.append(f"Available: {comma_join(available)}.")
self.message_lines.append(
"You can see all available models either on https://mfa-models.readthedocs.io/en/latest/ or "
"https://github.com/MontrealCorpusTools/mfa-models/releases."
)
if model_type:
self.message_lines.append(
f"If you're looking for a model from 1.0, "
f"please see https://github.com/MontrealCorpusTools/mfa-models/releases/tag/{model_type}-archive-v1.0."
)
class RemoteModelVersionNotFoundError(ArgumentError):
"""
Exception class for not finding a specified version of a pretrained model
Parameters
----------
name: str
Model name
model_type: str
Model type requested
version: str
Model version requested
available: list[str], optional
List of models that were found
"""
def __init__(
self, name: str, model_type: str, version: str, available: Optional[List[str]] = None
):
super().__init__("")
self.message_lines = [
f'Could not find version "{version}" for {model_type} named "{name}".'
]
if available:
available = [f"{x}" for x in available]
self.message_lines.append(f"Available versions: {comma_join(available)}.")
self.message_lines.append(
"You can see all available models and versions either on https://mfa-models.readthedocs.io/en/latest/ or "
"https://github.com/MontrealCorpusTools/mfa-models/releases."
)
if model_type:
self.message_lines.append(
f"If you're looking for a model from 1.0, "
f"please see https://github.com/MontrealCorpusTools/mfa-models/releases/tag/{model_type}-archive-v1.0."
)
[docs]
class MultipleModelTypesFoundError(ArgumentError):
"""
Exception class for finding multiple model types that could map to a given name
Parameters
----------
name: str
Model name
possible_model_types: list[str]
List of model types that have a model with the given name
"""
def __init__(self, name: str, possible_model_types: List[str]):
super().__init__("")
self.message_lines = [f'Found multiple model types for "{name}":']
possible_model_types = [f"{x}" for x in possible_model_types]
self.message_lines.extend(
[", ".join(possible_model_types), "Please specify a model type to inspect."]
)
[docs]
class ModelExtensionError(ArgumentError):
"""
Exception class for a model not having the correct extension
Parameters
----------
name: str
Model name
model_type: str
Model type
extensions: list[str]
Extensions that the model supports
"""
def __init__(self, name: str, model_type: str, extensions: List[str]):
super().__init__("")
extra = ""
if model_type:
extra += f" for {model_type}"
self.message_lines = [f'The path "{name}" does not have the correct extensions{extra}.']
if extensions:
available = [f"{x}" for x in extensions]
self.message_lines.append(f" Possible extensions: {comma_join(available)}.")
[docs]
class ModelTypeNotSupportedError(ArgumentError):
"""
Exception class for a model type not being supported
Parameters
----------
model_type: str
Model type
model_types: list[str]
List of supported model types
"""
def __init__(self, model_type, model_types):
super().__init__("")
self.message_lines = [f'The model type "{model_type}" is not supported.']
if model_types:
model_types = [f"{x}" for x in sorted(model_types)]
self.message_lines.append(f" Possible model types: {comma_join(model_types)}.")
[docs]
class ConfigError(MFAError):
"""
Exception class for errors in configuration
"""
pass
[docs]
class RootDirectoryError(ConfigError):
"""
Exception class for errors using the MFA_ROOT_DIR
"""
def __init__(self, temporary_directory, variable):
super().__init__("")
self.message_lines = [
f"Could not create a root MFA temporary directory (tried {temporary_directory}. ",
f"Please specify a write-able directory via the {variable} environment variable.",
]
[docs]
class TrainerError(MFAError):
"""
Exception class for errors in trainers
"""
pass
class IvectorError(MFAError):
"""
Exception class for errors in ivector extraction
"""
pass
class IvectorTrainingError(IvectorError):
"""
Exception class for errors in ivector extractor training
"""
pass
[docs]
class G2PError(MFAError):
"""
Exception class for errors in G2P
"""
pass
class PyniniAlignmentError(G2PError):
"""
Exception class for errors in alignment for Pynini training
"""
def __init__(self, error_dict: Dict[str, Exception]):
super().__init__("The following Pynini alignment jobs encountered errors:")
self.message_lines.extend(["", ""])
for k, v in error_dict.items():
self.message_lines.append(k)
self.message_lines.append(str(v))
class PyniniGenerationError(G2PError):
"""
Exception class for errors generating pronunciations with Pynini
"""
def __init__(self, error_dict: Dict[str, Exception]):
super().__init__("The following words had errors in running G2P:")
self.message_lines.extend(["", ""])
for k, v in error_dict.items():
self.message_lines.append(k)
self.message_lines.append(str(v))
class PhonetisaurusSymbolError(G2PError):
"""
Exception class for errors generating pronunciations with Pynini
"""
def __init__(self, symbol, variable):
super().__init__("")
self.message_lines = [
f'The symbol "{symbol}" is reserved for "{variable}", but is found in the graphemes or phonemes of your dictionary.',
f'Please re-run and specify another symbol that is not used in your dictionary with the "--{variable}" flag.',
]
[docs]
class LMError(MFAError):
"""
Exception class for errors in language models
"""
pass
[docs]
class LanguageModelNotFoundError(LMError):
"""
Exception class for a language model not being found
Parameters
----------
path: :class:`~pathlib.Path`
Path to missing language model
"""
def __init__(self, path: Path):
super().__init__(f"Could not find a suitable language model: {path}")
class MultiprocessingError(MFAError):
"""
Exception class for exceptions in multiprocessing workers
Parameters
----------
job_name: int
Job identifier
error_text:str
Traceback for exception in worker
"""
def __init__(self, job_name: int, error_text: str):
super().__init__(f"Job {job_name} encountered an error:")
self.message_lines = [f"Job {job_name} encountered an error:"]
self.job_name = job_name
self.message_lines.extend([x for x in error_text.splitlines(keepends=False)])
[docs]
class KaldiProcessingError(MFAError):
"""
Exception class for when a Kaldi binary has an exception
Parameters
----------
error_logs: list[str]
List of Kaldi logs that had errors
log_file: str, optional
Overall log file to find more information
"""
def __init__(self, error_logs: List[typing.Union[Path, str]], log_file: Optional[Path] = None):
super().__init__(
f"There were {len(error_logs)} job(s) with errors when running Kaldi binaries."
)
self.job_name = None
self.error_logs = error_logs
self.log_file = log_file
self.refresh_message()
def refresh_message(self) -> None:
"""Regenerate the exceptions message"""
self.message_lines = [
f"There were {len(self.error_logs)} job(s) with errors when running Kaldi binaries.",
"See the log files below for more information.",
]
for error_log in self.error_logs:
self.message_lines.append(str(error_log))
if config.VERBOSE:
with open(error_log, "r", encoding="utf8") as f:
for line in f:
self.message_lines.append(line.strip())
if self.log_file:
self.message_lines.append(f" For more details, please check {self.log_file}")
def append_error_log(self, error_log: str) -> None:
"""
Add error log for the exception
Parameters
----------
error_log: str
Path to error log
"""
self.error_logs.append(error_log)
self.refresh_message()
def update_log_file(self) -> None:
"""
Update the log file output
"""
logger = logging.getLogger("mfa")
if logger.handlers:
for handler in logger.handlers:
if isinstance(handler, logging.FileHandler):
self.log_file = handler.baseFilename
break
self.refresh_message()
# Segmenter Errors
class SegmenterError(MFAError):
"""
Class for errors during alignment
"""
pass