Source code for montreal_forced_aligner.db

"""Database classes"""
from __future__ import annotations

import logging
import os
import re
import typing
from pathlib import Path

import librosa
import numpy as np
import pywrapfst
import sqlalchemy
import sqlalchemy.types as types
from kalpy.data import KaldiMapping, Segment
from kalpy.feat.data import FeatureArchive
from kalpy.fstext.lexicon import LexiconCompiler
from kalpy.utterance import Utterance as KalpyUtterance
from pgvector.sqlalchemy import Vector
from praatio import textgrid
from praatio.utilities.constants import Interval
from sqlalchemy import Boolean, Column, DateTime, Enum, Float, ForeignKey, Integer, String
from sqlalchemy.ext.hybrid import hybrid_property
from sqlalchemy.ext.orderinglist import ordering_list
from sqlalchemy.orm import Bundle, declarative_base, joinedload, relationship

from montreal_forced_aligner import config
from montreal_forced_aligner.data import (
    CtmInterval,
    PhoneSetType,
    PhoneType,
    TextFileType,
    WordType,
    WorkflowType,
)
from montreal_forced_aligner.helper import mfa_open

if typing.TYPE_CHECKING:
    from montreal_forced_aligner.corpus.classes import UtteranceData

logger = logging.getLogger("mfa")

__all__ = [
    "Corpus",
    "CorpusWorkflow",
    "DictBundle",
    "Dictionary",
    "Word",
    "Phone",
    "Pronunciation",
    "PhonologicalRule",
    "RuleApplication",
    "File",
    "TextFile",
    "SoundFile",
    "Speaker",
    "SpeakerOrdering",
    "Utterance",
    "PhoneInterval",
    "WordInterval",
    "M2MSymbol",
    "Job",
    "Word2Job",
    "M2M2Job",
    "Dictionary2Job",
    "Grapheme",
    "MfaSqlBase",
    "bulk_update",
    "get_next_primary_key",
    "full_load_utterance",
]

MfaSqlBase = declarative_base()


class PathType(types.TypeDecorator):
    impl = types.String

    cache_ok = True

    def process_bind_param(self, value, dialect):
        if value is None:
            return value
        return str(value)

    def process_result_value(self, value, dialect):
        if value is None:
            return value
        return Path(value)


def get_next_primary_key(session: sqlalchemy.orm.Session, database_table: MfaSqlBase):
    pk = session.query(sqlalchemy.func.max(database_table.id)).scalar()
    if not pk:
        pk = 0
    return pk + 1


def full_load_utterance(session: sqlalchemy.orm.Session, utterance_id: int):
    utterance = (
        session.query(Utterance)
        .filter(Utterance.id == utterance_id)
        .options(
            joinedload(Utterance.speaker, innerjoin=True),
            joinedload(Utterance.file, innerjoin=True).joinedload(File.sound_file, innerjoin=True),
        )
        .first()
    )
    return utterance


def bulk_update(
    session: sqlalchemy.orm.Session,
    table: MfaSqlBase,
    values: typing.List[typing.Dict[str, typing.Any]],
    id_field=None,
) -> None:
    """
    Perform a bulk update of a database.

    Parameters
    ----------
    session: :class:`sqlalchemy.orm.Session`
        SqlAlchemy session to use
    table: :class:`~montreal_forced_aligner.db.MfaSqlBase`
        Table to update
    values: list[dict[str, Any]]
        List of field-value dictionaries to insert
    id_field: str, optional
        Optional specifier of the primary key field
    """
    if len(values) == 0:
        return
    if id_field is None:
        id_field = "id"

    column_names = [x for x in values[0].keys()]
    columns = [getattr(table, x)._copy() for x in column_names if x != id_field]
    sql_column_names = [f'"{x}"' for x in column_names if x != id_field]
    if config.USE_POSTGRES:
        session.execute(sqlalchemy.text(f"ALTER TABLE {table.__tablename__} DISABLE TRIGGER all"))
        session.commit()
    with session.begin_nested():
        temp_table = sqlalchemy.Table(
            f"temp_{table.__tablename__}",
            MfaSqlBase.metadata,
            sqlalchemy.Column(id_field, sqlalchemy.Integer, primary_key=True),
            *columns,
            prefixes=["TEMPORARY"],
            extend_existing=True,
        )
        create_statement = str(
            sqlalchemy.schema.CreateTable(temp_table).compile(session.get_bind())
        )
        session.execute(sqlalchemy.text(create_statement))
        session.execute(temp_table.insert(), values)

        set_statements = []
        for c in sql_column_names:
            set_statements.append(f""" {c} = b.{c}""")
        set_statements = ",\n".join(set_statements)
        sql = f"""
        UPDATE {table.__tablename__}
        SET
            {set_statements}
        FROM temp_{table.__tablename__} AS b
        WHERE {table.__tablename__}.{id_field}=b.{id_field};
        """
        session.execute(sqlalchemy.text(sql))

        # drop temp table
        session.execute(sqlalchemy.text(f"DROP TABLE temp_{table.__tablename__}"))
    if config.USE_POSTGRES:
        session.execute(sqlalchemy.text(f"ALTER TABLE {table.__tablename__} ENABLE TRIGGER all"))
        session.commit()
        session.execute(sqlalchemy.text("DISCARD TEMP"))
    MfaSqlBase.metadata.remove(temp_table)


Dictionary2Job = sqlalchemy.Table(
    "dictionary_job",
    MfaSqlBase.metadata,
    Column("dictionary_id", ForeignKey("dictionary.id"), primary_key=True),
    Column("job_id", ForeignKey("job.id"), primary_key=True),
)

SpeakerOrdering = sqlalchemy.Table(
    "speaker_ordering",
    MfaSqlBase.metadata,
    Column("speaker_id", ForeignKey("speaker.id"), primary_key=True),
    Column("file_id", ForeignKey("file.id"), primary_key=True),
    Column("index", Integer, primary_key=True),
)


class DictBundle(Bundle):
    """
    SqlAlchemy custom Bundle class for loading variable column counts
    """

    def create_row_processor(self, query, procs, labels):
        """Override create_row_processor to return values as dictionaries"""

        def proc(row):
            return dict(zip(labels, (proc(row) for proc in procs)))

        return proc


class Corpus(MfaSqlBase):
    """
    Database class for storing information about a corpus

    Parameters
    ----------
    id: int
        Primary key
    name: str
        Corpus name
    imported: bool
        Flag for whether the corpus has been imported
    features_generated: bool
        Flag for whether features have been generated
    alignment_done: bool
        Flag for whether alignment has successfully completed
    transcription_done: bool
        Flag for whether transcription has successfully completed
    alignment_evaluation_done: bool
        Flag for whether alignment evaluation has successfully completed
    has_reference_alignments: bool
        Flag for whether reference alignments have been imported
    """

    __tablename__ = "corpus"

    id = Column(Integer, primary_key=True, autoincrement=True)
    name = Column(String(50), unique=True, nullable=False)
    path = Column(PathType, unique=True, nullable=False)
    imported = Column(Boolean, default=False)
    text_normalized = Column(Boolean, default=False)
    cutoffs_found = Column(Boolean, default=False)
    features_generated = Column(Boolean, default=False)
    vad_calculated = Column(Boolean, default=False)
    ivectors_calculated = Column(Boolean, default=False)
    plda_calculated = Column(Boolean, default=False)
    xvectors_loaded = Column(Boolean, default=False)
    alignment_done = Column(Boolean, default=False)
    transcription_done = Column(Boolean, default=False)
    alignment_evaluation_done = Column(Boolean, default=False)
    has_reference_alignments = Column(Boolean, default=False)
    has_sound_files = Column(Boolean, default=False)
    has_text_files = Column(Boolean, default=False)
    num_jobs = Column(Integer, default=0)

    current_subset = Column(Integer, default=0)
    data_directory = Column(PathType, nullable=False)

    jobs = relationship("Job", back_populates="corpus")

    @property
    def split_directory(self):
        return self.data_directory.joinpath(f"split{self.num_jobs}")

    @property
    def current_subset_directory(self):
        if not self.current_subset:
            return self.split_directory
        return self.data_directory.joinpath(f"subset_{self.current_subset}")

    @property
    def speaker_ivector_column(self):
        if self.xvectors_loaded:
            return Speaker.xvector
        return Speaker.ivector

    @property
    def utterance_ivector_column(self):
        if self.xvectors_loaded:
            return Utterance.xvector
        return Utterance.ivector



[docs]
class Dialect(MfaSqlBase):
    """
    Database class for storing information about a dialect

    Parameters
    ----------
    id: int
        Primary key
    name: str
        Dialect name
    """

    __tablename__ = "dialect"

    id = Column(Integer, primary_key=True, autoincrement=True)
    name = Column(String(50), nullable=False)

    dictionaries = relationship("Dictionary", back_populates="dialect")
    rules = relationship("PhonologicalRule", back_populates="dialect")




[docs]
class Dictionary(MfaSqlBase):
    """
    Database class for storing information about a pronunciation dictionary

    Parameters
    ----------
    id: int
        Primary key
    name: str
        Dictionary name
    dialect: str
        Dialect of dictionary if dictionary name is in MFA format
    path: :class:`~pathlib.Path`
        Path to the dictionary
    phone_set_type: :class:`~montreal_forced_aligner.data.PhoneSetType`
        Phone set
    bracket_regex: str
        Regular expression for detecting bracketed words
    laughter_regex: str
        Regular expression for detecting laughter words
    position_dependent_phones: bool
        Flag for whether phones have word-position flags
    default: bool
        Flag for whether this dictionary is the default one
    clitic_marker: str
        Character marking clitics
    silence_word: str
        Symbol for silence
    optional_silence_phone: str
        Symbol for silence phone
    oov_word: str
        Symbol for unknown words
    bracketed_word: str
        Symbol for bracketed words (cutoffs, hesitations, etc)
    laughter_word: str
        Symbol for laughter words
    max_disambiguation_symbol: int
        Highest disambiguation index required
    silence_probability: float
        Probability of inserting non-initial optional silence
    initial_silence_probability: float
        Probability of inserting initial silence
    final_silence_correction: float
        Correction factor on having final silence
    final_non_silence_correction: float
        Correction factor on having final non-silence
    """

    __tablename__ = "dictionary"

    id = Column(Integer, primary_key=True, autoincrement=True)
    name = Column(String(50), nullable=False)
    path = Column(PathType, unique=True)
    rules_applied = Column(Boolean, default=False)
    phone_set_type = Column(Enum(PhoneSetType), nullable=True)
    root_temp_directory = Column(PathType, nullable=True)
    clitic_cleanup_regex = Column(String, nullable=True)
    bracket_regex = Column(String, nullable=True)
    laughter_regex = Column(String, nullable=True)
    position_dependent_phones = Column(Boolean, nullable=True)
    default = Column(Boolean, default=False, nullable=False)
    clitic_marker = Column(String(1), nullable=True)
    silence_word = Column(String, nullable=True, default="<eps>")
    optional_silence_phone = Column(String, nullable=True, default="sil")
    oov_word = Column(String, nullable=True, default="<unk>")
    oov_phone = Column(String, nullable=True, default="spn")
    bracketed_word = Column(String, nullable=True)
    cutoff_word = Column(String, nullable=True)
    laughter_word = Column(String, nullable=True)

    use_g2p = Column(Boolean, nullable=False, default=False)
    max_disambiguation_symbol = Column(Integer, default=0, nullable=False)
    silence_probability = Column(Float, default=0.5, nullable=False)
    initial_silence_probability = Column(Float, default=0.5, nullable=False)
    final_silence_correction = Column(Float, nullable=True)
    final_non_silence_correction = Column(Float, nullable=True)

    dialect_id = Column(Integer, ForeignKey("dialect.id"), index=True, nullable=True)
    dialect = relationship("Dialect", back_populates="dictionaries")

    words = relationship(
        "Word",
        back_populates="dictionary",
        order_by="Word.mapping_id",
        collection_class=ordering_list("mapping_id"),
        cascade="all, delete",
    )
    speakers = relationship(
        "Speaker",
        back_populates="dictionary",
        cascade="all, delete-orphan",
    )

    jobs = relationship(
        "Job",
        secondary=Dictionary2Job,
        back_populates="dictionaries",
    )

    @property
    def word_mapping(self):
        if not hasattr(self, "_word_mapping"):
            session = sqlalchemy.orm.Session.object_session(self)
            query = (
                session.query(Word.word, Word.mapping_id)
                .filter(Word.dictionary_id == self.id)
                .filter(Word.included == True)  # noqa
                .order_by(Word.mapping_id)
            )
            self._word_mapping = {}
            for w, mapping_id in query:
                self._word_mapping[w] = mapping_id
        return self._word_mapping

    @property
    def word_table(self):
        if not hasattr(self, "_word_table"):
            if self.words_symbol_path.exists():
                self._word_table = pywrapfst.SymbolTable.read_text(self.words_symbol_path)
                return self._word_table
            self.temp_directory.mkdir(parents=True, exist_ok=True)
            session = sqlalchemy.orm.Session.object_session(self)
            query = (
                session.query(Word.word, Word.mapping_id)
                .filter(Word.dictionary_id == self.id)
                .filter(Word.included == True)  # noqa
                .order_by(Word.mapping_id)
            )
            self._word_table = pywrapfst.SymbolTable()
            for w, mapping_id in query:
                self._word_table.add_symbol(w, mapping_id)
            self._word_table.write_text(self.words_symbol_path)
        return self._word_table

    @property
    def phone_table(self):
        if not hasattr(self, "_phone_table"):
            if self.phone_symbol_table_path.exists():
                self._phone_table = pywrapfst.SymbolTable.read_text(self.phone_symbol_table_path)
                for k in ["#0", "#1", "#2"]:
                    if not self._phone_table.member(k):
                        self._phone_table.add_symbol(k)
            else:
                self.phones_directory.mkdir(parents=True, exist_ok=True)
                session = sqlalchemy.orm.Session.object_session(self)
                query = session.query(Phone.kaldi_label, Phone.mapping_id).order_by(
                    Phone.mapping_id
                )
                self._phone_table = pywrapfst.SymbolTable()
                for p, mapping_id in query:
                    self._phone_table.add_symbol(p, mapping_id)
                self._phone_table.write_text(str(self.phone_symbol_table_path))
        return self._phone_table

    @property
    def word_pronunciations(self):
        if not hasattr(self, "_word_pronunciations"):
            session = sqlalchemy.orm.Session.object_session(self)
            query = (
                session.query(Word.word, Pronunciation.pronunciation)
                .join(Pronunciation.word)
                .filter(Word.dictionary_id == self.id)
                .filter(Word.included == True)  # noqa
                .filter(Pronunciation.pronunciation != self.oov_phone)
                .order_by(Word.mapping_id)
            )
            self._word_pronunciations = {}
            for w, pronunciation in query:
                if w not in self._word_pronunciations:
                    self._word_pronunciations[w] = set()
                self._word_pronunciations[w].add(pronunciation)
        return self._word_pronunciations

    @property
    def lexicon_compiler(self):
        lexicon_compiler = LexiconCompiler(
            silence_probability=self.silence_probability,
            initial_silence_probability=self.initial_silence_probability,
            final_silence_correction=self.final_silence_correction,
            final_non_silence_correction=self.final_non_silence_correction,
            silence_word=self.silence_word,
            oov_word=self.oov_word,
            silence_phone=self.optional_silence_phone,
            oov_phone=self.oov_phone,
            position_dependent_phones=self.position_dependent_phones,
        )
        lexicon_compiler.load_l_from_file(self.lexicon_fst_path)
        lexicon_compiler.load_l_align_from_file(self.align_lexicon_path)
        lexicon_compiler.word_table = self.word_table
        lexicon_compiler.phone_table = self.phone_table
        return lexicon_compiler

    @property
    def special_set(self) -> typing.Set[str]:
        return {
            "<s>",
            "</s>",
            self.silence_word,
            self.oov_word,
            self.bracketed_word,
            self.laughter_word,
        }

    @property
    def clitic_set(self) -> typing.Set[str]:
        """Set of clitic words"""
        return {x.word for x in self.words if x.word_type is WordType.clitic}

    @property
    def word_boundary_int_path(self) -> Path:
        """Path to the word boundary integer IDs"""
        return self.phones_directory.joinpath("word_boundary.int")

    @property
    def disambiguation_symbols_int_path(self) -> Path:
        """Path to the word boundary integer IDs"""
        return self.phones_directory.joinpath("disambiguation_symbols.int")

    @property
    def phones_directory(self) -> Path:
        """
        Phones directory
        """
        return self.root_temp_directory.joinpath("phones")

    @property
    def phone_symbol_table_path(self) -> Path:
        """Path to file containing phone symbols and their integer IDs"""
        return self.phones_directory.joinpath("phones.txt")

    @property
    def grapheme_symbol_table_path(self) -> Path:
        """Path to file containing grapheme symbols and their integer IDs"""
        return self.phones_directory.joinpath("graphemes.txt")

    @property
    def phone_disambig_path(self) -> Path:
        """Path to file containing phone symbols and their integer IDs"""
        return self.phones_directory.joinpath("phone_disambig.txt")

    @property
    def temp_directory(self) -> Path:
        """
        Path of disambiguated lexicon fst (L.fst)
        """
        return self.root_temp_directory.joinpath(f"{self.id}_{self.name}")

    @property
    def lexicon_disambig_fst_path(self) -> Path:
        """
        Path of disambiguated lexicon fst (L.fst)
        """
        return self.temp_directory.joinpath("L.disambig_fst")

    @property
    def align_lexicon_path(self) -> Path:
        """
        Path of lexicon file to use for aligning lattices
        """
        return self.temp_directory.joinpath("align_lexicon.fst")

    @property
    def align_lexicon_disambig_path(self) -> Path:
        """
        Path of lexicon file to use for aligning lattices
        """
        return self.temp_directory.joinpath("align_lexicon.disambig_fst")

    @property
    def align_lexicon_int_path(self) -> Path:
        """
        Path of lexicon file to use for aligning lattices
        """
        return self.temp_directory.joinpath("align_lexicon.int")

    @property
    def lexicon_fst_path(self) -> Path:
        """
        Path of disambiguated lexicon fst (L.fst)
        """
        return self.temp_directory.joinpath("L.fst")

    @property
    def words_symbol_path(self) -> Path:
        """
        Path of word to int mapping file for the dictionary
        """
        return self.temp_directory.joinpath("words.txt")

    @property
    def data_source_identifier(self) -> str:
        """Dictionary name"""
        return f"{self.id}_{self.name}"

    @property
    def identifier(self) -> str:
        """Dictionary name"""
        return f"{self.data_source_identifier}"

    @property
    def silence_probability_info(self) -> typing.Dict[str, float]:
        """Dictionary of silence information"""
        return {
            "silence_probability": self.silence_probability,
            "initial_silence_probability": self.initial_silence_probability,
            "final_silence_correction": self.final_silence_correction,
            "final_non_silence_correction": self.final_non_silence_correction,
        }




[docs]
class Phone(MfaSqlBase):
    """
    Database class for storing phones and their integer IDs

    Parameters
    ----------
    id: int
        Primary key
    mapping_id: int
        Integer ID of the phone for Kaldi processing
    phone: str
        Phone label
    phone_type: :class:`~montreal_forced_aligner.data.PhoneType`
        Type of phone
    """

    __tablename__ = "phone"

    id = Column(Integer, primary_key=True, autoincrement=True)
    mapping_id = Column(Integer, nullable=False, unique=True)
    phone = Column(String(10), nullable=False)
    kaldi_label = Column(String(10), unique=True, nullable=False)
    position = Column(String(2), nullable=True)
    phone_type = Column(Enum(PhoneType), nullable=False, index=True)
    mean_duration = Column(Float, nullable=True)
    sd_duration = Column(Float, nullable=True)

    phone_intervals = relationship(
        "PhoneInterval",
        back_populates="phone",
        order_by="PhoneInterval.begin",
        collection_class=ordering_list("begin"),
        cascade="all, delete",
        passive_deletes=True,
    )




[docs]
class Grapheme(MfaSqlBase):
    """
    Database class for storing phones and their integer IDs

    Parameters
    ----------
    id: int
        Primary key
    mapping_id: int
        Integer ID of the phone for Kaldi processing
    grapheme: str
        Phone label
    """

    __tablename__ = "grapheme"

    id = Column(Integer, primary_key=True, autoincrement=True)
    mapping_id = Column(Integer, nullable=False, unique=True)
    grapheme = Column(String(25), unique=True, nullable=False)




[docs]
class Word(MfaSqlBase):
    """
    Database class for storing words, their integer IDs, and pronunciation information

    Parameters
    ----------
    id: int
        Primary key
    mapping_id: int
        Integer ID of the word for Kaldi processing
    word: str
        Word label
    count: int
        Count frequency of word in the corpus
    word_type: :class:`~montreal_forced_aligner.data.WordType`
        Type of word
    dictionary_id: int
        Foreign key to :class:`~montreal_forced_aligner.db.Dictionary`
    dictionary: :class:`~montreal_forced_aligner.db.Dictionary`
        Pronunciation dictionary that the word belongs to
    """

    __tablename__ = "word"

    id = Column(Integer, primary_key=True, autoincrement=True)
    mapping_id = Column(Integer, nullable=False, index=True)
    word = Column(String, nullable=False, index=True)
    count = Column(Integer, default=0, nullable=False, index=True)
    word_type = Column(Enum(WordType), nullable=False, index=True)
    included = Column(Boolean, nullable=False, default=True)
    initial_cost = Column(Float, nullable=True)
    final_cost = Column(Float, nullable=True)
    dictionary_id = Column(Integer, ForeignKey("dictionary.id"), nullable=False, index=True)
    dictionary = relationship("Dictionary", back_populates="words")
    pronunciations = relationship(
        "Pronunciation", back_populates="word", cascade="all, delete", passive_deletes=True
    )

    job = relationship(
        "Word2Job",
        back_populates="word",
        uselist=False,
        cascade="all, delete",
    )
    word_intervals = relationship(
        "WordInterval",
        back_populates="word",
        order_by="WordInterval.begin",
        collection_class=ordering_list("begin"),
        cascade="all, delete",
    )

    __table_args__ = (
        sqlalchemy.Index("dictionary_word_type_index", "dictionary_id", "word_type"),
        sqlalchemy.Index("word_dictionary_index", "word", "dictionary_id"),
    )




[docs]
class Pronunciation(MfaSqlBase):
    """
    Database class for storing information about a pronunciation

    Parameters
    ----------
    id: int
        Primary key
    pronunciation: str
        Space-delimited pronunciation
    probability: float
        Probability of the pronunciation
    silence_after_probability: float
        Probability of silence following the pronunciation
    silence_before_correction: float
        Correction factor for silence before the pronunciation
    non_silence_before_correction: float
        Correction factor for non-silence before the pronunciation
    word_id: int
        Foreign key to :class:`~montreal_forced_aligner.db.Word`
    word: :class:`~montreal_forced_aligner.db.Word`
        Word for the pronunciation
    """

    __tablename__ = "pronunciation"

    id = Column(Integer, primary_key=True, autoincrement=True)
    pronunciation = Column(String, nullable=False)
    probability = Column(Float, nullable=True)
    disambiguation = Column(Integer, nullable=True)
    silence_after_probability = Column(Float, nullable=True)
    silence_before_correction = Column(Float, nullable=True)
    non_silence_before_correction = Column(Float, nullable=True)
    generated_by_rule = Column(Boolean, default=False, nullable=False, index=True)

    count = Column(Integer, nullable=False, default=0)
    silence_following_count = Column(Integer, nullable=True)
    non_silence_following_count = Column(Integer, nullable=True)

    word_id = Column(
        Integer, ForeignKey("word.id", ondelete="CASCADE"), nullable=False, index=True
    )
    word = relationship("Word", back_populates="pronunciations")

    rules = relationship(
        "RuleApplication",
        back_populates="pronunciation",
        cascade="all, delete",
    )

    word_intervals = relationship(
        "WordInterval",
        back_populates="pronunciation",
        order_by="WordInterval.begin",
        collection_class=ordering_list("begin"),
        cascade="all, delete",
    )




[docs]
class PhonologicalRule(MfaSqlBase):
    """
    Database class for storing information about a phonological rule
    Parameters
    ----------
    id: int
        Primary key
    segment: str
        Segment to replace
    preceding_context: str
        Context before segment to match
    following_context: str
        Context after segment to match
    replacement: str
        Replacement of segment
    probability: float
        Probability of the rule application
    silence_after_probability: float
        Probability of silence following forms with rule application
    silence_before_correction: float
        Correction factor for silence before forms with rule application
    non_silence_before_correction: float
        Correction factor for non-silence before forms with rule application
    pronunciations: list[:class:`~montreal_forced_aligner.db.RuleApplication`]
        List of rule applications
    """

    __tablename__ = "phonological_rule"

    id = Column(Integer, primary_key=True, autoincrement=True)

    segment = Column(String, nullable=False, index=True)
    preceding_context = Column(String, nullable=False, index=True)
    following_context = Column(String, nullable=False, index=True)
    replacement = Column(String, nullable=False)

    probability = Column(Float, nullable=True)
    silence_after_probability = Column(Float, nullable=True)
    silence_before_correction = Column(Float, nullable=True)
    non_silence_before_correction = Column(Float, nullable=True)

    dialect_id = Column(Integer, ForeignKey("dialect.id"), index=True, nullable=False)
    dialect = relationship("Dialect", back_populates="rules")

    pronunciations = relationship(
        "RuleApplication",
        back_populates="rule",
        cascade="all, delete",
    )

    def __hash__(self):
        return hash(
            (self.segment, self.preceding_context, self.following_context, self.replacement)
        )


[docs]
    def to_json(self) -> typing.Dict[str, typing.Any]:
        """
        Serializes the rule for export
        Returns
        -------
        dict[str, Any]
            Serialized rule
        """
        return {
            "segment": self.segment,
            "dialect": self.dialect,
            "preceding_context": self.preceding_context,
            "following_context": self.following_context,
            "replacement": self.replacement,
            "probability": self.probability,
            "silence_after_probability": self.silence_after_probability,
            "silence_before_correction": self.silence_before_correction,
            "non_silence_before_correction": self.non_silence_before_correction,
        }


    @property
    def match_regex(self):
        """Regular expression of the rule"""
        components = []
        initial = False
        final = False
        preceding = self.preceding_context
        following = self.following_context
        if preceding.startswith("^"):
            initial = True
            preceding = preceding.replace("^", "").strip()
        if following.endswith("$"):
            final = True
            following = following.replace("$", "").strip()
        if preceding:
            components.append(rf"(?P<preceding>{preceding})")
        if self.segment:
            components.append(rf"(?P<segment>{self.segment})")
        if following:
            components.append(rf"(?P<following>{following})")
        pattern = " ".join(components)
        if initial:
            pattern = "^" + pattern
        else:
            pattern = r"(?:^|(?<=\s))" + pattern
        if final:
            pattern += "$"
        else:
            pattern += r"(?:$|(?=\s))"
        return re.compile(pattern, flags=re.UNICODE)

    def __str__(self):
        from_components = []
        to_components = []
        initial = False
        final = False
        preceding = self.preceding_context
        following = self.following_context
        if preceding.startswith("^"):
            initial = True
            preceding = preceding.replace("^", "").strip()
        if following.endswith("$"):
            final = True
            following = following.replace("$", "").strip()
        if preceding:
            from_components.append(preceding)
            to_components.append(preceding)
        if self.segment:
            from_components.append(self.segment)
        if self.replacement:
            to_components.append(self.replacement)
        if following:
            from_components.append(following)
            to_components.append(following)

        from_string = " ".join(from_components)
        to_string = " ".join(to_components)
        if initial:
            from_string = "^" + from_string
        if final:
            from_string += "$"
        return f"<PhonologicalRule {self.id} for Dialect {self.dialect_id}: {from_string} -> {to_string}>"


[docs]
    def apply_rule(self, pronunciation: str) -> str:
        """
        Apply the rule on a pronunciation by replacing any matching segments with the replacement
        Parameters
        ----------
        pronunciation: str
            Pronunciation to apply rule
        Returns
        -------
        str
            Pronunciation with rule applied
        """
        preceding = self.preceding_context
        following = self.following_context
        if preceding.startswith("^"):
            preceding = preceding.replace("^", "").strip()
        if following.startswith("$"):
            following = following.replace("$", "").strip()
        components = []
        if preceding:
            components.append(r"\g<preceding>")
        if self.replacement:
            components.append(self.replacement)
        if following:
            components.append(r"\g<following>")
        return self.match_regex.sub(" ".join(components), pronunciation).strip()





[docs]
class RuleApplication(MfaSqlBase):
    """
    Database class for mapping rules to generated pronunciations
    Parameters
    ----------
    pronunciation_id: int
        Foreign key to :class:`~montreal_forced_aligner.db.Pronunciation`
    rule_id: int
        Foreign key to :class:`~montreal_forced_aligner.db.PhonologicalRule`
    pronunciation: :class:`~montreal_forced_aligner.db.Pronunciation`
        Pronunciation
    rule: :class:`~montreal_forced_aligner.db.PhonologicalRule`
        Rule applied
    """

    __tablename__ = "rule_applications"
    pronunciation_id = Column(ForeignKey("pronunciation.id", ondelete="CASCADE"), primary_key=True)
    rule_id = Column(ForeignKey("phonological_rule.id", ondelete="CASCADE"), primary_key=True)

    pronunciation = relationship("Pronunciation", back_populates="rules")

    rule = relationship("PhonologicalRule", back_populates="pronunciations")




[docs]
class Speaker(MfaSqlBase):
    """
    Database class for storing information about speakers

    Parameters
    ----------
    id: int
        Primary key
    name: str
        Name of the speaker
    cmvn: str
        File index for the speaker's CMVN stats
    dictionary_id: int
        Foreign key to :class:`~montreal_forced_aligner.db.Dictionary`
    dictionary: :class:`~montreal_forced_aligner.db.Dictionary`
        Pronunciation dictionary that the speaker uses
    utterances: list[:class:`~montreal_forced_aligner.db.Utterance`]
        Utterances for the speaker
    files: list[:class:`~montreal_forced_aligner.db.File`]
        Files that the speaker spoke in
    """

    __tablename__ = "speaker"

    id = Column(Integer, primary_key=True, autoincrement=True)
    name = Column(String, unique=True, nullable=False)
    cmvn = Column(String)
    fmllr = Column(String)
    min_f0 = Column(Float, nullable=True)
    max_f0 = Column(Float, nullable=True)
    ivector = Column(Vector(config.IVECTOR_DIMENSION), nullable=True)
    plda_vector = Column(Vector(config.PLDA_DIMENSION), nullable=True)
    xvector = Column(Vector(config.XVECTOR_DIMENSION), nullable=True)
    num_utterances = Column(Integer, nullable=True, index=True)
    modified = Column(Boolean, nullable=False, default=False, index=True)
    dictionary_id = Column(Integer, ForeignKey("dictionary.id"), nullable=True, index=True)
    dictionary = relationship("Dictionary", back_populates="speakers")
    utterances = relationship("Utterance", back_populates="speaker")
    files = relationship("File", secondary=SpeakerOrdering, back_populates="speakers")




[docs]
class File(MfaSqlBase):
    """
    Database class for storing information about files in the corpus

    Parameters
    ----------
    id: int
        Primary key
    name: str
        Base name of the file
    relative_path: :class:`~pathlib.Path`
        Path of the file relative to the root corpus directory
    modified: bool
        Flag for whether the file has been changed in the database for exporting
    text_file: :class:`~montreal_forced_aligner.db.TextFile`
        TextFile object with information about the transcript of a file
    sound_file: :class:`~montreal_forced_aligner.db.SoundFile`
        SoundFile object with information about the audio of a file
    speakers: list[:class:`~montreal_forced_aligner.db.Speaker`]
        Speakers in the file ordered by their index
    utterances: list[:class:`~montreal_forced_aligner.db.Utterance`]
        Utterances in the file
    """

    __tablename__ = "file"

    id = Column(Integer, primary_key=True, autoincrement=True)
    name = Column(String, nullable=False, index=True)
    relative_path = Column(PathType, nullable=False)
    modified = Column(Boolean, nullable=False, default=False, index=True)
    speakers = relationship(
        "Speaker",
        secondary=SpeakerOrdering,
        back_populates="files",
        order_by=SpeakerOrdering.c.index,
    )
    text_file = relationship(
        "TextFile", back_populates="file", uselist=False, cascade="all, delete"
    )
    sound_file = relationship(
        "SoundFile", back_populates="file", uselist=False, cascade="all, delete"
    )
    utterances = relationship(
        "Utterance",
        back_populates="file",
        order_by="Utterance.begin",
        collection_class=ordering_list("begin"),
        cascade="all, delete",
        cascade_backrefs=False,
    )

    @property
    def num_speakers(self) -> int:
        """Number of speakers in the file"""
        return len(self.speakers)

    @property
    def num_utterances(self) -> int:
        """Number of utterances in the file"""
        return len(self.utterances)

    @property
    def duration(self) -> float:
        """Duration of the associated sound file"""
        return self.sound_file.duration

    @property
    def num_channels(self) -> int:
        """Number of channels of the associated sound file"""
        return self.sound_file.num_channels

    @property
    def sample_rate(self) -> int:
        """Sample rate of the associated sound file"""
        return self.sound_file.sample_rate


[docs]
    def save(
        self,
        output_directory,
        output_format: typing.Optional[str] = None,
        save_transcription: bool = False,
        overwrite: bool = False,
    ) -> None:
        """
        Output File to TextGrid or lab.  If ``text_type`` is not specified, the original file type will be used,
        but if there was no text file for the file, it will guess lab format if there is only one utterance, otherwise
        it will output a TextGrid file.

        Parameters
        ----------
        output_directory: str
            Directory to output file, if None, then it will overwrite the original file
        output_format: str, optional
            Text type to save as, if not provided, it will use either the original file type or guess the file type
        save_transcription: bool
            Flag for whether the hypothesized transcription text should be saved instead of the default text
        """
        from montreal_forced_aligner.alignment.multiprocessing import construct_output_path

        utterance_count = len(self.utterances)
        if output_format is None:  # Saving directly
            if (
                utterance_count == 1
                and self.utterances[0].begin == 0
                and self.utterances[0].end == self.duration
            ):
                output_format = TextFileType.LAB.value
            else:
                output_format = TextFileType.TEXTGRID.value
        output_path = construct_output_path(
            self.name, self.relative_path, output_directory, output_format=output_format
        )
        if overwrite:
            if self.text_file is None:
                self.text_file = TextFile(
                    file_id=self.id, text_file_path=output_path, file_type=output_format
                )
            if output_path != self.text_file.text_file_path and os.path.exists(
                self.text_file.text_file_path
            ):
                os.remove(self.text_file.text_file_path)
            self.text_file.file_type = output_format
            self.text_file.text_file_path = output_path
        if output_format == TextFileType.LAB.value:
            if (
                utterance_count == 0
                and os.path.exists(self.text_file.text_file_path)
                and not save_transcription
            ):
                os.remove(self.text_file.text_file_path)
                return
            elif utterance_count == 0:
                return
            for u in self.utterances:
                if save_transcription:
                    with mfa_open(output_path, "w") as f:
                        f.write(u.transcription_text if u.transcription_text else "")
                elif u.text:
                    with mfa_open(output_path, "w") as f:
                        f.write(u.text)
            return
        elif output_format == TextFileType.TEXTGRID.value:
            max_time = self.sound_file.duration
            tiers = {}
            for speaker in self.speakers:
                tiers[speaker.name] = textgrid.IntervalTier(
                    speaker.name, [], minT=0, maxT=max_time
                )

            tg = textgrid.Textgrid()
            tg.maxTimestamp = max_time
            for utterance in self.utterances:
                if utterance.speaker.name not in tiers:
                    tiers[utterance.speaker.name] = textgrid.IntervalTier(
                        utterance.speaker.name, [], minT=0, maxT=max_time
                    )
                if save_transcription:
                    tiers[utterance.speaker.name].insertEntry(
                        Interval(
                            start=utterance.begin,
                            end=utterance.end,
                            label=utterance.transcription_text
                            if utterance.transcription_text
                            else "",
                        )
                    )
                else:
                    if tiers[utterance.speaker.name].entries:
                        if tiers[utterance.speaker.name].entries[-1].end > utterance.begin:
                            utterance.begin = tiers[utterance.speaker.name].entries[-1].end
                    if utterance.end > self.duration:
                        utterance.end = self.duration
                    tiers[utterance.speaker.name].insertEntry(
                        Interval(
                            start=utterance.begin, end=utterance.end, label=utterance.text.strip()
                        )
                    )
            for t in tiers.values():
                tg.addTier(t)
            tg.save(output_path, includeBlankSpaces=True, format=output_format)



[docs]
    def construct_transcription_tiers(
        self, original_text=False
    ) -> typing.Dict[str, typing.Dict[str, typing.List[CtmInterval]]]:
        """
        Construct output transcription tiers for the file

        Returns
        -------
        dict[str, dict[str, list[:class:`~montreal_forced_aligner.data.CtmInterval`]]]
            Tier dictionary of utterance transcriptions
        """
        data = {}
        for u in self.utterances:
            speaker_name = u.speaker_name
            if speaker_name not in data:
                data[speaker_name] = {}
            if original_text:
                label = u.text
                key = "text"
            else:
                label = u.transcription_text
                key = "transcription"
            if not label:
                label = ""
            if key not in data[speaker_name]:
                data[speaker_name][key] = []
            data[speaker_name][key].append(CtmInterval(u.begin, u.end, label))
        return data





[docs]
class SoundFile(MfaSqlBase):
    """

    Database class for storing information about sound files

    Parameters
    ----------
    file_id: int
        Foreign key to :class:`~montreal_forced_aligner.db.File`
    file: :class:`~montreal_forced_aligner.db.File`
        Root file
    sound_file_path: :class:`~pathlib.Path`
        Path to the audio file
    format: str
        Format of the audio file (flac, wav, mp3, etc)
    sample_rate: int
        Sample rate of the audio file
    duration: float
        Duration of audio file
    num_channels: int
        Number of channels in the audio file
    sox_string: str
        String that Kaldi will use to process the sound file
    """

    __tablename__ = "sound_file"

    file_id = Column(ForeignKey("file.id"), primary_key=True)
    file = relationship("File", back_populates="sound_file")
    sound_file_path = Column(PathType, nullable=False)
    format = Column(String, nullable=False)
    sample_rate = Column(Integer, nullable=False)
    duration = Column(Float, nullable=False)
    num_channels = Column(Integer, nullable=False)
    sox_string = Column(String)


[docs]
    def normalized_waveform(
        self, begin: float = 0, end: typing.Optional[float] = None
    ) -> typing.Tuple[np.array, np.array]:
        """
        Load a normalized waveform for acoustic processing/visualization

        Parameters
        ----------
        begin: float, optional
            Starting time point to return, defaults to 0
        end: float, optional
            Ending time point to return, defaults to the end of the file

        Returns
        -------
        numpy.array
            Time points
        numpy.array
            Sample values
        """
        if end is None or end > self.duration:
            end = self.duration

        y, _ = librosa.load(
            self.sound_file_path, sr=None, mono=False, offset=begin, duration=end - begin
        )
        if len(y.shape) > 1 and y.shape[0] == 2:
            y /= np.max(np.abs(y))
            num_steps = y.shape[1]
        else:
            y /= np.max(np.abs(y), axis=0)
            num_steps = y.shape[0]
        y[np.isnan(y)] = 0
        x = np.linspace(start=begin, stop=end, num=num_steps)
        return x, y





[docs]
class TextFile(MfaSqlBase):
    """
    Database class for storing information about transcription files

    Parameters
    ----------
    file_id: int
        Foreign key to :class:`~montreal_forced_aligner.db.File`
    file: :class:`~montreal_forced_aligner.db.File`
        Root file
    text_file_path: :class:`~pathlib.Path`
        Path to the transcription file
    file_type: str
        Type of the transcription file (lab, TextGrid, etc)
    """

    __tablename__ = "text_file"

    file_id = Column(ForeignKey("file.id"), primary_key=True)
    file = relationship("File", back_populates="text_file")
    text_file_path = Column(PathType, nullable=False)
    file_type = Column(String, nullable=False)




[docs]
class Utterance(MfaSqlBase):
    """

    Database class for storing information about utterances

    Parameters
    ----------
    id: int
        Primary key
    begin: float
        Beginning timestamp of the utterance
    end: float
        Ending timestamp of the utterance, -1 if there is no audio file
    duration: float
        Duration of the utterance
    channel: int
        Channel of the utterance in the audio file
    num_frames: int
        Number of feature frames extracted
    text: str
        Input text for the utterance
    oovs: str
        Space-delimited list of items that were not found in the speaker's pronunciation dictionary
    normalized_text: str
        Normalized text for the utterance, after removing case and punctuation, and splitting up compounds and clitics if the whole word is not
        found in the speaker's pronunciation dictionary
    features:str
        File index for generated features
    in_subset: bool
        Flag for whether to use this utterance in the current training subset
    ignored: bool
        Flag for if the utterance is ignored due to lacking features
    alignment_log_likelihood: float
        Log likelihood for the alignment of the utterance, taking both speech and silence phones into consideration
    speech_log_likelihood: float
        Log likelihood for the alignment of the utterance, taking only the speech phones into consideration
    duration_deviation: float
        Average of absolute z-score of speech phone duration
    phone_error_rate: float
        Phone error rate for alignment evaluation
    alignment_score: float
        Alignment score from alignment evaluation
    word_error_rate: float
        Word error rate for transcription evaluation
    character_error_rate: float
        Character error rate for transcription evaluation
    file_id: int
        Foreign key to :class:`~montreal_forced_aligner.db.File`
    speaker_id: int
        Foreign key to :class:`~montreal_forced_aligner.db.Speaker`
    file: :class:`~montreal_forced_aligner.db.File`
        File object that the utterance is from
    speaker: :class:`~montreal_forced_aligner.db.Speaker`
        Speaker object of the utterance
    phone_intervals: list[:class:`~montreal_forced_aligner.db.PhoneInterval`]
        Reference phone intervals
    word_intervals: list[:class:`~montreal_forced_aligner.db.WordInterval`]
        Aligned word intervals
    job_id: int
        Foreign key to :class:`~montreal_forced_aligner.db.Job`
    job: :class:`~montreal_forced_aligner.db.Job`
        Job that processes the utterance
    """

    __tablename__ = "utterance"

    id = Column(Integer, primary_key=True, autoincrement=True)
    begin = Column(Float, nullable=False, index=True)
    end = Column(Float, nullable=False)
    _duration = sqlalchemy.orm.deferred(
        Column("duration", Float, sqlalchemy.Computed('"end" - "begin"'), index=True)
    )
    channel = Column(Integer, nullable=False)
    num_frames = Column(Integer)
    text = Column(String)
    oovs = Column(String)
    normalized_text = Column(String)
    normalized_character_text = Column(String)
    transcription_text = Column(String)
    features = Column(String)
    ivector_ark = Column(String)
    vad_ark = Column(String)
    in_subset = Column(Boolean, nullable=False, default=False, index=True)
    ignored = Column(Boolean, nullable=False, default=False, index=True)
    alignment_log_likelihood = Column(Float)
    speech_log_likelihood = Column(Float)
    duration_deviation = Column(Float)
    phone_error_rate = Column(Float)
    alignment_score = Column(Float)
    word_error_rate = Column(Float)
    character_error_rate = Column(Float)
    ivector = Column(Vector(config.IVECTOR_DIMENSION), nullable=True)
    plda_vector = Column(Vector(config.PLDA_DIMENSION), nullable=True)
    xvector = Column(Vector(config.XVECTOR_DIMENSION), nullable=True)
    file_id = Column(Integer, ForeignKey("file.id"), index=True, nullable=False)
    speaker_id = Column(Integer, ForeignKey("speaker.id"), index=True, nullable=False)
    _kaldi_id = sqlalchemy.orm.deferred(
        Column(
            "kaldi_id",
            String,
            sqlalchemy.Computed("CAST(speaker_id AS text)|| '-' ||CAST(id AS text)"),
            unique=True,
        )
    )
    job_id = Column(Integer, ForeignKey("job.id"), index=True, nullable=True)
    file = relationship("File", back_populates="utterances", cascade_backrefs=False)
    speaker = relationship("Speaker", back_populates="utterances", cascade_backrefs=False)
    job = relationship("Job", back_populates="utterances")
    phone_intervals = relationship(
        "PhoneInterval",
        back_populates="utterance",
        order_by="PhoneInterval.begin",
        collection_class=ordering_list("begin"),
        cascade="all, delete",
    )
    word_intervals = relationship(
        "WordInterval",
        back_populates="utterance",
        order_by="WordInterval.begin",
        collection_class=ordering_list("begin"),
        cascade="all, delete",
    )

    __table_args__ = (
        sqlalchemy.Index(
            "utterance_position_index", "file_id", "speaker_id", "begin", "end", "channel"
        ),
    )

    @hybrid_property
    def duration(self) -> float:
        return self.end - self.begin

    @duration.expression
    def duration(cls):
        return cls._duration

    @hybrid_property
    def kaldi_id(self) -> str:
        return f"{self.speaker_id}-{self.id}"

    @kaldi_id.expression
    def kaldi_id(cls):
        return cls._kaldi_id

    def __repr__(self) -> str:
        """String representation of the utterance object"""
        return f"<Utterance in {self.file_name} by {self.speaker_name} from {self.begin} to {self.end}>"


[docs]
    def phone_intervals_for_workflow(self, workflow_id: int) -> typing.List[CtmInterval]:
        """
        Extract phone intervals for a given :class:`~montreal_forced_aligner.db.CorpusWorkflow`

        Parameters
        ----------
        workflow_id: int
            Integer ID for :class:`~montreal_forced_aligner.db.CorpusWorkflow`

        Returns
        -------
        list[:class:`~montreal_forced_aligner.data.CtmInterval`]
            List of phone intervals
        """
        return [x.as_ctm() for x in self.phone_intervals if x.workflow_id == workflow_id]



[docs]
    def word_intervals_for_workflow(self, workflow_id: int) -> typing.List[CtmInterval]:
        """
        Extract word intervals for a given :class:`~montreal_forced_aligner.db.CorpusWorkflow`

        Parameters
        ----------
        workflow_id: int
            Integer ID for :class:`~montreal_forced_aligner.db.CorpusWorkflow`

        Returns
        -------
        list[:class:`~montreal_forced_aligner.data.CtmInterval`]
            List of word intervals
        """
        return [x.as_ctm() for x in self.word_intervals if x.workflow_id == workflow_id]


    @property
    def reference_phone_intervals(self) -> typing.List[CtmInterval]:
        """
        Phone intervals from :attr:`montreal_forced_aligner.data.WorkflowType.reference`
        """
        return [
            x.as_ctm()
            for x in self.phone_intervals
            if x.workflow.workflow_type is WorkflowType.reference
        ]

    @property
    def aligned_phone_intervals(self) -> typing.List[CtmInterval]:
        """
        Phone intervals from :attr:`montreal_forced_aligner.data.WorkflowType.alignment`
        """
        return [
            x.as_ctm()
            for x in self.phone_intervals
            if x.workflow.workflow_type in [WorkflowType.alignment, WorkflowType.online_alignment]
        ]

    @property
    def aligned_word_intervals(self) -> typing.List[CtmInterval]:
        """
        Word intervals from :attr:`montreal_forced_aligner.data.WorkflowType.alignment`
        """
        return [x.as_ctm() for x in self.word_intervals]

    @property
    def transcribed_phone_intervals(self) -> typing.List[CtmInterval]:
        """
        Phone intervals from :attr:`montreal_forced_aligner.data.WorkflowType.transcription`
        """
        return [
            x.as_ctm()
            for x in self.phone_intervals
            if x.workflow.workflow_type is WorkflowType.transcription
        ]

    @property
    def transcribed_word_intervals(self) -> typing.List[CtmInterval]:
        """
        Word intervals from :attr:`montreal_forced_aligner.data.WorkflowType.transcription`
        """
        return [
            x.as_ctm()
            for x in self.word_intervals
            if x.workflow.workflow_type is WorkflowType.transcription
        ]

    @property
    def per_speaker_transcribed_phone_intervals(self) -> typing.List[CtmInterval]:
        """
        Phone intervals from :attr:`montreal_forced_aligner.data.WorkflowType.per_speaker_transcription`
        """
        return [
            x.as_ctm()
            for x in self.phone_intervals
            if x.workflow.workflow_type is WorkflowType.per_speaker_transcription
        ]

    @property
    def per_speaker_transcribed_word_intervals(self) -> typing.List[CtmInterval]:
        """
        Word intervals from :attr:`montreal_forced_aligner.data.WorkflowType.per_speaker_transcription`
        """
        return [
            x.as_ctm()
            for x in self.word_intervals
            if x.workflow.workflow_type is WorkflowType.per_speaker_transcription
        ]

    @property
    def phone_transcribed_phone_intervals(self) -> typing.List[CtmInterval]:
        """
        Phone intervals from :attr:`montreal_forced_aligner.data.WorkflowType.phone_transcription`
        """
        return [
            x.as_ctm()
            for x in self.phone_intervals
            if x.workflow.workflow_type is WorkflowType.phone_transcription
        ]

    @property
    def file_name(self) -> str:
        """Name of the utterance's file"""
        return self.file.name

    @property
    def speaker_name(self) -> str:
        """Name of the utterance's speaker"""
        return self.speaker.name


[docs]
    def to_data(self) -> UtteranceData:
        """
        Construct an UtteranceData object that can be used in multiprocessing

        Returns
        -------
        :class:`~montreal_forced_aligner.corpus.classes.UtteranceData`
            Data for the utterance
        """
        from montreal_forced_aligner.corpus.classes import UtteranceData

        if self.normalized_text is None:
            self.normalized_text = ""
        return UtteranceData(
            self.speaker_name,
            self.file_name,
            self.begin,
            self.end,
            self.channel,
            self.text,
            self.normalized_text.split(),
            set(self.oovs.split()),
        )



[docs]
    def to_kalpy(self) -> KalpyUtterance:
        """
        Construct an UtteranceData object that can be used in multiprocessing

        Returns
        -------
        :class:`~montreal_forced_aligner.corpus.classes.UtteranceData`
            Data for the utterance
        """
        seg = Segment(self.file.sound_file.sound_file_path, self.begin, self.end, self.channel)
        return KalpyUtterance(seg, self.normalized_text, self.speaker.cmvn, self.speaker.fmllr)



[docs]
    @classmethod
    def from_data(cls, data: UtteranceData, file: File, speaker: int, frame_shift: int = None):
        """
        Generate an utterance object from :class:`~montreal_forced_aligner.corpus.classes.UtteranceData`

        Parameters
        ----------
        data: :class:`~montreal_forced_aligner.corpus.classes.UtteranceData`
            Data for the utterance
        file: :class:`~montreal_forced_aligner.db.File`
            File database object for the utterance
        speaker: :class:`~montreal_forced_aligner.db.Speaker`
            Speaker database object for the utterance
        frame_shift: int, optional
            Frame shift in ms to use for calculating the number of frames in the utterance

        Returns
        -------
        :class:`~montreal_forced_aligner.db.Utterance`
            Utterance object
        """
        if not isinstance(speaker, int):
            speaker = speaker.id
        num_frames = None
        if frame_shift is not None:
            num_frames = int((data.end - data.begin) / round(frame_shift / 1000, 4))

        return Utterance(
            begin=data.begin,
            end=data.end,
            channel=data.channel,
            oovs=" ".join(sorted(data.oovs)),
            normalized_text=" ".join(data.normalized_text),
            text=data.text,
            num_frames=num_frames,
            file_id=file.id,
            speaker_id=speaker,
        )





[docs]
class CorpusWorkflow(MfaSqlBase):
    """

    Database class for storing information about a particular workflow (alignment, transcription, etc)

    Parameters
    ----------
    id: int
        Primary key
    workflow_type: :class:`~montreal_forced_aligner.data.WorkflowType`
        Workflow type
    time_stamp: :class:`datetime.datetime`
        Time stamp for the workflow run
    score: float
        Log likelihood or other score for the workflow run
    phone_intervals: list[:class:`~montreal_forced_aligner.db.PhoneInterval`]
        Phone intervals linked to the workflow
    word_intervals: list[:class:`~montreal_forced_aligner.db.WordInterval`]
        Word intervals linked to the workflow
    """

    __tablename__ = "corpus_workflow"

    id = Column(Integer, primary_key=True, autoincrement=True)
    name = Column(String, unique=True, index=True)
    workflow_type = Column(Enum(WorkflowType), nullable=False, index=True)
    working_directory = Column(PathType, nullable=False)
    time_stamp = Column(DateTime, nullable=False, server_default=sqlalchemy.func.now(), index=True)
    current = Column(Boolean, nullable=False, default=False, index=True)
    done = Column(Boolean, nullable=False, default=False, index=True)
    dirty = Column(Boolean, nullable=False, default=False, index=True)
    alignments_collected = Column(Boolean, nullable=False, default=False, index=True)
    score = Column(Float, nullable=True)

    phone_intervals = relationship(
        "PhoneInterval",
        back_populates="workflow",
        order_by="PhoneInterval.begin",
        collection_class=ordering_list("begin"),
        cascade="all, delete",
    )

    word_intervals = relationship(
        "WordInterval",
        back_populates="workflow",
        order_by="WordInterval.begin",
        collection_class=ordering_list("begin"),
        cascade="all, delete",
    )

    @property
    def lda_mat_path(self) -> Path:
        return self.working_directory.joinpath("lda.mat")




[docs]
class PhoneInterval(MfaSqlBase):
    """

    Database class for storing information about aligned phone intervals

    Parameters
    ----------
    id: int
        Primary key
    begin: float
        Beginning timestamp of the interval
    end: float
        Ending timestamp of the interval
    duration: float
        Calculated duration of the interval
    phone_goodness: float
        Confidence score, log-likelihood, etc for the phone interval
    phone_id: int
        Foreign key to :class:`~montreal_forced_aligner.db.Phone`
    phone: :class:`~montreal_forced_aligner.db.Phone`
        Phone of the interval
    utterance_id: int
        Foreign key to :class:`~montreal_forced_aligner.db.Utterance`
    utterance: :class:`~montreal_forced_aligner.db.Utterance`
        Utterance of the interval
    word_interval_id: int
        Foreign key to :class:`~montreal_forced_aligner.db.WordInterval`
    word_interval: :class:`~montreal_forced_aligner.db.WordInterval`
        Word interval that is associated with the phone interval
    workflow_id: int
        Foreign key to :class:`~montreal_forced_aligner.db.CorpusWorkflow`
    workflow: :class:`~montreal_forced_aligner.db.CorpusWorkflow`
        Workflow that generated the phone interval
    """

    __tablename__ = "phone_interval"

    id = Column(Integer, primary_key=True, autoincrement=True)
    begin = Column(Float, nullable=False, index=True)
    end = Column(Float, nullable=False)
    phone_goodness = Column(Float, nullable=True)
    _duration = sqlalchemy.orm.deferred(
        Column("duration", Float, sqlalchemy.Computed('"end" - "begin"'))
    )

    phone_id = Column(
        Integer, ForeignKey("phone.id", ondelete="CASCADE"), index=True, nullable=False
    )
    phone = relationship("Phone", back_populates="phone_intervals")

    word_interval_id = Column(
        Integer, ForeignKey("word_interval.id", ondelete="CASCADE"), index=True, nullable=True
    )
    word_interval = relationship("WordInterval", back_populates="phone_intervals")

    utterance_id = Column(
        Integer, ForeignKey("utterance.id", ondelete="CASCADE"), index=True, nullable=False
    )
    utterance = relationship("Utterance", back_populates="phone_intervals")

    workflow_id = Column(
        Integer, ForeignKey("corpus_workflow.id", ondelete="CASCADE"), index=True, nullable=False
    )
    workflow = relationship("CorpusWorkflow", back_populates="phone_intervals")

    __table_args__ = (
        sqlalchemy.Index("phone_utterance_workflow_index", "utterance_id", "workflow_id"),
    )

    @hybrid_property
    def duration(self) -> float:
        return self.end - self.begin

    @duration.expression
    def duration(cls):
        return cls._duration

    def __repr__(self):
        return f"<PhoneInterval {self.phone.kaldi_label} ({self.workflow.workflow_type}) from {self.begin}-{self.end} for utterance {self.utterance_id}>"


[docs]
    @classmethod
    def from_ctm(
        self, interval: CtmInterval, utterance: Utterance, workflow_id: int
    ) -> PhoneInterval:
        """
        Construct a PhoneInterval from a CtmInterval object

        Parameters
        ----------
        interval: :class:`~montreal_forced_aligner.data.CtmInterval`
            CtmInterval containing data for the phone interval
        utterance: :class:`~montreal_forced_aligner.db.Utterance`
            Utterance object that the phone interval belongs to
        workflow_id: int
            Integer id for the workflow that generated the phone interval

        Returns
        -------
        :class:`~montreal_forced_aligner.db.PhoneInterval`
            Phone interval object
        """
        return PhoneInterval(
            begin=interval.begin,
            end=interval.end,
            label=interval.label,
            utterance=utterance,
            workflow_id=workflow_id,
        )



[docs]
    def as_ctm(self) -> CtmInterval:
        """
        Generate a CtmInterval from the database object

        Returns
        -------
        :class:`~montreal_forced_aligner.data.CtmInterval`
            CTM interval object
        """
        return CtmInterval(self.begin, self.end, self.phone.phone, confidence=self.phone_goodness)





[docs]
class WordInterval(MfaSqlBase):
    """

    Database class for storing information about aligned word intervals

    Parameters
    ----------
    id: int
        Primary key
    begin: float
        Beginning timestamp of the interval
    end: float
        Ending timestamp of the interval
    word_id: int
        Foreign key to :class:`~montreal_forced_aligner.db.Word`
    word: :class:`~montreal_forced_aligner.db.Word`
        Word of the interval
    pronunciation_id: int
        Foreign key to :class:`~montreal_forced_aligner.db.Pronunciation`
    pronunciation: :class:`~montreal_forced_aligner.db.Pronunciation`
        Pronunciation of the word
    utterance_id: int
        Foreign key to :class:`~montreal_forced_aligner.db.Utterance`
    utterance: :class:`~montreal_forced_aligner.db.Utterance`
        Utterance of the interval
    workflow_id: int
        Foreign key to :class:`~montreal_forced_aligner.db.CorpusWorkflow`
    workflow: :class:`~montreal_forced_aligner.db.CorpusWorkflow`
        Workflow that generated the interval
    phone_intervals: list[:class:`~montreal_forced_aligner.db.PhoneInterval`]
        Phone intervals for the word interval
    """

    __tablename__ = "word_interval"

    id = Column(Integer, primary_key=True, autoincrement=True)
    begin = Column(Float, nullable=False, index=True)
    end = Column(Float, nullable=False)
    _duration = sqlalchemy.orm.deferred(
        Column("duration", Float, sqlalchemy.Computed('"end" - "begin"'))
    )

    utterance_id = Column(
        Integer, ForeignKey("utterance.id", ondelete="CASCADE"), index=True, nullable=False
    )
    utterance = relationship("Utterance", back_populates="word_intervals")

    word_id = Column(
        Integer, ForeignKey("word.id", ondelete="CASCADE"), index=True, nullable=False
    )
    word = relationship("Word", back_populates="word_intervals")

    pronunciation_id = Column(Integer, ForeignKey("pronunciation.id"), index=True, nullable=True)
    pronunciation = relationship("Pronunciation", back_populates="word_intervals")

    workflow_id = Column(
        Integer, ForeignKey("corpus_workflow.id", ondelete="CASCADE"), index=True, nullable=False
    )
    workflow = relationship("CorpusWorkflow", back_populates="word_intervals")

    phone_intervals = relationship(
        "PhoneInterval",
        back_populates="word_interval",
        order_by="PhoneInterval.begin",
        collection_class=ordering_list("begin"),
        cascade="all, delete",
    )

    __table_args__ = (
        sqlalchemy.Index("word_utterance_workflow_index", "utterance_id", "workflow_id"),
    )

    @hybrid_property
    def duration(self) -> float:
        return self.end - self.begin

    @duration.expression
    def duration(cls):
        return cls._duration


[docs]
    @classmethod
    def from_ctm(
        self, interval: CtmInterval, utterance: Utterance, workflow_id: int
    ) -> WordInterval:
        """
        Construct a WordInterval from a CtmInterval object

        Parameters
        ----------
        interval: :class:`~montreal_forced_aligner.data.CtmInterval`
            CtmInterval containing data for the word interval
        utterance: :class:`~montreal_forced_aligner.db.Utterance`
            Utterance object that the word interval belongs to
        workflow_id: int
            Integer id for the workflow that generated the phone interval

        Returns
        -------
        :class:`~montreal_forced_aligner.db.WordInterval`
            Word interval object
        """
        return WordInterval(
            begin=interval.begin,
            end=interval.end,
            label=interval.label,
            utterance=utterance,
            workflow_id=workflow_id,
        )



[docs]
    def as_ctm(self) -> CtmInterval:
        """
        Generate a CtmInterval from the database object

        Returns
        -------
        :class:`~montreal_forced_aligner.data.CtmInterval`
            CTM interval object
        """
        return CtmInterval(self.begin, self.end, self.word.word)





[docs]
class Job(MfaSqlBase):
    """
    Database class for storing information about multiprocessing jobs

    Parameters
    ----------
    id: int
        Primary key
    corpus_id: int
        Foreign key to :class:`~montreal_forced_aligner.db.Corpus`
    corpus: :class:`~montreal_forced_aligner.db.Corpus`
        Corpus
    utterances: list[:class:`~montreal_forced_aligner.db.Utterance`]
        Utterances associated with the job
    symbols: list[:class:`~montreal_forced_aligner.db.M2M2Job`]
        Symbols associated with the job in training phonetisaurus models
    words: list[:class:`~montreal_forced_aligner.db.Word2Job`]
        Words associated with the job in training phonetisaurus models
    """

    __tablename__ = "job"

    id = Column(Integer, primary_key=True, autoincrement=True)

    corpus_id = Column(Integer, ForeignKey("corpus.id"), index=True, nullable=True)
    corpus = relationship("Corpus", back_populates="jobs")
    utterances = relationship("Utterance", back_populates="job")

    symbols = relationship(
        "M2M2Job",
        back_populates="job",
    )

    words = relationship(
        "Word2Job",
        back_populates="job",
    )

    dictionaries = relationship(
        "Dictionary",
        secondary=Dictionary2Job,
        back_populates="jobs",
    )

    def __str__(self):
        return f"<Job {self.id}>"

    @property
    def has_dictionaries(self) -> bool:
        return len(self.dictionaries) > 0

    @property
    def dictionary_ids(self) -> typing.List[int]:
        return [x.id for x in self.dictionaries]

    def construct_feature_archive(
        self, working_directory: Path, dictionary_id: typing.Optional[int] = None, **kwargs
    ):
        fmllr_path = self.construct_path(
            self.corpus.current_subset_directory, "trans", "scp", dictionary_id
        )
        if not fmllr_path.exists():
            fmllr_path = None
            utt2spk = None
        else:
            utt2spk_path = self.construct_path(
                self.corpus.current_subset_directory, "utt2spk", "scp", dictionary_id
            )
            utt2spk = KaldiMapping()
            utt2spk.load(utt2spk_path)
        lda_mat_path = working_directory.joinpath("lda.mat")
        if not lda_mat_path.exists():
            lda_mat_path = None
        feat_path = self.construct_path(
            self.corpus.current_subset_directory, "feats", "scp", dictionary_id=dictionary_id
        )
        vad_path = self.construct_path(
            self.corpus.current_subset_directory, "vad", "scp", dictionary_id=dictionary_id
        )
        if not vad_path.exists():
            vad_path = None
        feature_archive = FeatureArchive(
            feat_path,
            utt2spk=utt2spk,
            lda_mat_file_name=lda_mat_path,
            transform_file_name=fmllr_path,
            vad_file_name=vad_path,
            deltas=True,
            **kwargs,
        )
        return feature_archive

    @property
    def wav_scp_path(self) -> Path:
        return self.construct_path(self.corpus.split_directory, "wav", "scp")

    @property
    def segments_scp_path(self) -> Path:
        return self.construct_path(self.corpus.split_directory, "segments", "scp")

    @property
    def utt2spk_scp_path(self) -> Path:
        return self.construct_path(self.corpus.split_directory, "utt2spk", "scp")

    @property
    def feats_scp_path(self) -> Path:
        return self.construct_path(self.corpus.split_directory, "feats", "scp")

    @property
    def feats_ark_path(self) -> Path:
        return self.construct_path(self.corpus.split_directory, "feats", "ark")

    @property
    def per_dictionary_feats_scp_paths(self) -> typing.Dict[int, Path]:
        paths = {}
        for d in self.dictionaries:
            paths[d.id] = self.construct_path(
                self.corpus.current_subset_directory, "feats", "scp", d.id
            )
        return paths

    @property
    def per_dictionary_utt2spk_scp_paths(self) -> typing.Dict[int, Path]:
        paths = {}
        for d in self.dictionaries:
            paths[d.id] = self.construct_path(
                self.corpus.current_subset_directory, "utt2spk", "scp", d.id
            )
        return paths

    @property
    def per_dictionary_spk2utt_scp_paths(self) -> typing.Dict[int, Path]:
        paths = {}
        for d in self.dictionaries:
            paths[d.id] = self.construct_path(
                self.corpus.current_subset_directory, "spk2utt", "scp", d.id
            )
        return paths

    @property
    def per_dictionary_cmvn_scp_paths(self) -> typing.Dict[int, Path]:
        paths = {}
        for d in self.dictionaries:
            paths[d.id] = self.construct_path(
                self.corpus.current_subset_directory, "cmvn", "scp", d.id
            )
        return paths

    @property
    def per_dictionary_trans_scp_paths(self) -> typing.Dict[int, Path]:
        paths = {}
        for d in self.dictionaries:
            paths[d.id] = self.construct_path(
                self.corpus.current_subset_directory, "trans", "scp", d.id
            )
        return paths

    @property
    def per_dictionary_text_int_scp_paths(self) -> typing.Dict[int, Path]:
        paths = {}
        for d in self.dictionaries:
            paths[d.id] = self.construct_path(
                self.corpus.current_subset_directory, "text", "int.scp", d.id
            )
        return paths


[docs]
    def construct_path(
        self, directory: Path, identifier: str, extension: str, dictionary_id: int = None
    ) -> Path:
        """
        Helper function for constructing dictionary-dependent paths for the Job

        Parameters
        ----------
        directory: str
            Directory to use as the root
        identifier: str
            Identifier for the path name, like ali or acc
        extension: str
            Extension of the path, like scp or ark
        dictionary_id: int, optional
            Dictionary ID to construct path for

        Returns
        -------
        Path
            Path
        """
        if dictionary_id is None:
            return directory.joinpath(f"{identifier}.{self.id}.{extension}")
        return directory.joinpath(f"{identifier}.{dictionary_id}.{self.id}.{extension}")


    def construct_path_dictionary(self, directory: Path, identifier: str, extension: str):
        paths = {}
        for d_id in self.dictionary_ids:
            paths[d_id] = self.construct_path(directory, identifier, extension, d_id)
        return paths


[docs]
    def construct_dictionary_dependent_paths(
        self, directory: Path, identifier: str, extension: str
    ) -> typing.Dict[int, Path]:
        """
        Helper function for constructing paths that depend only on the dictionaries of the job, and not the job name itself.
        These paths should be merged with all other jobs to get a full set of dictionary paths.
        Parameters
        ----------
        directory: :class:`~pathlib.Path`
            Directory to use as the root
        identifier: str
            Identifier for the path name, like ali or acc
        extension: str
            Extension of the path, like .scp or .ark
        Returns
        -------
        dict[int, Path]
            Path for each dictionary
        """
        output = {}
        for dict_id in self.dictionary_ids:
            output[dict_id] = directory.joinpath(f"{identifier}.{dict_id}.{extension}")
        return output





[docs]
class M2MSymbol(MfaSqlBase):
    """

    Database class for storing information many to many G2P training information

    Parameters
    ----------
    id: int
        Primary key
    symbol: str
        Symbol
    total_order: int
        Summed order of graphemes and phones
    max_order: int
        Maximum order between graphemes and phones
    grapheme_order: int
        Grapheme order
    phone_order: int
        Phone order
    weight: float
        Weight of arcs
    jobs: list[:class:`~montreal_forced_aligner.db.M2M2Job`]
        Jobs that use this symbol
    """

    __tablename__ = "m2m_symbol"

    id = Column(Integer, primary_key=True, autoincrement=True)
    symbol = Column(String, nullable=False)
    total_order = Column(Integer, nullable=False)
    max_order = Column(Integer, nullable=False)
    grapheme_order = Column(Integer, nullable=False)
    phone_order = Column(Integer, nullable=False)
    weight = Column(Float, nullable=False)

    jobs = relationship(
        "M2M2Job",
        back_populates="m2m_symbol",
    )




[docs]
class M2M2Job(MfaSqlBase):
    """
    Mapping class between :class:`~montreal_forced_aligner.db.M2MSymbol`
    and :class:`~montreal_forced_aligner.db.Job`

    Parameters
    ----------
    m2m_id: int
        Foreign key to :class:`~montreal_forced_aligner.db.M2MSymbol`
    job_id: int
        Foreign key to :class:`~montreal_forced_aligner.db.Job`
    m2m_symbol: :class:`~montreal_forced_aligner.db.M2MSymbol`
        M2MSymbol object
    job: :class:`~montreal_forced_aligner.db.Job`
        Job object
    """

    __tablename__ = "m2m_job"
    m2m_id = Column(ForeignKey("m2m_symbol.id"), primary_key=True)
    job_id = Column(ForeignKey("job.id"), primary_key=True)
    m2m_symbol = relationship("M2MSymbol", back_populates="jobs")
    job = relationship("Job", back_populates="symbols")




[docs]
class Word2Job(MfaSqlBase):
    """
    Mapping class between :class:`~montreal_forced_aligner.db.Word`
    and :class:`~montreal_forced_aligner.db.Job`

    Parameters
    ----------
    word_id: int
        Foreign key to :class:`~montreal_forced_aligner.db.M2MSymbol`
    job_id: int
        Foreign key to :class:`~montreal_forced_aligner.db.Job`
    word: :class:`~montreal_forced_aligner.db.Word`
        Word object
    job: :class:`~montreal_forced_aligner.db.Job`
        Job object
    """

    __tablename__ = "word_job"

    word_id = Column(ForeignKey("word.id"), primary_key=True)
    job_id = Column(ForeignKey("job.id"), primary_key=True)
    training = Column(Boolean, index=True)
    word = relationship("Word", back_populates="job")
    job = relationship("Job", back_populates="words")