From bbb7e353842f4d1a40f1044638d0f5575bd6334a Mon Sep 17 00:00:00 2001 From: LiamSchilling Date: Tue, 24 Feb 2026 00:53:05 -0500 Subject: [PATCH 01/18] feat(FST)!: implement FST trim, inversion, expanding input strings, prefix closure, union --- utility/fst_object.py | 296 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 294 insertions(+), 2 deletions(-) diff --git a/utility/fst_object.py b/utility/fst_object.py index 5d60123..e1dc3d2 100644 --- a/utility/fst_object.py +++ b/utility/fst_object.py @@ -1,13 +1,23 @@ -"""A class defining the Finite State Transducer. Copyright (C) 2019 Alena -Aksenova. +"""A class defining the Finite State Transducer. +Copyright (C) 2019 Alena Aksenova. +Copyright (C) 2026 William (Liam) Schilling. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. + +Modified by William (Liam) Schilling (Feb. 2026): + +Implemented fundemental finite-state operations, +taking `FST` to be a general (potentially nondeterministic) finite-state transducer +that accepts a relation over strings. +Specifically, added `fresh_state`, `encode_state`, `trim_inaccessible`, `trim_useless`, `trim`, +`invert`, `expand`, `prefix_closure`, and `union`. """ from copy import deepcopy +from queue import Queue class FST: """A class representing finite state transducers. @@ -81,3 +91,285 @@ def copy_fst(self): T.stout = deepcopy(self.stout) return T + + def fresh_state(self, name_prefix): + """Finds a name that is not the name of a state already in the FST. + Specifically, returns the first available name of the form `f"{name_prefix}.{i}"` + where `i` is an integer. + + Args: + name_prefix (str): guaranteed to be a prefix of the return value. + + Returns: + str: a name that is guaranteed not to be the name of a state in the FST. + """ + i = 0 + while True: + name = f"{name_prefix}.{i}" + if name not in self.Q: + return name + i += 1 + + @staticmethod + def encode_state(*args): + """Returns a name that encodes the values of all the passed arguments. + The encoding is guaranteed to be one-to-one as long as + the string representations of the arguments do not contain `;`, + except for from previous invokations of this function. + + Args: + *args: information to be encoded as a string name. + + Return: + str: a name that encodes the values of the arguments. + """ + return f"<{args.map(str).join(";")}>" + +def trim_inaccessible(F): + """Removes states and transitions from the FST + that are not accessible from the initial state. + + Args: + F (FST): the original FST. + + Returns: + FST: the trimmed FST. + """ + # initialize the new FST + G = FST(deepcopy(F.Sigma), deepcopy(F.Gamma)) + G.Q, G.E, G.qe, G.stout = [], [], F.qe, {} + + # perform a breadth-first traversal of the original FST from the initial state + worklist = Queue() + worklist.put(F.qe) + while not worklist.empty(): + curr_q = worklist.pop() + if curr_q not in G.Q: + G.Q.append(curr_q) + for [q, u, v, q_] in F.E: + if curr_q == q: + G.E.insert([q, u, v, q_]) + worklist.put(q_) + + # copy over the final outputs of the states that remain + for q, w in F.stout: + if q in G.Q: + G.stoud[q] = w + + return G + +def trim_useless(F): + """Removes states and transitions from the FST + from which no accepting state is accessible, + except for the initial state, which is not allowed to be removed. + + Args: + F (FST): the original FST. + + Returns: + FST: the trimmed FST. + """ + # initialize the new FST + G = FST(deepcopy(F.Sigma), deepcopy(F.Gamma)) + G.Q, G.E, G.qe, G.stout = [], [], F.qe, {} + + # perform a breadth-first traversal of the original FST from the accepting states + worklist = Queue() + for q in F.stout.keys(): + worklist.put(q) + while not worklist.empty(): + curr_q_ = worklist.pop() + if curr_q_ not in G.Q: + G.Q.append(curr_q_) + for [q, u, v, q_] in F.E: + if curr_q_ == q_: + G.E.insert([q, u, v, q_]) + worklist.put(q) + + # add back the initial state if it was not traversed already + if G.qe not in G.Q: + G.Q.append(G.qe) + + # copy over the final outputs of the states that remain + for q, w in F.stout: + if q in G.Q: + G.stoud[q] = w + + return G + +def trim(F): + """Removes states and transitions from the FST + that are never traversed by an accepting run. + + Args: + F (FST): the original FST. + + Returns: + FST: the trimmed FST. + """ + return trim_useless(trim_inaccessible(F)) + +def invert(F): + """Given an FST that accepts the relation `R`, + returns an FST that accepts the relation `{ (u, v) | (v, u) ∈ R }`. + + Args: + F (FST): the original FST. + + Returns: + FST: the inverted FST. + """ + # initialize the new FST + G = FST(deepcopy(F.Sigma), deepcopy(F.Gamma)) + G.Q, G.E, G.qe, G.stout = deepcopy(F.Q), [], F.qe, {} + + # copy over the transitions with swapped input an output strings + for [q, u, v, q_] in G.E: + G.E.append([q, v, u, q_]) + + # account for final outputs by turning them into transitions to new accepting states + for q, w in F.stout: + G.Q.append(q_ := G.fresh_state_name(q)) + G.E.append([q, w, "", q_]) + G.stout[q_] = "" + + return G + +def expand(F): + """Expands transitions with multi-character input strings + into non-accepting chains of transitions with single-character input strings. + + Args: + F (FST): the original FST. + + Returns: + FST: an FST that accepts the same relation but has no multi-character input strings. + """ + # initialize the new FST + G = FST(deepcopy(F.Sigma), deepcopy(F.Gamma)) + Q_set, E_set, G.qe, G.stout = set(), set(), FST.encode_state(F.qe, ""), {} + + # Construct the new set of states and transitions. + # Every state `q` is mapped to many new states ``, + # where `u` is every prefix of the outgoing input strings from `q`. + # Every transition `[q, u, v, q']` is mapped to a series of new transitions + # of the form `[, c, "", ]`, where `wc` is a prefix of `u`, + # along with the outputting transition `[, "", v, ]`. + for q in F.Q: + Q_set.insert(FST.encode_state(q, "")) + for [q, u, v, q_] in F.E: + for i in range(len(u)): + Q_set.insert(FST.encode_state(q, u[:i+1])) + E_set.insert([FST.encode_state(q, u[:i]), u[i], "", FST.encode_state(q, u[:i+1])]) + E_set.insert([FST.encode_state(q, u), "", v, FST.encode_state(q_, "")]) + + # copy over the final outputs + for q, w in F.stout: + G.stout[FST.encode_state(q, "")] = w + + G.Q, G.E = list(Q_set), list(E_set) + return G + +def prefix_closure(F): + """Given an FST whose domain is the language `L`, + returns an FST whose domain is the language `prefixes(L)`. + How the function treats FST outputs is currently underspecified, though we guarantee that + the relation accepted by the original FST is a subset of the relation accepted by the new FST. + + Args: + F (FST): the original FST. + + Returns: + FST: the new FST. + """ + # construct an FST where exactly the prefix closure of the original domain has valid runs + F = expand(trim(F)) + + # mark every state as accepting + for q in F.Q: + if q not in F.stout: + F.stout[q] = "" + + return F + +def union(F, G): + """Given FSTs that accept the relations `RF` and `RG`, respectively, + returns an FST that accepts the relation `RF ∪ RG`. + + Args: + F (FST): the left-hand original FST. + G (FST): the right-hand original FST. + + Returns: + FST: the union FST. + """ + # initialize the new FST + H = FST(list(set(F.Sigma).union(G.Sigma)), list(set(F.Gamma).union(G.Gamma))) + H.Q, H.E, H.qe, H.stout = [], [], "q0", {} + + # create epsilon transitions to nondeterministically choose between running `F` and `G` + H.E.append(["q0", "", "", FST.encode_state("LEFT", F.qe)]) + H.E.append(["q0", "", "", FST.encode_state("RIGHT", G.qe)]) + + # copy over the states from both `F` and `G` + for qf in F.Q: + H.Q.append(FST.encode_state("LEFT", qf)) + for qg in G.Q: + H.Q.append(FST.encode_state("RIGHT", qg)) + + # copy over the transitions from both `F` and `G` + for [qf, u, v, qf_] in F.E: + H.E.append([FST.encode_state("LEFT", qf), u, v, FST.encode_state("LEFT", qf_)]) + for [qg, u, v, qg_] in G.E: + H.E.append([FST.encode_state("RIGHT", qg), u, v, FST.encode_state("RIGHT", qg_)]) + + # copy over the final outputs from both `F` and `G` + for qf, w in F.stout: + H.stout[FST.encode_state("LEFT", qf)] = w + for qg, w in G.stout: + H.stout[FST.encode_state("RIGHT", qg)] = w + + return H + +def intersect(F, G): + """Given FSTs that accept the relations `RF` and `RG`, respectively, + returns an FST that accepts the relation `RF ∩ RG`. + + Args: + F (FST): the left-hand original FST. + G (FST): the right-hand original FST. + + Returns: + FST: the intersection FST. + """ + raise NotImplementedError + +def compose(F, G): + """Given FSTs that accept the relations `RF` and `RG`, respectively, + returns an FST that accepts the relation `{ (u, v) | ∃ w, (u, w) ∈ RG ∧ (w, v) ∈ RF }`. + In the special case that `RF` and `RG` are both subsequential functions + (which is guaranteed if both original FSTs are deterministic), + this has the effect of typical function composition. + + Args: + F (FST): the left-hand (second applied) original FST. + G (FST): the right-hand (first applied) original FST. + + Returns: + FST: the composition FST. + """ + raise NotImplementedError + +def determinize(F): + """Turns a nondeterministic FST that recognizes a subsequential function + into a deterministic FST that recognizes the same function. + A deterministic FST has only single-character input strings, + and at most one outgoing transition with each input string from each state. + + Args: + F (FST): the original functional FST. + + Returns: + FST: an FST that accepts the same function but is deterministic. + """ + raise NotImplementedError From d6be6732c35c1f4cc945acd983ef674273f8df9c Mon Sep 17 00:00:00 2001 From: LiamSchilling Date: Tue, 24 Feb 2026 01:38:42 -0500 Subject: [PATCH 02/18] docs --- utility/fst_object.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utility/fst_object.py b/utility/fst_object.py index e1dc3d2..39e698f 100644 --- a/utility/fst_object.py +++ b/utility/fst_object.py @@ -9,7 +9,7 @@ Modified by William (Liam) Schilling (Feb. 2026): -Implemented fundemental finite-state operations, +Implemented core finite-state operations, taking `FST` to be a general (potentially nondeterministic) finite-state transducer that accepts a relation over strings. Specifically, added `fresh_state`, `encode_state`, `trim_inaccessible`, `trim_useless`, `trim`, From cb47b6151102f820a87d60e021ebe7562a373c86 Mon Sep 17 00:00:00 2001 From: LiamSchilling Date: Tue, 24 Feb 2026 01:40:54 -0500 Subject: [PATCH 03/18] docs --- utility/fst_object.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utility/fst_object.py b/utility/fst_object.py index 39e698f..2506571 100644 --- a/utility/fst_object.py +++ b/utility/fst_object.py @@ -118,7 +118,7 @@ def encode_state(*args): except for from previous invokations of this function. Args: - *args: information to be encoded as a string name. + *args: information to be encoded as a state name. Return: str: a name that encodes the values of the arguments. From a28ee969393323512bc0f019908efe4717aec27e Mon Sep 17 00:00:00 2001 From: LiamSchilling Date: Tue, 24 Feb 2026 22:56:23 -0500 Subject: [PATCH 04/18] feat: make invariants explicit and improve implementations to adhere to better invariants --- utility/fst_object.py | 149 +++++++++++++++++++++++++++++++++++------- 1 file changed, 125 insertions(+), 24 deletions(-) diff --git a/utility/fst_object.py b/utility/fst_object.py index 2506571..9be322a 100644 --- a/utility/fst_object.py +++ b/utility/fst_object.py @@ -13,7 +13,34 @@ taking `FST` to be a general (potentially nondeterministic) finite-state transducer that accepts a relation over strings. Specifically, added `fresh_state`, `encode_state`, `trim_inaccessible`, `trim_useless`, `trim`, -`invert`, `expand`, `prefix_closure`, and `union`. +`invert`, `expand`, `prefix_closure`, `union`, `intersect`, `compose`, and `determinize`. + +These functions assume the following `FST` representation invariants, stated as type annotations: + Q (Annotated[list[str], "no duplicates"]) + Sigma (Annotated[list[Annotated[str, "length==1"]], "no duplicates"]) + Gamma (Annotated[list[Annotated[str, "length==1"]], "no duplicates"]) + qe (Annotated[str, "in Q"]) + E (Annotated[list[ + Annotated[str, "in Q"], + Annotated[str, "alphabet is Sigma"], + Annotated[str, "alphabet is Gamma"], + Annotated[str, "in Q"] + ], "no duplicates"]) + stout (dict[ + Annotated[str, "in Q"], + Annotated[str, "alphabet is Gamma"] + ]) +As an additional note, we take states that are not keys of `stout` to be the rejecting states. +This is different from the `DFA` class in `dfa_object.py`, which marks accepting states separately. + +The functions explicitly track the following useful FST properties. +The docstrings declare which are ensured by and which are invariants of the implementations. +- Trimmedness: Every state and transition is traversed by some accepting run + (except in the edge case of the empty FST, which may only have one state and no transitions). +- Final-output emptiness: Every final output (values of `stout`) is the empty string. +- Input-string expansion: The input string of every transition is either a character or empty. +- Determinism: The input string of every transition is a character, + and for each state, every character is the input string of no more than one outgoing transition. """ from copy import deepcopy @@ -95,7 +122,7 @@ def copy_fst(self): def fresh_state(self, name_prefix): """Finds a name that is not the name of a state already in the FST. Specifically, returns the first available name of the form `f"{name_prefix}.{i}"` - where `i` is an integer. + where `i` is a nonnegative integer. Args: name_prefix (str): guaranteed to be a prefix of the return value. @@ -114,9 +141,13 @@ def fresh_state(self, name_prefix): def encode_state(*args): """Returns a name that encodes the values of all the passed arguments. The encoding is guaranteed to be one-to-one as long as - the string representations of the arguments do not contain `;`, + the string representations of the arguments do not contain `;`, `<`, or `>`, except for from previous invokations of this function. + It is important that the client adhere to this condition in their FSTs, + or else the state encodings produced by functions in our library + could violate the "no duplicates" invariant of the state set. + Args: *args: information to be encoded as a state name. @@ -129,6 +160,12 @@ def trim_inaccessible(F): """Removes states and transitions from the FST that are not accessible from the initial state. + Invariants: + trimmedness + final-output emptiness + input-string expansion + determinism + Args: F (FST): the original FST. @@ -163,6 +200,12 @@ def trim_useless(F): from which no accepting state is accessible, except for the initial state, which is not allowed to be removed. + Invariants: + trimmedness + final-output emptiness + input-string expansion + determinism + Args: F (FST): the original FST. @@ -193,7 +236,7 @@ def trim_useless(F): # copy over the final outputs of the states that remain for q, w in F.stout: if q in G.Q: - G.stoud[q] = w + G.stout[q] = w return G @@ -201,6 +244,14 @@ def trim(F): """Removes states and transitions from the FST that are never traversed by an accepting run. + Ensures: + trimmedness + + Invariants: + final-output emptiness + input-string expansion + determinism + Args: F (FST): the original FST. @@ -213,6 +264,12 @@ def invert(F): """Given an FST that accepts the relation `R`, returns an FST that accepts the relation `{ (u, v) | (v, u) ∈ R }`. + Ensures: + final-output emptiness + + Invariants: + trimmedness + Args: F (FST): the original FST. @@ -227,11 +284,15 @@ def invert(F): for [q, u, v, q_] in G.E: G.E.append([q, v, u, q_]) - # account for final outputs by turning them into transitions to new accepting states + # account for nonempty final outputs by turning them into transitions to new accepting states, + # if empty then just copy the final output verbatim for q, w in F.stout: - G.Q.append(q_ := G.fresh_state_name(q)) - G.E.append([q, w, "", q_]) - G.stout[q_] = "" + if w == "": + G.stout[q] = "" + else: + G.Q.append(q_ := G.fresh_state_name(q)) + G.E.append([q, w, "", q_]) + G.stout[q_] = "" return G @@ -239,6 +300,14 @@ def expand(F): """Expands transitions with multi-character input strings into non-accepting chains of transitions with single-character input strings. + Ensures: + input-string expansion + + Invariants: + trimmedness + final-output emptiness + determinism + Args: F (FST): the original FST. @@ -253,15 +322,15 @@ def expand(F): # Every state `q` is mapped to many new states ``, # where `u` is every prefix of the outgoing input strings from `q`. # Every transition `[q, u, v, q']` is mapped to a series of new transitions - # of the form `[, c, "", ]`, where `wc` is a prefix of `u`, - # along with the outputting transition `[, "", v, ]`. + # of the form `[, c, "", ]`, where `wc` is an incomplete prefix of `u`, + # along with the completing transition `[, c, v, ]`, where `wc` equals `u`. for q in F.Q: Q_set.insert(FST.encode_state(q, "")) for [q, u, v, q_] in F.E: - for i in range(len(u)): - Q_set.insert(FST.encode_state(q, u[:i+1])) - E_set.insert([FST.encode_state(q, u[:i]), u[i], "", FST.encode_state(q, u[:i+1])]) - E_set.insert([FST.encode_state(q, u), "", v, FST.encode_state(q_, "")]) + for i in range(1, len(u)): + Q_set.insert(FST.encode_state(q, u[:i])) + E_set.insert([FST.encode_state(q, u[:i-1]), u[i-1], "", FST.encode_state(q, u[:i])]) + E_set.insert([FST.encode_state(q, u[:-1]), u[-1:], v, FST.encode_state(q_, "")]) # copy over the final outputs for q, w in F.stout: @@ -276,19 +345,29 @@ def prefix_closure(F): How the function treats FST outputs is currently underspecified, though we guarantee that the relation accepted by the original FST is a subset of the relation accepted by the new FST. + Ensures: + trimmedness + input-string expansion + + Invariants: + final-output emptiness + determinism + Args: F (FST): the original FST. Returns: - FST: the new FST. + FST: the prefix-closure FST. """ # construct an FST where exactly the prefix closure of the original domain has valid runs F = expand(trim(F)) - # mark every state as accepting - for q in F.Q: - if q not in F.stout: - F.stout[q] = "" + # mark every state as accepting, + # unless the FST is just one rejecting state, which is the empty FST edge case + if not (len(F.Q) == 1 and len(F.stout) == 0): + for q in F.Q: + if q not in F.stout: + F.stout[q] = "" return F @@ -296,6 +375,11 @@ def union(F, G): """Given FSTs that accept the relations `RF` and `RG`, respectively, returns an FST that accepts the relation `RF ∪ RG`. + Invariants: + trimmedness + final-output emptiness + input-string expansion + Args: F (FST): the left-hand original FST. G (FST): the right-hand original FST. @@ -305,11 +389,10 @@ def union(F, G): """ # initialize the new FST H = FST(list(set(F.Sigma).union(G.Sigma)), list(set(F.Gamma).union(G.Gamma))) - H.Q, H.E, H.qe, H.stout = [], [], "q0", {} + H.Q, H.E, H.qe, H.stout = [], [], FST.encode_state("LEFT", F.qe), {} - # create epsilon transitions to nondeterministically choose between running `F` and `G` - H.E.append(["q0", "", "", FST.encode_state("LEFT", F.qe)]) - H.E.append(["q0", "", "", FST.encode_state("RIGHT", G.qe)]) + # create an epsilon transition to nondeterministically choose between running `F` and `G` + H.E.append([FST.encode_state("LEFT", F.qe), "", "", FST.encode_state("RIGHT", G.qe)]) # copy over the states from both `F` and `G` for qf in F.Q: @@ -335,6 +418,12 @@ def intersect(F, G): """Given FSTs that accept the relations `RF` and `RG`, respectively, returns an FST that accepts the relation `RF ∩ RG`. + Ensures: + ? + + Invariants: + ? + Args: F (FST): the left-hand original FST. G (FST): the right-hand original FST. @@ -351,6 +440,12 @@ def compose(F, G): (which is guaranteed if both original FSTs are deterministic), this has the effect of typical function composition. + Ensures: + ? + + Invariants: + ? + Args: F (FST): the left-hand (second applied) original FST. G (FST): the right-hand (first applied) original FST. @@ -364,7 +459,13 @@ def determinize(F): """Turns a nondeterministic FST that recognizes a subsequential function into a deterministic FST that recognizes the same function. A deterministic FST has only single-character input strings, - and at most one outgoing transition with each input string from each state. + and for each state, at most one outgoing transition with every input string. + + Ensures: + ? + + Invariants: + ? Args: F (FST): the original functional FST. From 552414b7b5646ac132e1c52304ac21d31ba05dfd Mon Sep 17 00:00:00 2001 From: LiamSchilling Date: Mon, 2 Mar 2026 01:41:09 -0500 Subject: [PATCH 05/18] feat: separate `expand_final` implementation in `invert` into its own function --- utility/fst_object.py | 133 +++++++++++++++++++++++++----------------- 1 file changed, 81 insertions(+), 52 deletions(-) diff --git a/utility/fst_object.py b/utility/fst_object.py index 9be322a..844f7c3 100644 --- a/utility/fst_object.py +++ b/utility/fst_object.py @@ -13,7 +13,8 @@ taking `FST` to be a general (potentially nondeterministic) finite-state transducer that accepts a relation over strings. Specifically, added `fresh_state`, `encode_state`, `trim_inaccessible`, `trim_useless`, `trim`, -`invert`, `expand`, `prefix_closure`, `union`, `intersect`, `compose`, and `determinize`. +`expand_inputs`, `expand_final`, `invert`, `prefix_closure`, `union`, `intersect`, `compose`, +and `determinize`. These functions assume the following `FST` representation invariants, stated as type annotations: Q (Annotated[list[str], "no duplicates"]) @@ -174,25 +175,26 @@ def trim_inaccessible(F): """ # initialize the new FST G = FST(deepcopy(F.Sigma), deepcopy(F.Gamma)) - G.Q, G.E, G.qe, G.stout = [], [], F.qe, {} + Q_set, E_set, G.qe, G.stout = set(), set(), F.qe, {} # perform a breadth-first traversal of the original FST from the initial state worklist = Queue() worklist.put(F.qe) while not worklist.empty(): curr_q = worklist.pop() - if curr_q not in G.Q: - G.Q.append(curr_q) + if curr_q not in Q_set: + Q_set.insert(curr_q) for [q, u, v, q_] in F.E: if curr_q == q: - G.E.insert([q, u, v, q_]) + E_set.insert([q, u, v, q_]) worklist.put(q_) # copy over the final outputs of the states that remain for q, w in F.stout: - if q in G.Q: - G.stoud[q] = w + if q in Q_set: + G.stout[q] = w + G.Q, G.E = list(Q_set), list(E_set) return G def trim_useless(F): @@ -214,7 +216,7 @@ def trim_useless(F): """ # initialize the new FST G = FST(deepcopy(F.Sigma), deepcopy(F.Gamma)) - G.Q, G.E, G.qe, G.stout = [], [], F.qe, {} + Q_set, E_set, G.qe, G.stout = set(), set(), F.qe, {} # perform a breadth-first traversal of the original FST from the accepting states worklist = Queue() @@ -222,22 +224,23 @@ def trim_useless(F): worklist.put(q) while not worklist.empty(): curr_q_ = worklist.pop() - if curr_q_ not in G.Q: - G.Q.append(curr_q_) + if curr_q_ not in Q_set: + Q_set.insert(curr_q_) for [q, u, v, q_] in F.E: if curr_q_ == q_: - G.E.insert([q, u, v, q_]) + E_set.insert([q, u, v, q_]) worklist.put(q) # add back the initial state if it was not traversed already - if G.qe not in G.Q: - G.Q.append(G.qe) + if G.qe not in Q_set: + Q_set.insert(G.qe) # copy over the final outputs of the states that remain for q, w in F.stout: - if q in G.Q: + if q in Q_set: G.stout[q] = w + G.Q, G.E = list(Q_set), list(E_set) return G def trim(F): @@ -260,43 +263,7 @@ def trim(F): """ return trim_useless(trim_inaccessible(F)) -def invert(F): - """Given an FST that accepts the relation `R`, - returns an FST that accepts the relation `{ (u, v) | (v, u) ∈ R }`. - - Ensures: - final-output emptiness - - Invariants: - trimmedness - - Args: - F (FST): the original FST. - - Returns: - FST: the inverted FST. - """ - # initialize the new FST - G = FST(deepcopy(F.Sigma), deepcopy(F.Gamma)) - G.Q, G.E, G.qe, G.stout = deepcopy(F.Q), [], F.qe, {} - - # copy over the transitions with swapped input an output strings - for [q, u, v, q_] in G.E: - G.E.append([q, v, u, q_]) - - # account for nonempty final outputs by turning them into transitions to new accepting states, - # if empty then just copy the final output verbatim - for q, w in F.stout: - if w == "": - G.stout[q] = "" - else: - G.Q.append(q_ := G.fresh_state_name(q)) - G.E.append([q, w, "", q_]) - G.stout[q_] = "" - - return G - -def expand(F): +def expand_inputs(F): """Expands transitions with multi-character input strings into non-accepting chains of transitions with single-character input strings. @@ -339,6 +306,68 @@ def expand(F): G.Q, G.E = list(Q_set), list(E_set) return G +def expand_final(F): + """Expands final states with nonempty output strings + into a non-accepting state with a transition to a new accepting state. + + Ensures: + final-output emptiness + + Invariants: + trimmedness + input-string expansion + + Args: + F (FST): the original FST. + + Returns: + FST: an FST that accepts the same relation but has no nonempty final outputs. + """ + # initialize the new FST + G = FST(deepcopy(F.Sigma), deepcopy(F.Gamma)) + G.Q, G.E, G.qe, G.stout = deepcopy(F.Q), deepcopy(F.E), F.qe, {} + + # account for nonempty final outputs by turning them into transitions to new accepting states, + # if empty then just copy the final output verbatim + for q, w in F.stout: + if w == "": + G.stout[q] = "" + else: + G.Q.append(q_ := G.fresh_state_name(q)) + G.E.append([q, "", w, q_]) + G.stout[q_] = "" + + return G + +def invert(F): + """Given an FST that accepts the relation `R`, + returns an FST that accepts the relation `{ (u, v) | (v, u) ∈ R }`. + + Ensures: + final-output emptiness + + Invariants: + trimmedness + + Args: + F (FST): the original FST. + + Returns: + FST: the inverted FST. + """ + # we need final outputs to be empty because the FST class does not support final inputs + F = expand_final(F) + + # initialize the new FST + G = FST(deepcopy(F.Sigma), deepcopy(F.Gamma)) + G.Q, G.E, G.qe, G.stout = deepcopy(F.Q), [], F.qe, deepcopy(F.stout) + + # copy over the transitions with swapped input an output strings + for [q, u, v, q_] in F.E: + G.E.append([q, v, u, q_]) + + return G + def prefix_closure(F): """Given an FST whose domain is the language `L`, returns an FST whose domain is the language `prefixes(L)`. @@ -360,7 +389,7 @@ def prefix_closure(F): FST: the prefix-closure FST. """ # construct an FST where exactly the prefix closure of the original domain has valid runs - F = expand(trim(F)) + F = expand_inputs(trim(F)) # mark every state as accepting, # unless the FST is just one rejecting state, which is the empty FST edge case From e1595b1312a2ffb85ea6fd2385a9fc92a399e52c Mon Sep 17 00:00:00 2001 From: LiamSchilling Date: Mon, 2 Mar 2026 02:15:14 -0500 Subject: [PATCH 06/18] feat: implment the acceptor (universal) FST, the rejector (empty) FST, concatenation, and kleene star. --- utility/fst_object.py | 154 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 151 insertions(+), 3 deletions(-) diff --git a/utility/fst_object.py b/utility/fst_object.py index 844f7c3..390530d 100644 --- a/utility/fst_object.py +++ b/utility/fst_object.py @@ -13,8 +13,8 @@ taking `FST` to be a general (potentially nondeterministic) finite-state transducer that accepts a relation over strings. Specifically, added `fresh_state`, `encode_state`, `trim_inaccessible`, `trim_useless`, `trim`, -`expand_inputs`, `expand_final`, `invert`, `prefix_closure`, `union`, `intersect`, `compose`, -and `determinize`. +`expand_inputs`, `expand_final`, `invert`, `concatenate`, `kleene_closure`, `prefix_closure`, +`union`, `intersect`, `compose`, and `determinize`. These functions assume the following `FST` representation invariants, stated as type annotations: Q (Annotated[list[str], "no duplicates"]) @@ -38,6 +38,7 @@ The docstrings declare which are ensured by and which are invariants of the implementations. - Trimmedness: Every state and transition is traversed by some accepting run (except in the edge case of the empty FST, which may only have one state and no transitions). + Because of this edge case, some functions must take special care to be trimmedness-preserving. - Final-output emptiness: Every final output (values of `stout`) is the empty string. - Input-string expansion: The input string of every transition is either a character or empty. - Determinism: The input string of every transition is a character, @@ -156,6 +157,68 @@ def encode_state(*args): str: a name that encodes the values of the arguments. """ return f"<{args.map(str).join(";")}>" + + def is_trim_but_empty(F): + """A helper function that checks + whether an FST is the edge case of the trimmedness condition. + That is, the empty FST with one rejecting state and no transitions. + + Args: + F (FST): the FST to be checked. + + Returns: + bool: whether the FST has one rejecting state and no transitions. + """ + return len(F.Q) == 1 and len(F.E) == 0 and len(F.stout) == 0 + +def new_rejector(Sigma, Gamma): + """Creates an FST that rejects every string pair. + + Ensures: + trimmedness + final-output emptiness + input-string expansion + determinism + + Args: + Sigma (list): the input alphabet. + Gamma (list): the output alphabet. + + Returns: + FST: the rejector FST. + """ + # initialize the new FST + F = FST(Sigma, Gamma) + F.Q, F.E, F.qe, F.stout = ["q"], [], "q", {} + + return F + +def new_acceptor(Sigma, Gamma): + """Creates an FST that accepts every string pair. + + Ensures: + trimmedness + final-output emptiness + input-string expansion + + Args: + Sigma (list): the input alphabet. + Gamma (list): the output alphabet. + + Returns: + FST: the acceptor FST. + """ + # initialize the new FST + F = FST(Sigma, Gamma) + F.Q, F.E, F.qe, F.stout = ["q"], [], "q", {"q" : ""} + + # add transitions that allow writing any character to input or output + for c in Sigma: + F.E.append(["q", c, "", "q"]) + for c in Gamma: + F.E.append(["q", "", c, "q"]) + + return F def trim_inaccessible(F): """Removes states and transitions from the FST @@ -368,6 +431,91 @@ def invert(F): return G +def concatenate(F, G): + """Given FSTs that accept the relations `RF` and `RG`, respectively, + returns an FST that accepts the relation `RF · RG`. + That is, the relation of all string pairs that are the result of + concatenating a string pair from `RF` to a string pair from `RG`. + + Invariants: + trimmedness + input-string expansion + final-output emptiness + + Args: + F (FST): the left-hand original FST. + G (FST): the right-hand original FST. + + Returns: + FST: the concatenation FST. + """ + # we need final outputs of the first machine to be empty + # so that we do not miss output upon traversal to the next machine + F = expand_final(F) + + # initialize the new FST + H = FST(list(set(F.Sigma).union(G.Sigma)), list(set(F.Gamma).union(G.Gamma))) + H.Q, H.E, H.qe, H.stout = [], [], FST.encode_state("LEFT", F.qe), {} + + # copy over the states from both `F` and `G` + for qf in F.Q: + H.Q.append(FST.encode_state("LEFT", qf)) + for qg in G.Q: + H.Q.append(FST.encode_state("RIGHT", qg)) + + # copy over the transitions from both `F` and `G` + for [qf, u, v, qf_] in F.E: + H.E.append([FST.encode_state("LEFT", qf), u, v, FST.encode_state("LEFT", qf_)]) + for [qg, u, v, qg_] in G.E: + H.E.append([FST.encode_state("RIGHT", qg), u, v, FST.encode_state("RIGHT", qg_)]) + + # for every final state, + # create an epsilon transition to nondeterministically begin running the next machine + for qf in F.stout.keys(): + H.E.append([FST.encode_state("LEFT", qf), "", "", FST.encode_state("RIGHT", G.qe)]) + + # cope over the final transitions of the other machine + for qg, w in G.stout: + H.stout[FST.encode_state("RIGHT", qg)] = w + + return H + +def kleene_closue(F): + """Given an FST that accepts the relation `R`, + returns an FST that accepts the relation `R*`. + That is, the relation of all string pairs that are the result of + concatenating many string pairs from `R`. + + Ensures: + final-output emptiness + + Invariants: + trimmedness + input-string expansion + + Args: + F (FST): the original FST. + + Returns: + FST: the kleene-closure FST. + """ + # we need final outputs to be empty + # so that we do not miss output upon traversal back to the initial state + F = expand_final(F) + + # initialize the new FST + G = FST(deepcopy(F.Sigma), deepcopy(F.Gamma)) + G.Q, G.E, G.qe, G.stout = deepcopy(F.Q), deepcopy(F.E), F.qe, deepcopy(F.stout) + + # for every final state, + # create an epsilon transition to nondeterministically return to the initial state + for q in G.stout.keys(): + tr = [q, "", "", G.qe] + if tr not in G.E: + G.E.append(tr) + + return G + def prefix_closure(F): """Given an FST whose domain is the language `L`, returns an FST whose domain is the language `prefixes(L)`. @@ -393,7 +541,7 @@ def prefix_closure(F): # mark every state as accepting, # unless the FST is just one rejecting state, which is the empty FST edge case - if not (len(F.Q) == 1 and len(F.stout) == 0): + if not F.is_trim_but_empty(): for q in F.Q: if q not in F.stout: F.stout[q] = "" From 3c185b165083d58ed232812ae7438ec2cc5fc957 Mon Sep 17 00:00:00 2001 From: LiamSchilling Date: Mon, 2 Mar 2026 03:07:34 -0500 Subject: [PATCH 07/18] feat: draft implementation of intersect --- utility/fst_object.py | 58 ++++++++++++++++++++++++++++++++++++++----- 1 file changed, 52 insertions(+), 6 deletions(-) diff --git a/utility/fst_object.py b/utility/fst_object.py index 390530d..06e1062 100644 --- a/utility/fst_object.py +++ b/utility/fst_object.py @@ -439,8 +439,8 @@ def concatenate(F, G): Invariants: trimmedness - input-string expansion final-output emptiness + input-string expansion Args: F (FST): the left-hand original FST. @@ -595,11 +595,9 @@ def intersect(F, G): """Given FSTs that accept the relations `RF` and `RG`, respectively, returns an FST that accepts the relation `RF ∩ RG`. - Ensures: - ? - Invariants: - ? + final-output emptiness + input-string expansion Args: F (FST): the left-hand original FST. @@ -608,7 +606,55 @@ def intersect(F, G): Returns: FST: the intersection FST. """ - raise NotImplementedError + # expanding final outputs beforehand makes this construction far easier, + # but it also means that determinism is not an invariant, + # whereas it would be in a more robust implementation + F, G = expand_final(F), expand_final(G) + + # initialize the new FST + H = FST(list(set(F.Sigma).union(G.Sigma)), list(set(F.Gamma).union(G.Gamma))) + Q_set, E_set, H.stout = set(), set(), {} + G.qe = FST.encode_state(F.qe, G.qe, ("", ""), ("", "")) + + # perform a breadth-first traversal of the original FSTs from the accepting states + worklist = Queue() + worklist.put(F.qe, G.qe, ("", ""), ("", "")) + while not worklist.empty(): + (curr_qf, curr_qg, (curr_uf, curr_vf), (curr_ug, curr_vg)) = worklist.pop() + new_q = FST.encode_state(curr_qf, curr_qg, (curr_uf, curr_vf), (curr_ug, curr_vg)) + if new_q not in Q_set: + Q_set.insert(new_q) + for [qf, uf, vf, qf_] in F.E: + for [qg, ug, vg, qg_] in G.E: + states_match = curr_qf == qf and curr_qg == qg + f_buffers_match = uf.startswith(curr_uf) and vf.startswith(curr_vf) + g_buffers_match = ug.stateswith(curr_ug) and vg.startswith(curr_vg) + if states_match and f_buffers_match and g_buffers_match: + uf_suff, vf_suff = uf[len(curr_uf):], vf[len(curr_vf):] + ug_suff, vg_suff = ug[len(curr_ug):], vg[len(curr_vg):] + if uf_suff == ug_suff and vf_suff == vg_suff: + new_q_ = FST.encode_state(qf_, qg_, ("", ""), ("", "")) + new_u, new_v = uf_suff, vf_suff + elif uf_suff.startswith(ug_suff) and vf_suff.startswith(vg_suff): + uf_buff, vf_buff = uf_suff[len(ug_suff):], vf_suff[len(vg_suff):] + new_q_ = FST.encode_state(qf, qg_, (uf_buff, vf_buff), ("", "")) + new_u, new_v = ug_suff, vg_suff + elif ug_suff.startswith(uf_suff) and vg_suff.startswith(vf_suff): + ug_buff, vg_buff = ug_suff[len(uf_suff):], vg_suff[len(vf_suff):] + new_q_ = FST.encode_state(qf_, qg, ("", ""), (uf_buff, vf_buff)) + new_u, new_v = uf_suff, vf_suff + E_set.insert([new_q, new_u, new_v, new_q_]) + worklist.put(new_q_) + + # copy over the shared final states + for qf, "" in F.stout: + for qg, "" in G.stout: + new_q = FST.encode_state(qf, qg, ("", ""), ("", "")) + if new_q in Q_set: + H.stout[new_q] = "" + + H.Q, H.E = list(Q_set), list(E_set) + return G def compose(F, G): """Given FSTs that accept the relations `RF` and `RG`, respectively, From 93a49de22c2dceb270dff04824b136a22e3e2b10 Mon Sep 17 00:00:00 2001 From: LiamSchilling Date: Sat, 28 Mar 2026 22:31:54 -0400 Subject: [PATCH 08/18] feat: implement extended `transition` function. --- utility/fst_object.py | 38 ++++++++++++++++++++++++++++++++------ 1 file changed, 32 insertions(+), 6 deletions(-) diff --git a/utility/fst_object.py b/utility/fst_object.py index 06e1062..69bd033 100644 --- a/utility/fst_object.py +++ b/utility/fst_object.py @@ -1,20 +1,19 @@ """A class defining the Finite State Transducer. Copyright (C) 2019 Alena Aksenova. -Copyright (C) 2026 William (Liam) Schilling. +Copyright (C) 2026 William Schilling. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. -Modified by William (Liam) Schilling (Feb. 2026): +Modified by William (Liam) Schilling: Implemented core finite-state operations, taking `FST` to be a general (potentially nondeterministic) finite-state transducer -that accepts a relation over strings. -Specifically, added `fresh_state`, `encode_state`, `trim_inaccessible`, `trim_useless`, `trim`, -`expand_inputs`, `expand_final`, `invert`, `concatenate`, `kleene_closure`, `prefix_closure`, -`union`, `intersect`, `compose`, and `determinize`. +that accepts a relation over strings. Most notably, added +`trim`, `expand_inputs`, `expand_final`, `invert`, `concatenate`, `kleene_closure`, +`prefix_closure`, `union`, `intersect`, `compose`, and `determinize`. These functions assume the following `FST` representation invariants, stated as type annotations: Q (Annotated[list[str], "no duplicates"]) @@ -105,6 +104,32 @@ def rewrite(self, w): result = tuple(result) return result + + def transition(self, q, w): + """Traverses from the state `q` according to the string `w`, + and returns the resulting state. + Returns `None` if a missing transition is encountered. + + Args: + q: the start state. + w (str): a string to read. + + Returns: + the state reached by `w` from `q`, or `None`. + """ + if self.Q == None: + raise ValueError("The transducer needs to be constructed.") + + for c in w: + moved = False + for tr in self.E: + if tr[0] == q and tr[1] == c: + q = tr[3] + break + if not moved: + return None + + return q def copy_fst(self): """Produces a deep copy of the current FST. @@ -116,6 +141,7 @@ def copy_fst(self): T.Q = deepcopy(self.Q) T.Sigma = deepcopy(self.Sigma) T.Gamma = deepcopy(self.Gamma) + T.qe = deepcopy(self.qe) T.E = deepcopy(self.E) T.stout = deepcopy(self.stout) From 9666a54bfbc0c55940a24587b6fb22044c69c83a Mon Sep 17 00:00:00 2001 From: LiamSchilling Date: Sun, 29 Mar 2026 04:17:07 -0400 Subject: [PATCH 09/18] feat: draft implementation of composition --- utility/fst_object.py | 71 +++++++++++++++++++++++++++++++++++++------ 1 file changed, 62 insertions(+), 9 deletions(-) diff --git a/utility/fst_object.py b/utility/fst_object.py index 69bd033..d7b9c8c 100644 --- a/utility/fst_object.py +++ b/utility/fst_object.py @@ -621,8 +621,10 @@ def intersect(F, G): """Given FSTs that accept the relations `RF` and `RG`, respectively, returns an FST that accepts the relation `RF ∩ RG`. - Invariants: + Ensures: final-output emptiness + + Invariants: input-string expansion Args: @@ -640,7 +642,7 @@ def intersect(F, G): # initialize the new FST H = FST(list(set(F.Sigma).union(G.Sigma)), list(set(F.Gamma).union(G.Gamma))) Q_set, E_set, H.stout = set(), set(), {} - G.qe = FST.encode_state(F.qe, G.qe, ("", ""), ("", "")) + H.qe = FST.encode_state(F.qe, G.qe, ("", ""), ("", "")) # perform a breadth-first traversal of the original FSTs from the accepting states worklist = Queue() @@ -667,20 +669,20 @@ def intersect(F, G): new_u, new_v = ug_suff, vg_suff elif ug_suff.startswith(uf_suff) and vg_suff.startswith(vf_suff): ug_buff, vg_buff = ug_suff[len(uf_suff):], vg_suff[len(vf_suff):] - new_q_ = FST.encode_state(qf_, qg, ("", ""), (uf_buff, vf_buff)) + new_q_ = FST.encode_state(qf_, qg, ("", ""), (ug_buff, vg_buff)) new_u, new_v = uf_suff, vf_suff E_set.insert([new_q, new_u, new_v, new_q_]) worklist.put(new_q_) # copy over the shared final states - for qf, "" in F.stout: - for qg, "" in G.stout: + for qf, _ in F.stout: + for qg, _ in G.stout: new_q = FST.encode_state(qf, qg, ("", ""), ("", "")) if new_q in Q_set: H.stout[new_q] = "" H.Q, H.E = list(Q_set), list(E_set) - return G + return H def compose(F, G): """Given FSTs that accept the relations `RF` and `RG`, respectively, @@ -690,10 +692,10 @@ def compose(F, G): this has the effect of typical function composition. Ensures: - ? + final-output emptiness Invariants: - ? + input-string expansion Args: F (FST): the left-hand (second applied) original FST. @@ -702,7 +704,58 @@ def compose(F, G): Returns: FST: the composition FST. """ - raise NotImplementedError + # expanding final outputs beforehand makes this construction far easier, + # but it also means that determinism is not an invariant, + # whereas it would be in a more robust implementation + F, G = expand_final(F), expand_final(G) + + # initialize the new FST + H = FST(F.Sigma, G.Gamma) + Q_set, E_set, H.stout = set(), set(), {} + G.qe = FST.encode_state(F.qe, G.qe, "") + + # perform a breadth-first traversal of the original FSTs from the accepting states + worklist = Queue() + worklist.put(F.qe, G.qe, "") + while not worklist.empty(): + (curr_qf, curr_qg, curr_uf) = worklist.pop() + new_q = FST.encode_state(curr_qf, curr_qg, curr_uf) + if new_q not in Q_set: + Q_set.insert(new_q) + for [qg, ug, vg, qg_] in G.E: + if curr_qg == qg: + + # perform an inner breadth-first traversal of states reached by `curr_uf + vg` + visited = set() + inner_worklist = Queue() + inner_worklist.put(curr_qf, curr_uf + vg, "") + while not inner_worklist.empty(): + (curr_qf, curr_uf, curr_vf) = inner_worklist.pop() + if (curr_qf, curr_uf) not in visited: + visited.insert((curr_qf, curr_uf)) + for [qf, uf, vf, qf_] in F.E: + if curr_qf == qf: + if uf.startswith(curr_uf): + if uf == curr_uf: + new_q_ = FST.encode_state(qf_, qg_, "") + new_v = curr_vf + vf + else: + new_q_ = FST.encode_state(qf, qg_, curr_uf) + new_v = curr_vf + E_set.insert([new_q, ug, new_v, new_q_]) + worklist.put(new_q_) + if curr_uf.startswith(uf): + inner_worklist.put(qf_, curr_uf[len(uf):], curr_vf + vf) + + # copy over the shared final states + for qf, _ in F.stout: + for qg, _ in G.stout: + new_q = FST.encode_state(qf, qg, "") + if new_q in Q_set: + H.stout[new_q] = "" + + H.Q, H.E = list(Q_set), list(E_set) + return H def determinize(F): """Turns a nondeterministic FST that recognizes a subsequential function From fa2332aef688d641b4426882a58f3f18f6516910 Mon Sep 17 00:00:00 2001 From: LiamSchilling Date: Sun, 29 Mar 2026 04:23:23 -0400 Subject: [PATCH 10/18] docs --- utility/fst_object.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/utility/fst_object.py b/utility/fst_object.py index d7b9c8c..f8b1d51 100644 --- a/utility/fst_object.py +++ b/utility/fst_object.py @@ -11,9 +11,9 @@ Implemented core finite-state operations, taking `FST` to be a general (potentially nondeterministic) finite-state transducer -that accepts a relation over strings. Most notably, added -`trim`, `expand_inputs`, `expand_final`, `invert`, `concatenate`, `kleene_closure`, -`prefix_closure`, `union`, `intersect`, `compose`, and `determinize`. +that accepts a relation over strings. +Added trimming, inversion, concatenation, Kleene closure, prefix closure, union, intersection, +composition, and determinization. These functions assume the following `FST` representation invariants, stated as type annotations: Q (Annotated[list[str], "no duplicates"]) From 6d10334f2ccf2eb00f477c5ce2a1ccd534e9c8a7 Mon Sep 17 00:00:00 2001 From: LiamSchilling Date: Sun, 29 Mar 2026 17:23:33 -0400 Subject: [PATCH 11/18] debug --- utility/fst_object.py | 169 ++++++++++++++++++++++++------------------ 1 file changed, 95 insertions(+), 74 deletions(-) diff --git a/utility/fst_object.py b/utility/fst_object.py index f8b1d51..7bccc88 100644 --- a/utility/fst_object.py +++ b/utility/fst_object.py @@ -68,6 +68,14 @@ def __init__(self, Sigma=None, Gamma=None): self.E = None self.stout = None + def __str__(self): + return f'''FST( + (Sigma, Gamma) = ({self.Sigma}, {self.Gamma}), + Q = {self.Q}, + qe = '{self.qe}', + E = {self.E}, + stout = {self.stout} )''' + def rewrite(self, w): """Rewrites the given string with respect to the rules represented in the current FST. @@ -117,14 +125,12 @@ def transition(self, q, w): Returns: the state reached by `w` from `q`, or `None`. """ - if self.Q == None: - raise ValueError("The transducer needs to be constructed.") - for c in w: moved = False for tr in self.E: if tr[0] == q and tr[1] == c: q = tr[3] + moved = True break if not moved: return None @@ -141,7 +147,7 @@ def copy_fst(self): T.Q = deepcopy(self.Q) T.Sigma = deepcopy(self.Sigma) T.Gamma = deepcopy(self.Gamma) - T.qe = deepcopy(self.qe) + T.qe = self.qe T.E = deepcopy(self.E) T.stout = deepcopy(self.stout) @@ -170,7 +176,7 @@ def encode_state(*args): """Returns a name that encodes the values of all the passed arguments. The encoding is guaranteed to be one-to-one as long as the string representations of the arguments do not contain `;`, `<`, or `>`, - except for from previous invokations of this function. + except for from previous invocations of this function. It is important that the client adhere to this condition in their FSTs, or else the state encodings produced by functions in our library @@ -182,9 +188,9 @@ def encode_state(*args): Return: str: a name that encodes the values of the arguments. """ - return f"<{args.map(str).join(";")}>" + return f"<{"; ".join(map(str, args))}>" - def is_trim_but_empty(F): + def is_trim_but_empty(self): """A helper function that checks whether an FST is the edge case of the trimmedness condition. That is, the empty FST with one rejecting state and no transitions. @@ -195,7 +201,7 @@ def is_trim_but_empty(F): Returns: bool: whether the FST has one rejecting state and no transitions. """ - return len(F.Q) == 1 and len(F.E) == 0 and len(F.stout) == 0 + return len(self.Q) == 1 and len(self.E) == 0 and len(self.stout) == 0 def new_rejector(Sigma, Gamma): """Creates an FST that rejects every string pair. @@ -270,20 +276,20 @@ def trim_inaccessible(F): worklist = Queue() worklist.put(F.qe) while not worklist.empty(): - curr_q = worklist.pop() + curr_q = worklist.get() if curr_q not in Q_set: - Q_set.insert(curr_q) + Q_set.add(curr_q) for [q, u, v, q_] in F.E: if curr_q == q: - E_set.insert([q, u, v, q_]) + E_set.add((q, u, v, q_)) worklist.put(q_) # copy over the final outputs of the states that remain - for q, w in F.stout: + for q, w in F.stout.items(): if q in Q_set: G.stout[q] = w - G.Q, G.E = list(Q_set), list(E_set) + G.Q, G.E = list(Q_set), list(map(list, list(E_set))) return G def trim_useless(F): @@ -312,24 +318,23 @@ def trim_useless(F): for q in F.stout.keys(): worklist.put(q) while not worklist.empty(): - curr_q_ = worklist.pop() + curr_q_ = worklist.get() if curr_q_ not in Q_set: - Q_set.insert(curr_q_) + Q_set.add(curr_q_) for [q, u, v, q_] in F.E: if curr_q_ == q_: - E_set.insert([q, u, v, q_]) + E_set.add((q, u, v, q_)) worklist.put(q) # add back the initial state if it was not traversed already - if G.qe not in Q_set: - Q_set.insert(G.qe) + Q_set.add(G.qe) # copy over the final outputs of the states that remain - for q, w in F.stout: + for q, w in F.stout.items(): if q in Q_set: G.stout[q] = w - G.Q, G.E = list(Q_set), list(E_set) + G.Q, G.E = list(Q_set), list(map(list, list(E_set))) return G def trim(F): @@ -352,6 +357,17 @@ def trim(F): """ return trim_useless(trim_inaccessible(F)) +def is_empty(F): + """Checks that the FST accepts no string pairs. + + Args: + F (FST): the target FST. + + Returns: + bool: `True` iff there is no string pair that `F` accepts. + """ + return trim(F).is_trim_but_empty() + def expand_inputs(F): """Expands transitions with multi-character input strings into non-accepting chains of transitions with single-character input strings. @@ -381,18 +397,18 @@ def expand_inputs(F): # of the form `[, c, "", ]`, where `wc` is an incomplete prefix of `u`, # along with the completing transition `[, c, v, ]`, where `wc` equals `u`. for q in F.Q: - Q_set.insert(FST.encode_state(q, "")) + Q_set.add(FST.encode_state(q, "")) for [q, u, v, q_] in F.E: for i in range(1, len(u)): - Q_set.insert(FST.encode_state(q, u[:i])) - E_set.insert([FST.encode_state(q, u[:i-1]), u[i-1], "", FST.encode_state(q, u[:i])]) - E_set.insert([FST.encode_state(q, u[:-1]), u[-1:], v, FST.encode_state(q_, "")]) + Q_set.add(FST.encode_state(q, u[:i])) + E_set.add((FST.encode_state(q, u[:i-1]), u[i-1], "", FST.encode_state(q, u[:i]))) + E_set.add((FST.encode_state(q, u[:-1]), u[-1:], v, FST.encode_state(q_, ""))) # copy over the final outputs - for q, w in F.stout: + for q, w in F.stout.items(): G.stout[FST.encode_state(q, "")] = w - G.Q, G.E = list(Q_set), list(E_set) + G.Q, G.E = list(Q_set), list(map(list, list(E_set))) return G def expand_final(F): @@ -405,6 +421,7 @@ def expand_final(F): Invariants: trimmedness input-string expansion + determinism IF already final-output empty Args: F (FST): the original FST. @@ -418,11 +435,11 @@ def expand_final(F): # account for nonempty final outputs by turning them into transitions to new accepting states, # if empty then just copy the final output verbatim - for q, w in F.stout: + for q, w in F.stout.items(): if w == "": G.stout[q] = "" else: - G.Q.append(q_ := G.fresh_state_name(q)) + G.Q.append(q_ := G.fresh_state(q)) G.E.append([q, "", w, q_]) G.stout[q_] = "" @@ -464,7 +481,7 @@ def concatenate(F, G): concatenating a string pair from `RF` to a string pair from `RG`. Invariants: - trimmedness + trimmedness IF both nonempty final-output emptiness input-string expansion @@ -480,7 +497,7 @@ def concatenate(F, G): F = expand_final(F) # initialize the new FST - H = FST(list(set(F.Sigma).union(G.Sigma)), list(set(F.Gamma).union(G.Gamma))) + H = FST(list(set(F.Sigma) | set(G.Sigma)), list(set(F.Gamma) | set(G.Gamma))) H.Q, H.E, H.qe, H.stout = [], [], FST.encode_state("LEFT", F.qe), {} # copy over the states from both `F` and `G` @@ -500,13 +517,13 @@ def concatenate(F, G): for qf in F.stout.keys(): H.E.append([FST.encode_state("LEFT", qf), "", "", FST.encode_state("RIGHT", G.qe)]) - # cope over the final transitions of the other machine - for qg, w in G.stout: + # copy over the final transitions of the other machine + for qg, w in G.stout.items(): H.stout[FST.encode_state("RIGHT", qg)] = w return H -def kleene_closue(F): +def kleene_closure(F): """Given an FST that accepts the relation `R`, returns an FST that accepts the relation `R*`. That is, the relation of all string pairs that are the result of @@ -545,7 +562,7 @@ def kleene_closue(F): def prefix_closure(F): """Given an FST whose domain is the language `L`, returns an FST whose domain is the language `prefixes(L)`. - How the function treats FST outputs is currently underspecified, though we guarantee that + How the function treats FST outputs is underspecified, though we guarantee that the relation accepted by the original FST is a subset of the relation accepted by the new FST. Ensures: @@ -569,7 +586,7 @@ def prefix_closure(F): # unless the FST is just one rejecting state, which is the empty FST edge case if not F.is_trim_but_empty(): for q in F.Q: - if q not in F.stout: + if q not in F.stout.keys(): F.stout[q] = "" return F @@ -579,7 +596,7 @@ def union(F, G): returns an FST that accepts the relation `RF ∪ RG`. Invariants: - trimmedness + trimmedness IF both nonempty final-output emptiness input-string expansion @@ -591,7 +608,7 @@ def union(F, G): FST: the union FST. """ # initialize the new FST - H = FST(list(set(F.Sigma).union(G.Sigma)), list(set(F.Gamma).union(G.Gamma))) + H = FST(list(set(F.Sigma) | set(G.Sigma)), list(set(F.Gamma) | set(G.Gamma))) H.Q, H.E, H.qe, H.stout = [], [], FST.encode_state("LEFT", F.qe), {} # create an epsilon transition to nondeterministically choose between running `F` and `G` @@ -610,9 +627,9 @@ def union(F, G): H.E.append([FST.encode_state("RIGHT", qg), u, v, FST.encode_state("RIGHT", qg_)]) # copy over the final outputs from both `F` and `G` - for qf, w in F.stout: + for qf, w in F.stout.items(): H.stout[FST.encode_state("LEFT", qf)] = w - for qg, w in G.stout: + for qg, w in G.stout.items(): H.stout[FST.encode_state("RIGHT", qg)] = w return H @@ -626,6 +643,7 @@ def intersect(F, G): Invariants: input-string expansion + determinism IF already final-output empty Args: F (FST): the left-hand original FST. @@ -640,48 +658,48 @@ def intersect(F, G): F, G = expand_final(F), expand_final(G) # initialize the new FST - H = FST(list(set(F.Sigma).union(G.Sigma)), list(set(F.Gamma).union(G.Gamma))) + H = FST(list(set(F.Sigma) | set(G.Sigma)), list(set(F.Gamma) | set(G.Gamma))) Q_set, E_set, H.stout = set(), set(), {} H.qe = FST.encode_state(F.qe, G.qe, ("", ""), ("", "")) # perform a breadth-first traversal of the original FSTs from the accepting states worklist = Queue() - worklist.put(F.qe, G.qe, ("", ""), ("", "")) + worklist.put((F.qe, G.qe, ("", ""), ("", ""))) while not worklist.empty(): - (curr_qf, curr_qg, (curr_uf, curr_vf), (curr_ug, curr_vg)) = worklist.pop() + (curr_qf, curr_qg, (curr_uf, curr_vf), (curr_ug, curr_vg)) = worklist.get() new_q = FST.encode_state(curr_qf, curr_qg, (curr_uf, curr_vf), (curr_ug, curr_vg)) if new_q not in Q_set: - Q_set.insert(new_q) + Q_set.add(new_q) for [qf, uf, vf, qf_] in F.E: for [qg, ug, vg, qg_] in G.E: states_match = curr_qf == qf and curr_qg == qg f_buffers_match = uf.startswith(curr_uf) and vf.startswith(curr_vf) - g_buffers_match = ug.stateswith(curr_ug) and vg.startswith(curr_vg) + g_buffers_match = ug.startswith(curr_ug) and vg.startswith(curr_vg) if states_match and f_buffers_match and g_buffers_match: uf_suff, vf_suff = uf[len(curr_uf):], vf[len(curr_vf):] ug_suff, vg_suff = ug[len(curr_ug):], vg[len(curr_vg):] if uf_suff == ug_suff and vf_suff == vg_suff: - new_q_ = FST.encode_state(qf_, qg_, ("", ""), ("", "")) + new_q_ = (qf_, qg_, ("", ""), ("", "")) new_u, new_v = uf_suff, vf_suff elif uf_suff.startswith(ug_suff) and vf_suff.startswith(vg_suff): uf_buff, vf_buff = uf_suff[len(ug_suff):], vf_suff[len(vg_suff):] - new_q_ = FST.encode_state(qf, qg_, (uf_buff, vf_buff), ("", "")) + new_q_ = (qf, qg_, (uf_buff, vf_buff), ("", "")) new_u, new_v = ug_suff, vg_suff elif ug_suff.startswith(uf_suff) and vg_suff.startswith(vf_suff): ug_buff, vg_buff = ug_suff[len(uf_suff):], vg_suff[len(vf_suff):] - new_q_ = FST.encode_state(qf_, qg, ("", ""), (ug_buff, vg_buff)) + new_q_ = (qf_, qg, ("", ""), (ug_buff, vg_buff)) new_u, new_v = uf_suff, vf_suff - E_set.insert([new_q, new_u, new_v, new_q_]) + E_set.add((new_q, new_u, new_v, FST.encode_state(*new_q_))) worklist.put(new_q_) # copy over the shared final states - for qf, _ in F.stout: - for qg, _ in G.stout: + for qf in F.stout.keys(): + for qg in G.stout.keys(): new_q = FST.encode_state(qf, qg, ("", ""), ("", "")) if new_q in Q_set: H.stout[new_q] = "" - H.Q, H.E = list(Q_set), list(E_set) + H.Q, H.E = list(Q_set), list(map(list, list(E_set))) return H def compose(F, G): @@ -696,6 +714,7 @@ def compose(F, G): Invariants: input-string expansion + determinism IF already final-output empty Args: F (FST): the left-hand (second applied) original FST. @@ -710,51 +729,53 @@ def compose(F, G): F, G = expand_final(F), expand_final(G) # initialize the new FST - H = FST(F.Sigma, G.Gamma) + H = FST(deepcopy(G.Sigma), deepcopy(F.Gamma)) Q_set, E_set, H.stout = set(), set(), {} - G.qe = FST.encode_state(F.qe, G.qe, "") + H.qe = FST.encode_state(F.qe, G.qe, "") # perform a breadth-first traversal of the original FSTs from the accepting states worklist = Queue() - worklist.put(F.qe, G.qe, "") + worklist.put((F.qe, G.qe, "")) while not worklist.empty(): - (curr_qf, curr_qg, curr_uf) = worklist.pop() + (curr_qf, curr_qg, curr_uf) = worklist.get() new_q = FST.encode_state(curr_qf, curr_qg, curr_uf) if new_q not in Q_set: - Q_set.insert(new_q) + Q_set.add(new_q) for [qg, ug, vg, qg_] in G.E: if curr_qg == qg: # perform an inner breadth-first traversal of states reached by `curr_uf + vg` - visited = set() + inner_visited = set() inner_worklist = Queue() - inner_worklist.put(curr_qf, curr_uf + vg, "") + inner_worklist.put((curr_qf, curr_uf + vg, "")) while not inner_worklist.empty(): - (curr_qf, curr_uf, curr_vf) = inner_worklist.pop() - if (curr_qf, curr_uf) not in visited: - visited.insert((curr_qf, curr_uf)) + (inner_curr_qf, inner_curr_uf, inner_curr_vf) = inner_worklist.get() + if (inner_curr_qf, inner_curr_uf) not in inner_visited: + inner_visited.add((inner_curr_qf, inner_curr_uf)) for [qf, uf, vf, qf_] in F.E: - if curr_qf == qf: - if uf.startswith(curr_uf): - if uf == curr_uf: - new_q_ = FST.encode_state(qf_, qg_, "") - new_v = curr_vf + vf + if inner_curr_qf == qf: + if uf.startswith(inner_curr_uf): + if uf == inner_curr_uf: + new_q_ = (qf_, qg_, "") + new_v = inner_curr_vf + vf else: - new_q_ = FST.encode_state(qf, qg_, curr_uf) - new_v = curr_vf - E_set.insert([new_q, ug, new_v, new_q_]) + new_q_ = (qf, qg_, inner_curr_uf) + new_v = inner_curr_vf + E_set.add((new_q, ug, new_v, FST.encode_state(*new_q_))) worklist.put(new_q_) - if curr_uf.startswith(uf): - inner_worklist.put(qf_, curr_uf[len(uf):], curr_vf + vf) + if inner_curr_uf.startswith(uf): + uf_suff = inner_curr_uf[len(uf):] + new_q_ = (qf_, uf_suff, inner_curr_vf + vf) + inner_worklist.put(new_q_) # copy over the shared final states - for qf, _ in F.stout: - for qg, _ in G.stout: + for qf in F.stout.keys(): + for qg in G.stout.keys(): new_q = FST.encode_state(qf, qg, "") if new_q in Q_set: H.stout[new_q] = "" - H.Q, H.E = list(Q_set), list(E_set) + H.Q, H.E = list(Q_set), list(map(list, list(E_set))) return H def determinize(F): From 1184516b4f885601e39d7a626d75ad517e652668 Mon Sep 17 00:00:00 2001 From: LiamSchilling Date: Tue, 31 Mar 2026 09:25:07 -0400 Subject: [PATCH 12/18] feat: new `rewrite` variants --- utility/fst_object.py | 101 ++++++++++++++++++++++-------------------- 1 file changed, 54 insertions(+), 47 deletions(-) diff --git a/utility/fst_object.py b/utility/fst_object.py index 7bccc88..bfd8831 100644 --- a/utility/fst_object.py +++ b/utility/fst_object.py @@ -12,8 +12,12 @@ Implemented core finite-state operations, taking `FST` to be a general (potentially nondeterministic) finite-state transducer that accepts a relation over strings. -Added trimming, inversion, concatenation, Kleene closure, prefix closure, union, intersection, -composition, and determinization. +Added trimming, inversion, concatenation, Kleene closure, +union, intersection, composition, and determinization. + +Exposed additional variations of rewrite and transition functions to the user. +The previous implementation of `rewrite` has been divided into some smaller functions, +but the resulting `rewrite` function has the same behavior as before. These functions assume the following `FST` representation invariants, stated as type annotations: Q (Annotated[list[str], "no duplicates"]) @@ -70,27 +74,30 @@ def __init__(self, Sigma=None, Gamma=None): def __str__(self): return f'''FST( - (Sigma, Gamma) = ({self.Sigma}, {self.Gamma}), - Q = {self.Q}, - qe = '{self.qe}', - E = {self.E}, - stout = {self.stout} )''' + (Sigma, Gamma)=({self.Sigma}, {self.Gamma}), + Q={self.Q}, + qe='{self.qe}', + E={self.E}, + stout={self.stout} +)''' - def rewrite(self, w): - """Rewrites the given string with respect to the rules represented in - the current FST. + def rewrite_from(self, q, w): + """Performs a partial rewrite of the given string with respect to the rules represented in + the current FST. That is, a rewrite starting from some intermediate state `q`. Arguments: + q: the start state. w (str): a string that needs to be rewritten. + Outputs: - str: the translation of the input string. + str: the translation of the input string starting from `q`. """ if self.Q == None: raise ValueError("The transducer needs to be constructed.") # move through the transducer and write the output result = [] - current_state = self.qe + current_state = q moved = False for i in range(len(w)): for tr in self.E: @@ -113,7 +120,27 @@ def rewrite(self, w): result = tuple(result) return result - def transition(self, q, w): + def rewrite(self, w): + """Rewrites the given string with respect to the rules represented in + the current FST. + + Arguments: + w (str): a string that needs to be rewritten. + + Outputs: + str: the translation of the input string. + """ + return self.rewrite_from(self.qe, w) + + def flat_rewrite_from(self, q, w): + """A version of `rewrite_from` that flattens the list of output strings into one string.""" + return "".join(self.rewrite_from(q, w)) + + def flat_rewrite(self, w): + """A version of `rewrite` that flattens the list of output strings into one string.""" + return "".join(self.rewrite(w)) + + def transition_from(self, q, w): """Traverses from the state `q` according to the string `w`, and returns the resulting state. Returns `None` if a missing transition is encountered. @@ -134,9 +161,21 @@ def transition(self, q, w): break if not moved: return None - return q + def in_domain_from(self, q, w): + """Traverses from the state `q` according to the string `w`, + and returns `True` iff the traversal is a success. + + Args: + q: the start state. + w (str): a string to read. + + Returns: + `True` iff `w` is in the domain of the FST starting from `q`. + """ + return self.transition_from(q, w) is not None + def copy_fst(self): """Produces a deep copy of the current FST. @@ -147,7 +186,7 @@ def copy_fst(self): T.Q = deepcopy(self.Q) T.Sigma = deepcopy(self.Sigma) T.Gamma = deepcopy(self.Gamma) - T.qe = self.qe + T.qe = deepcopy(self.qe) T.E = deepcopy(self.E) T.stout = deepcopy(self.stout) @@ -559,38 +598,6 @@ def kleene_closure(F): return G -def prefix_closure(F): - """Given an FST whose domain is the language `L`, - returns an FST whose domain is the language `prefixes(L)`. - How the function treats FST outputs is underspecified, though we guarantee that - the relation accepted by the original FST is a subset of the relation accepted by the new FST. - - Ensures: - trimmedness - input-string expansion - - Invariants: - final-output emptiness - determinism - - Args: - F (FST): the original FST. - - Returns: - FST: the prefix-closure FST. - """ - # construct an FST where exactly the prefix closure of the original domain has valid runs - F = expand_inputs(trim(F)) - - # mark every state as accepting, - # unless the FST is just one rejecting state, which is the empty FST edge case - if not F.is_trim_but_empty(): - for q in F.Q: - if q not in F.stout.keys(): - F.stout[q] = "" - - return F - def union(F, G): """Given FSTs that accept the relations `RF` and `RG`, respectively, returns an FST that accepts the relation `RF ∪ RG`. From d84fceca5fe949dafdd38f7194d4cb5a04495e29 Mon Sep 17 00:00:00 2001 From: LiamSchilling Date: Tue, 31 Mar 2026 10:36:49 -0400 Subject: [PATCH 13/18] feat: give FSTs the option to have a descriptive string `name` --- utility/fst_object.py | 40 +++++++++++++++++++++++++++++++++------- 1 file changed, 33 insertions(+), 7 deletions(-) diff --git a/utility/fst_object.py b/utility/fst_object.py index bfd8831..8be0d33 100644 --- a/utility/fst_object.py +++ b/utility/fst_object.py @@ -55,16 +55,18 @@ class FST: """A class representing finite state transducers. Attributes: - Q (list): a list of states; - Sigma (list): a list of symbols of the input alphabet; - Gamma (list): a list of symbols of the output alphabet; - qe (str): name of the unique initial state; - E (list): a list of transitions; + name (str): a descriptive name for this FST. + Q (list): a list of states. + Sigma (list): a list of symbols of the input alphabet. + Gamma (list): a list of symbols of the output alphabet. + qe (str): name of the unique initial state. + E (list): a list of transitions. stout (dict): a collection of state outputs. """ def __init__(self, Sigma=None, Gamma=None): """Initializes the FST object.""" + self.name = "unnamed-FST" self.Q = None self.Sigma = Sigma self.Gamma = Gamma @@ -73,7 +75,11 @@ def __init__(self, Sigma=None, Gamma=None): self.stout = None def __str__(self): + return self.name + + def __repr__(self): return f'''FST( + name='{self.name}', (Sigma, Gamma)=({self.Sigma}, {self.Gamma}), Q={self.Q}, qe='{self.qe}', @@ -183,13 +189,13 @@ def copy_fst(self): T (FST): a copy of the current FST. """ T = FST() + T.name = deepcopy(self.name) T.Q = deepcopy(self.Q) T.Sigma = deepcopy(self.Sigma) T.Gamma = deepcopy(self.Gamma) T.qe = deepcopy(self.qe) T.E = deepcopy(self.E) T.stout = deepcopy(self.stout) - return T def fresh_state(self, name_prefix): @@ -260,6 +266,7 @@ def new_rejector(Sigma, Gamma): """ # initialize the new FST F = FST(Sigma, Gamma) + F.name = "rejector-FST" F.Q, F.E, F.qe, F.stout = ["q"], [], "q", {} return F @@ -281,6 +288,7 @@ def new_acceptor(Sigma, Gamma): """ # initialize the new FST F = FST(Sigma, Gamma) + F.name = "acceptor-FST" F.Q, F.E, F.qe, F.stout = ["q"], [], "q", {"q" : ""} # add transitions that allow writing any character to input or output @@ -309,6 +317,7 @@ def trim_inaccessible(F): """ # initialize the new FST G = FST(deepcopy(F.Sigma), deepcopy(F.Gamma)) + G.name = f"trim_inaccessible({F})" Q_set, E_set, G.qe, G.stout = set(), set(), F.qe, {} # perform a breadth-first traversal of the original FST from the initial state @@ -350,6 +359,7 @@ def trim_useless(F): """ # initialize the new FST G = FST(deepcopy(F.Sigma), deepcopy(F.Gamma)) + G.name = f"trim_useless({F})" Q_set, E_set, G.qe, G.stout = set(), set(), F.qe, {} # perform a breadth-first traversal of the original FST from the accepting states @@ -394,7 +404,10 @@ def trim(F): Returns: FST: the trimmed FST. """ - return trim_useless(trim_inaccessible(F)) + G = trim_useless(trim_inaccessible(F)) + G.name = f"trim({F})" + + return G def is_empty(F): """Checks that the FST accepts no string pairs. @@ -427,6 +440,7 @@ def expand_inputs(F): """ # initialize the new FST G = FST(deepcopy(F.Sigma), deepcopy(F.Gamma)) + G.name = f"expand_inputs({F})" Q_set, E_set, G.qe, G.stout = set(), set(), FST.encode_state(F.qe, ""), {} # Construct the new set of states and transitions. @@ -470,6 +484,7 @@ def expand_final(F): """ # initialize the new FST G = FST(deepcopy(F.Sigma), deepcopy(F.Gamma)) + G.name = f"expand_final({F})" G.Q, G.E, G.qe, G.stout = deepcopy(F.Q), deepcopy(F.E), F.qe, {} # account for nonempty final outputs by turning them into transitions to new accepting states, @@ -501,10 +516,12 @@ def invert(F): FST: the inverted FST. """ # we need final outputs to be empty because the FST class does not support final inputs + F_name = F.name F = expand_final(F) # initialize the new FST G = FST(deepcopy(F.Sigma), deepcopy(F.Gamma)) + G.name = f"{F_name}⁻¹" G.Q, G.E, G.qe, G.stout = deepcopy(F.Q), [], F.qe, deepcopy(F.stout) # copy over the transitions with swapped input an output strings @@ -533,10 +550,12 @@ def concatenate(F, G): """ # we need final outputs of the first machine to be empty # so that we do not miss output upon traversal to the next machine + F_name = F.name F = expand_final(F) # initialize the new FST H = FST(list(set(F.Sigma) | set(G.Sigma)), list(set(F.Gamma) | set(G.Gamma))) + H.name = f"({F_name} · {G})" H.Q, H.E, H.qe, H.stout = [], [], FST.encode_state("LEFT", F.qe), {} # copy over the states from both `F` and `G` @@ -583,10 +602,12 @@ def kleene_closure(F): """ # we need final outputs to be empty # so that we do not miss output upon traversal back to the initial state + F_name = F.name F = expand_final(F) # initialize the new FST G = FST(deepcopy(F.Sigma), deepcopy(F.Gamma)) + G.name = f"{F_name}*" G.Q, G.E, G.qe, G.stout = deepcopy(F.Q), deepcopy(F.E), F.qe, deepcopy(F.stout) # for every final state, @@ -616,6 +637,7 @@ def union(F, G): """ # initialize the new FST H = FST(list(set(F.Sigma) | set(G.Sigma)), list(set(F.Gamma) | set(G.Gamma))) + H.name = f"({F} ∪ {G})" H.Q, H.E, H.qe, H.stout = [], [], FST.encode_state("LEFT", F.qe), {} # create an epsilon transition to nondeterministically choose between running `F` and `G` @@ -662,10 +684,12 @@ def intersect(F, G): # expanding final outputs beforehand makes this construction far easier, # but it also means that determinism is not an invariant, # whereas it would be in a more robust implementation + F_name, G_name = F.name, G.name F, G = expand_final(F), expand_final(G) # initialize the new FST H = FST(list(set(F.Sigma) | set(G.Sigma)), list(set(F.Gamma) | set(G.Gamma))) + H.name = f"({F_name} ∩ {G_name})" Q_set, E_set, H.stout = set(), set(), {} H.qe = FST.encode_state(F.qe, G.qe, ("", ""), ("", "")) @@ -733,10 +757,12 @@ def compose(F, G): # expanding final outputs beforehand makes this construction far easier, # but it also means that determinism is not an invariant, # whereas it would be in a more robust implementation + F_name, G_name = F.name, G.name F, G = expand_final(F), expand_final(G) # initialize the new FST H = FST(deepcopy(G.Sigma), deepcopy(F.Gamma)) + H.name = f"({F_name} ∘ {G_name})" Q_set, E_set, H.stout = set(), set(), {} H.qe = FST.encode_state(F.qe, G.qe, "") From c9cdc38dfa1f853cc472aa3008c1d858bdf54618 Mon Sep 17 00:00:00 2001 From: LiamSchilling Date: Fri, 10 Apr 2026 02:51:06 -0400 Subject: [PATCH 14/18] fix composition --- utility/fst_object.py | 163 +++++++++++------------------------------- 1 file changed, 43 insertions(+), 120 deletions(-) diff --git a/utility/fst_object.py b/utility/fst_object.py index 8be0d33..527e714 100644 --- a/utility/fst_object.py +++ b/utility/fst_object.py @@ -12,8 +12,7 @@ Implemented core finite-state operations, taking `FST` to be a general (potentially nondeterministic) finite-state transducer that accepts a relation over strings. -Added trimming, inversion, concatenation, Kleene closure, -union, intersection, composition, and determinization. +Added trimming, inversion, concatenation, Kleene closure, union, composition, and determinization. Exposed additional variations of rewrite and transition functions to the user. The previous implementation of `rewrite` has been divided into some smaller functions, @@ -337,7 +336,7 @@ def trim_inaccessible(F): if q in Q_set: G.stout[q] = w - G.Q, G.E = list(Q_set), list(map(list, list(E_set))) + G.Q, G.E = list(Q_set), list(map(list, E_set)) return G def trim_useless(F): @@ -383,7 +382,7 @@ def trim_useless(F): if q in Q_set: G.stout[q] = w - G.Q, G.E = list(Q_set), list(map(list, list(E_set))) + G.Q, G.E = list(Q_set), list(map(list, E_set)) return G def trim(F): @@ -445,7 +444,7 @@ def expand_inputs(F): # Construct the new set of states and transitions. # Every state `q` is mapped to many new states ``, - # where `u` is every prefix of the outgoing input strings from `q`. + # where `u` ranges over prefixes of the outgoing input strings from `q`. # Every transition `[q, u, v, q']` is mapped to a series of new transitions # of the form `[, c, "", ]`, where `wc` is an incomplete prefix of `u`, # along with the completing transition `[, c, v, ]`, where `wc` equals `u`. @@ -461,7 +460,7 @@ def expand_inputs(F): for q, w in F.stout.items(): G.stout[FST.encode_state(q, "")] = w - G.Q, G.E = list(Q_set), list(map(list, list(E_set))) + G.Q, G.E = list(Q_set), list(map(list, E_set)) return G def expand_final(F): @@ -591,7 +590,7 @@ def kleene_closure(F): final-output emptiness Invariants: - trimmedness + trimmedness IF nonempty input-string expansion Args: @@ -663,152 +662,79 @@ def union(F, G): return H -def intersect(F, G): - """Given FSTs that accept the relations `RF` and `RG`, respectively, - returns an FST that accepts the relation `RF ∩ RG`. - - Ensures: - final-output emptiness - - Invariants: - input-string expansion - determinism IF already final-output empty - - Args: - F (FST): the left-hand original FST. - G (FST): the right-hand original FST. - - Returns: - FST: the intersection FST. - """ - # expanding final outputs beforehand makes this construction far easier, - # but it also means that determinism is not an invariant, - # whereas it would be in a more robust implementation - F_name, G_name = F.name, G.name - F, G = expand_final(F), expand_final(G) - - # initialize the new FST - H = FST(list(set(F.Sigma) | set(G.Sigma)), list(set(F.Gamma) | set(G.Gamma))) - H.name = f"({F_name} ∩ {G_name})" - Q_set, E_set, H.stout = set(), set(), {} - H.qe = FST.encode_state(F.qe, G.qe, ("", ""), ("", "")) - - # perform a breadth-first traversal of the original FSTs from the accepting states - worklist = Queue() - worklist.put((F.qe, G.qe, ("", ""), ("", ""))) - while not worklist.empty(): - (curr_qf, curr_qg, (curr_uf, curr_vf), (curr_ug, curr_vg)) = worklist.get() - new_q = FST.encode_state(curr_qf, curr_qg, (curr_uf, curr_vf), (curr_ug, curr_vg)) - if new_q not in Q_set: - Q_set.add(new_q) - for [qf, uf, vf, qf_] in F.E: - for [qg, ug, vg, qg_] in G.E: - states_match = curr_qf == qf and curr_qg == qg - f_buffers_match = uf.startswith(curr_uf) and vf.startswith(curr_vf) - g_buffers_match = ug.startswith(curr_ug) and vg.startswith(curr_vg) - if states_match and f_buffers_match and g_buffers_match: - uf_suff, vf_suff = uf[len(curr_uf):], vf[len(curr_vf):] - ug_suff, vg_suff = ug[len(curr_ug):], vg[len(curr_vg):] - if uf_suff == ug_suff and vf_suff == vg_suff: - new_q_ = (qf_, qg_, ("", ""), ("", "")) - new_u, new_v = uf_suff, vf_suff - elif uf_suff.startswith(ug_suff) and vf_suff.startswith(vg_suff): - uf_buff, vf_buff = uf_suff[len(ug_suff):], vf_suff[len(vg_suff):] - new_q_ = (qf, qg_, (uf_buff, vf_buff), ("", "")) - new_u, new_v = ug_suff, vg_suff - elif ug_suff.startswith(uf_suff) and vg_suff.startswith(vf_suff): - ug_buff, vg_buff = ug_suff[len(uf_suff):], vg_suff[len(vf_suff):] - new_q_ = (qf_, qg, ("", ""), (ug_buff, vg_buff)) - new_u, new_v = uf_suff, vf_suff - E_set.add((new_q, new_u, new_v, FST.encode_state(*new_q_))) - worklist.put(new_q_) - - # copy over the shared final states - for qf in F.stout.keys(): - for qg in G.stout.keys(): - new_q = FST.encode_state(qf, qg, ("", ""), ("", "")) - if new_q in Q_set: - H.stout[new_q] = "" - - H.Q, H.E = list(Q_set), list(map(list, list(E_set))) - return H - -def compose(F, G): +def left_epsilon_free_compose(F, G): """Given FSTs that accept the relations `RF` and `RG`, respectively, returns an FST that accepts the relation `{ (u, v) | ∃ w, (u, w) ∈ RG ∧ (w, v) ∈ RF }`. In the special case that `RF` and `RG` are both subsequential functions (which is guaranteed if both original FSTs are deterministic), this has the effect of typical function composition. - Ensures: - final-output emptiness + Requiring the left-hand FST to have no transitions with empty input strings + allows us to design this implementation to have determinism as an invariant. + Additionally, this condition always holds for deterministic FSTs, + so it will be useful for subsequential applications. Invariants: + final-output emptiness input-string expansion - determinism IF already final-output empty + determinism IF `G` already final-output empty Args: - F (FST): the left-hand (second applied) original FST. + F (FST): the left-hand (second applied) original FST, MUST have no empty input strings. G (FST): the right-hand (first applied) original FST. Returns: FST: the composition FST. """ - # expanding final outputs beforehand makes this construction far easier, - # but it also means that determinism is not an invariant, + # we have to expand inputs in `F` so that it never over-consumes output of `G` + F_name = F.name + F = expand_inputs(F) + + # expanding final outputs of the right-hand machine makes this construction far easier, + # but it also means that determinism is not always an invariant, # whereas it would be in a more robust implementation - F_name, G_name = F.name, G.name - F, G = expand_final(F), expand_final(G) + G_name = G.name + G = expand_final(G) # initialize the new FST H = FST(deepcopy(G.Sigma), deepcopy(F.Gamma)) H.name = f"({F_name} ∘ {G_name})" - Q_set, E_set, H.stout = set(), set(), {} - H.qe = FST.encode_state(F.qe, G.qe, "") + Q_set, E_set, H.qe, H.stout = set(), set(), FST.encode_state(F.qe, G.qe), {} - # perform a breadth-first traversal of the original FSTs from the accepting states + # perform a breadth-first traversal of the new FST + # by traversing transitions in `G` while simulating both `G` and `F` worklist = Queue() - worklist.put((F.qe, G.qe, "")) + worklist.put((F.qe, G.qe)) while not worklist.empty(): - (curr_qf, curr_qg, curr_uf) = worklist.get() - new_q = FST.encode_state(curr_qf, curr_qg, curr_uf) + (curr_qf, curr_qg) = worklist.get() + new_q = FST.encode_state(curr_qf, curr_qg) if new_q not in Q_set: Q_set.add(new_q) + + # for every possible move in `G`, perform a breadth-first traversal of `F` on its output for [qg, ug, vg, qg_] in G.E: if curr_qg == qg: - - # perform an inner breadth-first traversal of states reached by `curr_uf + vg` - inner_visited = set() inner_worklist = Queue() - inner_worklist.put((curr_qf, curr_uf + vg, "")) + inner_worklist.put((curr_qf, vg, "")) while not inner_worklist.empty(): - (inner_curr_qf, inner_curr_uf, inner_curr_vf) = inner_worklist.get() - if (inner_curr_qf, inner_curr_uf) not in inner_visited: - inner_visited.add((inner_curr_qf, inner_curr_uf)) + (inner_qf, curr_vg, curr_vf) = inner_worklist.get() + if curr_vg == "": + new_q_ = FST.encode_state(inner_qf, qg_) + E_set.add((new_q, ug, curr_vf, new_q_)) + worklist.put((inner_qf, qg_)) + else: for [qf, uf, vf, qf_] in F.E: - if inner_curr_qf == qf: - if uf.startswith(inner_curr_uf): - if uf == inner_curr_uf: - new_q_ = (qf_, qg_, "") - new_v = inner_curr_vf + vf - else: - new_q_ = (qf, qg_, inner_curr_uf) - new_v = inner_curr_vf - E_set.add((new_q, ug, new_v, FST.encode_state(*new_q_))) - worklist.put(new_q_) - if inner_curr_uf.startswith(uf): - uf_suff = inner_curr_uf[len(uf):] - new_q_ = (qf_, uf_suff, inner_curr_vf + vf) - inner_worklist.put(new_q_) + if inner_qf == qf and curr_vg.startswith(uf): + inner_worklist.put((qf_, curr_vg[len(uf):], curr_vf + vf)) # copy over the shared final states - for qf in F.stout.keys(): + for qf, w in F.stout.items(): for qg in G.stout.keys(): - new_q = FST.encode_state(qf, qg, "") + new_q = FST.encode_state(qf, qg) if new_q in Q_set: - H.stout[new_q] = "" + H.stout[new_q] = w - H.Q, H.E = list(Q_set), list(map(list, list(E_set))) + H.Q, H.E = list(Q_set), list(map(list, E_set)) return H def determinize(F): @@ -817,9 +743,6 @@ def determinize(F): A deterministic FST has only single-character input strings, and for each state, at most one outgoing transition with every input string. - Ensures: - ? - Invariants: ? From 55e5b9969f4276da779da7ea081266c0d36716f0 Mon Sep 17 00:00:00 2001 From: LiamSchilling Date: Sat, 11 Apr 2026 00:16:06 -0400 Subject: [PATCH 15/18] feat_determinization --- utility/fst_object.py | 100 ++++++++++++++++++++++++++++++++++-------- 1 file changed, 82 insertions(+), 18 deletions(-) diff --git a/utility/fst_object.py b/utility/fst_object.py index 527e714..0f98858 100644 --- a/utility/fst_object.py +++ b/utility/fst_object.py @@ -49,6 +49,7 @@ from copy import deepcopy from queue import Queue +from .helper import lcp class FST: """A class representing finite state transducers. @@ -662,6 +663,39 @@ def union(F, G): return H +def consume_input_from(F, u, qs): + """Given a list of state-string pairs and an input string, + runs a nondeterministic FST on the input string + from every given state and for every possible nondeterministic choice, + accumulating output onto the original given strings. + + To guarantee termination, transition cycles whose input strings are empty + are not traversed for more than one consecutive loop, + as enfored by the `visited` tracker only considering + the state in the FST and the amount of input consumed. + + Args: + F (FST): The target FST. + u (str): The input string. + qs (list[Tuple[str, str]]): A list of start state-string from which to run the FST. + + Returns + list[Tuple[str, str]]: All possible results of running the FST. + """ + visited = {} + worklist = Queue() + for q, v in qs: + worklist.put((q, u, v)) + while not worklist.empty(): + (curr_q, curr_u, curr_v) = worklist.get() + if (curr_q, curr_u) not in visited: + visited[(curr_q, curr_u)] = curr_v + for [q, u, v, q_] in F.E: + if curr_q == q and curr_u.startswith(u): + worklist.put((q_, curr_u[len(u):], curr_v + v)) + + return [(q, v) for ((q, u), v) in visited.items() if u == ""] + def left_epsilon_free_compose(F, G): """Given FSTs that accept the relations `RF` and `RG`, respectively, returns an FST that accepts the relation `{ (u, v) | ∃ w, (u, w) ∈ RG ∧ (w, v) ∈ RF }`. @@ -710,22 +744,11 @@ def left_epsilon_free_compose(F, G): new_q = FST.encode_state(curr_qf, curr_qg) if new_q not in Q_set: Q_set.add(new_q) - - # for every possible move in `G`, perform a breadth-first traversal of `F` on its output for [qg, ug, vg, qg_] in G.E: if curr_qg == qg: - inner_worklist = Queue() - inner_worklist.put((curr_qf, vg, "")) - while not inner_worklist.empty(): - (inner_qf, curr_vg, curr_vf) = inner_worklist.get() - if curr_vg == "": - new_q_ = FST.encode_state(inner_qf, qg_) - E_set.add((new_q, ug, curr_vf, new_q_)) - worklist.put((inner_qf, qg_)) - else: - for [qf, uf, vf, qf_] in F.E: - if inner_qf == qf and curr_vg.startswith(uf): - inner_worklist.put((qf_, curr_vg[len(uf):], curr_vf + vf)) + for (qf_, vf) in consume_input_from(F, vg, [(curr_qf, "")]): + E_set.add((new_q, ug, vf, FST.encode_state(qf_, qg_))) + worklist.put((qf_, qg_)) # copy over the shared final states for qf, w in F.stout.items(): @@ -743,13 +766,54 @@ def determinize(F): A deterministic FST has only single-character input strings, and for each state, at most one outgoing transition with every input string. - Invariants: - ? + Ensures: + trimmedness + input-string expansion + determinism Args: - F (FST): the original functional FST. + F (FST): the original functional FST.s Returns: FST: an FST that accepts the same function but is deterministic. """ - raise NotImplementedError + # expanding inputs in `F` means that we never have to partially traverse a transition + # we have to trim `F` so that we get the subsequential guarantees for the whole machine + F_name = F.name + F = expand_inputs(trim(F)) + + # the initial state of the new machine is the set of states reachable + # from the initial state of the old machine on the empty string + qes = sorted(consume_input_from(F, "", [(F.qe, "")])) + + # initialize the new FST + G = FST(deepcopy(F.Sigma), deepcopy(F.Gamma)) + G.name = f"determinize({F_name})" + Q_set, E_set, G.qe, G.stout = set(), set(), FST.encode_state(*qes), {} + + # perform a breadth-first traversal of the new FST + # by tracking every possible nondeterministic behavior of `F` on consuming each character + worklist = Queue() + worklist.put(qes) + while not worklist.empty(): + curr_qs = sorted(worklist.get()) + new_q = FST.encode_state(*curr_qs) + if (tuple(curr_qs), new_q) not in Q_set: + Q_set.add((tuple(curr_qs), new_q)) + for c in F.Sigma: + qs = sorted(consume_input_from(F, c, curr_qs)) + if qs != []: + common_output = lcp(*[v for (_, v) in qs]) + qs_ = [(q, v[len(common_output):]) for (q, v) in qs] + E_set.add((new_q, c, common_output, FST.encode_state(*qs_))) + worklist.put(qs_) + + # mark any state containing a final state of the original machine as a new final state + for (qs, new_q) in Q_set: + for (q, v) in qs: + if q in F.stout: + G.stout[new_q] = v + break + + G.Q, G.E = list(map(lambda x : x[1], Q_set)), list(map(list, E_set)) + return G From 95f3f058c62348876c8cade988f1bbfa0bacc8c2 Mon Sep 17 00:00:00 2001 From: LiamSchilling Date: Sat, 11 Apr 2026 14:12:05 -0400 Subject: [PATCH 16/18] feat: onwardization and minimization --- utility/fst_object.py | 134 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 133 insertions(+), 1 deletion(-) diff --git a/utility/fst_object.py b/utility/fst_object.py index 0f98858..ac5c7c5 100644 --- a/utility/fst_object.py +++ b/utility/fst_object.py @@ -772,7 +772,7 @@ def determinize(F): determinism Args: - F (FST): the original functional FST.s + F (FST): the original functional FST. Returns: FST: an FST that accepts the same function but is deterministic. @@ -817,3 +817,135 @@ def determinize(F): G.Q, G.E = list(map(lambda x : x[1], Q_set)), list(map(list, E_set)) return G + +def onwardize(F): + """Pushes forward common prefixes of output strings + so that the resulting FST produces output as early as possible. + + Ensures: + trimmedness + + Invariants: + final-output emptiness + input-string expansion + determinism + + Args: + F (FST): the original FST. + + Returns: + FST: an FST that recognizes the same relation but it onward. + """ + # initialize the new FST, which we must trim to ensure termination + G = trim(F) + G.name = f"onwardize({F})" + + # push outputs forward until fixedpoint + while True: + + outputs_per_state, common_output_per_state = {}, {} + + # collect all transition outputs indexed by state + for [q, _, v , _] in G.E: + if q not in outputs_per_state: + outputs_per_state[q] = [] + outputs_per_state[q].append(v) + + # collect all final outputs indexed by state + for q, v in G.stout.items(): + outputs_per_state[q].append(v) + + # compute the common prefixes of all output sets + for (q, outputs) in outputs_per_state.items(): + common_output_per_state[q] = lcp(*outputs) + + # if there are no more nonempty common prefixes, then we are done + if all([v == "" for (_, v) in common_output_per_state.items()]): + return G + + # for every transition, push common output forward and recieve pushed common output + for tr in G.E: + tr[2] = tr[2][len(common_output_per_state[tr[0]]):] + common_output_per_state[tr[3]] + + # for every final output, push common prefix forward + for q in G.stout.keys(): + G.stout[q] = G.stout[q][len(common_output_per_state[q]):] + +def minimize(F): + """Given a deterministic FST, + returns the canonical FST that recognizes the same subsequential function. + It will be trim, onward, and minimal in the sense that all states are observably distinct. + + The minimization algorithm uses Hopcroft's partition-refinement strategy + (https://en.wikipedia.org/wiki/DFA_minimization). + + Ensures: + trimmedness + input-string expansion + determinism + + Invariants: + final-output expansion + + Args: + F (FST): the original deterministic FST. + + Returns: + FST: an FST that accepts the same function but is onward. + """ + # we must onwardize the original FST + F_name = F.name + F = onwardize(F) + + # initialize an overly coarse partition of the states into indexed equivalance classes + partition = None + class_of = { q : 0 for q in F.Q } + + # refine the partition using local inferences until fixedpoint + while True: + + # for every state and final transition, + # collect a signature including its output and destination class + signature_per_state_per_tr = { q: { u: None for u in [""] + F.Sigma } for q in F.Q } + for [q, u, v, q_] in F.E: + signature_per_state_per_tr[q][u] = (v, class_of[q_]) + for q, v in F.stout.items(): + signature_per_state_per_tr[q][""] = v + + # construct a signature of observable information per state + signature_per_state = { q: ( + class_of[q], + tuple(signature_per_state_per_tr[q][u] for u in [""] + F.Sigma) + ) for q in F.Q } + + old_partition = partition + + # "transpose" the state signatures by keying sets of nondistinct states by their signature + partition = {} + for (q, sig) in signature_per_state.items(): + if sig not in partition: + partition[sig] = set() + partition[sig].add(q) + + class_of = { q: i for (i, (_, s)) in enumerate(partition.items()) for q in s } + + # if there was no change, then we are done with the refinement + if partition == old_partition: + break + + # initialize the new FST + G = FST(deepcopy(F.Sigma), deepcopy(F.Gamma)) + G.name = f"minimize({F_name})" + G.E, G.stout = [], {} + G.Q = [str(sig[0]) for sig in partition.keys()] + G.qe = str(next(sig for (sig, s) in partition.items() if F.qe in s)[0]) + + # populate the state and final transitions by reading from the signatures of the partition + for sig in partition.keys(): + if sig[1][0] != None: + G.stout[str(sig[0])] = str(sig[1][0]) + for (i, u) in zip(range(1, len(F.Sigma) + 1), F.Sigma): + if sig[1][i] != None: + G.E.append([str(sig[0]), u, sig[1][i][0], str(sig[1][i][1])]) + + return G From 3fc3bfda2d5f627caa4857dfae223b80832fe069 Mon Sep 17 00:00:00 2001 From: LiamSchilling Date: Mon, 13 Apr 2026 00:19:12 -0400 Subject: [PATCH 17/18] feat: identity, image, domain --- utility/fst_object.py | 83 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 83 insertions(+) diff --git a/utility/fst_object.py b/utility/fst_object.py index ac5c7c5..15bcf6b 100644 --- a/utility/fst_object.py +++ b/utility/fst_object.py @@ -299,6 +299,89 @@ def new_acceptor(Sigma, Gamma): return F +def new_identity(Sigma): + """Creates an FST that recognizes the identity function. + + Ensures: + trimmedness + final-output emptiness + input-string expansion + determinism + + Args: + Sigma (list): the input alphabet. + Gamma (list): the output alphabet. + + Returns: + FST: the identity FST. + """ + # initialize the new FST + F = FST(Sigma, Sigma) + F.name = "identity-FST" + F.Q, F.E, F.qe, F.stout = ["q"], [], "q", {"q" : ""} + + # add transitions that allow copying any character from input to output + for c in Sigma: + F.E.append(["q", c, c, "q"]) + + return F + +def domain(F): + """Given an FST recognizing a relation `R`, + returns an FST recognizing the relation `{ (u, u) | ∃ w, (w, u) ∈ R }`. + + Invariants: + trimmedness + input-string expansion + determinism + + Args: + F (FST): the original FST. + + Returns: + FST: an FST that recognizes its domain. + """ + # initialize the new FST + G = F.copy_fst() + G.name = f"dom({F.name})" + + # copy the input string transition of every transition to its output + for tr in G.E: + tr[2] = tr[1] + + # empty the output strings on final transitions + for q in G.stout.keys(): + G.stout[q] = "" + + return G + +def image(F): + """Given an FST recognizing a relation `R`, + returns an FST recognizing the relation `{ (u, u) | ∃ w, (u, w) ∈ R }`. + + Ensures: + final-output emptiness + + Invariants: + trimmedness + + Args: + F (FST): the original FST. + + Returns: + FST: an FST that recognizes its image. + """ + # initialize the new FST + # we need final outputs to be empty because the FST class does not support final inputs + G = expand_final(F) + G.name = f"img({F.name})" + + # copy the output string transition of every transition to its input + for tr in G.E: + tr[1] = tr[2] + + return G + def trim_inaccessible(F): """Removes states and transitions from the FST that are not accessible from the initial state. From 844ae33f7b8344d74ddfd4cc9400a02406d4b0eb Mon Sep 17 00:00:00 2001 From: LiamSchilling Date: Mon, 13 Apr 2026 00:19:50 -0400 Subject: [PATCH 18/18] fix docs --- utility/fst_object.py | 1 - 1 file changed, 1 deletion(-) diff --git a/utility/fst_object.py b/utility/fst_object.py index 15bcf6b..8cf7777 100644 --- a/utility/fst_object.py +++ b/utility/fst_object.py @@ -310,7 +310,6 @@ def new_identity(Sigma): Args: Sigma (list): the input alphabet. - Gamma (list): the output alphabet. Returns: FST: the identity FST.