From 18dc689f96504458fb405056767c51df46929073 Mon Sep 17 00:00:00 2001 From: breandan Date: Thu, 24 Oct 2024 17:33:09 -0400 Subject: [PATCH] chase down bug with MAX_TOKENS --- .../hypergraph/kaliningraph/automata/FSA.kt | 2 ++ .../kaliningraph/parsing/Levenshtein.kt | 1 + .../hypergraph/kaliningraph/parsing/Parikh.kt | 2 +- .../hypergraph/kaliningraph/automata/JFSA.kt | 7 +++++- .../kaliningraph/parsing/JVMBarHillel.kt | 7 +++--- .../kaliningraph/automata/WFSATest.kt | 22 +++++++++++++++++++ 6 files changed, 36 insertions(+), 5 deletions(-) diff --git a/src/commonMain/kotlin/ai/hypergraph/kaliningraph/automata/FSA.kt b/src/commonMain/kotlin/ai/hypergraph/kaliningraph/automata/FSA.kt index 7986cf0e..800b1169 100644 --- a/src/commonMain/kotlin/ai/hypergraph/kaliningraph/automata/FSA.kt +++ b/src/commonMain/kotlin/ai/hypergraph/kaliningraph/automata/FSA.kt @@ -37,6 +37,8 @@ open class FSA(open val Q: TSA, open val init: Set<Σᐩ>, open val final: Set< } val stateCoords: Sequence by lazy { states.map { it.coords().let { (i, j) -> Triple(stateMap[it]!!, i, j) } }.asSequence() } + var height = 0 + var width = 0 val validTriples by lazy { stateCoords.let { it * it * it }.filter { it.isValidStateTriple() }.toList() } val validPairs by lazy { stateCoords.let { it * it }.filter { it.isValidStatePair() }.toSet() } diff --git a/src/commonMain/kotlin/ai/hypergraph/kaliningraph/parsing/Levenshtein.kt b/src/commonMain/kotlin/ai/hypergraph/kaliningraph/parsing/Levenshtein.kt index 44bfd578..b817a317 100644 --- a/src/commonMain/kotlin/ai/hypergraph/kaliningraph/parsing/Levenshtein.kt +++ b/src/commonMain/kotlin/ai/hypergraph/kaliningraph/parsing/Levenshtein.kt @@ -76,6 +76,7 @@ fun makeLevFSA( } FSA(Q, initialStates, finalStates) + .also { it.height = dist; it.width = str.size } // .nominalize() .also { println("Levenshtein-${str.size}x$dist automaton has ${Q.size} arcs!") } } diff --git a/src/commonMain/kotlin/ai/hypergraph/kaliningraph/parsing/Parikh.kt b/src/commonMain/kotlin/ai/hypergraph/kaliningraph/parsing/Parikh.kt index 25604e5f..f365aefa 100644 --- a/src/commonMain/kotlin/ai/hypergraph/kaliningraph/parsing/Parikh.kt +++ b/src/commonMain/kotlin/ai/hypergraph/kaliningraph/parsing/Parikh.kt @@ -96,7 +96,7 @@ class ParikhMap(val cfg: CFG, val size: Int, reconstruct: Boolean = true) { } } - fun genRanges(delta: Int = 2 * MAX_RADIUS + 1, n: Int = MAX_TOKENS) = + fun genRanges(delta: Int = 2 * MAX_RADIUS + 1, n: Int = MAX_TOKENS + MAX_RADIUS) = (1..delta).map { margin -> val range = (0..n).toList() range.windowed(margin, 1).map { diff --git a/src/jvmMain/kotlin/ai/hypergraph/kaliningraph/automata/JFSA.kt b/src/jvmMain/kotlin/ai/hypergraph/kaliningraph/automata/JFSA.kt index 8aaacf99..16dd3fb4 100644 --- a/src/jvmMain/kotlin/ai/hypergraph/kaliningraph/automata/JFSA.kt +++ b/src/jvmMain/kotlin/ai/hypergraph/kaliningraph/automata/JFSA.kt @@ -150,4 +150,9 @@ fun BAutomaton.decodeDFA( println("Took ${startTime.elapsedNow()} to decode ${deduped.size} trajectories") return deduped -} \ No newline at end of file +} + +fun BAutomaton.decodeDFA( + dec: Map, // Maps unicode characters back to strings because BAutomata uses Unicode + take: Int = 1000, +) = getFiniteStrings(take).map { it.map { dec[it]!! }.joinToString(" ") } \ No newline at end of file diff --git a/src/jvmMain/kotlin/ai/hypergraph/kaliningraph/parsing/JVMBarHillel.kt b/src/jvmMain/kotlin/ai/hypergraph/kaliningraph/parsing/JVMBarHillel.kt index 18169604..855b1bf6 100644 --- a/src/jvmMain/kotlin/ai/hypergraph/kaliningraph/parsing/JVMBarHillel.kt +++ b/src/jvmMain/kotlin/ai/hypergraph/kaliningraph/parsing/JVMBarHillel.kt @@ -5,7 +5,6 @@ import ai.hypergraph.kaliningraph.automata.* import ai.hypergraph.kaliningraph.repair.minimizeFix import ai.hypergraph.kaliningraph.types.* import ai.hypergraph.kaliningraph.types.times -import java.util.concurrent.* import java.util.stream.* import kotlin.streams.* import kotlin.time.Duration.Companion.minutes @@ -165,6 +164,7 @@ fun CFG.jvmIntersectLevFSAP(fsa: FSA, lbc: List = this.lengthBoundsCache ): CFG { // if (fsa.Q.size < 650) throw Exception("FSA size was out of bounds") + if (parikhMap.size < fsa.width + fsa.height) throw Exception("WARNING: Parikh map size exceeded") var clock = TimeSource.Monotonic.markNow() val nts = ConcurrentHashMap.newKeySet<Σᐩ>().apply { add("START") } @@ -200,8 +200,9 @@ fun CFG.jvmIntersectLevFSAP(fsa: FSA, // This is a finer grained filter, but more expensive to compute, so we use the coarse filter first fsa.obeys(it.π1, it.π2, it.π3, parikhMap) }.toList().also { - val fraction = it.size.toDouble() / (fsa.states.size * nonterminals.size * fsa.states.size) - println("Fraction of valid triples: $fraction") + val candidates = (fsa.states.size * nonterminals.size * fsa.states.size) + val fraction = it.size.toDouble() / candidates + println("Fraction of valid triples: ${it.size}/$candidates ≈ $fraction") }.forEach { ct2[it.π1.π1][it.π3][it.π2.π1] = true } println("Precomputed LP constraints in ${ctClock.elapsedNow()}") diff --git a/src/jvmTest/kotlin/ai/hypergraph/kaliningraph/automata/WFSATest.kt b/src/jvmTest/kotlin/ai/hypergraph/kaliningraph/automata/WFSATest.kt index 0596e745..41afbdca 100644 --- a/src/jvmTest/kotlin/ai/hypergraph/kaliningraph/automata/WFSATest.kt +++ b/src/jvmTest/kotlin/ai/hypergraph/kaliningraph/automata/WFSATest.kt @@ -148,6 +148,28 @@ class WFSATest { }.also { println("Decoding ${it.value.size} repairs took ${it.duration}") } } +/* +./gradlew jvmTest --tests "ai.hypergraph.kaliningraph.automata.WFSATest.testRepairMembership" + */ + @Test + fun testRepairMembership() { + val toRepair = "if STRING in NAME : return [ NEWLINE" + val groundTr = "if STRING in NAME : return NUMBER NEWLINE" + println(groundTr in vanillaS2PCFG.language) + val radius = 1 + val fsa = makeLevFSA(toRepair, radius) + val gram = vanillaS2PCFG.run { jvmIntersectLevFSAP(fsa, parikhMap) } + val pt = gram.toPTree() + + pt.toDFA(true)!!.decodeDFA(pt.termDict).toSet() + .also { assertTrue(it.isNotEmpty()) }.onEach { + assertTrue(fsa.recognizes(it) && it in vanillaS2PCFG.language) + println(levenshteinAlign(toRepair, it).paintANSIColors()) + }.also { assertEquals(pt.sampleStrWithoutReplacement().toSet(), it) } + + assertTrue(groundTr in gram.language) + } + /* ./gradlew jvmTest --tests "ai.hypergraph.kaliningraph.automata.WFSATest.testBijection" */