diff --git a/.gitignore b/.gitignore index 2ca8af0..ddc55b7 100644 --- a/.gitignore +++ b/.gitignore @@ -1,25 +1,25 @@ bin/ build/ -javadoc/ jars/* +javadoc/ java_pid* +jrte.code-workspace +regression.* **/.fuse_hidden* **/.~* **/*.bak -*.gz -*.out -*.log* -*.lck -*.dfa -*.pr -*.last .data/ +.doc/ .tmp/ .settings/ .vscode/ .classpath .project .sign-off -TRun.* -regression.* -jrte.code-workspace +*.dfa +*.gz +*.last +*.log* +*.lck +*.out +*.pr diff --git a/build.xml b/build.xml index 7eb7aa6..1989345 100755 --- a/build.xml +++ b/build.xml @@ -39,7 +39,7 @@ - + @@ -48,13 +48,15 @@ - + + + - + @@ -81,7 +83,7 @@ - + @@ -101,12 +103,12 @@ - + - + @@ -145,10 +147,10 @@ - + - + @@ -156,10 +158,10 @@ - - @@ -167,7 +169,7 @@ - + @@ -236,7 +238,7 @@ - + @@ -246,7 +248,7 @@ - + @@ -263,7 +265,7 @@ - + @@ -296,9 +298,9 @@ - + - + @@ -314,6 +316,7 @@ + diff --git a/etc/sh/bench b/etc/sh/bench index 11e8262..9e20f0d 100755 --- a/etc/sh/bench +++ b/etc/sh/bench @@ -20,26 +20,26 @@ while [[ ! "$1" =~ ^(--tsv|[0-9]+) ]]; do vmargs="$vmargs $1"; shift if (($#==0)); then - execHelp + echo "Usage: bench [--tsv] " + echo "Use --tsv for tab-delimited output per etc/benchmarks/ribose.ods" exit 0 fi done format=cat -if [[ "$1" == "--tsv" ]]; then +if [[ "$1" == "--tsv" ]]; then format=strip shift fi if (($#<4)); then - echo "Usage: bench [--tsv] " + echo "Usage: bench [--tsv] ..." echo "Use --tsv for tab-delimited output per etc/benchmarks/ribose.ods" exit 1 -fi +fi n=$1 model=$2 -transducer=$3 -input=$4 -shift 4 +input=$3 +shift 3 output="-Djrte.out.enabled=false -Dregex.out.enabled=false" -for i in $(seq $n); do - ${JAVA_HOME}/bin/java $vmargs -Dfile.encoding=UTF-8 $output -cp "$path" com.characterforming.jrte.test.FileRunner --nil "$model" $transducer "$input" "$@"|$format -done +for t in $@; do for i in $(seq $n); do + ${JAVA_HOME}/bin/java $vmargs -Dfile.encoding=UTF-8 $output -cp "$path" com.characterforming.jrte.test.FileRunner --nil "$model" $t "$input"|$format +done; done diff --git a/logging.properties b/logging.properties new file mode 100644 index 0000000..b3dd6a0 --- /dev/null +++ b/logging.properties @@ -0,0 +1 @@ +java.util.logging.SimpleFormatter.format=%1$tF %1$tT [%4$-7s][%2$s]%n%5$s %6$s%n \ No newline at end of file diff --git a/patterns/test/DateExtractor.inr b/patterns/test/DateExtractor.inr index 24feeb7..ac16b39 100644 --- a/patterns/test/DateExtractor.inr +++ b/patterns/test/DateExtractor.inr @@ -1,15 +1,30 @@ white = space | comma | tab | nl; delimiter = '/' | '-' | '.' | '\\'; -yyyy = ((digit-'0') (digit:3)) @@ PasteAny; -md = ('0'? (digit-'0') | '1' ('0'|'1'|'2')) @@ PasteAny; -dd = ('1' (digit-('0'|'1'|'2')) | '2' digit | '3' ('0'|'1')) @@ PasteAny; d0 = (delimiter, select[`~d0`] cut select); d1 = (delimiter, select[`~d1`] cut select); d2 = select[`~d2`] cut select; -qy = `!_dmy`; -yq = `!_ydm`; +yyy = ((digit-'0') (digit:2)) @@ PasteAny; +yyyy = ((digit-'0') (digit:3)) @@ PasteAny; +mdy = yyy (digit, paste signal[`!_mdy`]); +dmy = yyy (digit, paste signal[`!_dmy`]); + +dd = ('1' (digit-('0'|'1'|'2')) | '2' digit | '3' ('0'|'1')) @@ PasteAny; +ymd = ( + ( + ('1', paste) ((digit - ('0'|'1'|'2')), paste) + | ('2', paste) (digit, paste) + | ('3', paste) (('0'|'1'), paste) + ) + (byte, signal[`!_ymd`]) +); + +md = ('0'? (digit-'0') | '1' ('0'|'1'|'2')) @@ PasteAny; +ydm = ( + (('0', paste) ((digit-'0'), paste) | ('1', paste) (('0'|'1'|'2'), paste)) + (byte, signal[`!_ydm`]) +); numericDate = ( ( digit+ '/' digit+ '/' digit+ @@ -17,45 +32,46 @@ numericDate = ( | digit+ '.' digit+ '.' digit+ | digit+ '\\' digit+ '\\' digit+ ) - white @@ - ( md d0 dd d1 yyyy (white, d2 signal[`!_mdy`]) - | dd d0 md d1 yyyy (white, d2 signal[`!_dmy`]) - | md d0 md d1 yyyy (white, d2 signal[qy]) - | yyyy d0 md d1 dd (white, d2 signal[`!_ymd`]) - | yyyy d0 dd d1 md (white, d2 signal[`!_ydm`]) - | yyyy d0 md d1 md (white, d2 signal[yq]) + ( md d0 dd d1 mdy + | dd d0 md d1 dmy + | md d0 md d1 dmy + | yyyy d0 md d1 ymd + | yyyy d0 dd d1 ydm + | yyyy d0 md d1 ydm ) ); day = (dd | md) {'st','nd','rd','th'}?; month = ( - ( ({'J','j'} 'an', select[`~?`] paste[`#01`] select) 'uary'? - | ({'F','f'} 'eb', select[`~?`] paste[`#02`] select) 'ruary'? - | ({'M','m'} 'ar', select[`~?`] paste[`#03`] select) 'ch'? - | ({'A','a'} 'pr', select[`~?`] paste[`#04`] select) 'il'? - | ({'M','m'} 'ay', select[`~?`] paste[`#05`] select) - | ({'J','j'} 'un', select[`~?`] paste[`#06`] select) 'e'? - | ({'J','j'} 'ul', select[`~?`] paste[`#07`] select) 'y'? - | ({'A','a'} 'ug', select[`~?`] paste[`#08`] select) 'ust'? - | ({'S','s'} 'ep', select[`~?`] paste[`#09`] select) 'tember'? - | ({'O','o'} 'ct', select[`~?`] paste[`#10`] select) 'ober'? - | ({'N','n'} 'ov', select[`~?`] paste[`#11`] select) 'ember'? - | ({'D','d'} 'ec', select[`~?`] paste[`#12`] select) 'ember'? + ( ({'J','j'} 'an', select[`~?`] paste[`01`] select) 'uary'? + | ({'F','f'} 'eb', select[`~?`] paste[`02`] select) 'ruary'? + | ({'M','m'} 'ar', select[`~?`] paste[`03`] select) 'ch'? + | ({'A','a'} 'pr', select[`~?`] paste[`04`] select) 'il'? + | ({'M','m'} 'ay', select[`~?`] paste[`05`] select) + | ({'J','j'} 'un', select[`~?`] paste[`06`] select) 'e'? + | ({'J','j'} 'ul', select[`~?`] paste[`07`] select) 'y'? + | ({'A','a'} 'ug', select[`~?`] paste[`08`] select) 'ust'? + | ({'S','s'} 'ep', select[`~?`] paste[`09`] select) 'tember'? + | ({'O','o'} 'ct', select[`~?`] paste[`10`] select) 'ober'? + | ({'N','n'} 'ov', select[`~?`] paste[`11`] select) 'ember'? + | ({'D','d'} 'ec', select[`~?`] paste[`12`] select) 'ember'? ) ); -m0 = ((((month$2:alph) - `~?`)$(0,0))* (`~?`, `~d0`)*)*; -m1 = ((((month$2:alph) - `~?`)$(0,0))* (`~?`, `~d1`)*)*; +qd0 = ((((month$2:alph) - `~?`)$(0,0))* (`~?`, `~d0`)*)*; +qd1 = ((((month$2:alph) - `~?`)$(0,0))* (`~?`, `~d1`)*)*; alphanumericDate = ( - day (white, select[`~d0`] cut) white* 'of'? white* - (month @ m1) white+ - yyyy (white, select[`~d2`] cut select signal[`!_dmy`]) - | (month @ m0) white+ - day (white, select[`~d1`] cut select) white* - yyyy (white, select[`~d2`] cut select signal[`!_mdy`]) + day (white, select[`~d0`] cut) white+ + (month @ qd1) white+ + yyy (digit, paste) + (byte, signal[`!_dmy`]) + | (month @ qd0) white+ + day (white, select[`~d1`] cut select) white+ + yyy (digit, paste) + (byte, signal[`!_mdy`]) ); date = (numericDate | alphanumericDate); @@ -68,67 +84,34 @@ conjoint = (a0 & a1) | (a0 & a2) | (a1 & a2); '*** DateExtractor tape alphabets must be disjoint' conjoint:enum; -SignalOrNul = { - ({nul, _mdy}, _mdy), - ({nul, _dmy}, _dmy), - ({nul, _ymd}, _ymd), - ({nul, _ydm}, _ydm), - ({nul, eol}, eol) -}; +prefix = (date$0:pref) & a0; +noise = byte - prefix; -nullify = (AnyOrNul* SignalOrNul*)* @ date; -prefix = utf8 & (date$0:pref); null = ( ( - (nullify$(0 1 2)) - @ ((a0$(0,0))* (a1$(0,,0))* (a2$(0,,,0))*)* - (nul, nul, clear[`~*`] select) - (nul* a0* a1* a2*)* + ((AnyOrNul* @ date)$(0 1 2)) + @ ((a0$(0,0)) ((a1$(0,,0)) (a2$(0,,,0))*)*)* + (nul, nul, signal[`!nil`]) (nul* a0* a1* a2*)* ) ); -noise = (utf8 - (date$0:pref)) - white; -dates = ( +DateExtractor = ( ( - white* noise* + (nil, clear[`~*`] select) noise* ( (numericDate | alphanumericDate) - ( (`_mdy`, out[`~d2` SLASH `~d0` SLASH `~d1` SPACE] clear[`~*`] select) - | (`_dmy`, out[`~d2` SLASH `~d1` SLASH `~d0` SPACE] clear[`~*`] select) - | (`_ymd`, out[`~d0` SLASH `~d1` SLASH `~d2` SPACE] clear[`~*`] select) - | (`_ydm`, out[`~d0` SLASH `~d2` SLASH `~d1` SPACE] clear[`~*`] select) + ( + (`_dmy`, d2 out[`~d2` solidus `~d1` solidus `~d0` space] signal[`!nil`]) + | (`_mdy`, d2 out[`~d2` solidus `~d0` solidus `~d1` space] signal[`!nil`]) + | (`_ymd`, d2 out[`~d0` solidus `~d1` solidus `~d2` space] signal[`!nil`]) + | (`_ydm`, d2 out[`~d0` solidus `~d2` solidus `~d1` space] signal[`!nil`]) ) | null - )* + ) )* - (eos, out[NL]) -); - -map = { - (NL, `\n`), - (SLASH, `/`), - (SPACE, ` `), - (`#0`, `0`), - (`#01`, `01`), - (`#02`, `02`), - (`#03`, `03`), - (`#04`, `04`), - (`#05`, `05`), - (`#06`, `06`), - (`#07`, `07`), - (`#08`, `08`), - (`#09`, `09`), - (`#10`, `10`), - (`#11`, `11`), - (`#12`, `12`) -}; - -identity = ((dates$2:alph) - (map$0))$(0,0); - -DateExtractor = (nil, clear[`~*`] select)? ( - dates @ (identity* map*)* + (eos, out[nl]) ):dfamin; -DateExtractor$(0,1 2):prsseq `build/automata/DateExtractor.pr`; DateExtractor:save `build/automata/DateExtractor.dfa`; +DateExtractor$(0,1 2):prsseq `build/automata/DateExtractor.pr`; diff --git a/reference/ASCII.PNG b/reference/ASCII.PNG new file mode 100644 index 0000000..7d5485c Binary files /dev/null and b/reference/ASCII.PNG differ diff --git a/ribose b/ribose index 10d2760..4f3fff3 100755 --- a/ribose +++ b/ribose @@ -21,7 +21,7 @@ execCompile() { fi fi if (($#==4)); then - $JAVA -ea -cp $path $vmargs com.characterforming.ribose.Ribose compile "$@" + $JAVA -ea -cp $path $vmargs com.characterforming.ribose.Ribose compile "$@" elif (($#>0)); then $JAVA -ea -cp $path $vmargs com.characterforming.ribose.Ribose compile --target com.characterforming.ribose.base.SimpleTarget "$@" else diff --git a/src/com/characterforming/jrte/engine/Assembler.java b/src/com/characterforming/jrte/engine/Assembler.java new file mode 100644 index 0000000..153063a --- /dev/null +++ b/src/com/characterforming/jrte/engine/Assembler.java @@ -0,0 +1,360 @@ +/*** + * Ribose is a recursive transduction engine for Java + * + * Copyright (C) 2011,2022 Kim Briggs + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received copies of the GNU General Public License + * and GNU Lesser Public License along with this program. See + * LICENSE-gpl-3.0. If not, see + * . + */ + +package com.characterforming.jrte.engine; + +import java.nio.charset.CharacterCodingException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map.Entry; + +import com.characterforming.jrte.engine.Model.Argument; +import com.characterforming.ribose.base.Codec; +import com.characterforming.ribose.base.Signal; + +final class Assembler { + static final int NUL = 0; + static final int NIL = 1; + + record Assembly(int[] inputEquivalents, int[][][] transitions, int[] effects) { + @Override + public int hashCode() { return 0; } + + @Override + public boolean equals(Object other) { return false; } + + @Override + public String toString() { return ""; } + } + + record Fst(int[] inputEquivalenceIndex, HashSet[] inputEquivalenceSets, State[] states, int[][][] matrix) { + @Override + public int hashCode() { return 0; } + + @Override + public boolean equals(Object other) { return false; } + + @Override + public String toString() { return ""; } + } + + private final ModelCompiler compiler; + private final int msumOrdinal; + private final int mproductOrdinal; + private final int mscanOrdinal; + private boolean instrument; + private HashSet[] inputEquivalenceSets; + private HashMap effectVectorMap; + private ArrayList effectVectors; + + Assembler(ModelCompiler compiler) + throws CharacterCodingException { + this.compiler = compiler; + this.msumOrdinal = this.compiler.getEffectorOrdinal(Codec.encode("msum")); + this.mproductOrdinal = this.compiler.getEffectorOrdinal(Codec.encode("mproduct")); + this.mscanOrdinal = this.compiler.getEffectorOrdinal(Codec.encode("mscan")); + this.instrument = ModelCompiler.MIN_SUM_SIZE >= 0 && ModelCompiler.MIN_PRODUCT_LENGTH >= 0; + this.reset(); + } + + private void reset() { + this.inputEquivalenceSets = null; + this.effectVectorMap = null; + this.effectVectors = null; + } + + Assembly assemble(final int[][][] transitionMatrix, HashMap effectorVectorMap) { + this.reset(); + + // compute byte|signal input equivalence classes and index + Fst fst = this.reduceEquivalentInputs(transitionMatrix); + this.inputEquivalenceSets = fst.inputEquivalenceSets(); + int nInputs = fst.inputEquivalenceSets.length; + int nStates = fst.states().length; + + // construct incoming effect vector enumeration + this.effectVectors = new ArrayList<>(effectorVectorMap.size()); + for (int i = 0; i < effectorVectorMap.size(); i++) + this.effectVectors.add(null); + for (Entry entry : effectorVectorMap.entrySet()) + this.effectVectors.set(entry.getValue(), entry.getKey().getData()); + final boolean[] markedEffects = new boolean[nStates * nInputs]; + Arrays.fill(markedEffects, !this.instrument); + this.effectVectorMap = effectorVectorMap; + markedEffects[0] = true; + +// inject msum & mscan effectors + if (this.instrument) { + final boolean[] walkedStates = new boolean[nStates]; + ArrayList walkResult = new ArrayList<>(32); + for (State state : fst.states()) { + for (int eq = 0; eq < nInputs; eq++) { + int[] transition = fst.matrix()[state.ordinal][eq]; + if (state.isNotInstrumented()) { + State nextState = fst.states()[transition[0]]; + if (nextState.isScanState()) { + assert nextState.outboundByte >= 0 && nextState.outboundByte< Signal.NUL.signal(); + transition[1] = this.injectScanEffector(nextState.outboundByte, + fst.matrix(), state, eq); + } else if (nextState.isSumState()) { + transition[1] = this.injectSumEffector(nextState.idempotentBytes, + fst.matrix(), state, eq); + } else if (nextState.isProductState() + && this.walk(nextState, walkedStates, fst, walkResult).get(0) >= ModelCompiler.MIN_PRODUCT_LENGTH) { + assert nextState.idempotentCount >= 255; + transition[1] = this.injectProductEffector(this.product(walkResult), walkResult.get(1), + fst.matrix(), state, eq); + } + } + if (transition[1] < 0) + markedEffects[-1 * transition[1]] = true; + } + } + + // eliminate transitional states mapped to product vectors + int[] retainedStateMap = new int[nStates]; + Arrays.fill(retainedStateMap, -1); + boolean[] markedStates = new boolean[nStates]; + final int marked = this.mark(fst.matrix(), markedStates); + int retainedState = -1; + for (int state = 0; state < fst.matrix().length; state++) + if (markedStates[state]) { + retainedStateMap[state] = ++retainedState; + fst.matrix()[retainedState] = fst.matrix()[state]; + fst.states()[retainedState] = fst.states()[state]; + } + nStates = ++retainedState; + assert marked == nStates; + for (int state = 0; state < nStates; state++) { + int[][] transitions = fst.matrix()[state]; + for (int eq = 0; eq < nInputs; eq++) { + assert retainedStateMap[transitions[eq][0]] >= 0 + && retainedStateMap[transitions[eq][0]] < nStates; + transitions[eq][0] = retainedStateMap[transitions[eq][0]]; + } + } + } + + // lay out effect vectors and construct vector offset index + int size = 0, offset = 0; + for (int v = 0; v < this.effectVectors.size(); v++) + if (markedEffects[v]) + size += this.effectVectors.get(v).length; + int[] effectVectorArray = new int[size]; + int[] vectorOffsetMap = new int[this.effectVectors.size()]; + for (int v = 0; v < this.effectVectors.size(); v++) + if (markedEffects[v]) { + final int length = this.effectVectors.get(v).length; + System.arraycopy(effectVectors.get(v), 0, + effectVectorArray, offset, length); + vectorOffsetMap[v] = offset; + offset += length; + } + assert effectVectorArray.length > 0 && effectVectorArray[0] == NUL; + assert effectVectorArray[effectVectorArray.length - 1] == NUL; + + // rewrite effect vector ordinals in kernel matrix with offsets + for (int state = 0; state < nStates; state++) + for (int eq = 0; eq < nInputs; eq++) + if (fst.matrix()[state][eq][1] < 0) { + assert -1 * fst.matrix()[state][eq][1] < vectorOffsetMap.length + : String.format("state:%d; eq:%d; action:%d; length:%d", + state, eq, fst.matrix()[state][eq][1], vectorOffsetMap.length); + fst.matrix()[state][eq][1] = + -1 * vectorOffsetMap[-1 * fst.matrix()[state][eq][1]]; + } + + // reduce kernel matrix and input equivalence modulo input product vectorization + int[] finalEquivalents = null; + int[][][] finalMatrix = null; + if (this.instrument) { + int[][][] transposedMatrix = new int[nInputs][nStates][2]; + for (int eq = 0; eq < nInputs; eq++) + for (int state = 0; state < nStates; state++) + transposedMatrix[eq][state] = fst.matrix()[state][eq]; + Fst finalFst = this.reduceEquivalentInputs(transposedMatrix); + HashSet[] equivalentInputs = this.allocateHashSetArray(finalFst.inputEquivalenceSets().length); + finalEquivalents = new int[this.compiler.getSignalLimit()]; + for (int eq = 0; eq < finalFst.inputEquivalenceSets().length; eq++) { + equivalentInputs[eq] = new HashSet<>(); + for (int e : finalFst.inputEquivalenceSets()[eq]) + equivalentInputs[eq].addAll(this.inputEquivalenceSets[e]); + for (int token : equivalentInputs[eq]) + finalEquivalents[token] = eq; + } + finalMatrix = new int[nStates][nInputs][2]; + for (int state = 0; state < nStates; state++) + finalMatrix[state] = Arrays.copyOf(finalFst.matrix()[state], finalFst.matrix()[state].length); + } else { + finalEquivalents = fst.inputEquivalenceIndex(); + finalMatrix = fst.matrix(); + } + + return new Assembly(finalEquivalents, finalMatrix, effectVectorArray); + } + + private Fst reduceEquivalentInputs(int[][][] transitionMatrix) { + // factor matrix modulo input equivalence + int[] index = new int[transitionMatrix.length]; + final HashMap> equivalenceSets = new HashMap<>((5 * index.length) >> 2); + for (int token = 0; token < transitionMatrix.length; token++) { + assert transitionMatrix[token].length == transitionMatrix[0].length; + final IntsArray transitions = new IntsArray(transitionMatrix[token]); + HashSet equivalentInputOrdinals = equivalenceSets.computeIfAbsent( + transitions, absent -> new HashSet<>(10)); + if (equivalentInputOrdinals.isEmpty()) { + equivalenceSets.put(transitions, equivalentInputOrdinals); + } + equivalentInputOrdinals.add(token); + } + + // group equivalent inputs + int equivalenceIndex = 0; + final int nInputs = equivalenceSets.size(); + HashSet[] equiv = this.allocateHashSetArray(nInputs); + for (HashSet equivalents : equivalenceSets.values()) { + for (int token : equivalents) + index[token] = equivalenceIndex; + equiv[equivalenceIndex++] = equivalents; + } + + // construct transposed states x input groups transition matrix + final int nStates = transitionMatrix[0].length; + int[][][] matrix = new int[nStates][nInputs][2]; + State[] states = new State[nStates]; + for (int state = 0; state < nStates; state++) { + for (int eq = 0; eq < nInputs; eq++) + matrix[state][eq] = transitionMatrix[equiv[eq].iterator().next().intValue()][state]; + states[state] = new State(state, matrix[state], + equiv, this.compiler.getSignalLimit()); + } + + return new Fst(index, equiv, states, matrix); + } + + private int mark(int[][][] matrix, boolean[] markedStates) { + Arrays.fill(markedStates, false); + StateStack stack = new StateStack(matrix.length); + stack.push(0); + int marked = 0; + while (!stack.isEmpty()) { + int state = stack.pop(); + markedStates[state] = true; + for (int[] transition : matrix[state]) + if (!markedStates[transition[0]]) + stack.push(transition[0]); + ++marked; + } + return marked; + } + + private int injectEffector(int action, int effector, int parameter) { + if (action == NIL) + return Transducer.action(effector, parameter); + int[] key = null; + if (action > 0x10000) { + key = new int[] { + -1 * Transducer.action(action), + Transducer.parameter(action), + -1 * effector, parameter, 0 }; + } else if (action > NUL) { + key = new int[] { + action, -1 * effector, parameter, 0 }; + } else if (action < NUL) { + key = this.effectVectors.get(-1 * action); + key = Arrays.copyOf(key, key.length + 2); + key[key.length - 3] = -1 * effector; + key[key.length - 2] = parameter; + key[key.length - 1] = 0; + } else key = null; + if (key != null) { + action = -1 * this.effectVectorMap.computeIfAbsent( + new Ints(key), absent -> this.effectVectorMap.size()); + if (this.effectVectors.size() < this.effectVectorMap.size()) + this.effectVectors.add(key); + } + return action; + } + + private int injectScanEffector(int idempotentByte, int[][][] matrix, State state, int eq) { + assert idempotentByte >= 0 && idempotentByte < 256; + byte scanByte = (byte)(idempotentByte & 0xff); + Argument argument = new Argument(-1, + new BytesArray(new byte[][] { { scanByte } })); + return this.injectEffector(matrix[state.ordinal][eq][1], mscanOrdinal, + this.compiler.compileParameters(mscanOrdinal, argument)); + } + + private int injectSumEffector(long[] idempotentBitmap, int[][][] matrix, State state, int eq) { + int selfCount = 0; + byte[] selfBytes = new byte[256]; + for (int word = 0; word < idempotentBitmap.length; word++) + for (int bit = 0; bit < 64; bit++) + if (0 != ((1L << bit) & idempotentBitmap[word])) + selfBytes[selfCount++] = (byte)(64 * word + bit); + Argument argument = new Argument(-1, + new BytesArray(new byte[][] { Arrays.copyOf(selfBytes, selfCount) })); + return this.injectEffector(matrix[state.ordinal][eq][1], msumOrdinal, + this.compiler.compileParameters(msumOrdinal, argument)); + } + + private int injectProductEffector(byte[] product, int endpoint, int[][][] matrix, State state, int eq) { + Argument argument = new Argument(-1, + new BytesArray(new byte[][] { Arrays.copyOf(product, product.length) })); + matrix[state.ordinal][eq][0] = endpoint; + return this.injectEffector(matrix[state.ordinal][eq][1], mproductOrdinal, + this.compiler.compileParameters(mproductOrdinal, argument)); + } + + private ArrayList walk(State nextState, boolean[] walkedStates, Fst fst, ArrayList walkResult) { + walkResult.clear(); + walkResult.add(0); walkResult.add(-1); + ArrayList walkStates = new ArrayList<>(16); + Arrays.fill(walkedStates, false); + while (nextState.isProductState() && !walkedStates[nextState.ordinal]) { + assert nextState.outboundByte == (nextState.outboundByte & 0xff); + walkResult.add(nextState.outboundByte); + walkedStates[nextState.ordinal] = true; + walkStates.add(nextState.ordinal); + int eq = fst.inputEquivalenceIndex()[nextState.outboundByte]; + nextState = fst.states[fst.matrix[nextState.ordinal][eq][0]]; + } + if (walkStates.size() > 1) { + walkResult.set(0, walkResult.size() - 2); + walkResult.set(1, walkStates.get(walkStates.size() - 2)); + } + return walkResult; + } + + private byte[] product(ArrayList walkResult) { + byte[] p = new byte[walkResult.size() - 3]; + for (int i = 0; i < p.length; i++) + p[i] = (byte)(walkResult.get(i + 2).intValue() & 0xff); + return p; + } + + @SuppressWarnings("unchecked") + private HashSet[] allocateHashSetArray(int size) { + return (HashSet[])new HashSet[size]; + } +} diff --git a/src/com/characterforming/jrte/engine/Chain.java b/src/com/characterforming/jrte/engine/Chain.java index 1a0a611..09d3ab8 100644 --- a/src/com/characterforming/jrte/engine/Chain.java +++ b/src/com/characterforming/jrte/engine/Chain.java @@ -1,18 +1,18 @@ /*** * Ribose is a recursive transduction engine for Java - * + * * Copyright (C) 2011,2022 Kim Briggs - * + * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. - * + * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. - * + * * You should have received a copy of the GNU General Public License * along with this program (LICENSE-gpl-3.0). If not, see * . @@ -38,7 +38,7 @@ int getOutS() { } boolean isEmpty() { - return this.effectVector.length == 0; + return this.effectVector.length <= 1; } boolean isEffector() { diff --git a/src/com/characterforming/jrte/engine/Model.java b/src/com/characterforming/jrte/engine/Model.java index 9df5199..c2244e9 100644 --- a/src/com/characterforming/jrte/engine/Model.java +++ b/src/com/characterforming/jrte/engine/Model.java @@ -98,7 +98,7 @@ public record Argument(int transducerOrdinal, BytesArray tokens) {} protected boolean deleteOnClose; protected RandomAccessFile io; protected File modelPath; - + private ArrayList> effectorParametersMaps; private Transductor proxyTransductor; @@ -111,7 +111,7 @@ public Model() { this.targetClass = this.getClass(); this.deleteOnClose = false; } - + /** Live compiler model loading to compile new target model */ protected Model(final File modelPath, Class targetClass, TargetMode targetMode) throws ModelException { this.targetClass = targetClass; @@ -241,7 +241,7 @@ public ITransduction transduction(ITransductor trex) { /** * Commit model to persistent store - * + * * @param effectorParameters the union of all effector parameters * @return true if model saved */ @@ -395,7 +395,7 @@ public boolean map(PrintStream mapWriter) { signalIndex[m.getValue() - Base.RTE_SIGNAL_BASE] = m.getKey(); } for (int i = 0; i < signalIndex.length; i++) { - mapWriter.printf("%1$6d signal %2$s%n", i + Base.RTE_SIGNAL_BASE, + mapWriter.printf("%1$6d signal %2$s%n", i + Base.RTE_SIGNAL_BASE, signalIndex[i].asString()); } Bytes[] fieldIndex = new Bytes[this.fieldOrdinalMap.size()]; @@ -407,7 +407,7 @@ public boolean map(PrintStream mapWriter) { transducerIndex[m.getValue()] = m.getKey(); } for (int transducerOrdinal = 0; transducerOrdinal < transducerIndex.length; transducerOrdinal++) { - mapWriter.printf("%1$6d transducer %2$s%n", transducerOrdinal, + mapWriter.printf("%1$6d transducer %2$s%n", transducerOrdinal, transducerIndex[transducerOrdinal].asString()); Map fieldMap = this.transducerFieldMaps.get(transducerOrdinal); Bytes[] fields = new Bytes[fieldMap.size()]; @@ -455,7 +455,7 @@ protected boolean checkTargetEffectors(ITarget target, IEffector[] boundFx) { boolean checked = true; if (boundFx.length != this.proxyEffectors.length) { this.rtcLogger.log(Level.SEVERE, () -> String.format( - "Proxy effector count (%1$d) does not match target %3$s effector count (%2$d)", + "Proxy effector count (%1$d) does not match target %3$s effector count (%2$d)", this.proxyEffectors.length, boundFx.length, target.getName())); checked = false; } else { @@ -480,7 +480,7 @@ protected Argument[][] compileModelParameters(List errors) throws Effect this.proxyEffectors[effectorOrdinal].setOutput(this.proxyTransductor); if (this.proxyEffectors[effectorOrdinal] instanceof BaseParameterizedEffector parameterizedEffector) { if (parametersMap != null) { - assert parametersMap != null: String.format("Effector parameters map is null for %1$s effector", + assert parametersMap != null: String.format("Effector parameters map is null for %1$s effector", parameterizedEffector.getName()); Argument[] arguments = new Argument[parametersMap.size()]; IToken[][] tokens = new IToken[arguments.length][]; @@ -604,7 +604,7 @@ protected int getLocalField(int transducerOrdinal, int fieldOrdinal) { protected int addSignal(Bytes signalName) { final int mapSize = this.signalOrdinalMap.size(); - return this.signalOrdinalMap.computeIfAbsent(signalName, + return this.signalOrdinalMap.computeIfAbsent(signalName, absent -> Base.RTE_SIGNAL_BASE + mapSize); } @@ -649,7 +649,7 @@ protected int readInt() throws ModelException { return this.io.readInt(); } catch (final IOException e) { throw new ModelException(String.format( - "Model.readInt() IOException reading int at file position %1$d", + "Model.readInt() IOException reading int at file position %1$d", this.getSafeFilePosition()), e); } } @@ -659,7 +659,7 @@ protected long readLong() throws ModelException { return this.io.readLong(); } catch (final IOException e) { throw new ModelException(String.format( - "Model.readLong() IOException reading long at file position %1$d", + "Model.readLong() IOException reading long at file position %1$d", this.getSafeFilePosition()), e); } } @@ -697,7 +697,7 @@ protected byte[] readBytes() throws ModelException { } if (read >= 0 && read != bytes.length) { throw new ModelException(String.format( - "Model.readBytes expected %1$d bytes at file position %2$d but read only %3$d", + "Model.readBytes expected %1$d bytes at file position %2$d but read only %3$d", bytes.length, position, read)); } return bytes; @@ -1014,23 +1014,23 @@ protected void writeString(final String s) throws ModelException, CharacterCodin protected void writeTransitionMatrix(final int[][][] matrix) throws ModelException { final long position = this.getSafeFilePosition(); try { - final int nInputs = matrix.length; - final int nStates = nInputs > 0 ? matrix[0].length : 0; + final int nStates = matrix.length; + final int nInputs = nStates > 0 ? matrix[0].length : 0; this.io.writeInt(nStates); this.io.writeInt(nInputs); for (int state = 0; state < nStates; state++) { int transitions = 0; for (int input = 0; input < nInputs; input++) { - if (matrix[input][state][1] != 0) { + if (matrix[state][input][1] != 0) { transitions++; } } this.io.writeInt(transitions); for (int input = 0; input < nInputs; input++) { - if (matrix[input][state][1] != 0) { + if (matrix[state][input][1] != 0) { this.io.writeInt(input); - this.io.writeInt(matrix[input][state][0]); - this.io.writeInt(matrix[input][state][1]); + this.io.writeInt(matrix[state][input][0]); + this.io.writeInt(matrix[state][input][1]); } } } diff --git a/src/com/characterforming/jrte/engine/ModelCompiler.java b/src/com/characterforming/jrte/engine/ModelCompiler.java index 4f69410..d299126 100755 --- a/src/com/characterforming/jrte/engine/ModelCompiler.java +++ b/src/com/characterforming/jrte/engine/ModelCompiler.java @@ -35,7 +35,6 @@ import java.util.HashMap; import java.util.HashSet; import java.util.List; -import java.util.Map; import java.util.Map.Entry; import java.util.logging.Level; import java.util.logging.Logger; @@ -62,11 +61,14 @@ public final class ModelCompiler extends Model implements ITarget, AutoCloseable private static final long VERSION = 210; private static final String AUTOMATON = "Automaton"; private static final String AMBIGUOUS_STATE_MESSAGE = "%1$s: Ambiguous state %2$d"; - private static final int MIN_PRODUCT_LENGTH = Integer.parseInt(System.getProperty("ribose.product.threshold", "12")); - private static final int MIN_SUM_SIZE = Integer.parseInt(System.getProperty("ribose.sum.threshold", "128")); - + + static final int MIN_PRODUCT_LENGTH = Integer.parseInt(System.getProperty("ribose.product.threshold", "10")); + static final int MIN_SUM_SIZE = Integer.parseInt(System.getProperty("ribose.sum.threshold", "128")); + static final int MIN_SCAN_SIZE = 255; + private Bytes transducerName; - private int transducerOrdinal; + int transducerOrdinal; + private final Assembler assembler; private ITransductor transductor; private HashMap inputStateMap; private HashMap> stateTransitionMap; @@ -79,7 +81,7 @@ public final class ModelCompiler extends Model implements ITarget, AutoCloseable private int[] inputEquivalenceIndex; private int[][][] kernelMatrix; private int transition; - + record Transition (int from, int to, int tape, Bytes symbol, boolean isFinal) {} @@ -174,13 +176,15 @@ public int invoke() throws EffectorException { public ModelCompiler() { super(); + this.assembler = null; } private ModelCompiler(final File modelPath, Class targetClass, TargetMode targetMode) - throws ModelException { + throws ModelException, CharacterCodingException { super(modelPath, targetClass, targetMode); assert super.modelPath.exists(); assert super.targetMode.isLive(); + this.assembler = new Assembler(this); this.transductor = null; this.inputStateMap = null; this.stateTransitionMap = null; @@ -354,7 +358,7 @@ private boolean createModelFile(File riboseModelFile, Logger rtcLogger) { } catch (IOException e) { rtcLogger.log(Level.SEVERE, e, () -> String.format("Exception caught creating model file : %1$s", riboseModelFile.getPath())); - } + } return false; } @@ -390,8 +394,10 @@ private boolean compileTransducer(File inrFile) { assert !this.transductor.status().isRunnable(); this.saveTransducer(); } catch (Exception e) { - this.addError(String.format("%1$s: Failed to compile '%2$s'; %3$s", - this.transducerName, inrFile.getPath(), e.getMessage())); + String msg = String.format("%1$s: Failed to compile '%2$s'", + this.transducerName, inrFile.getPath()); + this.rtcLogger.log(Level.SEVERE, msg, e); + this.addError(msg); } assert this.transductor.status().isStopped(); return true; @@ -438,7 +444,7 @@ private boolean validate() { e.getKey().asString())); } } - + if (super.transducerOrdinalMap.isEmpty()) { this.addError("Error: The model is empty"); } @@ -452,12 +458,12 @@ void saveTransducer() throws ModelException, CharacterCodingException { fields[e.getValue()] = e.getKey(); } super.writeTransducer(this.transducerName, this.transducerOrdinal, fields, this.inputEquivalenceIndex, this.kernelMatrix, this.effectorVectors); - int nInputs = this.kernelMatrix.length; - int nStates = this.kernelMatrix[0].length; + int nStates = this.kernelMatrix.length; + int nInputs = this.kernelMatrix[0].length; int nTransitions = 0; - for (int input = 0; input < nInputs; input++) - for (int state = 0; state < nStates; state++) - if (this.kernelMatrix[input][state][1] != 0) + for (int input = 0; input < nInputs; input++) + for (int state = 0; state < nStates; state++) + if (this.kernelMatrix[state][input][1] != 0) nTransitions++; final int transitionCount = nTransitions; final int fieldCount = super.getFieldCount(this.transducerOrdinal); @@ -469,7 +475,7 @@ void saveTransducer() throws ModelException, CharacterCodingException { System.out.println(info); } - private String getTransducerName() { + String getTransducerName() { return this.transducerName.asString(); } @@ -585,7 +591,10 @@ void putAutomaton() throws CharacterCodingException { } } - this.factor(transitionMatrix); + Assembler.Assembly assembly = this.assembler.assemble(transitionMatrix, effectorVectorMap); + this.inputEquivalenceIndex = assembly.inputEquivalents(); + this.kernelMatrix = assembly.transitions(); + this.effectorVectors = assembly.effects(); } private boolean hasErrors() { @@ -602,437 +611,6 @@ private List getErrors() { return this.errors; } - private void factor(final int[][][] transitionMatrix) throws CharacterCodingException { - // factor matrix modulo input equivalence - final HashMap> rowEquivalenceMap = - new HashMap<>((5 * transitionMatrix.length) >> 2); - for (int token = 0; token < transitionMatrix.length; token++) { - assert transitionMatrix[token].length == transitionMatrix[0].length; - final IntsArray row = new IntsArray(transitionMatrix[token]); - HashSet equivalentInputOrdinals = rowEquivalenceMap.get(row); - if (equivalentInputOrdinals == null) { - equivalentInputOrdinals = new HashSet<>(16); - rowEquivalenceMap.put(row, equivalentInputOrdinals); - } - equivalentInputOrdinals.add(token); - } - // construct kernel matrix from input equivalents - this.inputEquivalenceIndex = new int[transitionMatrix.length]; - this.kernelMatrix = new int[rowEquivalenceMap.size()][][]; - int equivalenceIndex = 0; - for (final Map.Entry> entry : rowEquivalenceMap.entrySet()) { - final IntsArray row = entry.getKey(); - for (final int inputOrdinal : entry.getValue()) { - this.inputEquivalenceIndex[inputOrdinal] = equivalenceIndex; - } - this.kernelMatrix[equivalenceIndex++] = row.getInts(); - } - // instrument sum and product traps, compress kernel matrix and extract effect vectors - final int nInputs = equivalenceIndex; - final int nStates = transitionMatrix[0].length; - final int nulSignal = Signal.NUL.signal(); - final int nulEquivalent = this.inputEquivalenceIndex[nulSignal]; - final int msumOrdinal = super.getEffectorOrdinal(Codec.encode("msum")); - final int mproductOrdinal = super.getEffectorOrdinal(Codec.encode("mproduct")); - final int mscanOrdinal = super.getEffectorOrdinal(Codec.encode("mscan")); - final int[][] msumStateEffects = new int[nStates][]; - final int[][] mproductStateEffects = new int[nStates][]; - final int[][] mproductEndpoints = new int[nStates][2]; - final int[][] mscanStateEffects = new int[nStates][]; - Arrays.fill(msumStateEffects, null); - Arrays.fill(mproductStateEffects, null); - Arrays.fill(mscanStateEffects, null); - // msum instrumentation - byte[][] equivalentInputs = new byte[nInputs][transitionMatrix.length]; - int[] equivalenceLengths = new int[nInputs]; - Arrays.fill(equivalenceLengths, 0); - for (int token = 0; token < nulSignal; token++) { - int input = this.inputEquivalenceIndex[token]; - equivalentInputs[input][equivalenceLengths[input]++] = (byte)token; - } - for (int state = 0; state < nStates; state++) { - int selfIndex = 0, selfCount = 0; - byte[] selfBytes = new byte[nulSignal]; - Arrays.fill(selfBytes, (byte)0); - boolean[] allBytes = new boolean[nulSignal]; - Arrays.fill(allBytes, false); - for (int input = 0; input < nInputs; input++) { - int[] cell = this.kernelMatrix[input][state]; - if (cell[0] == state && cell[1] == 1) { - for (int index = 0; index < equivalenceLengths[input]; index++) { - selfBytes[selfIndex] = equivalentInputs[input][index]; - allBytes[Byte.toUnsignedInt(selfBytes[selfIndex])] = true; - selfIndex++; - } - selfCount += equivalenceLengths[input]; - } - } - if (selfCount >= 255) { - assert mscanStateEffects[state] == null; - for (int token = 0; token < nulSignal; token++) { - if (!allBytes[token]) { - Argument argument = new Argument(-1, new BytesArray(new byte[][] { { (byte) token } })); - int mscanParameterIndex = super.compileParameters(mscanOrdinal, argument); - mscanStateEffects[state] = new int[] { -1 * mscanOrdinal, mscanParameterIndex, 0 }; - break; - } - } - } else if (selfCount > ModelCompiler.MIN_SUM_SIZE) { - assert msumStateEffects[state] == null; - Argument argument = new Argument(-1, new BytesArray(new byte[][] { Arrays.copyOf(selfBytes, selfCount) })); - int msumParameterIndex = super.compileParameters(msumOrdinal, argument); - msumStateEffects[state] = new int[] { -1 * msumOrdinal, msumParameterIndex, 0 }; - } - } - // mproduct instrumentation - int[] inputEquivalentCardinality = new int[nInputs]; - int[] inputEquivalenceToken = new int[nInputs]; - Arrays.fill(inputEquivalenceToken, -1); - for (int token = 0; token < this.inputEquivalenceIndex.length; token++) { - int equivalent = this.inputEquivalenceIndex[token]; - if (++inputEquivalentCardinality[equivalent] == 1 - && inputEquivalenceToken[equivalent] == -1 - && token < nulSignal) { - inputEquivalenceToken[equivalent] = token; - } else { - inputEquivalenceToken[equivalent] = -2; - } - assert (token >= nulSignal) - || (inputEquivalentCardinality[equivalent] == 1) == (inputEquivalenceToken[equivalent] == token); - assert (token < nulSignal) || (inputEquivalenceToken[equivalent] < 0); - } - int[] exitEquivalent = new int[nStates]; - for (int state = 0; state < nStates; state++) { - exitEquivalent[state] = -1; - for (int input = 0; input < nInputs; input++) { - if (input != nulEquivalent) { - int[] cell = this.kernelMatrix[input][state]; - if (cell[0] != state) { - if (cell[1] == 1 && exitEquivalent[state] < 0 - && inputEquivalenceToken[input] >= 0) { - assert exitEquivalent[state] == -1; - exitEquivalent[state] = input; - } else { - exitEquivalent[state] = -1; - break; - } - } else if (cell[1] != 0) { - exitEquivalent[state] = -1; - break; - } - } - } - assert (exitEquivalent[state] < 0) - || (this.kernelMatrix[exitEquivalent[state]][state][0] != state - && this.kernelMatrix[exitEquivalent[state]][state][1] == 1); - } - assertKernelSanity(); - boolean[] walkedStates = new boolean[nStates]; - Arrays.fill(walkedStates, false); - StateStack walkStack = new StateStack(nStates); - byte[] walkedBytes = new byte[nStates]; - int[] walkResult = new int[] { 0, 0, 0 }; - walkedStates[0] = true; - walkStack.push(0); - while (walkStack.size() > 0) { - int fromState = walkStack.pop(); - for (int input = 0; input < nInputs; input++) { - int toState = this.kernelMatrix[input][fromState][0]; - if (exitEquivalent[toState] >= 0) { - assert inputEquivalenceToken[exitEquivalent[toState]] >= 0; - assert inputEquivalenceToken[exitEquivalent[toState]] < nulSignal; - int nextState = this.walk(toState, walkedBytes, walkResult, exitEquivalent, inputEquivalenceToken); - if (walkResult[0] > ModelCompiler.MIN_PRODUCT_LENGTH) { - assert this.inputEquivalenceIndex[walkedBytes[0]] == exitEquivalent[toState]; - if (mproductStateEffects[toState] == null) { - byte[][] product = new byte[][] { Arrays.copyOfRange(walkedBytes, 0, walkResult[0]) }; - int effects = super.compileParameters(mproductOrdinal, new Argument(-1, new BytesArray(product))); - mproductStateEffects[toState] = new int[] { -1 * mproductOrdinal, effects, 0 }; - mproductEndpoints[toState][0] = nextState; - mproductEndpoints[toState][1] = walkResult[2]; - } else { - assert mproductEndpoints[toState][0] == nextState; - assert mproductEndpoints[toState][1] == walkResult[2]; - } - assert this.inputEquivalenceIndex[walkedBytes[walkResult[0] - 1]] == walkResult[2]; - toState = nextState; - } - if (walkResult[0] > 0) { - int state = this.kernelMatrix[input][fromState][0]; - for (int i = 0; i < walkResult[0] - 1; i++) { - assert exitEquivalent[state] >= 0; - assert walkedBytes[i] == inputEquivalenceToken[exitEquivalent[state]]; - assert this.inputEquivalenceIndex[walkedBytes[i]] == exitEquivalent[state]; - int[] cell = this.kernelMatrix[exitEquivalent[state]][state]; - assert cell[1] == 1; - state = cell[0]; - } - assert exitEquivalent[state] == walkResult[2]; - assert this.inputEquivalenceIndex[walkedBytes[walkResult[0] - 1]] == exitEquivalent[state]; - assert this.kernelMatrix[exitEquivalent[state]][state][1] == 1; - assert nextState == this.kernelMatrix[exitEquivalent[state]][state][0]; - } - } - if (!walkedStates[toState]) { - walkedStates[toState] = true; - walkStack.push(toState); - } - } - } - // effect vector construction - assertKernelSanity(); - int vectorCount = this.effectorVectorMap.size(); - int[][] effectVectors = new int[vectorCount > 4 ? (vectorCount * 3) >> 1 : 5][]; - for (Entry entry : this.effectorVectorMap.entrySet()) { - int[] vector = entry.getKey().getData(); - effectVectors[entry.getValue()] = vector; - } - effectVectors = instrumentSumVectors(msumOrdinal, effectVectors, msumStateEffects); - assertKernelSanity(); - effectVectors = instrumentSumVectors(mscanOrdinal, effectVectors, mscanStateEffects); - assertKernelSanity(); - effectVectors = instrumentProductVectors(mproductOrdinal, effectVectors, mproductStateEffects, mproductEndpoints); - assertKernelSanity(); - int[] effectorVectorPosition = new int[this.effectorVectorMap.size()]; - Arrays.fill(effectorVectorPosition, -1); - int position = 1; int size = 0; - for (int input = 0; input < nInputs; input++) { - for (int state = 0; state < nStates; state++) { - int effectOrdinal = this.kernelMatrix[input][state][1]; - if (effectOrdinal < 0) { - size += effectVectors[-1 * effectOrdinal].length; - } - } - } - this.effectorVectors = new int[size + 1]; - this.effectorVectors[0] = 0; - for (int input = 0; input < nInputs; input++) { - for (int state = 0; state < nStates; state++) { - int[] cell = this.kernelMatrix[input][state]; - int effectOrdinal = cell[1]; - if (effectOrdinal < 0) { - effectOrdinal *= -1; - if (effectorVectorPosition[effectOrdinal] < 0) { - System.arraycopy(effectVectors[effectOrdinal], 0, this.effectorVectors, position, effectVectors[effectOrdinal].length); - effectorVectorPosition[effectOrdinal] = position; - position += effectVectors[effectOrdinal].length; - } - cell[1] = -1 * effectorVectorPosition[effectOrdinal]; - } - } - } - if (position < this.effectorVectors.length) { - this.effectorVectors = Arrays.copyOf(this.effectorVectors, position); - } - assertKernelSanity(); - // redundant state elimination - for (int state = 0; state < nStates; state++) { - assert (mproductEndpoints[state] != null) || (mproductStateEffects[state] == null); - assert (exitEquivalent[state] >= 0) || (mproductStateEffects[state] == null); - if (mproductStateEffects[state] != null - && mproductEndpoints[state][1] != exitEquivalent[state]) { - int[] cell = this.kernelMatrix[exitEquivalent[state]][state]; - if (cell[1] > 0) { - cell[0] = state; - cell[1] = 0; - } - } - } - assertKernelSanity(); - int[] stateMap = new int[nStates]; - Arrays.fill(stateMap, -1); - Arrays.fill(walkedStates, false); - assert walkStack.size() == 0; - int mStates = 0; - walkedStates[0] = true; - walkStack.push(0); - while (walkStack.size() > 0) { - int state = walkStack.pop(); - stateMap[state] = mStates++; - for (int input = 0; input < nInputs; input++) { - int nextState = this.kernelMatrix[input][state][0]; - if (nextState != state && !walkedStates[nextState]) { - walkedStates[nextState] = true; - walkStack.push(nextState); - } - } - } - int[][][] finalMatrix = new int[rowEquivalenceMap.size()][mStates][2]; - for (int input = 0; input < nInputs; input++) { - int[][] row = this.kernelMatrix[input]; - for (int state = 0; state < nStates; state++) { - if (walkedStates[state]) { - int[] cell = finalMatrix[input][stateMap[state]]; - cell[0] = stateMap[row[state][0]]; - cell[1] = row[state][1]; - } - } - } - this.kernelMatrix = finalMatrix; - assertKernelSanity(); - // Coalesce equivalence classes - rowEquivalenceMap.clear(); - for (int input = 0; input < this.kernelMatrix.length; input++) { - assert this.kernelMatrix[input].length == this.kernelMatrix[0].length; - final IntsArray row = new IntsArray(this.kernelMatrix[input]); - HashSet equivalentClassOrdinals = rowEquivalenceMap.computeIfAbsent( - row, absent -> new HashSet<>(16)); - if (equivalentClassOrdinals.isEmpty()) { - rowEquivalenceMap.put(row, equivalentClassOrdinals); - } - equivalentClassOrdinals.add(input); - } - int[] classEquivalenceIndex = new int[this.kernelMatrix.length]; - int[][][] matrix = new int[rowEquivalenceMap.size()][][]; - int equivalenceClassIndex = 0; - for (final Map.Entry> entry : rowEquivalenceMap.entrySet()) { - final IntsArray row = entry.getKey(); - for (final int inputOrdinal : entry.getValue()) { - classEquivalenceIndex[inputOrdinal] = equivalenceClassIndex; - } - matrix[equivalenceClassIndex++] = row.getInts(); - } - for (int token = 0; token < this.inputEquivalenceIndex.length; token++) { - this.inputEquivalenceIndex[token] = classEquivalenceIndex[this.inputEquivalenceIndex[token]]; - } - this.kernelMatrix = matrix; - } - - private void assertKernelSanity() { - for (int input = 0; input < this.kernelMatrix.length; input++) { - assert this.kernelMatrix[0].length == this.kernelMatrix[input].length; - for (int state = 0; state < this.kernelMatrix[input].length; state++) { - assert (this.kernelMatrix[input][state][1] != 0) - || (this.kernelMatrix[input][state][0] == state) - : String.format("sanity: state[%d->%d] input[%d->%d]", - state, this.kernelMatrix[input][state][0], - input, this.kernelMatrix[input][state][1]); - } - } - } - - private int walk(int fromState, byte[] walkedBytes, int[] walkResult, int[] exitEquivalent, int[] singletonEquivalenceMap) { - int nulEquivalent = this.inputEquivalenceIndex[Signal.NUL.signal()]; - int[] nulTransition = this.kernelMatrix[nulEquivalent][fromState]; - int[] matchTransition = new int[] { - nulTransition[0] != fromState ? nulTransition[0] : Integer.MIN_VALUE, - nulTransition[1] - }; - int walkLength = 0; - int walkedInput = -1; - int walkState = fromState; - Arrays.fill(walkResult, 0); - assert exitEquivalent[walkState] >= 0; - int exitInput = exitEquivalent[walkState]; - while (exitInput >= 0 && walkLength < walkedBytes.length - && this.kernelMatrix[exitInput][walkState][1] == 1) { - assert singletonEquivalenceMap[exitInput] >= 0 - && singletonEquivalenceMap[exitInput] < Signal.NUL.signal(); - int[] errorTransition = this.kernelMatrix[nulEquivalent][walkState]; - int errorState = matchTransition[0] != Integer.MIN_VALUE ? matchTransition[0] : walkState; - if (errorTransition[1] == matchTransition[1] && errorTransition[0] == errorState) { - walkedInput = exitInput; - walkedBytes[walkLength] = (byte)singletonEquivalenceMap[walkedInput]; - walkState = this.kernelMatrix[walkedInput][walkState][0]; - exitInput = exitEquivalent[walkState]; - ++walkLength; - } else { - break; - } - } - if (walkLength > 0) { - walkResult[0] = walkLength; - walkResult[1] = walkState; - walkResult[2] = walkedInput; - return walkState; - } - return fromState; - } - - private int[][] instrumentSumVectors(int msumOrdinal, int[][] effectVectors, int[][] msumEffects) { - int nInputs = this.kernelMatrix.length; - int nStates = this.kernelMatrix[0].length; - for (int input = 0; input < nInputs; input++) { - for (int state = 0; state < nStates; state++) { - int[] cell = this.kernelMatrix[input][state]; - if ((cell[0] == state) || (cell[1] == 0) || (msumEffects[cell[0]] == null)) { - continue; - } - int vectorLength = msumEffects[cell[0]].length; - int vectorOrdinal = cell[1]; - if ((vectorOrdinal > 0) - || (effectVectors[-1 * vectorOrdinal][effectVectors[-1 * vectorOrdinal].length - vectorLength] != msumOrdinal)) { - int[] effect = msumEffects[cell[0]]; - int[] vector = vectorOrdinal > 0 - ? (vectorOrdinal > 1 ? new int[] { vectorOrdinal, 0 } : new int[] { vectorOrdinal }) - : effectVectors[-1 * vectorOrdinal]; - int[] vectorex = Arrays.copyOf(vector, vector.length + effect.length - 1); - System.arraycopy(effect, 0, vectorex, vector.length - 1, effect.length); - Ints vxkey = new Ints(vectorex); - if (this.effectorVectorMap.containsKey(vxkey)) { - vectorOrdinal = this.effectorVectorMap.get(vxkey); - } else { - vectorOrdinal = this.effectorVectorMap.size(); - this.effectorVectorMap.put(vxkey, vectorOrdinal); - if (vectorOrdinal >= effectVectors.length) { - int[][] newv = new int[vectorOrdinal > 4 ? (vectorOrdinal * 3) >> 1 : 5][]; - System.arraycopy(effectVectors, 0, newv, 0, effectVectors.length); - effectVectors = newv; - } - effectVectors[vectorOrdinal] = vectorex; - } - cell[1] = -1 * vectorOrdinal; - } - } - } - return effectVectors; - } - - private int[][] instrumentProductVectors(int mproductOrdinal, int[][] effectVectors, int[][] mproductEffects, int[][] mproductEndpoints) { - int nInputs = this.kernelMatrix.length; - int nStates = this.kernelMatrix[0].length; - for (int input = 0; input < nInputs; input++) { - for (int state = 0; state < nStates; state++) { - int[] cell = this.kernelMatrix[input][state]; - if ((cell[1] == 0) || (mproductEffects[cell[0]] == null)) { - continue; - } - int startState = cell[0]; - int vectorLength = mproductEffects[startState].length; - int vectorOrdinal = cell[1]; - if ((vectorOrdinal > 0) - || (effectVectors[-1 * vectorOrdinal][effectVectors[-1 * vectorOrdinal].length - vectorLength] != mproductOrdinal)) { - int endState = mproductEndpoints[startState][0]; - int endInput = mproductEndpoints[startState][1]; - this.kernelMatrix[endInput][startState][0] = endState; - this.kernelMatrix[endInput][startState][1] = 1; - int[] vector = vectorOrdinal > 0 - ? (vectorOrdinal > 1 ? new int[] { vectorOrdinal, 0 } : new int[] { vectorOrdinal }) - : effectVectors[-1 * vectorOrdinal]; - int[] mproductEffect = mproductEffects[startState]; - int[] vectorex = Arrays.copyOf(vector, vector.length + mproductEffect.length - 1); - System.arraycopy(mproductEffect, 0, vectorex, vector.length - 1, mproductEffect.length); - Ints vxkey = new Ints(vectorex); - if (this.effectorVectorMap.containsKey(vxkey)) { - vectorOrdinal = this.effectorVectorMap.get(vxkey); - } else { - vectorOrdinal = this.effectorVectorMap.size(); - this.effectorVectorMap.put(vxkey, vectorOrdinal); - if (vectorOrdinal >= effectVectors.length) { - int[][] newv = new int[vectorOrdinal > 4 ? (vectorOrdinal * 3) >> 1 : 5][]; - System.arraycopy(effectVectors, 0, newv, 0, effectVectors.length); - effectVectors = newv; - } - effectVectors[vectorOrdinal] = vectorex; - } - cell[1] = -1 * vectorOrdinal; - assert vectorOrdinal > 0; - } - } - } - return effectVectors; - } - private Chain chain(final Transition transition) { assert transition.tape != 1 && transition.tape != 2 : "Invalid tape number for chain(InrTransition) : " + transition.toString(); @@ -1041,103 +619,87 @@ private Chain chain(final Transition transition) { } boolean fail = false; int effectorOrdinal = -1; - int effectorPos = 0; - int[] effectorVector = new int[8]; - int parameterPos = 0; + int effectorPos = 0, parameterPos = 0; byte[][] parameterList = new byte[8][]; - ArrayList outT = this.getTransitions(transition.to); - while (outT != null && outT.size() == 1 && outT.get(0).tape > 0) { + int[] effectorVector = new int[8]; + ArrayList outT = null; + for ( + outT = this.getTransitions(transition.to); + outT != null && outT.size() == 1 && outT.get(0).tape > 0; + outT = this.getTransitions(outT.get(0).to) + ) { final Transition t = outT.get(0); switch (t.tape) { - case 1: - if ((effectorPos + 2) >= effectorVector.length) { - int newLength = effectorVector.length > 4 ? (effectorVector.length * 3) >> 1 : 5; - effectorVector = Arrays.copyOf(effectorVector, newLength); - } - if (effectorOrdinal >= 0 && parameterPos > 0) { - assert((effectorPos > 0) && (effectorOrdinal == effectorVector[effectorPos - 1])); - effectorVector[effectorPos - 1] *= -1; - final Argument argument = new Argument(this.transducerOrdinal, new BytesArray(Arrays.copyOf(parameterList, parameterPos))); - int parameterOrdinal = super.compileParameters(effectorOrdinal, argument); - effectorVector[effectorPos] = parameterOrdinal; - parameterList = new byte[8][]; - ++effectorPos; - } - Bytes effectorSymbol = t.symbol; - effectorOrdinal = super.getEffectorOrdinal(effectorSymbol); - if (effectorOrdinal >= 0) { - effectorVector[effectorPos] = effectorOrdinal; - ++effectorPos; - } else { - fail = true; - } - parameterPos = 0; - break; - case 2: - if (effectorOrdinal >= 0) { - if (parameterPos >= parameterList.length) { - int newLength = parameterList.length > 4 ? (parameterList.length * 3) >> 1 : 5; - parameterList = Arrays.copyOf(parameterList, newLength); - } - parameterList[parameterPos] = t.symbol.bytes(); - ++parameterPos; - } - break; - default: - this.addError(String.format("%1$s: Invalid tape number %2$d", - this.getTransducerName(), t.tape)); + case 1: + if ((effectorPos + 3) >= effectorVector.length) { + int newLength = effectorVector.length > 4 ? (effectorVector.length * 3) >> 1 : 5; + effectorVector = Arrays.copyOf(effectorVector, newLength); + } + if (effectorOrdinal >= 0 && parameterPos > 0) { + assert((effectorPos > 0) && (effectorOrdinal == effectorVector[effectorPos - 1])); + final Argument argument = new Argument(this.transducerOrdinal, new BytesArray(Arrays.copyOf(parameterList, parameterPos))); + int parameterOrdinal = super.compileParameters(effectorOrdinal, argument); + effectorVector[effectorPos] = parameterOrdinal; + effectorVector[effectorPos - 1] *= -1; + parameterList = new byte[8][]; + ++effectorPos; + } + Bytes effectorSymbol = t.symbol; + effectorOrdinal = super.getEffectorOrdinal(effectorSymbol); + if (effectorOrdinal >= 0) { + effectorVector[effectorPos++] = effectorOrdinal; + } else { + this.addError(String.format("%1$s: Unrecognized effector '%2$s'", + this.getTransducerName(), effectorSymbol.toString())); fail = true; - break; + } + parameterPos = 0; + break; + case 2: + if (effectorOrdinal >= 0) { + if (parameterPos >= parameterList.length) { + int newLength = parameterList.length > 4 ? (parameterList.length * 3) >> 1 : 5; + parameterList = Arrays.copyOf(parameterList, newLength); + } + parameterList[parameterPos] = t.symbol.bytes(); + ++parameterPos; + } + break; + default: + this.addError(String.format("%1$s: Invalid tape number %2$d (tape 1 or 2 expected)", + this.getTransducerName(), t.tape)); + fail = true; + break; } - outT = this.getTransitions(t.to); + } + int outS = -1; + if (outT == null || outT.isEmpty() || outT.get(0).isFinal) { + outS = 0; + } else if (outT.get(0).tape == 0) { + outS = outT.get(0).from; + } else { + assert outT.size() > 1; + this.addError(String.format(AMBIGUOUS_STATE_MESSAGE, + this.getTransducerName(), outT.get(0).from)); + fail = true; } if (!fail) { - assert effectorPos > 0 || parameterPos == 0; + assert effectorVector.length > (effectorPos + 2); + assert effectorPos == 0 || effectorOrdinal == effectorVector[effectorPos - 1]; assert parameterPos == 0 || effectorPos > 0; - int vectorLength = effectorPos + 1; - if (effectorPos > 0 && parameterPos > 0) { - ++vectorLength; - } - if (vectorLength != effectorVector.length) { - effectorVector = Arrays.copyOf(effectorVector, vectorLength); - } if (parameterPos > 0) { - assert((effectorPos > 0) && (effectorOrdinal == effectorVector[effectorPos - 1])); - final BytesArray parameters = new BytesArray(Arrays.copyOf(parameterList, parameterPos)); - final Argument argument = new Argument(this.transducerOrdinal, parameters); - int parameterOrdinal = super.compileParameters(effectorOrdinal, argument); - effectorVector[effectorPos] = parameterOrdinal; effectorVector[effectorPos - 1] *= -1; - ++effectorPos; + effectorVector[effectorPos++] = super.compileParameters( + effectorOrdinal, new Argument(this.transducerOrdinal, + new BytesArray(Arrays.copyOf(parameterList, parameterPos)))); } - if (effectorPos > 0) { - effectorVector[effectorPos] = 0; - ++effectorPos; - } - if (outT == null || outT.isEmpty() || outT.size() == 1 && outT.get(0).isFinal) { - return new Chain(Arrays.copyOf(effectorVector, effectorPos), 0); - } else if (outT.size() == 1 && outT.get(0).tape == 0) { - return new Chain(Arrays.copyOf(effectorVector, effectorPos), outT.get(0).from); - } else if (outT.get(0).isFinal || outT.get(0).tape == 0) { - int outS = -1; - for (final Transition t : outT) { - if (t.tape > 0) { - this.addError(String.format(AMBIGUOUS_STATE_MESSAGE, this.getTransducerName(), t.from)); - fail = true; - } else { - outS = t.from; - } - } - if (!fail) { - return new Chain(Arrays.copyOf(effectorVector, effectorPos), outS); - } - } else { - for (final Transition t : outT) { - this.addError(String.format(AMBIGUOUS_STATE_MESSAGE, this.getTransducerName(), t.from)); - } + effectorVector[effectorPos++] = 0; + assert effectorVector.length >= effectorPos; + if (effectorVector.length > effectorPos) { + effectorVector = Arrays.copyOf(effectorVector, effectorPos); } } - return null; + return fail? null : new Chain(effectorVector, outS); } private Integer[] getInrStates() { @@ -1147,7 +709,7 @@ private Integer[] getInrStates() { return inrStates; } - private ArrayList getTransitions(final int inrState) { + ArrayList getTransitions(final int inrState) { return this.stateTransitionMap.get(inrState); } diff --git a/src/com/characterforming/jrte/engine/ModelLoader.java b/src/com/characterforming/jrte/engine/ModelLoader.java index 13449f2..237f194 100644 --- a/src/com/characterforming/jrte/engine/ModelLoader.java +++ b/src/com/characterforming/jrte/engine/ModelLoader.java @@ -67,7 +67,7 @@ private ModelLoader(final File modelPath) this.transducerAccessIndex = new AtomicIntegerArray(size); this.transducerObjectIndex = new AtomicReferenceArray<>(size); } - + /** * Bind target instance to runtime model. * @@ -258,14 +258,14 @@ public void decompile(final String transducerName) for (int j = 0; j < inputEquivalenceIndex.length; j++) { if (inputEquivalenceIndex[j] != i) { if (startToken >= 0) { - if (startToken < (j - 2)) { + if (startToken < (j - 1)) { this.printStart(startToken); this.printEnd(j - 1); } else { this.printStart(startToken); } + startToken = -1; } - startToken = -1; } else if (startToken < 0) { startToken = j; } diff --git a/src/com/characterforming/jrte/engine/State.java b/src/com/characterforming/jrte/engine/State.java new file mode 100644 index 0000000..68220b9 --- /dev/null +++ b/src/com/characterforming/jrte/engine/State.java @@ -0,0 +1,120 @@ +/*** + * Ribose is a recursive transduction engine for Java + * + * Copyright (C) 2011,2022 Kim Briggs + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received copies of the GNU General Public License + * and GNU Lesser Public License along with this program. See + * LICENSE-gpl-3.0. If not, see + * . + */ + +package com.characterforming.jrte.engine; + +import java.util.HashSet; + +import com.characterforming.ribose.base.Signal; + +class State { + final int ordinal; + final int signalLimit; + final int[][] transitions; + final long[] idempotentBytes; + final HashSet[] inputEquivalents; + int idempotentCount; + int idempotentNulBytes; + int idempotentByteCount; + int outboundCount; + int outboundByte; + int outboundEq; + + State(int state, int[][] transitions, HashSet[] inputEquivalents, int signalLimit) { + this.ordinal = state; + this.signalLimit = signalLimit; + this.transitions = transitions; + this.inputEquivalents = inputEquivalents; + this.idempotentNulBytes = 0; + this.idempotentBytes = new long[] { 0, 0, 0, 0 }; + this.idempotentByteCount = 0; + this.idempotentCount = 0; + this.outboundEq = -1; + this.outboundByte = -1; + this.outboundCount = 0; + this.setup(); + } + + private void setup() { + for (int eq = 0; eq < this.transitions.length; eq++) + if (this.transitions[eq][0] == this.ordinal) { + this.idempotentCount += this.inputEquivalents[eq].size(); + for (int token : this.inputEquivalents[eq]) + if (token < Signal.NUL.signal()) { + if (this.transitions[eq][1] == Assembler.NIL) { + this.idempotentBytes[token >> 6] |= (1L << (token & 0x3f)); + this.idempotentByteCount += 1; + } else if (this.transitions[eq][1] == Assembler.NUL) + this.idempotentNulBytes += 1; + } + } else { + final int size = this.inputEquivalents[eq].size(); + int token = this.inputEquivalents[eq].iterator().next().intValue(); + if (size == 1 && this.outboundCount == 0 + && this.transitions[eq][1] == Assembler.NIL + && token < Signal.NUL.signal()) { + this.outboundByte = token; + this.outboundCount = 1; + this.outboundEq = eq; + } else + for (int t : this.inputEquivalents[eq]) + if (t < Signal.NUL.signal()) + ++this.outboundCount; + } + if (this.isScanState()) + for (int word = 0; word < this.idempotentBytes.length; word++) + if (this.idempotentBytes[word] != 0xffffffffffffffffL) + for (int bit = 0; bit < 64; bit++) + if (0 == ((1L<= 0; + } + + private boolean isProduct() { + return this.outboundCount == 1 && this.idempotentNulBytes == 255 + && this.outboundEq >= 0 && this.transitions[this.outboundEq][1] == Assembler.NIL + && this.outboundByte >= 0 && this.outboundByte < Signal.NUL.signal(); + } + + boolean isScanState() { + assert this.idempotentByteCount != 255 + || (!this.isSumState() && !this.isProduct()); + return this.idempotentByteCount == 255; + } + + boolean isSumState() { + assert this.idempotentByteCount < ModelCompiler.MIN_SUM_SIZE || this.idempotentByteCount >= 255 + || (!this.isScanState() && !this.isProduct()); + return this.idempotentByteCount >= ModelCompiler.MIN_SUM_SIZE + && this.idempotentByteCount < 255; + } + + boolean isProductState() { + assert !isProduct() || (!this.isScanState() && !this.isSumState()); + return isProduct(); + } + + boolean isNotInstrumented() { + return !this.isScanState() && !this.isSumState() && !this.isProduct(); + } +} \ No newline at end of file diff --git a/src/com/characterforming/jrte/engine/StateStack.java b/src/com/characterforming/jrte/engine/StateStack.java index f17e949..c73bd71 100644 --- a/src/com/characterforming/jrte/engine/StateStack.java +++ b/src/com/characterforming/jrte/engine/StateStack.java @@ -1,18 +1,18 @@ /*** * Ribose is a recursive transduction engine for Java -* +* * Copyright (C) 2011,2022 Kim Briggs -* +* * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. -* +* * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. -* +* * You should have received a copy of the GNU General Public License * along with this program (LICENSE-gpl-3.0). If not, see * . @@ -46,4 +46,8 @@ int pop() { } return -1; } + + public boolean isEmpty() { + return this.tos < 0; + } } \ No newline at end of file diff --git a/src/com/characterforming/jrte/engine/Transductor.java b/src/com/characterforming/jrte/engine/Transductor.java index ed4de31..cd0fe41 100644 --- a/src/com/characterforming/jrte/engine/Transductor.java +++ b/src/com/characterforming/jrte/engine/Transductor.java @@ -97,7 +97,7 @@ public final class Transductor implements ITransductor, ITransduction, IOutput { private static final int MATCH_SUM = 1; private static final int MATCH_PRODUCT = 2; private static final int MATCH_SCAN = 3; - + private Model model; private final boolean isProxy; private final ModelLoader loader; @@ -121,7 +121,7 @@ public final class Transductor implements ITransductor, ITransduction, IOutput { private final ITransductor.Metrics metrics; /** - * Proxy constructor + * Proxy constructor */ Transductor() { this.model = null; @@ -234,7 +234,7 @@ public String asString(int fieldOrdinal) throws EffectorException, CharacterCodi Value v = this.transducerStack.value(fieldOrdinal); return Codec.decode(v.value(), v.length()); } else { - throw new EffectorException("Not valid for proxy transductor"); + throw new EffectorException("Not valid for proxy transductor"); } } @@ -436,9 +436,11 @@ public ITransductor run() throws EffectorException, DomainErrorException { this.selected = this.transducer.selected; state = this.transducer.state; I: do { - int post = input.position; // get next input token if (signal > 0) { + assert this.matchMode == MATCH_NONE || signal == SIGEOS + : String.format("mode=%d; signal=%d; expected mode=%d", + this.matchMode, signal, MATCH_NONE); this.matchMode = MATCH_NONE; token = signal; signal = 0; @@ -449,15 +451,13 @@ public ITransductor run() throws EffectorException, DomainErrorException { token = -1; break T; } - post = input.position; } token = input.array[input.position++] & 0xff; } - + int action = NIL; S: do { - final int trap = this.matchMode; - switch (trap) { + switch (this.matchMode) { // trap runs in (nil* paste*)* effector space case MATCH_NONE: do { @@ -466,14 +466,13 @@ public ITransductor run() throws EffectorException, DomainErrorException { action = Transducer.action(transition); if (action == PASTE) this.value.paste((byte)token); - else if (action != NIL) { - this.metrics.traps[MATCH_NONE][0] += 1; - this.metrics.traps[MATCH_NONE][1] += input.position - post; + else if (action != NIL) break S; - } - token = input.position < input.limit ? input.array[input.position++] & 0xff : -1; + if (input.position < input.limit) + token = input.array[input.position++] & 0xff; + else + continue I; } while (token >= 0); - this.metrics.traps[MATCH_NONE][1] += input.position - post; break; // absorb self-referencing (msum,mscan) or sequential (mproduct) transitions with nil effect case MATCH_SUM: @@ -489,37 +488,39 @@ else if (action != NIL) { assert false; break; } - if (token >= 0) { - this.metrics.traps[trap][1] += input.position - post; - this.metrics.traps[trap][0] += 1; - } else continue I; - assert this.matchMode == MATCH_NONE; + if (token < 0) + continue I; } while (true); // effect action and check for transducer or input stack adjustment - final int aftereffects = action < 0 - ? effect(action, token, effectorVector) - : effect(action, token); + int aftereffects = IEffector.RTX_NONE; + if (action >= 0x10000) + aftereffects = ((IParameterizedEffector) + this.effectors[Transducer.effector(action)]).invoke(Transducer.parameter(action)); + else if (action >= 0) + aftereffects = effect(action, token); + else + aftereffects = effect(action, token, effectorVector); if (aftereffects != IEffector.RTX_NONE) { - if (0 != (aftereffects & IEffector.RTX_INPUT)) { + if (0 != (aftereffects & IEffector.RTX_INPUT)) input = this.inputStack.peek(); - } if (0 != (aftereffects & IEffector.RTX_SIGNAL)) { signal = Transducer.signal(aftereffects); - if (signal < SIGNUL || signal >= this.signalLimit) { + if (signal < SIGNUL || signal >= this.signalLimit) signal = SIGNUL; - } } int stackeffect = aftereffects & (IEffector.RTX_START | IEffector.RTX_STOP); if (stackeffect == IEffector.RTX_START) { - assert this.transducerStack.tos() > 0 && this.transducer == this.transducerStack.get(this.transducerStack.tos() - 1); + assert this.transducerStack.tos() > 0 + && this.transducer == this.transducerStack.get(this.transducerStack.tos() - 1); this.transducer.selected = this.selected; this.transducer.state = state; - } - if (0 != (aftereffects & (IEffector.RTX_PAUSE | IEffector.RTX_STOPPED))) { + } else if (stackeffect != 0) + this.matchMode = MATCH_NONE; + if (0 != (aftereffects & (IEffector.RTX_PAUSE | IEffector.RTX_STOPPED))) break T; - } else if (stackeffect != 0) { + else if (stackeffect != 0) { break I; } } @@ -553,11 +554,6 @@ else if (action != NIL) { private int effect(int action, int token) throws EffectorException, IOException { - if (action >= 0x10000) { - return this.effectors[Transducer.effector(action)] instanceof IParameterizedEffector e - ? e.invoke(Transducer.parameter(action)) - : IEffector.RTX_NONE; - } switch (action) { case NUL: if ((token != SIGNUL && token != SIGEOS)) { @@ -609,11 +605,11 @@ private int effect(int action, int token) case PAUSE: return IEffector.RTX_PAUSE; case STOP: - return this.transducerStack.pop() == null ? IEffector.RTX_STOPPED : IEffector.RTX_STOP; + return this.transducerStack.pop() == null ? IEffector.RTX_STOPPED : IEffector.RTX_STOP; default: if (action < this.effectors.length) return this.effectors[action].invoke(); - throw new EffectorException(String.format("Effector ordinal %d is out of range (<%d)", + throw new EffectorException(String.format("Effector ordinal %d is out of range (<%d)", action, this.effectors.length)); } } @@ -627,10 +623,10 @@ private int effect(int action, int token, int[] effectorVector) E: do { action = effectorVector[index++]; if (action < 0 ) { - assert this.effectors[0 - action] instanceof IParameterizedEffector; if (this.effectors[0 - action] instanceof IParameterizedEffector e) aftereffects |= e.invoke(effectorVector[index++]); - } else if (action != NUL) + else assert false; + } else if (action != NUL) aftereffects |= this.effect(action, token); else break E; } while (true); @@ -639,13 +635,17 @@ private int effect(int action, int token, int[] effectorVector) private int sumTrap(Input input, int token) { final long[] matchMask = this.matchSum; + final int post = input.position; while (0 != (matchMask[token >> 6] & (1L << (token & 0x3f)))) { - if (input.position < input.limit) { + if (input.position < input.limit) token = 0xff & input.array[input.position++]; - } else { + else { + this.metrics.traps[MATCH_PRODUCT][1] += (input.position - post); return -1; } } + this.metrics.traps[MATCH_SUM][0] += 1; + this.metrics.traps[MATCH_SUM][1] += (input.position - post); this.matchMode = MATCH_NONE; return token; } @@ -653,37 +653,46 @@ private int sumTrap(Input input, int token) { private int productTrap(Input input, int token) { final byte[] product = this.matchProduct; byte match = (byte)(0xff & token); + final int post = input.position; int mpos = this.matchPosition; assert mpos <= product.length; while (mpos < product.length) { if (match == product[mpos++]) { - if (mpos == product.length) { + if (mpos == product.length) break; - } else if (input.position < input.limit) { + else if (input.position < input.limit) match = input.array[input.position++]; - } else { + else { + this.metrics.traps[MATCH_PRODUCT][1] += (input.position - post); this.matchPosition = mpos; return -1; } } else { + this.metrics.traps[MATCH_PRODUCT][0] += 1; + this.metrics.traps[MATCH_PRODUCT][1] += (input.position - post); this.errorInput = 0xff & match; this.matchMode = MATCH_NONE; return SIGNUL; } } + this.metrics.traps[MATCH_PRODUCT][0] += 1; + this.metrics.traps[MATCH_PRODUCT][1] += (input.position - post) + 1; this.matchMode = MATCH_NONE; return 0xff & match; } private int scanTrap(Input input, int token) { final int matchToken = this.matchByte; - while (token != matchToken) { - if (input.position < input.limit) { + final int post = input.position; + while (token != matchToken) + if (input.position < input.limit) token = 0xff & input.array[input.position++]; - } else { + else { + this.metrics.traps[MATCH_SCAN][1] += (input.position - post); return -1; } - } + this.metrics.traps[MATCH_SCAN][0] += 1; + this.metrics.traps[MATCH_SCAN][1] += (input.position - post); this.matchMode = MATCH_NONE; return token; } @@ -694,7 +703,7 @@ private String getErrorInput(int last, int state) { top.state = state; state /= eqCount; last /= eqCount; StringBuilder message = new StringBuilder(256); message.append(String.format("Domain error on (%1$d~%2$d) in %3$s [%4$d]->[%5$d]%n,\tTransducer stack:%n", - this.errorInput, this.errorInput >= 0 ? top.get().getInputFilter()[this.errorInput] : this.errorInput, + this.errorInput, this.errorInput >= 0 ? top.get().getInputFilter()[this.errorInput] : this.errorInput, top.get().getName(), last, state)); for (int i = this.transducerStack.tos(); i >= 0; i--) { TransducerState t = this.transducerStack.get(i); @@ -775,7 +784,7 @@ public int invoke() throws EffectorException { @Override public int invoke(final int parameterIndex) throws EffectorException { for (IToken t : super.parameters[parameterIndex]) { - if (t instanceof Token token) { + if (t instanceof Token token) { if (token.getType() == IToken.Type.FIELD) { Value field = transducerStack.value(token.getOrdinal()); if (field != null) { @@ -785,7 +794,7 @@ public int invoke(final int parameterIndex) throws EffectorException { byte[] bytes = token.getLiteral().bytes(); value.paste(bytes, bytes.length); } else { - throw new EffectorException(String.format("Invalid token `%1$s` for effector '%2$s'", + throw new EffectorException(String.format("Invalid token `%1$s` for effector '%2$s'", token.asString(), super.getName())); } } @@ -809,7 +818,6 @@ public int invoke() throws EffectorException { @Override public int invoke(final int parameterIndex) throws EffectorException { selected = super.parameters[parameterIndex]; - value = transducerStack.value(selected); assert selected != Model.ALL_FIELDS_ORDINAL; if (selected != Model.ALL_FIELDS_ORDINAL) { value = transducerStack.value(selected); @@ -828,7 +836,7 @@ public int invoke() throws EffectorException { assert false; return IEffector.RTX_NONE; } - + @Override public int invoke(final int parameterIndex) throws EffectorException { int fieldOrdinal = super.parameters[parameterIndex]; @@ -902,7 +910,7 @@ public int invoke(final int parameterIndex) throws EffectorException { public Integer[] allocateParameters(int parameterCount) { return new Integer[parameterCount]; } - + @Override public Integer compileParameter(final IToken[] parameterList) throws TargetBindingException { if (parameterList.length != 1) { @@ -936,7 +944,7 @@ public Integer compileParameter(final IToken[] parameterList) throws TargetBindi private final class InEffector extends BaseInputOutputEffector { private InEffector(final Transductor transductor) throws CharacterCodingException { super(transductor, "in"); - + } @Override @@ -1010,7 +1018,7 @@ public int invoke() throws EffectorException { public int[][] allocateParameters(int parameterCount) { return new int[parameterCount][]; } - + @Override public int invoke(final int parameterIndex) throws EffectorException { assert (transducer == transducerStack.peek()) || (transducer == transducerStack.get(transducerStack.tos()-1)); @@ -1064,7 +1072,7 @@ public int invoke() throws EffectorException { public Integer[] allocateParameters(int parameterCount) { return new Integer[parameterCount]; } - + @Override public Integer compileParameter(final IToken[] parameterTokens) throws TargetBindingException { if (parameterTokens.length != 1) { @@ -1118,7 +1126,7 @@ public int invoke() throws EffectorException { public long[][] allocateParameters(int parameterCount) { return new long[parameterCount][]; } - + @Override public int invoke(final int parameterIndex) throws EffectorException { if (matchMode == MATCH_NONE) { @@ -1138,7 +1146,7 @@ public long[] compileParameter(final IToken[] parameterList) throws TargetBindin } long[] byteMap = new long[] {0, 0, 0, 0}; for (byte b : parameterList[0].getLiteral().bytes()) { - final int i = Byte.toUnsignedInt(b); + int i = b & 0xff; byteMap[i >> 6] |= 1L << (i & 0x3f); } return byteMap; @@ -1210,7 +1218,7 @@ public int invoke(final int parameterIndex) throws EffectorException { public byte[][] allocateParameters(int parameterCount) { return new byte[parameterCount][]; } - + @Override public byte[] compileParameter(final IToken[] parameterList) throws TargetBindingException { if (parameterList.length != 1) { @@ -1259,7 +1267,7 @@ public int invoke(final int parameterIndex) throws EffectorException { public Integer[] allocateParameters(int parameterCount) { return new Integer[parameterCount]; } - + @Override public Integer compileParameter(final IToken[] parameterList) throws TargetBindingException { if (parameterList.length != 1) { diff --git a/src/com/characterforming/jrte/test/FileRunner.java b/src/com/characterforming/jrte/test/FileRunner.java index caf9add..8fb3e5f 100644 --- a/src/com/characterforming/jrte/test/FileRunner.java +++ b/src/com/characterforming/jrte/test/FileRunner.java @@ -43,7 +43,7 @@ public class FileRunner { /** * Shell interface. - * + * * @param args arguments from the shell */ public static void main(final String[] args) { @@ -131,7 +131,7 @@ public static void main(final String[] args) { } assert trex.status().isStopped(); } - long bytes = metrics.traps[0][1] + metrics.traps[1][1] + metrics.traps[2][1] + metrics.traps[3][1]; + long bytes = 10 * byteLength; double mbps = (tjrte > 0) ? (double)(bytes*1000000000l) / (double)(tjrte*1024*1024) : -1; double mnone = (bytes > 0) ? ((double) (100 * metrics.traps[0][1]) / (double) bytes) : -1; double mps = (bytes > 0) ? ((double) (100 * metrics.traps[1][1]) / (double) bytes) : -1; @@ -146,7 +146,7 @@ public static void main(final String[] args) { String ssum = String.format("(%d/%.2f%%):msum", sum, mps); String sproduct = String.format("(%d/%.2f%%):mproduct", product, mpr); String sscan = String.format("(%d/%.2f%%):mscan", scan, msc); - System.out.println(String.format("%8.3f mb/s %7.3f nul/kb %16s %16s %20s %17s", + System.out.println(String.format("%8.3f mb/s %7.3f nul/kb %16s %16s %20s %17s", mbps, ekb, snone, ssum, sproduct, sscan)); assert bytes == 0 || bytes >= 10*byteLength; } else { diff --git a/src/com/characterforming/ribose/IModel.java b/src/com/characterforming/ribose/IModel.java index eb326bd..8f36f2e 100644 --- a/src/com/characterforming/ribose/IModel.java +++ b/src/com/characterforming/ribose/IModel.java @@ -36,17 +36,17 @@ import com.characterforming.ribose.base.SimpleTarget; /** - * The {@code IModel} interface provides static methods for compiling and loading + * The {@code IModel} interface provides static methods for compiling and loading * ribose models into the Java runtime and encapsulates a runtime model instance. * The model compiler assembles ribose models from collections of ginr automata. - * The model loader implements threadsafe instantiation of {@link ITransductor} + * The model loader implements threadsafe instantiation of {@link ITransductor} * for fine-grained transduction workflows and more granular methods for * transducing input streams. A single transductor instance can be reused for * more that one transduction, and transductions can be wrapped in a {@code * try-with-transductor} statement using {@link #transduction(ITransductor)}, - * which returns an autocloseable {@link ITransduction} instance. + * which returns an autocloseable {@link ITransduction} instance. *

- * Model files are compiled atomically and support multiple concurrent loaders. + * Model files are compiled atomically and support multiple concurrent loaders. * Each model loader serializes one-time loading of transducers on first use in * multithreaded contexts. In all other respects, ribose objects are not threadsafe. * Runtime {@code IModel} instances are {@link AutoCloseable} but clients must @@ -66,9 +66,9 @@ public interface IModel extends AutoCloseable { * Compile a collection of ginr DFAs from an automata directory into a ribose model file * and bind them to an {@link ITarget} class. Ginr compiles ribose patterns (*.inr files) * to DFAs as the first step in building a ribose model. The ribose compiler cruches the - * DFAs to produce ribose transducers and asembles them in a into the model file for + * DFAs to produce ribose transducers and asembles them in a into the model file for * runtime use. - * + * * @param targetClassname the name of the Target implementation class will be instantiated as model target * @param ginrAutomataDirectory directory containing DFAs compiled by ginr to be included in the model * @param riboseModelFile path indicating where to create the model file @@ -86,9 +86,14 @@ public static boolean compileRiboseModel(String targetClassname, File ginrAutoma } /** - * Load a ribose runtime model from persistent store and bind it to a model - * target instance. A runtime model can be used to instantiate transductors. - * + * Load a ribose runtime model from persistent store. A runtime model can be used + * o instantiate {@link ITransductor} instances and bind them to instances of the + * model {@link ITarget}. A transductor can run serial {@link ITransduction}s + * whereby syntactic cues in an input stream select effectors to reduce and + * ssimilate extracted data into the target. All of this is wrapped inside + * the IModel {@code stream()} methods, for one-off transductions that do + * not require fine-grained control. + * * @param riboseModelFile path to the runtime model to load * @return a live ribose runtime model instance * @throws ModelException if the model could not be loaded @@ -96,7 +101,7 @@ public static boolean compileRiboseModel(String targetClassname, File ginrAutoma public static IModel loadRiboseModel(File riboseModelFile) throws ModelException { return ModelLoader.loadModel(riboseModelFile); - } + } /** * Instantiate a new transductor and bind it to a live target instance. Use @@ -120,9 +125,9 @@ ITransductor transductor(ITarget target) * instance may safely run consecutive transductions as long * as each transduction is closed. Each transduction should begin by * calling {@link ITransduction#reset()}, this will ensure that the - * transductor is in a readty state before the transduction begins. See - * {@link ITransductor} for an example. - * + * transductor is in a ready state before the transduction begins. See + * {@link ITransductor} for an example. + * * @param transductor The transductor that will run the transduction. * @return the transduction instance */ @@ -160,7 +165,7 @@ boolean stream(Bytes transducer, Signal prologue, InputStream in, OutputStream o * Bind a live {@link ITarget} to an transductor and transduce a byte input * stream onto it. The signal and input and output streams and I/O buffers are * treated as for {@link #stream(Bytes, Signal, InputStream, OutputStream)}. - * + * * @param transducer the UTF-8 encoded name of the transducer to start * @param target the transduction target instance * @param prologue start signal to send to {@code transducer} (if not @link Signal#NONE}) @@ -172,9 +177,9 @@ boolean stream(Bytes transducer, Signal prologue, InputStream in, OutputStream o boolean stream(Bytes transducer, ITarget target, Signal prologue, InputStream in, OutputStream out) throws RiboseException; - /** + /** * Decompile a transducer to System.out - * + * * @param transducerName the transducer name as aUnicode string * @throws ModelException if things don't work out * @throws CharacterCodingException if encoder fails @@ -182,9 +187,9 @@ boolean stream(Bytes transducer, ITarget target, Signal prologue, InputStream in void decompile(String transducerName) throws ModelException, CharacterCodingException; - /** + /** * Print the model map to an output stream - * + * * @param mapWriter the output sink * @return true unless an uncaught exception is thrown * @throws ModelException if things don't work out @@ -194,7 +199,7 @@ boolean map(PrintStream mapWriter) /** * Get the fully qualified name of the model target. - * + * * @return the fully qualified name of the model target */ String getTargetClassname(); @@ -209,7 +214,7 @@ void close() throws ModelException; /** - * Detach thread local variables from the calling thread. + * Detach thread local variables from the calling thread. */ static void detach() { Codec.detach();