diff --git a/design/mvp/Binary.md b/design/mvp/Binary.md index 8940de62..7bc38a24 100644 --- a/design/mvp/Binary.md +++ b/design/mvp/Binary.md @@ -12,7 +12,7 @@ rules, but rather merge the minimal need-to-know elements of both, with just enough detail to create a prototype. A complete definition of the binary format and validation will be present in the [formal specification](../../spec/). -See the [explainer introduction](Explainer.md) for an explanation of 🪙. +See [Gated Features](Explainer.md#gated-features) for an explanation of 🪙 and 🔧. ## Component Definitions @@ -184,6 +184,7 @@ defvaltype ::= pvt: => pvt | 0x72 lt*:vec() => (record (field lt)*) (if |lt*| > 0) | 0x71 case*:vec() => (variant case*) | 0x70 t: => (list t) + | 0x67 t: len: => (list t len) 🔧 | 0x6f t*:vec() => (tuple t+) (if |t*| > 0) | 0x6e l*:vec() => (flags l+) (if |l*| > 0) | 0x6d l*:vec() => (enum l*) @@ -373,6 +374,12 @@ appear once within a `name` section, for example component instances can only be named once. +## Binary Format Warts to Fix in a 1.0 Release + +* The two `list` type codes should be merged into one with an optional immediate. +* The `0x00` prefix byte of `importname'` and `exportname'` will be removed or repurposed. + + [`core:u32`]: https://webassembly.github.io/spec/core/binary/values.html#integers [`core:section`]: https://webassembly.github.io/spec/core/binary/modules.html#binary-section [`core:custom`]: https://webassembly.github.io/spec/core/binary/modules.html#custom-section diff --git a/design/mvp/CanonicalABI.md b/design/mvp/CanonicalABI.md index b19525a9..1e961691 100644 --- a/design/mvp/CanonicalABI.md +++ b/design/mvp/CanonicalABI.md @@ -99,13 +99,23 @@ def alignment(t): case Float32() : return 4 case Float64() : return 8 case Char() : return 4 - case String() | List(_) : return 4 + case String() : return 4 + case List(t, l) : return alignment_list(t, l) case Record(fields) : return alignment_record(fields) case Variant(cases) : return alignment_variant(cases) case Flags(labels) : return alignment_flags(labels) case Own(_) | Borrow(_) : return 4 ``` +List alignment is the same as tuple alignment when the length is fixed and +otherwise uses the alignment of pointers. +```python +def alignment_list(elem_type, maybe_length): + if maybe_length is not None: + return alignment(elem_type) + return 4 +``` + Record alignment is tuple alignment, with the definitions split for reuse below: ```python def alignment_record(fields): @@ -172,12 +182,18 @@ def size(t): case Float32() : return 4 case Float64() : return 8 case Char() : return 4 - case String() | List(_) : return 8 + case String() : return 8 + case List(t, l) : return size_list(t, l) case Record(fields) : return size_record(fields) case Variant(cases) : return size_variant(cases) case Flags(labels) : return size_flags(labels) case Own(_) | Borrow(_) : return 4 +def size_list(elem_type, maybe_length): + if maybe_length is not None: + return maybe_length * size(elem_type) + return 8 + def size_record(fields): s = 0 for f in fields: @@ -445,7 +461,7 @@ def load(cx, ptr, t): case Float64() : return decode_i64_as_float(load_int(cx, ptr, 8)) case Char() : return convert_i32_to_char(cx, load_int(cx, ptr, 4)) case String() : return load_string(cx, ptr) - case List(t) : return load_list(cx, ptr, t) + case List(t, l) : return load_list(cx, ptr, t, l) case Record(fields) : return load_record(cx, ptr, fields) case Variant(cases) : return load_variant(cx, ptr, cases) case Flags(labels) : return load_flags(cx, ptr, labels) @@ -564,7 +580,9 @@ def load_string_from_range(cx, ptr, tagged_code_units): Lists and records are loaded by recursively loading their elements/fields: ```python -def load_list(cx, ptr, elem_type): +def load_list(cx, ptr, elem_type, maybe_length): + if maybe_length is not None: + return load_list_from_valid_range(cx, ptr, maybe_length, elem_type) begin = load_int(cx, ptr, 4) length = load_int(cx, ptr + 4, 4) return load_list_from_range(cx, begin, length, elem_type) @@ -572,6 +590,9 @@ def load_list(cx, ptr, elem_type): def load_list_from_range(cx, ptr, length, elem_type): trap_if(ptr != align_to(ptr, alignment(elem_type))) trap_if(ptr + length * size(elem_type) > len(cx.opts.memory)) + return load_list_from_valid_range(cx, ptr, length, elem_type) + +def load_list_from_valid_range(cx, ptr, length, elem_type): a = [] for i in range(length): a.append(load(cx, ptr + i * size(elem_type), elem_type)) @@ -697,7 +718,7 @@ def store(cx, v, t, ptr): case Float64() : store_int(cx, encode_float_as_i64(v), ptr, 8) case Char() : store_int(cx, char_to_i32(v), ptr, 4) case String() : store_string(cx, v, ptr) - case List(t) : store_list(cx, v, ptr, t) + case List(t, l) : store_list(cx, v, ptr, t, l) case Record(fields) : store_record(cx, v, ptr, fields) case Variant(cases) : store_variant(cx, v, ptr, cases) case Flags(labels) : store_flags(cx, v, ptr, labels) @@ -984,7 +1005,11 @@ are symmetric to the loading functions. Unlike strings, lists can simply allocate based on the up-front knowledge of length and static element size. ```python -def store_list(cx, v, ptr, elem_type): +def store_list(cx, v, ptr, elem_type, maybe_length): + if maybe_length is not None: + assert(maybe_length == len(v)) + store_list_into_valid_range(cx, v, ptr, elem_type) + return begin, length = store_list_into_range(cx, v, elem_type) store_int(cx, begin, ptr, 4) store_int(cx, length, ptr + 4, 4) @@ -995,9 +1020,12 @@ def store_list_into_range(cx, v, elem_type): ptr = cx.opts.realloc(0, 0, alignment(elem_type), byte_length) trap_if(ptr != align_to(ptr, alignment(elem_type))) trap_if(ptr + byte_length > len(cx.opts.memory)) + store_list_into_valid_range(cx, v, ptr, elem_type) + return (ptr, len(v)) + +def store_list_into_valid_range(cx, v, ptr, elem_type): for i,e in enumerate(v): store(cx, e, elem_type, ptr + i * size(elem_type)) - return (ptr, len(v)) def store_record(cx, v, ptr, fields): for f in fields: @@ -1137,13 +1165,23 @@ def flatten_type(t): case Float32() : return ['f32'] case Float64() : return ['f64'] case Char() : return ['i32'] - case String() | List(_) : return ['i32', 'i32'] + case String() : return ['i32', 'i32'] + case List(t, l) : return flatten_list(t, l) case Record(fields) : return flatten_record(fields) case Variant(cases) : return flatten_variant(cases) case Flags(labels) : return ['i32'] * num_i32_flags(labels) case Own(_) | Borrow(_) : return ['i32'] ``` +List flattening of a fixed-length list uses the same flattening as a tuple +(via `flatten_record` below). +```python +def flatten_list(elem_type, maybe_length): + if maybe_length is not None: + return flatten_type(elem_type) * maybe_length + return ['i32', 'i32'] +``` + Record flattening simply flattens each field in sequence. ```python def flatten_record(fields): @@ -1217,7 +1255,7 @@ def lift_flat(cx, vi, t): case Float64() : return canonicalize_nan64(vi.next('f64')) case Char() : return convert_i32_to_char(cx, vi.next('i32')) case String() : return lift_flat_string(cx, vi) - case List(t) : return lift_flat_list(cx, vi, t) + case List(t, l) : return lift_flat_list(cx, vi, t, l) case Record(fields) : return lift_flat_record(cx, vi, fields) case Variant(cases) : return lift_flat_variant(cx, vi, cases) case Flags(labels) : return lift_flat_flags(vi, labels) @@ -1246,17 +1284,23 @@ def lift_flat_signed(vi, core_width, t_width): return i ``` -The contents of strings and lists are always stored in memory so lifting these -types is essentially the same as loading them from memory; the only difference -is that the pointer and length come from `i32` values instead of from linear -memory: +The contents of strings and variable-length lists are stored in memory so +lifting these types is essentially the same as loading them from memory; the +only difference is that the pointer and length come from `i32` values instead +of from linear memory. Fixed-length lists are lifted the same way as a +tuple (via `lift_flat_record` below). ```python def lift_flat_string(cx, vi): ptr = vi.next('i32') packed_length = vi.next('i32') return load_string_from_range(cx, ptr, packed_length) -def lift_flat_list(cx, vi, elem_type): +def lift_flat_list(cx, vi, elem_type, maybe_length): + if maybe_length is not None: + a = [] + for i in range(maybe_length): + a.append(lift_flat(cx, vi, elem_type)) + return a ptr = vi.next('i32') length = vi.next('i32') return load_list_from_range(cx, ptr, length, elem_type) @@ -1341,7 +1385,7 @@ def lower_flat(cx, v, t): case Float64() : return [Value('f64', maybe_scramble_nan64(v))] case Char() : return [Value('i32', char_to_i32(v))] case String() : return lower_flat_string(cx, v) - case List(t) : return lower_flat_list(cx, v, t) + case List(t, l) : return lower_flat_list(cx, v, t, l) case Record(fields) : return lower_flat_record(cx, v, fields) case Variant(cases) : return lower_flat_variant(cx, v, cases) case Flags(labels) : return lower_flat_flags(v, labels) @@ -1361,15 +1405,23 @@ def lower_flat_signed(i, core_bits): return [Value('i' + str(core_bits), i)] ``` -Since strings and lists are stored in linear memory, lifting can reuse the -previous definitions; only the resulting pointers are returned differently -(as `i32` values instead of as a pair in linear memory): +Since strings and variable-length lists are stored in linear memory, lifting +can reuse the previous definitions; only the resulting pointers are returned +differently (as `i32` values instead of as a pair in linear memory). +Fixed-length lists are lowered the same way as tuples (via `lower_flat_record` +below). ```python def lower_flat_string(cx, v): ptr, packed_length = store_string_into_range(cx, v) return [Value('i32', ptr), Value('i32', packed_length)] -def lower_flat_list(cx, v, elem_type): +def lower_flat_list(cx, v, elem_type, maybe_length): + if maybe_length is not None: + assert(maybe_length == len(v)) + flat = [] + for e in v: + flat += lower_flat(cx, e, elem_type) + return flat (ptr, length) = store_list_into_range(cx, v, elem_type) return [Value('i32', ptr), Value('i32', length)] ``` diff --git a/design/mvp/Explainer.md b/design/mvp/Explainer.md index f8cd5c34..fcd72800 100644 --- a/design/mvp/Explainer.md +++ b/design/mvp/Explainer.md @@ -42,6 +42,7 @@ emoji symbols listed below; these emojis will be removed once they are implemented, considered stable and included in a future milestone: * 🪙: value imports/exports and component-level start function * 🪺: nested namespaces and packages in import/export names +* 🔧: fixed-length lists (Based on the previous [scoping and layering] proposal to the WebAssembly CG, this repo merges and supersedes the [module-linking] and [interface-types] @@ -534,6 +535,7 @@ defvaltype ::= bool | (record (field "