Add optional fixed length to lists

Resolves #181
WebAssembly · Feb 19, 2024 · 91d669e · 91d669e
1 parent f0487b2
commit 91d669e
Show file tree

Hide file tree

Showing 6 changed files with 166 additions and 35 deletions.
diff --git a/design/mvp/Binary.md b/design/mvp/Binary.md
@@ -12,7 +12,7 @@ rules, but rather merge the minimal need-to-know elements of both, with just
 enough detail to create a prototype. A complete definition of the binary format
 and validation will be present in the [formal specification](../../spec/).
 
-See the [explainer introduction](Explainer.md) for an explanation of 🪙.
+See [Gated Features](Explainer.md#gated-features) for an explanation of 🪙 and 🔧.
 
 
 ## Component Definitions
@@ -184,6 +184,7 @@ defvaltype ::= pvt:<primvaltype> => pvt
  | 0x72 lt*:vec(<labelvaltype>) => (record (field lt)*) (if |lt*| > 0)
  | 0x71 case*:vec(<case>) => (variant case*)
  | 0x70 t:<valtype> => (list t)
+ | 0x67 t:<valtype> len:<u32> => (list t len) 🔧
  | 0x6f t*:vec(<valtype>) => (tuple t+) (if |t*| > 0)
  | 0x6e l*:vec(<label'>) => (flags l+) (if |l*| > 0)
  | 0x6d l*:vec(<label'>) => (enum l*)
@@ -373,6 +374,12 @@ appear once within a `name` section, for example component instances can only be
 named once.
 
 
+## Binary Format Warts to Fix in a 1.0 Release
+
+* The two `list` type codes should be merged into one with an optional immediate.
+* The `0x00` prefix byte of `importname'` and `exportname'` will be removed or repurposed.
+
+
 [`core:u32`]: https://webassembly.github.io/spec/core/binary/values.html#integers
 [`core:section`]: https://webassembly.github.io/spec/core/binary/modules.html#binary-section
 [`core:custom`]: https://webassembly.github.io/spec/core/binary/modules.html#custom-section

diff --git a/design/mvp/CanonicalABI.md b/design/mvp/CanonicalABI.md
@@ -99,13 +99,23 @@ def alignment(t):
  case Float32() : return 4
  case Float64() : return 8
  case Char() : return 4
- case String() | List(_) : return 4
+ case String() : return 4
+ case List(t, l) : return alignment_list(t, l)
  case Record(fields) : return alignment_record(fields)
  case Variant(cases) : return alignment_variant(cases)
  case Flags(labels) : return alignment_flags(labels)
  case Own(_) | Borrow(_) : return 4
 ```
 
+List alignment is the same as tuple alignment when the length is fixed and
+otherwise uses the alignment of pointers.
+```python
+def alignment_list(elem_type, maybe_length):
+ if maybe_length is not None:
+ return alignment(elem_type)
+ return 4
+```
+
 Record alignment is tuple alignment, with the definitions split for reuse below:
 ```python
 def alignment_record(fields):
@@ -172,12 +182,18 @@ def size(t):
  case Float32() : return 4
  case Float64() : return 8
  case Char() : return 4
- case String() | List(_) : return 8
+ case String() : return 8
+ case List(t, l) : return size_list(t, l)
  case Record(fields) : return size_record(fields)
  case Variant(cases) : return size_variant(cases)
  case Flags(labels) : return size_flags(labels)
  case Own(_) | Borrow(_) : return 4
 
+def size_list(elem_type, maybe_length):
+ if maybe_length is not None:
+ return maybe_length * size(elem_type)
+ return 8
+
 def size_record(fields):
  s = 0
  for f in fields:
@@ -445,7 +461,7 @@ def load(cx, ptr, t):
  case Float64() : return decode_i64_as_float(load_int(cx, ptr, 8))
  case Char() : return convert_i32_to_char(cx, load_int(cx, ptr, 4))
  case String() : return load_string(cx, ptr)
- case List(t)  : return load_list(cx, ptr, t)
+ case List(t, l) : return load_list(cx, ptr, t, l)
  case Record(fields) : return load_record(cx, ptr, fields)
  case Variant(cases) : return load_variant(cx, ptr, cases)
  case Flags(labels) : return load_flags(cx, ptr, labels)
@@ -564,14 +580,19 @@ def load_string_from_range(cx, ptr, tagged_code_units):
 
 Lists and records are loaded by recursively loading their elements/fields:
 ```python
-def load_list(cx, ptr, elem_type):
+def load_list(cx, ptr, elem_type, maybe_length):
+ if maybe_length is not None:
+ return load_list_from_valid_range(cx, ptr, maybe_length, elem_type)
  begin = load_int(cx, ptr, 4)
  length = load_int(cx, ptr + 4, 4)
  return load_list_from_range(cx, begin, length, elem_type)
 
 def load_list_from_range(cx, ptr, length, elem_type):
  trap_if(ptr != align_to(ptr, alignment(elem_type)))
  trap_if(ptr + length * size(elem_type) > len(cx.opts.memory))
+ return load_list_from_valid_range(cx, ptr, length, elem_type)
+
+def load_list_from_valid_range(cx, ptr, length, elem_type):
  a = []
  for i in range(length):
  a.append(load(cx, ptr + i * size(elem_type), elem_type))
@@ -697,7 +718,7 @@ def store(cx, v, t, ptr):
  case Float64() : store_int(cx, encode_float_as_i64(v), ptr, 8)
  case Char() : store_int(cx, char_to_i32(v), ptr, 4)
  case String() : store_string(cx, v, ptr)
- case List(t)  : store_list(cx, v, ptr, t)
+ case List(t, l) : store_list(cx, v, ptr, t, l)
  case Record(fields) : store_record(cx, v, ptr, fields)
  case Variant(cases) : store_variant(cx, v, ptr, cases)
  case Flags(labels) : store_flags(cx, v, ptr, labels)
@@ -984,7 +1005,11 @@ are symmetric to the loading functions. Unlike strings, lists can
 simply allocate based on the up-front knowledge of length and static
 element size.
 ```python
-def store_list(cx, v, ptr, elem_type):
+def store_list(cx, v, ptr, elem_type, maybe_length):
+ if maybe_length is not None:
+ assert(maybe_length == len(v))
+ store_list_into_valid_range(cx, v, ptr, elem_type)
+ return
  begin, length = store_list_into_range(cx, v, elem_type)
  store_int(cx, begin, ptr, 4)
  store_int(cx, length, ptr + 4, 4)
@@ -995,9 +1020,12 @@ def store_list_into_range(cx, v, elem_type):
  ptr = cx.opts.realloc(0, 0, alignment(elem_type), byte_length)
  trap_if(ptr != align_to(ptr, alignment(elem_type)))
  trap_if(ptr + byte_length > len(cx.opts.memory))
+ store_list_into_valid_range(cx, v, ptr, elem_type)
+ return (ptr, len(v))
+
+def store_list_into_valid_range(cx, v, ptr, elem_type):
  for i,e in enumerate(v):
  store(cx, e, elem_type, ptr + i * size(elem_type))
- return (ptr, len(v))
 
 def store_record(cx, v, ptr, fields):
  for f in fields:
@@ -1137,13 +1165,23 @@ def flatten_type(t):
  case Float32() : return ['f32']
  case Float64() : return ['f64']
  case Char() : return ['i32']
- case String() | List(_) : return ['i32', 'i32']
+ case String() : return ['i32', 'i32']
+ case List(t, l) : return flatten_list(t, l)
  case Record(fields) : return flatten_record(fields)
  case Variant(cases) : return flatten_variant(cases)
  case Flags(labels) : return ['i32'] * num_i32_flags(labels)
  case Own(_) | Borrow(_) : return ['i32']
 ```
 
+List flattening of a fixed-length list uses the same flattening as a tuple
+(via `flatten_record` below).
+```python
+def flatten_list(elem_type, maybe_length):
+ if maybe_length is not None:
+ return flatten_type(elem_type) * maybe_length
+ return ['i32', 'i32']
+```
+
 Record flattening simply flattens each field in sequence.
 ```python
 def flatten_record(fields):
@@ -1217,7 +1255,7 @@ def lift_flat(cx, vi, t):
  case Float64() : return canonicalize_nan64(vi.next('f64'))
  case Char() : return convert_i32_to_char(cx, vi.next('i32'))
  case String() : return lift_flat_string(cx, vi)
- case List(t)  : return lift_flat_list(cx, vi, t)
+ case List(t, l) : return lift_flat_list(cx, vi, t, l)
  case Record(fields) : return lift_flat_record(cx, vi, fields)
  case Variant(cases) : return lift_flat_variant(cx, vi, cases)
  case Flags(labels) : return lift_flat_flags(vi, labels)
@@ -1246,17 +1284,23 @@ def lift_flat_signed(vi, core_width, t_width):
  return i
 ```
 
-The contents of strings and lists are always stored in memory so lifting these
-types is essentially the same as loading them from memory; the only difference
-is that the pointer and length come from `i32` values instead of from linear
-memory:
+The contents of strings and variable-length lists are stored in memory so
+lifting these types is essentially the same as loading them from memory; the
+only difference is that the pointer and length come from `i32` values instead
+of from linear memory. Fixed-length lists are lifted the same way as a
+tuple (via `lift_flat_record` below).
 ```python
 def lift_flat_string(cx, vi):
  ptr = vi.next('i32')
  packed_length = vi.next('i32')
  return load_string_from_range(cx, ptr, packed_length)
 
-def lift_flat_list(cx, vi, elem_type):
+def lift_flat_list(cx, vi, elem_type, maybe_length):
+ if maybe_length is not None:
+ a = []
+ for i in range(maybe_length):
+ a.append(lift_flat(cx, vi, elem_type))
+ return a
  ptr = vi.next('i32')
  length = vi.next('i32')
  return load_list_from_range(cx, ptr, length, elem_type)
@@ -1341,7 +1385,7 @@ def lower_flat(cx, v, t):
  case Float64() : return [Value('f64', maybe_scramble_nan64(v))]
  case Char() : return [Value('i32', char_to_i32(v))]
  case String() : return lower_flat_string(cx, v)
- case List(t)  : return lower_flat_list(cx, v, t)
+ case List(t, l) : return lower_flat_list(cx, v, t, l)
  case Record(fields) : return lower_flat_record(cx, v, fields)
  case Variant(cases) : return lower_flat_variant(cx, v, cases)
  case Flags(labels) : return lower_flat_flags(v, labels)
@@ -1361,15 +1405,23 @@ def lower_flat_signed(i, core_bits):
  return [Value('i' + str(core_bits), i)]
 ```
 
-Since strings and lists are stored in linear memory, lifting can reuse the
-previous definitions; only the resulting pointers are returned differently
-(as `i32` values instead of as a pair in linear memory):
+Since strings and variable-length lists are stored in linear memory, lifting
+can reuse the previous definitions; only the resulting pointers are returned
+differently (as `i32` values instead of as a pair in linear memory).
+Fixed-length lists are lowered the same way as tuples (via `lower_flat_record`
+below).
 ```python
 def lower_flat_string(cx, v):
  ptr, packed_length = store_string_into_range(cx, v)
  return [Value('i32', ptr), Value('i32', packed_length)]
 
-def lower_flat_list(cx, v, elem_type):
+def lower_flat_list(cx, v, elem_type, maybe_length):
+ if maybe_length is not None:
+ assert(maybe_length == len(v))
+ flat = []
+ for e in v:
+ flat += lower_flat(cx, e, elem_type)
+ return flat
  (ptr, length) = store_list_into_range(cx, v, elem_type)
  return [Value('i32', ptr), Value('i32', length)]
 ```

diff --git a/design/mvp/Explainer.md b/design/mvp/Explainer.md
@@ -42,6 +42,7 @@ emoji symbols listed below; these emojis will be removed once they are
 implemented, considered stable and included in a future milestone:
 * 🪙: value imports/exports and component-level start function
 * 🪺: nested namespaces and packages in import/export names
+* 🔧: fixed-length lists
 
 (Based on the previous [scoping and layering] proposal to the WebAssembly CG,
 this repo merges and supersedes the [module-linking] and [interface-types]
@@ -534,6 +535,7 @@ defvaltype ::= bool
  | (record (field "<label>" <valtype>)+)
  | (variant (case <id>? "<label>" <valtype>?)+)
  | (list <valtype>)
+ | (list <valtype> <u32>) 🔧
  | (tuple <valtype>+)
  | (flags "<label>"+)
  | (enum "<label>"+)
@@ -586,7 +588,7 @@ sets of abstract values:
 | `char` | [Unicode Scalar Values] |
 | `record` | heterogeneous [tuples] of named values |
 | `variant` | heterogeneous [tagged unions] of named values |
-| `list` | homogeneous, variable-length [sequences] of values |
+| `list` | homogeneous, variable- or fixed-length [sequences] of values |
 | `own` | a unique, opaque address of a resource that will be destroyed when this value is dropped |
 | `borrow` | an opaque address of a resource that must be dropped before the current export call returns |
 
@@ -618,6 +620,10 @@ component-level there is a `bool` type with `true` and `false` values.
 The `record`, `variant`, and `list` types allow for grouping, categorizing,
 and sequencing contained values.
 
+🔧 When the optional `<u32>` immediate of the `list` type constructor is present,
+the list has a fixed length and the representation of the list in memory is
+specialized to this length.
+
 ##### Handle types
 
 The `own` and `borrow` value types are both *handle types*. Handles logically

diff --git a/design/mvp/WIT.md b/design/mvp/WIT.md
@@ -24,8 +24,11 @@ document, a pseudo-formal [grammar specification][lexical-structure], and
 additionally a specification of the [package format][package-format] of a WIT
 package suitable for distribution.
 
+See [Gated Features] for an explanation of 🔧.
+
 [IDL]: https://en.wikipedia.org/wiki/Interface_description_language
 [components]: https://github.com/webassembly/component-model
+[Gated Features]: Explainer.md#gated-features
 
 ## Package Names
 
@@ -1225,6 +1228,9 @@ tuple-list ::= ty
  | ty ',' tuple-list?
 
 list ::= 'list' '<' ty '>'
+ | 'list' '<' ty ',' uint '>' 🔧
+
+uint ::= [1-9][0-9]*
 
 option ::= 'option' '<' ty '>'
 
@@ -1238,8 +1244,18 @@ The `tuple` type is semantically equivalent to a `record` with numerical fields,
 but it frequently can have language-specific meaning so it's provided as a
 first-class type.
 
-Similarly the `option` and `result` types are semantically equivalent to the
-variants:
+🔧 A `list` with a fixed length provides the low-level memory representation of a
+homogeneous `tuple` of the same length, but with the dynamic indexing of a
+list. E.g., the following two functions have the same low-level (Core
+WebAssembly) representation, but will naturally produce different source-level
+bindings:
+
+```wit
+get-ipv4-address1: func() -> list<u8, 4>;
+get-ipv4-address2: func() -> tuple<u8, u8, u8, u8>;
+```
+
+The `option` and `result` types are semantically equivalent to the variants:
 
 ```wit
 variant option {