From 7d852a0d903c5a7b0422ddf81844147c704e09b0 Mon Sep 17 00:00:00 2001
From: AlexWells <alex.wells@diamond.ac.uk>
Date: Thu, 25 Aug 2022 13:45:09 +0100
Subject: [PATCH 1/7] Allow arrays of strings to be used with Waveforms

Note the tests fail, largely due to now numpy defaults to
a floating point dtype, which we cannot coerce into an array of strings.

Also TODOs need completion...
---
 softioc/builder.py          |  7 +++++++
 softioc/device.py           |  1 +
 tests/test_record_values.py | 18 ++++++++++++++++++
 3 files changed, 26 insertions(+)

diff --git a/softioc/builder.py b/softioc/builder.py
index 6db5fa08..0403c2ef 100644
--- a/softioc/builder.py
+++ b/softioc/builder.py
@@ -148,6 +148,8 @@ def Action(name, **fields):
     'uint32':   'ULONG',
     'float32':  'FLOAT',
     'float64':  'DOUBLE',
+    'bytes32':  'STRING',
+    'bytes320': 'STRING',
 }
 
 # Coverts FTVL string to numpy type
@@ -160,6 +162,7 @@ def Action(name, **fields):
     'ULONG':    'uint32',
     'FLOAT':    'float32',
     'DOUBLE':   'float64',
+    'STRING':   'S40',
 }
 
 
@@ -211,11 +214,15 @@ def _waveform(value, fields):
 
         # Special case for [u]int64: if the initial value comes in as 64 bit
         # integers we cannot represent that, so recast it as [u]int32
+        # Special case for array of strings to correctly identify each element
+        # of the array as a string type.
         if datatype is None:
             if initial_value.dtype == numpy.int64:
                 initial_value = numpy.require(initial_value, numpy.int32)
             elif initial_value.dtype == numpy.uint64:
                 initial_value = numpy.require(initial_value, numpy.uint32)
+            elif initial_value.dtype.char == "S":
+                initial_value = numpy.require(initial_value, numpy.dtype("S40"))
     else:
         initial_value = numpy.array([], dtype = datatype)
         length = _get_length(fields)
diff --git a/softioc/device.py b/softioc/device.py
index 7a577c29..eed9e521 100644
--- a/softioc/device.py
+++ b/softioc/device.py
@@ -391,6 +391,7 @@ def _read_value(self, record):
         return result
 
     def _write_value(self, record, value):
+        value = _require_waveform(value, self._dtype)
         nord = len(value)
         memmove(
             record.BPTR, value.ctypes.data_as(c_void_p),
diff --git a/tests/test_record_values.py b/tests/test_record_values.py
index bf36ce12..16587002 100644
--- a/tests/test_record_values.py
+++ b/tests/test_record_values.py
@@ -189,6 +189,24 @@ def record_values_names(fixture_value):
         ),
         numpy.ndarray,
     ),
+    (
+        "wIn_byte_string_array",
+        builder.WaveformIn,
+        [b"AB", b"CD", b"EF"],
+        numpy.array(
+            [b"AB", b"CD", b"EF"], dtype=numpy.dtype("|S40")
+        ),
+        numpy.ndarray,
+    ),
+    (
+        "wOut_byte_string_array",
+        builder.WaveformOut,
+        [b"AB", b"CD", b"EF"],
+        numpy.array(
+            [b"AB", b"CD", b"EF"], dtype=numpy.dtype("|S40")
+        ),
+        numpy.ndarray,
+    ),
     (
         "longStringIn_str",
         builder.longStringIn,

From f127bf28f74322df03ae84913a4b7766fe9cbc12 Mon Sep 17 00:00:00 2001
From: AlexWells <alex.wells@diamond.ac.uk>
Date: Fri, 14 Oct 2022 14:30:35 +0100
Subject: [PATCH 2/7] Add support and tests for List[str]

Note that these tests don't currently pass!
---
 softioc/builder.py          |  2 +-
 tests/test_record_values.py | 18 ++++++++++++++++++
 2 files changed, 19 insertions(+), 1 deletion(-)

diff --git a/softioc/builder.py b/softioc/builder.py
index 0403c2ef..d06096d1 100644
--- a/softioc/builder.py
+++ b/softioc/builder.py
@@ -221,7 +221,7 @@ def _waveform(value, fields):
                 initial_value = numpy.require(initial_value, numpy.int32)
             elif initial_value.dtype == numpy.uint64:
                 initial_value = numpy.require(initial_value, numpy.uint32)
-            elif initial_value.dtype.char == "S":
+            elif initial_value.dtype.char in ("S", "U"):
                 initial_value = numpy.require(initial_value, numpy.dtype("S40"))
     else:
         initial_value = numpy.array([], dtype = datatype)
diff --git a/tests/test_record_values.py b/tests/test_record_values.py
index 16587002..13fd62d3 100644
--- a/tests/test_record_values.py
+++ b/tests/test_record_values.py
@@ -207,6 +207,24 @@ def record_values_names(fixture_value):
         ),
         numpy.ndarray,
     ),
+    (
+        "wIn_string_array",
+        builder.WaveformIn,
+        ["123", "456", "7890"],
+        numpy.array(
+            [b"123", b"456", b"7890"], dtype=numpy.dtype("|S40")
+        ),
+        numpy.ndarray,
+    ),
+    (
+        "wOut_string_array",
+        builder.WaveformOut,
+        ["123", "456", "7890"],
+        numpy.array(
+            [b"123", b"456", b"7890"], dtype=numpy.dtype("|S40")
+        ),
+        numpy.ndarray,
+    ),
     (
         "longStringIn_str",
         builder.longStringIn,

From 01163d22a13bde3eff6b9c9dda8d91646133c6cb Mon Sep 17 00:00:00 2001
From: AlexWells <alex.wells@diamond.ac.uk>
Date: Fri, 24 Mar 2023 09:39:15 +0000
Subject: [PATCH 3/7] Handle returning a numpy array of strings

Also make all the tests pass. Involved skipping
---
 softioc/builder.py          |  7 ++--
 softioc/device.py           | 11 +++++-
 tests/test_record_values.py | 70 ++++++++++++++++++++++++++++---------
 3 files changed, 67 insertions(+), 21 deletions(-)

diff --git a/softioc/builder.py b/softioc/builder.py
index d06096d1..8fa42ba6 100644
--- a/softioc/builder.py
+++ b/softioc/builder.py
@@ -148,8 +148,7 @@ def Action(name, **fields):
     'uint32':   'ULONG',
     'float32':  'FLOAT',
     'float64':  'DOUBLE',
-    'bytes32':  'STRING',
-    'bytes320': 'STRING',
+    'bytes320': 'STRING',  # Numpy's term for a 40-character string (40*8 bits)
 }
 
 # Coverts FTVL string to numpy type
@@ -214,8 +213,8 @@ def _waveform(value, fields):
 
         # Special case for [u]int64: if the initial value comes in as 64 bit
         # integers we cannot represent that, so recast it as [u]int32
-        # Special case for array of strings to correctly identify each element
-        # of the array as a string type.
+        # Special case for array of strings to mark each element as conforming
+        # to EPICS 40-character string limit
         if datatype is None:
             if initial_value.dtype == numpy.int64:
                 initial_value = numpy.require(initial_value, numpy.int32)
diff --git a/softioc/device.py b/softioc/device.py
index eed9e521..d092f72b 100644
--- a/softioc/device.py
+++ b/softioc/device.py
@@ -356,6 +356,16 @@ def _require_waveform(value, dtype):
     if isinstance(value, bytes):
         # Special case hack for byte arrays.  Surprisingly tricky:
         value = numpy.frombuffer(value, dtype = numpy.uint8)
+
+    if dtype and dtype.char == 'S':
+        result = numpy.empty(len(value), 'S40')
+        for n, s in enumerate(value):
+            if isinstance(s, str):
+                result[n] = s.encode('UTF-8')
+            else:
+                result[n] = s
+        return result
+
     value = numpy.require(value, dtype = dtype)
     if value.shape == ():
         value.shape = (1,)
@@ -391,7 +401,6 @@ def _read_value(self, record):
         return result
 
     def _write_value(self, record, value):
-        value = _require_waveform(value, self._dtype)
         nord = len(value)
         memmove(
             record.BPTR, value.ctypes.data_as(c_void_p),
diff --git a/tests/test_record_values.py b/tests/test_record_values.py
index 13fd62d3..84465d8d 100644
--- a/tests/test_record_values.py
+++ b/tests/test_record_values.py
@@ -31,6 +31,9 @@
     "might think to put into a record that can theoretically hold a huge " \
     "string and so lets test it and prove that shall we?"
 
+# The numpy dtype of all arrays of strings
+DTYPE_STRING = "S40"
+
 
 def record_func_names(fixture_value):
     """Provide a nice name for the record_func fixture"""
@@ -113,6 +116,8 @@ def record_values_names(fixture_value):
     ("strOut_39chars", builder.stringOut, MAX_LEN_STR, MAX_LEN_STR, str),
     ("strIn_empty", builder.stringIn, "", "", str),
     ("strOut_empty", builder.stringOut, "", "", str),
+    # TODO: Add Invalid-utf8 tests?
+    # TODO: Add tests for bytes-strings to stringIn/Out?
     ("strin_utf8", builder.stringIn, "%a€b", "%a€b", str),  # Valid UTF-8
     ("strOut_utf8", builder.stringOut, "%a€b", "%a€b", str),  # Valid UTF-8
     (
@@ -189,39 +194,42 @@ def record_values_names(fixture_value):
         ),
         numpy.ndarray,
     ),
+
+    # TODO: Unicode versions of below tests?
+
     (
         "wIn_byte_string_array",
         builder.WaveformIn,
-        [b"AB", b"CD", b"EF"],
+        [b"AB123", b"CD456", b"EF789"],
         numpy.array(
-            [b"AB", b"CD", b"EF"], dtype=numpy.dtype("|S40")
+            ["AB123", "CD456", "EF789"], dtype=DTYPE_STRING
         ),
         numpy.ndarray,
     ),
     (
         "wOut_byte_string_array",
         builder.WaveformOut,
-        [b"AB", b"CD", b"EF"],
+        [b"12AB", b"34CD", b"56EF"],
         numpy.array(
-            [b"AB", b"CD", b"EF"], dtype=numpy.dtype("|S40")
+            ["12AB", "34CD", "56EF"], dtype=DTYPE_STRING
         ),
         numpy.ndarray,
     ),
     (
         "wIn_string_array",
         builder.WaveformIn,
-        ["123", "456", "7890"],
+        ["123abc", "456def", "7890ghi"],
         numpy.array(
-            [b"123", b"456", b"7890"], dtype=numpy.dtype("|S40")
+            ["123abc", "456def", "7890ghi"], dtype=DTYPE_STRING
         ),
         numpy.ndarray,
     ),
     (
         "wOut_string_array",
         builder.WaveformOut,
-        ["123", "456", "7890"],
+        ["123abc", "456def", "7890ghi"],
         numpy.array(
-            [b"123", b"456", b"7890"], dtype=numpy.dtype("|S40")
+            ["123abc", "456def", "7890ghi"],  dtype=DTYPE_STRING
         ),
         numpy.ndarray,
     ),
@@ -275,6 +283,7 @@ def record_values(request):
     """A list of parameters for record value setting/getting tests.
 
     Fields are:
+    - Record name
     - Record builder function
     - Input value passed to .set()/initial_value/caput
     - Expected output value after doing .get()/caget
@@ -412,8 +421,31 @@ def run_test_function(
     expected value. set_enum and get_enum determine when the record's value is
     set and how the value is retrieved, respectively."""
 
-    ctx = get_multiprocessing_context()
+    def is_valid(configuration):
+        """Remove some cases that cannot succeed.
+        Waveforms of Strings must have the value specified as initial value."""
+        (
+            record_name,
+            creation_func,
+            initial_value,
+            expected_value,
+            expected_type,
+        ) = configuration
+
+        if creation_func in (builder.WaveformIn, builder.WaveformOut):
+            if isinstance(initial_value, list) and \
+                    all(isinstance(val, (str, bytes)) for val in initial_value):
+                if set_enum is not SetValueEnum.INITIAL_VALUE:
+                    print(f"Removing {configuration}")
+                    return False
+
+        return True
+
+    record_configurations = [
+        x for x in record_configurations if is_valid(x)
+    ]
 
+    ctx = get_multiprocessing_context()
     parent_conn, child_conn = ctx.Pipe()
 
     ioc_process = ctx.Process(
@@ -498,6 +530,7 @@ def run_test_function(
 
                 if (
                     creation_func in [builder.WaveformOut, builder.WaveformIn]
+                    and expected_value.dtype
                     and expected_value.dtype in [numpy.float64, numpy.int32]
                 ):
                     log(
@@ -506,6 +539,10 @@ def run_test_function(
                         "scalar. Therefore we skip this check.")
                     continue
 
+                if isinstance(rec_val, numpy.ndarray) and len(rec_val) > 1 \
+                        and rec_val.dtype.char in ["S", "U"]:
+                    # caget won't retrieve the full length 40 buffer
+                    rec_val = rec_val.astype(DTYPE_STRING)
 
             record_value_asserts(
                 creation_func, rec_val, expected_value, expected_type
@@ -521,13 +558,6 @@ def run_test_function(
             pytest.fail("Process did not terminate")
 
 
-def skip_long_strings(record_values):
-    if (
-        record_values[0] in [builder.stringIn, builder.stringOut]
-        and len(record_values[1]) > 40
-    ):
-        pytest.skip("CAPut blocks strings > 40 characters.")
-
 
 class TestGetValue:
     """Tests that use .get() to check whether values applied with .set(),
@@ -545,6 +575,14 @@ def test_value_pre_init_set(self, record_values):
             expected_type,
         ) = record_values
 
+        if (
+            creation_func in [builder.WaveformIn, builder.WaveformOut] and
+            isinstance(initial_value, list) and
+            all(isinstance(s, (str, bytes)) for s in initial_value)
+        ):
+            pytest.skip("Cannot .set() a list of strings to a waveform, must"
+                        "initially specify using initial_value or FTVL")
+
         kwarg = {}
         if creation_func in [builder.WaveformIn, builder.WaveformOut]:
             kwarg = {"length": WAVEFORM_LENGTH}  # Must specify when no value

From 01308e011d251c49a45e50e79804d71c6327074e Mon Sep 17 00:00:00 2001
From: AlexWells <alex.wells@diamond.ac.uk>
Date: Wed, 29 Mar 2023 10:26:36 +0100
Subject: [PATCH 4/7] Add exception on overlong string setting

Also add more tests
---
 .vscode/launch.json         |  3 +-
 CHANGELOG.rst               |  1 +
 docs/reference/api.rst      |  5 ++
 softioc/builder.py          |  8 ++--
 softioc/device.py           | 22 ++++++---
 tests/test_record_values.py | 93 ++++++++++++++++++++++++++++++++-----
 6 files changed, 109 insertions(+), 23 deletions(-)

diff --git a/.vscode/launch.json b/.vscode/launch.json
index 21c0b227..39175105 100644
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@@ -9,7 +9,8 @@
             "type": "python",
             "request": "launch",
             "program": "${file}",
-            "console": "integratedTerminal"
+            "console": "integratedTerminal",
+            "justMyCode": false,
         },
         {
             "name": "Debug Unit Test",
diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index db3d872b..66e83267 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -30,6 +30,7 @@ Added:
 
 - `Allow "status" and "severity" on In record init <../../pull/111>`_
 - `Allow users to reset the list of created records <../../pull/114>`_
+- `Allow arrays of strings to be used with Waveforms <../../pull/102>`_
 
 4.1.0_ - 2022-08-05
 -------------------
diff --git a/docs/reference/api.rst b/docs/reference/api.rst
index 22094b25..ae85f200 100644
--- a/docs/reference/api.rst
+++ b/docs/reference/api.rst
@@ -342,6 +342,11 @@ All functions return a wrapped `ProcessDeviceSupportIn` or
     field type name.  Otherwise the field type is taken from the initial value
     if given, or defaults to ``'FLOAT'``.
 
+    .. note::
+        When storing arrays of strings, it is possible to store Unicode characters.
+        However, as EPICS has no Unicode support the resultant values will be stored
+        as byte strings. Care must be taken when encoding/decoding the values.
+
 
 The following functions generates specialised records.
 
diff --git a/softioc/builder.py b/softioc/builder.py
index 8fa42ba6..70ae8a1c 100644
--- a/softioc/builder.py
+++ b/softioc/builder.py
@@ -11,7 +11,7 @@
 
 from . import device, pythonSoftIoc  # noqa
 # Re-export this so users only have to import the builder
-from .device import SetBlocking # noqa
+from .device import SetBlocking, to_epics_str_array # noqa
 
 PythonDevice = pythonSoftIoc.PythonDevice()
 
@@ -148,7 +148,7 @@ def Action(name, **fields):
     'uint32':   'ULONG',
     'float32':  'FLOAT',
     'float64':  'DOUBLE',
-    'bytes320': 'STRING',  # Numpy's term for a 40-character string (40*8 bits)
+    'bytes320': 'STRING',  # Numpy term for 40-character byte str (40*8 bits)
 }
 
 # Coverts FTVL string to numpy type
@@ -220,8 +220,8 @@ def _waveform(value, fields):
                 initial_value = numpy.require(initial_value, numpy.int32)
             elif initial_value.dtype == numpy.uint64:
                 initial_value = numpy.require(initial_value, numpy.uint32)
-            elif initial_value.dtype.char in ("S", "U"):
-                initial_value = numpy.require(initial_value, numpy.dtype("S40"))
+            elif initial_value.dtype.char in ('S', 'U'):
+                initial_value = to_epics_str_array(initial_value)
     else:
         initial_value = numpy.array([], dtype = datatype)
         length = _get_length(fields)
diff --git a/softioc/device.py b/softioc/device.py
index d092f72b..3055d889 100644
--- a/softioc/device.py
+++ b/softioc/device.py
@@ -351,6 +351,20 @@ class ao(ProcessDeviceSupportOut):
     _ctype_ = c_double
     _dbf_type_ = fields.DBF_DOUBLE
 
+def to_epics_str_array(value):
+    """Convert the given array of Python strings to an array of EPICS
+    nul-terminated strings"""
+    result = numpy.empty(len(value), 'S40')
+
+    for n, s in enumerate(value):
+        if isinstance(s, str):
+            val = EpicsString._ctype_()
+            val.value = s.encode() + b'\0'
+            result[n] = val.value
+        else:
+            result[n] = s
+    return result
+
 
 def _require_waveform(value, dtype):
     if isinstance(value, bytes):
@@ -358,13 +372,7 @@ def _require_waveform(value, dtype):
         value = numpy.frombuffer(value, dtype = numpy.uint8)
 
     if dtype and dtype.char == 'S':
-        result = numpy.empty(len(value), 'S40')
-        for n, s in enumerate(value):
-            if isinstance(s, str):
-                result[n] = s.encode('UTF-8')
-            else:
-                result[n] = s
-        return result
+        return to_epics_str_array(value)
 
     value = numpy.require(value, dtype = dtype)
     if value.shape == ():
diff --git a/tests/test_record_values.py b/tests/test_record_values.py
index 84465d8d..a637a310 100644
--- a/tests/test_record_values.py
+++ b/tests/test_record_values.py
@@ -32,7 +32,7 @@
     "string and so lets test it and prove that shall we?"
 
 # The numpy dtype of all arrays of strings
-DTYPE_STRING = "S40"
+NUMPY_DTYPE_STRING = "S40"
 
 
 def record_func_names(fixture_value):
@@ -116,8 +116,6 @@ def record_values_names(fixture_value):
     ("strOut_39chars", builder.stringOut, MAX_LEN_STR, MAX_LEN_STR, str),
     ("strIn_empty", builder.stringIn, "", "", str),
     ("strOut_empty", builder.stringOut, "", "", str),
-    # TODO: Add Invalid-utf8 tests?
-    # TODO: Add tests for bytes-strings to stringIn/Out?
     ("strin_utf8", builder.stringIn, "%a€b", "%a€b", str),  # Valid UTF-8
     ("strOut_utf8", builder.stringOut, "%a€b", "%a€b", str),  # Valid UTF-8
     (
@@ -194,15 +192,12 @@ def record_values_names(fixture_value):
         ),
         numpy.ndarray,
     ),
-
-    # TODO: Unicode versions of below tests?
-
     (
         "wIn_byte_string_array",
         builder.WaveformIn,
         [b"AB123", b"CD456", b"EF789"],
         numpy.array(
-            ["AB123", "CD456", "EF789"], dtype=DTYPE_STRING
+            ["AB123", "CD456", "EF789"], dtype=NUMPY_DTYPE_STRING
         ),
         numpy.ndarray,
     ),
@@ -211,7 +206,35 @@ def record_values_names(fixture_value):
         builder.WaveformOut,
         [b"12AB", b"34CD", b"56EF"],
         numpy.array(
-            ["12AB", "34CD", "56EF"], dtype=DTYPE_STRING
+            ["12AB", "34CD", "56EF"], dtype=NUMPY_DTYPE_STRING
+        ),
+        numpy.ndarray,
+    ),
+    (
+        "wIn_unicode_string_array",
+        builder.WaveformIn,
+        ["12€½", "34¾²", "56¹³"],
+        numpy.array(
+            [
+                b'12\xe2\x82\xac\xc2\xbd',
+                b'34\xc2\xbe\xc2\xb2',
+                b'56\xc2\xb9\xc2\xb3'
+            ],
+            dtype=NUMPY_DTYPE_STRING
+        ),
+        numpy.ndarray,
+    ),
+    (
+        "wOut_unicode_string_array",
+        builder.WaveformOut,
+        ["12€½", "34¾²", "56¹³"],
+        numpy.array(
+            [
+                b'12\xe2\x82\xac\xc2\xbd',
+                b'34\xc2\xbe\xc2\xb2',
+                b'56\xc2\xb9\xc2\xb3'
+            ],
+            dtype=NUMPY_DTYPE_STRING
         ),
         numpy.ndarray,
     ),
@@ -220,7 +243,7 @@ def record_values_names(fixture_value):
         builder.WaveformIn,
         ["123abc", "456def", "7890ghi"],
         numpy.array(
-            ["123abc", "456def", "7890ghi"], dtype=DTYPE_STRING
+            ["123abc", "456def", "7890ghi"], dtype=NUMPY_DTYPE_STRING
         ),
         numpy.ndarray,
     ),
@@ -229,7 +252,7 @@ def record_values_names(fixture_value):
         builder.WaveformOut,
         ["123abc", "456def", "7890ghi"],
         numpy.array(
-            ["123abc", "456def", "7890ghi"],  dtype=DTYPE_STRING
+            ["123abc", "456def", "7890ghi"],  dtype=NUMPY_DTYPE_STRING
         ),
         numpy.ndarray,
     ),
@@ -539,10 +562,19 @@ def is_valid(configuration):
                         "scalar. Therefore we skip this check.")
                     continue
 
+                # caget on a waveform of strings will return unicode. Have to
+                # convert it manually to binary.
                 if isinstance(rec_val, numpy.ndarray) and len(rec_val) > 1 \
                         and rec_val.dtype.char in ["S", "U"]:
+                    result = numpy.empty(len(rec_val), NUMPY_DTYPE_STRING)
+                    for n, s in enumerate(rec_val):
+                        if isinstance(s, str):
+                            result[n] = s.encode('UTF-8', errors= 'ignore')
+                        else:
+                            result[n] = s
+                    rec_val = result
                     # caget won't retrieve the full length 40 buffer
-                    rec_val = rec_val.astype(DTYPE_STRING)
+                    rec_val = rec_val.astype(NUMPY_DTYPE_STRING)
 
             record_value_asserts(
                 creation_func, rec_val, expected_value, expected_type
@@ -958,7 +990,46 @@ def test_waveform_rejects_overlong_values(self):
         w_in = builder.WaveformIn("W_IN", [1, 2, 3])
         w_out = builder.WaveformOut("W_OUT", [1, 2, 3])
 
+        w_in_str = builder.WaveformIn("W_IN_STR", ["ABC", "DEF"])
+        w_out_str = builder.WaveformOut("W_OUT_STR", ["ABC", "DEF"])
+
         with pytest.raises(AssertionError):
             w_in.set([1, 2, 3, 4])
         with pytest.raises(AssertionError):
             w_out.set([1, 2, 3, 4])
+        with pytest.raises(AssertionError):
+            w_in_str.set(["ABC", "DEF", "GHI"])
+        with pytest.raises(AssertionError):
+            w_out_str.set(["ABC", "DEF", "GHI"])
+
+    def test_waveform_rejects_late_strings(self):
+        """Test that a waveform won't allow a list of strings to be assigned
+        if no list was given in initial waveform construction"""
+        w_in = builder.WaveformIn("W_IN", length=10)
+        w_out = builder.WaveformOut("W_OUT", length=10)
+
+        with pytest.raises(ValueError):
+            w_in.set(["ABC", "DEF"])
+        with pytest.raises(ValueError):
+            w_out.set(["ABC", "DEF"])
+
+    def test_waveform_rejects_long_array_of_strings(self):
+        """Test that a waveform of strings won't accept too long strings"""
+        w_in = builder.WaveformIn(
+            "W_IN",
+            initial_value=["123abc", "456def", "7890ghi"]
+        )
+        w_out = builder.WaveformIn(
+            "W_OUT",
+            initial_value=["123abc", "456def", "7890ghi"]
+        )
+
+        with pytest.raises(AssertionError):
+            w_in.set(["1", "2", "3", "4"])
+        with pytest.raises(AssertionError):
+            w_out.set(["1", "2", "3", "4"])
+
+        with pytest.raises(ValueError):
+            w_in.set([VERY_LONG_STRING])
+        with pytest.raises(ValueError):
+            w_out.set([VERY_LONG_STRING])

From 84346b87deaf425849a21bd77bae493440396191 Mon Sep 17 00:00:00 2001
From: AlexWells <alex.wells@diamond.ac.uk>
Date: Wed, 29 Mar 2023 11:30:57 +0100
Subject: [PATCH 5/7] Return a list of strings regardless of input

---
 softioc/device.py           |  5 ++-
 tests/test_record_values.py | 62 ++++++++++---------------------------
 2 files changed, 20 insertions(+), 47 deletions(-)

diff --git a/softioc/device.py b/softioc/device.py
index 3055d889..16afe620 100644
--- a/softioc/device.py
+++ b/softioc/device.py
@@ -430,7 +430,10 @@ def _value_to_epics(self, value):
         return numpy.copy(value)
 
     def _epics_to_value(self, value):
-        return value
+        if self._dtype.char == 'S':
+            return [_string_at(s, 40) for s in value]
+        else:
+            return value
 
     def _value_to_dbr(self, value):
         return self._dbf_type_, len(value), value.ctypes.data, value
diff --git a/tests/test_record_values.py b/tests/test_record_values.py
index a637a310..490e9ec8 100644
--- a/tests/test_record_values.py
+++ b/tests/test_record_values.py
@@ -196,65 +196,43 @@ def record_values_names(fixture_value):
         "wIn_byte_string_array",
         builder.WaveformIn,
         [b"AB123", b"CD456", b"EF789"],
-        numpy.array(
-            ["AB123", "CD456", "EF789"], dtype=NUMPY_DTYPE_STRING
-        ),
-        numpy.ndarray,
+        ["AB123", "CD456", "EF789"],
+        list,
     ),
     (
         "wOut_byte_string_array",
         builder.WaveformOut,
         [b"12AB", b"34CD", b"56EF"],
-        numpy.array(
-            ["12AB", "34CD", "56EF"], dtype=NUMPY_DTYPE_STRING
-        ),
-        numpy.ndarray,
+        ["12AB", "34CD", "56EF"],
+        list,
     ),
     (
         "wIn_unicode_string_array",
         builder.WaveformIn,
         ["12€½", "34¾²", "56¹³"],
-        numpy.array(
-            [
-                b'12\xe2\x82\xac\xc2\xbd',
-                b'34\xc2\xbe\xc2\xb2',
-                b'56\xc2\xb9\xc2\xb3'
-            ],
-            dtype=NUMPY_DTYPE_STRING
-        ),
-        numpy.ndarray,
+        ["12€½", "34¾²", "56¹³"],
+        list,
     ),
     (
         "wOut_unicode_string_array",
         builder.WaveformOut,
         ["12€½", "34¾²", "56¹³"],
-        numpy.array(
-            [
-                b'12\xe2\x82\xac\xc2\xbd',
-                b'34\xc2\xbe\xc2\xb2',
-                b'56\xc2\xb9\xc2\xb3'
-            ],
-            dtype=NUMPY_DTYPE_STRING
-        ),
-        numpy.ndarray,
+        ["12€½", "34¾²", "56¹³"],
+        list,
     ),
     (
         "wIn_string_array",
         builder.WaveformIn,
         ["123abc", "456def", "7890ghi"],
-        numpy.array(
-            ["123abc", "456def", "7890ghi"], dtype=NUMPY_DTYPE_STRING
-        ),
-        numpy.ndarray,
+        ["123abc", "456def", "7890ghi"],
+        list,
     ),
     (
         "wOut_string_array",
         builder.WaveformOut,
         ["123abc", "456def", "7890ghi"],
-        numpy.array(
-            ["123abc", "456def", "7890ghi"],  dtype=NUMPY_DTYPE_STRING
-        ),
-        numpy.ndarray,
+        ["123abc", "456def", "7890ghi"],
+        list,
     ),
     (
         "longStringIn_str",
@@ -553,7 +531,7 @@ def is_valid(configuration):
 
                 if (
                     creation_func in [builder.WaveformOut, builder.WaveformIn]
-                    and expected_value.dtype
+                    and hasattr(expected_value, 'dtype')
                     and expected_value.dtype in [numpy.float64, numpy.int32]
                 ):
                     log(
@@ -562,19 +540,11 @@ def is_valid(configuration):
                         "scalar. Therefore we skip this check.")
                     continue
 
-                # caget on a waveform of strings will return unicode. Have to
-                # convert it manually to binary.
+                # caget on a waveform of strings will return a numpy array.
+                # Must extract it out to a list to match .get()
                 if isinstance(rec_val, numpy.ndarray) and len(rec_val) > 1 \
                         and rec_val.dtype.char in ["S", "U"]:
-                    result = numpy.empty(len(rec_val), NUMPY_DTYPE_STRING)
-                    for n, s in enumerate(rec_val):
-                        if isinstance(s, str):
-                            result[n] = s.encode('UTF-8', errors= 'ignore')
-                        else:
-                            result[n] = s
-                    rec_val = result
-                    # caget won't retrieve the full length 40 buffer
-                    rec_val = rec_val.astype(NUMPY_DTYPE_STRING)
+                    rec_val = [s for s in rec_val]
 
             record_value_asserts(
                 creation_func, rec_val, expected_value, expected_type

From 5b59ed7f86a126842467247dfb004ecf21aed561 Mon Sep 17 00:00:00 2001
From: AlexWells <alex.wells@diamond.ac.uk>
Date: Fri, 31 Mar 2023 09:33:48 +0100
Subject: [PATCH 6/7] Code review changes

- Rewrites the note on waveform of strings
- Refactors _require_waveform
- Removes unused DTYPE and add more tests for mixed lists
---
 docs/reference/api.rst      |  6 +++---
 softioc/device.py           | 20 +++++++++----------
 tests/test_record_values.py | 39 +++++++++++++++++++++++++++++++------
 3 files changed, 46 insertions(+), 19 deletions(-)

diff --git a/docs/reference/api.rst b/docs/reference/api.rst
index ae85f200..8b7a23bc 100644
--- a/docs/reference/api.rst
+++ b/docs/reference/api.rst
@@ -343,9 +343,9 @@ All functions return a wrapped `ProcessDeviceSupportIn` or
     if given, or defaults to ``'FLOAT'``.
 
     .. note::
-        When storing arrays of strings, it is possible to store Unicode characters.
-        However, as EPICS has no Unicode support the resultant values will be stored
-        as byte strings. Care must be taken when encoding/decoding the values.
+        Storing arrays of strings differs from other values. String arrays will always
+        be assumed to be encoded as Unicode strings, and will be returned to the user
+        as a Python list rather than a Numpy array.
 
 
 The following functions generates specialised records.
diff --git a/softioc/device.py b/softioc/device.py
index 16afe620..4fcb5add 100644
--- a/softioc/device.py
+++ b/softioc/device.py
@@ -367,18 +367,18 @@ def to_epics_str_array(value):
 
 
 def _require_waveform(value, dtype):
-    if isinstance(value, bytes):
-        # Special case hack for byte arrays.  Surprisingly tricky:
-        value = numpy.frombuffer(value, dtype = numpy.uint8)
-
     if dtype and dtype.char == 'S':
         return to_epics_str_array(value)
-
-    value = numpy.require(value, dtype = dtype)
-    if value.shape == ():
-        value.shape = (1,)
-    assert value.ndim == 1, 'Can\'t write multidimensional arrays'
-    return value
+    else:
+        if isinstance(value, bytes):
+            # Special case hack for byte arrays.  Surprisingly tricky:
+            value = numpy.frombuffer(value, dtype = numpy.uint8)
+
+        value = numpy.require(value, dtype = dtype)
+        if value.shape == ():
+            value.shape = (1,)
+        assert value.ndim == 1, 'Can\'t write multidimensional arrays'
+        return value
 
 
 class WaveformBase(ProcessDeviceSupportCore):
diff --git a/tests/test_record_values.py b/tests/test_record_values.py
index 490e9ec8..b3c882ed 100644
--- a/tests/test_record_values.py
+++ b/tests/test_record_values.py
@@ -31,9 +31,6 @@
     "might think to put into a record that can theoretically hold a huge " \
     "string and so lets test it and prove that shall we?"
 
-# The numpy dtype of all arrays of strings
-NUMPY_DTYPE_STRING = "S40"
-
 
 def record_func_names(fixture_value):
     """Provide a nice name for the record_func fixture"""
@@ -234,6 +231,34 @@ def record_values_names(fixture_value):
         ["123abc", "456def", "7890ghi"],
         list,
     ),
+    (
+        "wIn_mixed_array_1",
+        builder.WaveformIn,
+        ["123abc", 456, "7890ghi"],
+        ["123abc", "456", "7890ghi"],
+        list,
+    ),
+    (
+        "wOut_mixed_array_1",
+        builder.WaveformOut,
+        ["123abc", 456, "7890ghi"],
+        ["123abc", "456", "7890ghi"],
+        list,
+    ),
+    (
+        "wIn_mixed_array_2",
+        builder.WaveformIn,
+        [123, 456, "7890ghi"],
+        ["123", "456", "7890ghi"],
+        list,
+    ),
+    (
+        "wOut_mixed_array_2",
+        builder.WaveformOut,
+        [123, 456, "7890ghi"],
+        ["123", "456", "7890ghi"],
+        list,
+    ),
     (
         "longStringIn_str",
         builder.longStringIn,
@@ -435,7 +460,7 @@ def is_valid(configuration):
 
         if creation_func in (builder.WaveformIn, builder.WaveformOut):
             if isinstance(initial_value, list) and \
-                    all(isinstance(val, (str, bytes)) for val in initial_value):
+                    any(isinstance(val, (str, bytes)) for val in initial_value):
                 if set_enum is not SetValueEnum.INITIAL_VALUE:
                     print(f"Removing {configuration}")
                     return False
@@ -580,7 +605,7 @@ def test_value_pre_init_set(self, record_values):
         if (
             creation_func in [builder.WaveformIn, builder.WaveformOut] and
             isinstance(initial_value, list) and
-            all(isinstance(s, (str, bytes)) for s in initial_value)
+            any(isinstance(s, (str, bytes)) for s in initial_value)
         ):
             pytest.skip("Cannot .set() a list of strings to a waveform, must"
                         "initially specify using initial_value or FTVL")
@@ -974,7 +999,7 @@ def test_waveform_rejects_overlong_values(self):
 
     def test_waveform_rejects_late_strings(self):
         """Test that a waveform won't allow a list of strings to be assigned
-        if no list was given in initial waveform construction"""
+        if no string list was given in initial waveform construction"""
         w_in = builder.WaveformIn("W_IN", length=10)
         w_out = builder.WaveformOut("W_OUT", length=10)
 
@@ -994,11 +1019,13 @@ def test_waveform_rejects_long_array_of_strings(self):
             initial_value=["123abc", "456def", "7890ghi"]
         )
 
+        # Test putting too many elements
         with pytest.raises(AssertionError):
             w_in.set(["1", "2", "3", "4"])
         with pytest.raises(AssertionError):
             w_out.set(["1", "2", "3", "4"])
 
+        # Test putting too long a string
         with pytest.raises(ValueError):
             w_in.set([VERY_LONG_STRING])
         with pytest.raises(ValueError):

From 65f8bd88913fbbe469b535f0632e8d997f66278c Mon Sep 17 00:00:00 2001
From: AlexWells <alex.wells@diamond.ac.uk>
Date: Fri, 31 Mar 2023 09:59:14 +0100
Subject: [PATCH 7/7] Move CHANGELOG message to correct position

---
 CHANGELOG.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index 66e83267..9c8ba245 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -17,6 +17,7 @@ Added:
 
 Fixed:
 
+- `Allow arrays of strings to be used with Waveforms <../../pull/102>`_
 - `Fix DeprecationWarning from numpy when using "+" <../../pull/123>`_
 
 4.2.0_ - 2022-11-08
@@ -30,7 +31,6 @@ Added:
 
 - `Allow "status" and "severity" on In record init <../../pull/111>`_
 - `Allow users to reset the list of created records <../../pull/114>`_
-- `Allow arrays of strings to be used with Waveforms <../../pull/102>`_
 
 4.1.0_ - 2022-08-05
 -------------------