diff --git a/dali/operators/generic/lookup_table.cu b/dali/operators/generic/lookup_table.cu
index 05ae1e0b0b..34eaaf6731 100644
--- a/dali/operators/generic/lookup_table.cu
+++ b/dali/operators/generic/lookup_table.cu
@@ -1,4 +1,4 @@
-// Copyright (c) 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2019-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -48,14 +48,14 @@ void LookupTable<GPUBackend>::RunImpl(Workspace &ws) {
 
   auto num_samples = shape.num_samples();
   samples_.resize(num_samples);
+  TensorListShape<1> collapsed_shape(num_samples);
   for (int sample_id = 0; sample_id < num_samples; sample_id++) {
     samples_[sample_id].output = output.raw_mutable_tensor(sample_id);
     samples_[sample_id].input = input.raw_tensor(sample_id);
+    collapsed_shape.tensor_shape_span(sample_id)[0] = shape.tensor_size(sample_id);
   }
   samples_dev_.from_host(samples_, stream);
 
-  auto collapsed_shape = collapse_dims<1>(shape, {std::make_pair(0, shape.sample_dim())});
-
   block_setup_.SetupBlocks(collapsed_shape, true);
   blocks_dev_.from_host(block_setup_.Blocks(), stream);
 
diff --git a/dali/test/python/operator_1/test_lookup_table.py b/dali/test/python/operator_1/test_lookup_table.py
index 4cda68f8dc..04f821eb52 100644
--- a/dali/test/python/operator_1/test_lookup_table.py
+++ b/dali/test/python/operator_1/test_lookup_table.py
@@ -13,14 +13,12 @@
 # limitations under the License.
 
 import numpy as np
-import nvidia.dali.ops as ops
-import nvidia.dali.types as types
 import random
+from nvidia.dali import fn, types, ops, pipeline_def
 from nvidia.dali.pipeline import Pipeline
-
 from test_utils import RandomlyShapedDataIterator
 from test_utils import compare_pipelines
-
+from nose2.tools import params
 
 class LookupTablePipeline(Pipeline):
     def __init__(
@@ -175,3 +173,26 @@ def test_lookup_table_vs_python_op():
                     dictionary_type,
                     default_value,
                 )
+
+@params("cpu", "gpu")
+def test_scalar(device):
+    @pipeline_def(batch_size=64, num_threads=2, device_id=0)
+    def pipe():
+        raw = np.array([[0, 1, 2, 3]]) # single batch of 4 scalars
+        ids = fn.external_source(source=raw, device=device)
+        scale_keys = [0, 1]
+        scale_values = [100, 200]
+        scale_mat = fn.lookup_table(
+            ids,
+            keys=scale_keys,
+            values=scale_values,
+            device=device,
+            dtype=types.INT64,
+        )
+        return scale_mat, ids
+    p = pipe()
+    p.build()
+    scaled, _ = p.run()
+    if device == 'gpu':
+        scaled = scaled.as_cpu()
+    assert (scaled.as_array() == [100, 200, 0, 0]).all()