diff --git a/scripts/core/EXP-COMMAND-BUFFER.rst b/scripts/core/EXP-COMMAND-BUFFER.rst index 46aa160630..8a83c8a024 100644 --- a/scripts/core/EXP-COMMAND-BUFFER.rst +++ b/scripts/core/EXP-COMMAND-BUFFER.rst @@ -59,7 +59,8 @@ returned list of supported extensions. // Retrieve extension string std::unique_ptr returnedExtensions(new char[returnedSize]); - ${x}DeviceGetInfo(hDevice, ${X}_DEVICE_INFO_EXTENSIONS, returnedSize, returnedExtensions.get(), nullptr); + ${x}DeviceGetInfo(hDevice, ${X}_DEVICE_INFO_EXTENSIONS, returnedSize, + returnedExtensions.get(), nullptr); std::string_view ExtensionsString(returnedExtensions.get()); bool CmdBufferSupport = @@ -117,11 +118,15 @@ were obtained from. // Append a memcpy with no sync-point dependencies ${x}_exp_command_buffer_sync_point_t syncPoint; - ${x}CommandBufferAppendMemcpyUSMExp(hCommandBuffer, pDst, pSrc, size, 0, nullptr, &syncPoint); + ${x}CommandBufferAppendMemcpyUSMExp(hCommandBuffer, pDst, pSrc, size, 0, + nullptr, &syncPoint); // Append a kernel launch with syncPoint as a dependency, ignore returned // sync-point - ${x}CommandBufferAppendKernelLaunchExp(hCommandBuffer, hKernel, workDim, pGlobalWorkOffset, pGlobalWorkSize, pLocalWorkSize, 1, &syncPoint, nullptr); + ${x}CommandBufferAppendKernelLaunchExp(hCommandBuffer, hKernel, workDim, + pGlobalWorkOffset, pGlobalWorkSize, + pLocalWorkSize, 1, &syncPoint, + nullptr); Enqueueing Command-Buffers -------------------------------------------------------------------------------- diff --git a/scripts/core/PROG.rst b/scripts/core/PROG.rst index 280144adcd..9c97823be1 100644 --- a/scripts/core/PROG.rst +++ b/scripts/core/PROG.rst @@ -60,11 +60,13 @@ Initialization and Discovery // Get number of total GPU devices in the platform uint32_t deviceCount = 0; - ${x}DeviceGet(platforms[0], ${X}_DEVICE_TYPE_GPU, &deviceCount, nullptr, nullptr); + ${x}DeviceGet(platforms[0], ${X}_DEVICE_TYPE_GPU, &deviceCount, nullptr, + nullptr); // Get handles of all GPU devices in the platform std::vector<${x}_device_handle_t> devices(deviceCount); - ${x}DeviceGet(platforms[0], ${X}_DEVICE_TYPE_GPU, &deviceCount, devices.data(), devices.size()); + ${x}DeviceGet(platforms[0], ${X}_DEVICE_TYPE_GPU, &deviceCount, + devices.data(), devices.size()); Device handle lifetime ---------------------- @@ -97,7 +99,8 @@ In case where the info size is only known at runtime then two calls are needed, // Size is known beforehand ${x}_device_type_t deviceType; - ${x}DeviceGetInfo(hDevice, ${X}_DEVICE_INFO_TYPE, sizeof(${x}_device_type_t), &deviceType, nullptr); + ${x}DeviceGetInfo(hDevice, ${X}_DEVICE_INFO_TYPE, + sizeof(${x}_device_type_t), &deviceType, nullptr); // Size is only known at runtime size_t infoSize; @@ -105,7 +108,8 @@ In case where the info size is only known at runtime then two calls are needed, std::string deviceName; DeviceName.resize(infoSize); - ${x}DeviceGetInfo(hDevice, ${X}_DEVICE_INFO_NAME, infoSize, deviceName.data(), nullptr); + ${x}DeviceGetInfo(hDevice, ${X}_DEVICE_INFO_NAME, infoSize, + deviceName.data(), nullptr); Device partitioning into sub-devices ------------------------------------ @@ -133,7 +137,8 @@ fixed part of the parent device, which can explicitly be programmed individually if (count > 0) { subDevices.resize(count); - ${x}DevicePartition(Device, &properties, count, &subDevices.data(), nullptr); + ${x}DevicePartition(Device, &properties, count, &subDevices.data(), + nullptr); } The returned sub-devices may be requested for further partitioning into sub-sub-devices, and so on. @@ -158,7 +163,8 @@ events, and programs are explicitly created against a context. A trivial work wi uint32_t deviceCount = 1; ${x}_device_handle_t hDevice; - ${x}DeviceGet(hPlatform, ${X}_DEVICE_TYPE_GPU, &deviceCount, &hDevice, nullptr); + ${x}DeviceGet(hPlatform, ${X}_DEVICE_TYPE_GPU, &deviceCount, &hDevice, + nullptr); // Create a context ${x}_context_handle_t hContext; @@ -234,14 +240,16 @@ queue is created. // Create an out of order queue for hDevice in hContext ${x}_queue_handle_t hQueue; - ${x}QueueCreate(hContext, hDevice, ${X}_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE, &hQueue); + ${x}QueueCreate(hContext, hDevice, + ${X}_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE, &hQueue); - // Lanuch a kernel with 3D workspace partitioning + // Launch a kernel with 3D workspace partitioning const uint32_t nDim = 3; const size_t gWorkOffset = {0, 0, 0}; const size_t gWorkSize = {128, 128, 128}; const size_t lWorkSize = {1, 8, 8}; - ${x}EnqueueKernelLaunch(hQueue, hKernel, nDim, gWorkOffset, gWorkSize, lWorkSize, 0, nullptr, nullptr); + ${x}EnqueueKernelLaunch(hQueue, hKernel, nDim, gWorkOffset, gWorkSize, + lWorkSize, 0, nullptr, nullptr); Queue object lifetime --------------------- diff --git a/scripts/generate_docs.py b/scripts/generate_docs.py index 4b2ca891d3..a17727629e 100644 --- a/scripts/generate_docs.py +++ b/scripts/generate_docs.py @@ -96,7 +96,8 @@ def _generate_valid_rst(fin, fout, namespace, tags, ver, rev, meta): error = False outlines = [] - for iline, line in enumerate(util.textRead(fin)): + iter = enumerate(util.textRead(fin)) + for iline, line in iter: if re.match(RE_ENABLE, line) or re.match(RE_PYCODE_BLOCK_END, line): enable = True @@ -136,6 +137,17 @@ def _generate_valid_rst(fin, fout, namespace, tags, ver, rev, meta): continue if code_block and 'function' == symbol_type: + # If function is split across multiple lines + # then join lines until a ';' is encountered. + try: + line = line.strip() + while not line.endswith(';'): + _, n_line = next(iter) + line = line + n_line.strip() + except StopIteration: + print(f"Function {line[:100]} was not terminated by a ';' character.") + error = True + words = re.sub(RE_EXTRACT_PARAMS, r"\1", line) words = line.split(",") if len(words) != len(meta['function'][symbol]['params']):