From 784e673d0a2ce3f66fae6ead0f75cc116954e4a1 Mon Sep 17 00:00:00 2001 From: Sebastian Sanchez Date: Tue, 3 Dec 2019 11:20:02 -0800 Subject: [PATCH] enqueueWriteBuffer: Initialize host buffer to obtain accurate measurement When a host buffer is passed as a source into enqueueWriteBuffer(), a memcpy() is used by OpenCL. memcpy() is optimized to copy zero pages. Newly allocated memory points to zero pages, and when the memory is written to, physical memory is allocated. Therefore, initialize host buffer to obtain accurate measurements with enqueueWriteBuffer(). Results on Intel hardware: Before: Platform: Intel(R) OpenCL HD Graphics Device: Intel(R) Gen9 HD Graphics NEO Driver version : 19.03.0 (Linux x64) Compute units : 48 Clock frequency : 1200 MHz Transfer bandwidth (GBPS) enqueueWriteBuffer : 34.18 enqueueReadBuffer : 13.02 enqueueMapBuffer(for read) : 14316530.00 memcpy from mapped ptr : 13.01 enqueueUnmap(after write) : inf memcpy to mapped ptr : 13.37 After: Platform: Intel(R) OpenCL HD Graphics Device: Intel(R) Gen9 HD Graphics NEO Driver version : 19.03.0 (Linux x64) Compute units : 48 Clock frequency : 1200 MHz Transfer bandwidth (GBPS) enqueueWriteBuffer : 13.44 enqueueReadBuffer : 12.91 enqueueMapBuffer(for read) : 21474796.00 memcpy from mapped ptr : 12.91 enqueueUnmap(after write) : inf memcpy to mapped ptr : 13.44 --- src/transfer_bandwidth.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/transfer_bandwidth.cpp b/src/transfer_bandwidth.cpp index d1c3183..c41ded9 100644 --- a/src/transfer_bandwidth.cpp +++ b/src/transfer_bandwidth.cpp @@ -18,6 +18,7 @@ int clPeak::runTransferBandwidthTest(cl::CommandQueue &queue, cl::Program &prog, try { arr = new float[numItems]; + memset(arr, 0, numItems * sizeof(float)); cl::Buffer clBuffer = cl::Buffer(ctx, (CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR), (numItems * sizeof(float))); log->print(NEWLINE TAB TAB "Transfer bandwidth (GBPS)" NEWLINE);