From 633533c3a9d48ab7d734b4ad2ca5edb12a53d2cc Mon Sep 17 00:00:00 2001 From: borg323 <39573933+borg323@users.noreply.github.com> Date: Sun, 14 Apr 2024 21:20:21 +0300 Subject: [PATCH 1/6] update android openblas to 0.3.27 (#2011) --- appveyor.yml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/appveyor.yml b/appveyor.yml index dc0445dac8..7d89dec958 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -85,11 +85,12 @@ install: - cmd: IF %NAME%==android IF NOT EXIST C:\ndk\android-ndk-r19c\toolchains\llvm\prebuilt\windows-x86_64 appveyor DownloadFile https://dl.google.com/android/repository/android-ndk-r19c-windows-x86_64.zip - cmd: IF %NAME%==android IF NOT EXIST C:\ndk\android-ndk-r19c\toolchains\llvm\prebuilt\windows-x86_64 7z x android-ndk-r19c-windows-x86_64.zip -oC:\ndk - cmd: IF %NAME%==android set PATH=C:\ndk\android-ndk-r19c\toolchains\llvm\prebuilt\windows-x86_64\bin;%PATH% +- cmd: IF EXIST C:\cache\OpenBLAS\ rd /s /q C:\cache\OpenBLAS - cmd: IF %NAME%==android sed "s/clang+*/&.cmd/" cross-files/aarch64-linux-android >crossfile-aarch64 -- cmd: IF %NAME%==android IF NOT EXIST C:\cache\OpenBLAS\android-aarch64 appveyor DownloadFile https://github.com/borg323/OpenBLAS/releases/download/android-0.3.8-2/openblas-android-aarch64.zip +- cmd: IF %NAME%==android IF NOT EXIST C:\cache\OpenBLAS\android-aarch64 appveyor DownloadFile https://github.com/borg323/OpenBLAS/releases/download/android-0.3.27/openblas-android-aarch64.zip - cmd: IF %NAME%==android IF NOT EXIST C:\cache\OpenBLAS\android-aarch64 7z x openblas-android-aarch64.zip -oC:\cache\OpenBLAS - cmd: IF %NAME%==android sed "s/clang+*/&.cmd/" cross-files/armv7a-linux-android >crossfile-armv7a -- cmd: IF %NAME%==android IF NOT EXIST C:\cache\OpenBLAS\android-armv7a appveyor DownloadFile https://github.com/borg323/OpenBLAS/releases/download/android-0.3.8-2/openblas-android-armv7a.zip +- cmd: IF %NAME%==android IF NOT EXIST C:\cache\OpenBLAS\android-armv7a appveyor DownloadFile https://github.com/borg323/OpenBLAS/releases/download/android-0.3.27/openblas-android-armv7a.zip - cmd: IF %NAME%==android IF NOT EXIST C:\cache\OpenBLAS\android-armv7a 7z x openblas-android-armv7a.zip -oC:\cache\OpenBLAS - cmd: set PKG_FOLDER="C:\cache" - cmd: IF NOT EXIST c:\cache mkdir c:\cache From 5f28d9f2b1412444090e7468c9d13cf37ce79063 Mon Sep 17 00:00:00 2001 From: Hans Ekbrand Date: Sun, 14 Apr 2024 20:20:47 +0200 Subject: [PATCH 2/6] fix warning when comparing res_block_mem <= scratch_size (#2014) --- src/neural/cuda/network_cuda.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/neural/cuda/network_cuda.cc b/src/neural/cuda/network_cuda.cc index cba3a0cc0b..cc44f1a531 100644 --- a/src/neural/cuda/network_cuda.cc +++ b/src/neural/cuda/network_cuda.cc @@ -670,7 +670,8 @@ class CudaNetwork : public Network { stream_attribute.accessPolicyWindow.missProp = cudaAccessPropertyStreaming; if (allow_cache_opt_ && use_res_block_winograd_fuse_opt_ && - (res_block_mem <= scratch_size_) && (res_block_mem <= l2_cache_size_)) { + (static_cast(res_block_mem) <= scratch_size_) && + (res_block_mem <= l2_cache_size_)) { // we can use a single alloc to hold all the required tensors, and enable // persistent L2 caching on it ReportCUDAErrors(cudaStreamSetAttribute( From f19c2521daa4ccfd93cb450799b97cf773ad8d56 Mon Sep 17 00:00:00 2001 From: borg323 <39573933+borg323@users.noreply.github.com> Date: Sun, 14 Apr 2024 21:21:15 +0300 Subject: [PATCH 3/6] compile common cuda code for multiple targets (#2015) --- meson.build | 45 ++++++++++++++++----------------------------- 1 file changed, 16 insertions(+), 29 deletions(-) diff --git a/meson.build b/meson.build index ef0e0afcdf..f6c390ef63 100644 --- a/meson.build +++ b/meson.build @@ -490,6 +490,22 @@ if get_option('build_backends') nvcc_extra_args = ['-arch=compute_' + cuda_cc, '-code=sm_' + cuda_cc] elif get_option('native_cuda') and nvcc_help.contains('-arch=native') nvcc_extra_args = ['-arch=native'] + elif nvcc_help.contains('-arch=all-major') + nvcc_extra_args = ['-arch=all-major', '-Wno-deprecated-gpu-targets'] + else + nvcc_extra_args = ['-Wno-deprecated-gpu-targets'] + # Fallback for cuda versions without -arch=all-major. + foreach x : ['35', '50', '60', '70', '80'] + if nvcc_help.contains('sm_' + x) + nvcc_extra_args += '-gencode=arch=compute_' + x + ',code=sm_' + x + endif + endforeach + # For forward compatibility. + if nvcc_help.contains('sm_80') # Cuda 11+ + nvcc_extra_args += '-gencode=arch=compute_80,code=compute_80' + elif nvcc_help.contains('sm_75') # Cuda 10+ + nvcc_extra_args += '-gencode=arch=compute_75,code=compute_75' + endif endif foreach x : get_option('cudnn_include') cuda_arguments += ['-I', x] @@ -507,35 +523,6 @@ if get_option('build_backends') command : [nvcc, nvcc_extra_args, cuda_arguments] ) - # Handling of fp16 cuda code: If nvcc_extra_args is empty add options to - # generate code for the major fp16 capable architectures. - if nvcc_extra_args == [] - nvcc_arch = '-arch=compute_70' - nvcc_sm_list = ['sm_70', 'sm_75', 'sm_80', 'sm_90'] - if host_machine.system() != 'windows' - nvcc_arch = '-arch=compute_60' - nvcc_sm_list += ['sm_60'] - if ['arm', 'aarch64'].contains(host_machine.cpu_family()) - message('Compiling for Jetson.') - nvcc_arch = '-arch=compute_53' - nvcc_sm_list = ['sm_53', 'sm_62', 'sm_72', 'sm_87'] - endif - endif - nvcc_extra_args = [nvcc_arch] - foreach x : nvcc_sm_list - if nvcc_help.contains(x) - nvcc_extra_args += '-code=' + x - endif - endforeach - # For forward compatibility. - if nvcc_help.contains('sm_90') # Cuda 12+ - nvcc_extra_args += '-gencode=arch=compute_90,code=compute_90' - elif nvcc_help.contains('sm_80') # Cuda 11+ - nvcc_extra_args += '-gencode=arch=compute_80,code=compute_80' - elif nvcc_help.contains('sm_75') # Cuda 10+ - nvcc_extra_args += '-gencode=arch=compute_75,code=compute_75' - endif - endif files += custom_target('cuda fp16 code', input : 'src/neural/cuda/fp16_kernels.cu', output : outputname, From 9db2e40bcaeeb9b4e03cb41358778c7e5ec0e071 Mon Sep 17 00:00:00 2001 From: borg323 <39573933+borg323@users.noreply.github.com> Date: Sun, 14 Apr 2024 21:21:37 +0300 Subject: [PATCH 4/6] fix cudnn breakage (#2016) --- src/neural/cuda/network_cudnn.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/neural/cuda/network_cudnn.cc b/src/neural/cuda/network_cudnn.cc index dc39906fc4..7129595c89 100644 --- a/src/neural/cuda/network_cudnn.cc +++ b/src/neural/cuda/network_cudnn.cc @@ -938,7 +938,7 @@ class CudnnNetwork : public Network { std::unique_ptr GetInputsOutputs() { std::lock_guard lock(inputs_outputs_lock_); if (free_inputs_outputs_.empty()) { - return std::make_unique(max_batch_size_, wdl_, false, + return std::make_unique(max_batch_size_, wdl_, moves_left_); } else { std::unique_ptr resource = From e7a744162bd3a0307bf4b11f29b28a9d5d165796 Mon Sep 17 00:00:00 2001 From: borg323 <39573933+borg323@users.noreply.github.com> Date: Sun, 14 Apr 2024 21:38:21 +0300 Subject: [PATCH 5/6] increase xla priority (#2017) --- meson.build | 1 + src/neural/xla/network_xla.cc | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/meson.build b/meson.build index f6c390ef63..519bd98fc5 100644 --- a/meson.build +++ b/meson.build @@ -631,6 +631,7 @@ if get_option('build_backends') 'src/neural/xla/xla_tensor.cc', ] deps += cc.find_library('dl', required: false) + has_backends = true endif endif # if get_option('build_backends') diff --git a/src/neural/xla/network_xla.cc b/src/neural/xla/network_xla.cc index ff9ff7413b..1652ff817e 100644 --- a/src/neural/xla/network_xla.cc +++ b/src/neural/xla/network_xla.cc @@ -311,7 +311,7 @@ std::unique_ptr MakeXlaNetwork(const std::optional& w, w->format().network_format()); } -REGISTER_NETWORK("xla", MakeXlaNetwork, -34) +REGISTER_NETWORK("xla", MakeXlaNetwork, 34) } // namespace } // namespace lczero From f6f966d8f016e616b45d5af48df003fce4821a74 Mon Sep 17 00:00:00 2001 From: borg323 <39573933+borg323@users.noreply.github.com> Date: Mon, 15 Apr 2024 23:42:18 +0300 Subject: [PATCH 6/6] fix dml install script (#2020) --- dist/install-dml.cmd | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/dist/install-dml.cmd b/dist/install-dml.cmd index 099f42958c..ca93411a55 100644 --- a/dist/install-dml.cmd +++ b/dist/install-dml.cmd @@ -2,14 +2,7 @@ where /q tar if errorlevel 1 goto error -where /q lc0.exe -if errorlevel 1 cd /d %~dp0 -where /q lc0.exe -if errorlevel 1 ( - echo This script must run in the lc0 folder. - pause - exit /b -) +cd /d %~dp0 cls echo Installing the DirectML.dll version required by the Lc0 onnx-dml backend.