Merge branch 'master' into release/0.31

LeelaChessZero · Apr 15, 2024 · e056bd1 · e056bd1
2 parents 05172b6 + f6f966d
commit e056bd1
Show file tree

Hide file tree

Showing 6 changed files with 25 additions and 42 deletions.
diff --git a/appveyor.yml b/appveyor.yml
@@ -85,11 +85,12 @@ install:
 - cmd: IF %NAME%==android IF NOT EXIST C:\ndk\android-ndk-r19c\toolchains\llvm\prebuilt\windows-x86_64 appveyor DownloadFile https://dl.google.com/android/repository/android-ndk-r19c-windows-x86_64.zip
 - cmd: IF %NAME%==android IF NOT EXIST C:\ndk\android-ndk-r19c\toolchains\llvm\prebuilt\windows-x86_64 7z x android-ndk-r19c-windows-x86_64.zip -oC:\ndk
 - cmd: IF %NAME%==android set PATH=C:\ndk\android-ndk-r19c\toolchains\llvm\prebuilt\windows-x86_64\bin;%PATH%
+- cmd: IF EXIST C:\cache\OpenBLAS\ rd /s /q C:\cache\OpenBLAS
 - cmd: IF %NAME%==android sed "s/clang+*/&.cmd/" cross-files/aarch64-linux-android >crossfile-aarch64
-- cmd: IF %NAME%==android IF NOT EXIST C:\cache\OpenBLAS\android-aarch64 appveyor DownloadFile https://github.com/borg323/OpenBLAS/releases/download/android-0.3.8-2/openblas-android-aarch64.zip
+- cmd: IF %NAME%==android IF NOT EXIST C:\cache\OpenBLAS\android-aarch64 appveyor DownloadFile https://github.com/borg323/OpenBLAS/releases/download/android-0.3.27/openblas-android-aarch64.zip
 - cmd: IF %NAME%==android IF NOT EXIST C:\cache\OpenBLAS\android-aarch64 7z x openblas-android-aarch64.zip -oC:\cache\OpenBLAS
 - cmd: IF %NAME%==android sed "s/clang+*/&.cmd/" cross-files/armv7a-linux-android >crossfile-armv7a
-- cmd: IF %NAME%==android IF NOT EXIST C:\cache\OpenBLAS\android-armv7a appveyor DownloadFile https://github.com/borg323/OpenBLAS/releases/download/android-0.3.8-2/openblas-android-armv7a.zip
+- cmd: IF %NAME%==android IF NOT EXIST C:\cache\OpenBLAS\android-armv7a appveyor DownloadFile https://github.com/borg323/OpenBLAS/releases/download/android-0.3.27/openblas-android-armv7a.zip
 - cmd: IF %NAME%==android IF NOT EXIST C:\cache\OpenBLAS\android-armv7a 7z x openblas-android-armv7a.zip -oC:\cache\OpenBLAS
 - cmd: set PKG_FOLDER="C:\cache"
 - cmd: IF NOT EXIST c:\cache mkdir c:\cache

diff --git a/dist/install-dml.cmd b/dist/install-dml.cmd
@@ -2,14 +2,7 @@
 where /q tar
 if errorlevel 1 goto error
 
-where /q lc0.exe
-if errorlevel 1 cd /d %~dp0
-where /q lc0.exe
-if errorlevel 1 (
- echo This script must run in the lc0 folder.
- pause
- exit /b
-)
+cd /d %~dp0
 
 cls
 echo Installing the DirectML.dll version required by the Lc0 onnx-dml backend.

diff --git a/meson.build b/meson.build
@@ -490,6 +490,22 @@ if get_option('build_backends')
  nvcc_extra_args = ['-arch=compute_' + cuda_cc, '-code=sm_' + cuda_cc]
  elif get_option('native_cuda') and nvcc_help.contains('-arch=native')
  nvcc_extra_args = ['-arch=native']
+ elif nvcc_help.contains('-arch=all-major')
+ nvcc_extra_args = ['-arch=all-major', '-Wno-deprecated-gpu-targets']
+ else
+ nvcc_extra_args = ['-Wno-deprecated-gpu-targets']
+ # Fallback for cuda versions without -arch=all-major.
+ foreach x : ['35', '50', '60', '70', '80']
+ if nvcc_help.contains('sm_' + x)
+ nvcc_extra_args += '-gencode=arch=compute_' + x + ',code=sm_' + x
+ endif
+ endforeach
+ # For forward compatibility.
+ if nvcc_help.contains('sm_80') # Cuda 11+
+ nvcc_extra_args += '-gencode=arch=compute_80,code=compute_80'
+ elif nvcc_help.contains('sm_75') # Cuda 10+
+ nvcc_extra_args += '-gencode=arch=compute_75,code=compute_75'
+ endif
  endif
  foreach x : get_option('cudnn_include')
  cuda_arguments += ['-I', x]
@@ -507,35 +523,6 @@ if get_option('build_backends')
  command : [nvcc, nvcc_extra_args, cuda_arguments]
  )
 
- # Handling of fp16 cuda code: If nvcc_extra_args is empty add options to
- # generate code for the major fp16 capable architectures.
- if nvcc_extra_args == []
- nvcc_arch = '-arch=compute_70'
- nvcc_sm_list = ['sm_70', 'sm_75', 'sm_80', 'sm_90']
- if host_machine.system() != 'windows'
- nvcc_arch = '-arch=compute_60'
- nvcc_sm_list += ['sm_60']
- if ['arm', 'aarch64'].contains(host_machine.cpu_family())
- message('Compiling for Jetson.')
- nvcc_arch = '-arch=compute_53'
- nvcc_sm_list = ['sm_53', 'sm_62', 'sm_72', 'sm_87']
- endif
- endif
- nvcc_extra_args = [nvcc_arch]
- foreach x : nvcc_sm_list
- if nvcc_help.contains(x)
- nvcc_extra_args += '-code=' + x
- endif
- endforeach
- # For forward compatibility.
- if nvcc_help.contains('sm_90') # Cuda 12+
- nvcc_extra_args += '-gencode=arch=compute_90,code=compute_90'
- elif nvcc_help.contains('sm_80') # Cuda 11+
- nvcc_extra_args += '-gencode=arch=compute_80,code=compute_80'
- elif nvcc_help.contains('sm_75') # Cuda 10+
- nvcc_extra_args += '-gencode=arch=compute_75,code=compute_75'
- endif
- endif
  files += custom_target('cuda fp16 code',
  input : 'src/neural/cuda/fp16_kernels.cu',
  output : outputname,
@@ -644,6 +631,7 @@ if get_option('build_backends')
  'src/neural/xla/xla_tensor.cc',
  ]
  deps += cc.find_library('dl', required: false)
+ has_backends = true
  endif
 
 endif # if get_option('build_backends')

diff --git a/src/neural/cuda/network_cuda.cc b/src/neural/cuda/network_cuda.cc
@@ -670,7 +670,8 @@ class CudaNetwork : public Network {
  stream_attribute.accessPolicyWindow.missProp = cudaAccessPropertyStreaming;
 
  if (allow_cache_opt_ && use_res_block_winograd_fuse_opt_ &&
- (res_block_mem <= scratch_size_) && (res_block_mem <= l2_cache_size_)) {
+ (static_cast<size_t>(res_block_mem) <= scratch_size_) &&
+ (res_block_mem <= l2_cache_size_)) {
  // we can use a single alloc to hold all the required tensors, and enable
  // persistent L2 caching on it
  ReportCUDAErrors(cudaStreamSetAttribute(

diff --git a/src/neural/cuda/network_cudnn.cc b/src/neural/cuda/network_cudnn.cc
@@ -938,7 +938,7 @@ class CudnnNetwork : public Network {
  std::unique_ptr<InputsOutputs> GetInputsOutputs() {
  std::lock_guard<std::mutex> lock(inputs_outputs_lock_);
  if (free_inputs_outputs_.empty()) {
- return std::make_unique<InputsOutputs>(max_batch_size_, wdl_, false,
+ return std::make_unique<InputsOutputs>(max_batch_size_, wdl_,
  moves_left_);
  } else {
  std::unique_ptr<InputsOutputs> resource =

diff --git a/src/neural/xla/network_xla.cc b/src/neural/xla/network_xla.cc
@@ -311,7 +311,7 @@ std::unique_ptr<Network> MakeXlaNetwork(const std::optional<WeightsFile>& w,
  w->format().network_format());
 }
 
-REGISTER_NETWORK("xla", MakeXlaNetwork, -34)
+REGISTER_NETWORK("xla", MakeXlaNetwork, 34)
 
 } // namespace
 } // namespace lczero