diff --git a/appveyor.yml b/appveyor.yml index dc0445dac8..7d89dec958 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -85,11 +85,12 @@ install: - cmd: IF %NAME%==android IF NOT EXIST C:\ndk\android-ndk-r19c\toolchains\llvm\prebuilt\windows-x86_64 appveyor DownloadFile https://dl.google.com/android/repository/android-ndk-r19c-windows-x86_64.zip - cmd: IF %NAME%==android IF NOT EXIST C:\ndk\android-ndk-r19c\toolchains\llvm\prebuilt\windows-x86_64 7z x android-ndk-r19c-windows-x86_64.zip -oC:\ndk - cmd: IF %NAME%==android set PATH=C:\ndk\android-ndk-r19c\toolchains\llvm\prebuilt\windows-x86_64\bin;%PATH% +- cmd: IF EXIST C:\cache\OpenBLAS\ rd /s /q C:\cache\OpenBLAS - cmd: IF %NAME%==android sed "s/clang+*/&.cmd/" cross-files/aarch64-linux-android >crossfile-aarch64 -- cmd: IF %NAME%==android IF NOT EXIST C:\cache\OpenBLAS\android-aarch64 appveyor DownloadFile https://github.com/borg323/OpenBLAS/releases/download/android-0.3.8-2/openblas-android-aarch64.zip +- cmd: IF %NAME%==android IF NOT EXIST C:\cache\OpenBLAS\android-aarch64 appveyor DownloadFile https://github.com/borg323/OpenBLAS/releases/download/android-0.3.27/openblas-android-aarch64.zip - cmd: IF %NAME%==android IF NOT EXIST C:\cache\OpenBLAS\android-aarch64 7z x openblas-android-aarch64.zip -oC:\cache\OpenBLAS - cmd: IF %NAME%==android sed "s/clang+*/&.cmd/" cross-files/armv7a-linux-android >crossfile-armv7a -- cmd: IF %NAME%==android IF NOT EXIST C:\cache\OpenBLAS\android-armv7a appveyor DownloadFile https://github.com/borg323/OpenBLAS/releases/download/android-0.3.8-2/openblas-android-armv7a.zip +- cmd: IF %NAME%==android IF NOT EXIST C:\cache\OpenBLAS\android-armv7a appveyor DownloadFile https://github.com/borg323/OpenBLAS/releases/download/android-0.3.27/openblas-android-armv7a.zip - cmd: IF %NAME%==android IF NOT EXIST C:\cache\OpenBLAS\android-armv7a 7z x openblas-android-armv7a.zip -oC:\cache\OpenBLAS - cmd: set PKG_FOLDER="C:\cache" - cmd: IF NOT EXIST c:\cache mkdir c:\cache diff --git a/dist/install-dml.cmd b/dist/install-dml.cmd index 099f42958c..ca93411a55 100644 --- a/dist/install-dml.cmd +++ b/dist/install-dml.cmd @@ -2,14 +2,7 @@ where /q tar if errorlevel 1 goto error -where /q lc0.exe -if errorlevel 1 cd /d %~dp0 -where /q lc0.exe -if errorlevel 1 ( - echo This script must run in the lc0 folder. - pause - exit /b -) +cd /d %~dp0 cls echo Installing the DirectML.dll version required by the Lc0 onnx-dml backend. diff --git a/meson.build b/meson.build index ef0e0afcdf..519bd98fc5 100644 --- a/meson.build +++ b/meson.build @@ -490,6 +490,22 @@ if get_option('build_backends') nvcc_extra_args = ['-arch=compute_' + cuda_cc, '-code=sm_' + cuda_cc] elif get_option('native_cuda') and nvcc_help.contains('-arch=native') nvcc_extra_args = ['-arch=native'] + elif nvcc_help.contains('-arch=all-major') + nvcc_extra_args = ['-arch=all-major', '-Wno-deprecated-gpu-targets'] + else + nvcc_extra_args = ['-Wno-deprecated-gpu-targets'] + # Fallback for cuda versions without -arch=all-major. + foreach x : ['35', '50', '60', '70', '80'] + if nvcc_help.contains('sm_' + x) + nvcc_extra_args += '-gencode=arch=compute_' + x + ',code=sm_' + x + endif + endforeach + # For forward compatibility. + if nvcc_help.contains('sm_80') # Cuda 11+ + nvcc_extra_args += '-gencode=arch=compute_80,code=compute_80' + elif nvcc_help.contains('sm_75') # Cuda 10+ + nvcc_extra_args += '-gencode=arch=compute_75,code=compute_75' + endif endif foreach x : get_option('cudnn_include') cuda_arguments += ['-I', x] @@ -507,35 +523,6 @@ if get_option('build_backends') command : [nvcc, nvcc_extra_args, cuda_arguments] ) - # Handling of fp16 cuda code: If nvcc_extra_args is empty add options to - # generate code for the major fp16 capable architectures. - if nvcc_extra_args == [] - nvcc_arch = '-arch=compute_70' - nvcc_sm_list = ['sm_70', 'sm_75', 'sm_80', 'sm_90'] - if host_machine.system() != 'windows' - nvcc_arch = '-arch=compute_60' - nvcc_sm_list += ['sm_60'] - if ['arm', 'aarch64'].contains(host_machine.cpu_family()) - message('Compiling for Jetson.') - nvcc_arch = '-arch=compute_53' - nvcc_sm_list = ['sm_53', 'sm_62', 'sm_72', 'sm_87'] - endif - endif - nvcc_extra_args = [nvcc_arch] - foreach x : nvcc_sm_list - if nvcc_help.contains(x) - nvcc_extra_args += '-code=' + x - endif - endforeach - # For forward compatibility. - if nvcc_help.contains('sm_90') # Cuda 12+ - nvcc_extra_args += '-gencode=arch=compute_90,code=compute_90' - elif nvcc_help.contains('sm_80') # Cuda 11+ - nvcc_extra_args += '-gencode=arch=compute_80,code=compute_80' - elif nvcc_help.contains('sm_75') # Cuda 10+ - nvcc_extra_args += '-gencode=arch=compute_75,code=compute_75' - endif - endif files += custom_target('cuda fp16 code', input : 'src/neural/cuda/fp16_kernels.cu', output : outputname, @@ -644,6 +631,7 @@ if get_option('build_backends') 'src/neural/xla/xla_tensor.cc', ] deps += cc.find_library('dl', required: false) + has_backends = true endif endif # if get_option('build_backends') diff --git a/src/neural/cuda/network_cuda.cc b/src/neural/cuda/network_cuda.cc index cba3a0cc0b..cc44f1a531 100644 --- a/src/neural/cuda/network_cuda.cc +++ b/src/neural/cuda/network_cuda.cc @@ -670,7 +670,8 @@ class CudaNetwork : public Network { stream_attribute.accessPolicyWindow.missProp = cudaAccessPropertyStreaming; if (allow_cache_opt_ && use_res_block_winograd_fuse_opt_ && - (res_block_mem <= scratch_size_) && (res_block_mem <= l2_cache_size_)) { + (static_cast(res_block_mem) <= scratch_size_) && + (res_block_mem <= l2_cache_size_)) { // we can use a single alloc to hold all the required tensors, and enable // persistent L2 caching on it ReportCUDAErrors(cudaStreamSetAttribute( diff --git a/src/neural/cuda/network_cudnn.cc b/src/neural/cuda/network_cudnn.cc index dc39906fc4..7129595c89 100644 --- a/src/neural/cuda/network_cudnn.cc +++ b/src/neural/cuda/network_cudnn.cc @@ -938,7 +938,7 @@ class CudnnNetwork : public Network { std::unique_ptr GetInputsOutputs() { std::lock_guard lock(inputs_outputs_lock_); if (free_inputs_outputs_.empty()) { - return std::make_unique(max_batch_size_, wdl_, false, + return std::make_unique(max_batch_size_, wdl_, moves_left_); } else { std::unique_ptr resource = diff --git a/src/neural/xla/network_xla.cc b/src/neural/xla/network_xla.cc index ff9ff7413b..1652ff817e 100644 --- a/src/neural/xla/network_xla.cc +++ b/src/neural/xla/network_xla.cc @@ -311,7 +311,7 @@ std::unique_ptr MakeXlaNetwork(const std::optional& w, w->format().network_format()); } -REGISTER_NETWORK("xla", MakeXlaNetwork, -34) +REGISTER_NETWORK("xla", MakeXlaNetwork, 34) } // namespace } // namespace lczero