Skip to content

Commit

Permalink
Merge branch 'master' into release/0.31
Browse files Browse the repository at this point in the history
  • Loading branch information
borg323 committed Apr 15, 2024
2 parents 05172b6 + f6f966d commit e056bd1
Show file tree
Hide file tree
Showing 6 changed files with 25 additions and 42 deletions.
5 changes: 3 additions & 2 deletions appveyor.yml
Original file line number Diff line number Diff line change
Expand Up @@ -85,11 +85,12 @@ install:
- cmd: IF %NAME%==android IF NOT EXIST C:\ndk\android-ndk-r19c\toolchains\llvm\prebuilt\windows-x86_64 appveyor DownloadFile https://dl.google.com/android/repository/android-ndk-r19c-windows-x86_64.zip
- cmd: IF %NAME%==android IF NOT EXIST C:\ndk\android-ndk-r19c\toolchains\llvm\prebuilt\windows-x86_64 7z x android-ndk-r19c-windows-x86_64.zip -oC:\ndk
- cmd: IF %NAME%==android set PATH=C:\ndk\android-ndk-r19c\toolchains\llvm\prebuilt\windows-x86_64\bin;%PATH%
- cmd: IF EXIST C:\cache\OpenBLAS\ rd /s /q C:\cache\OpenBLAS
- cmd: IF %NAME%==android sed "s/clang+*/&.cmd/" cross-files/aarch64-linux-android >crossfile-aarch64
- cmd: IF %NAME%==android IF NOT EXIST C:\cache\OpenBLAS\android-aarch64 appveyor DownloadFile https://github.com/borg323/OpenBLAS/releases/download/android-0.3.8-2/openblas-android-aarch64.zip
- cmd: IF %NAME%==android IF NOT EXIST C:\cache\OpenBLAS\android-aarch64 appveyor DownloadFile https://github.com/borg323/OpenBLAS/releases/download/android-0.3.27/openblas-android-aarch64.zip
- cmd: IF %NAME%==android IF NOT EXIST C:\cache\OpenBLAS\android-aarch64 7z x openblas-android-aarch64.zip -oC:\cache\OpenBLAS
- cmd: IF %NAME%==android sed "s/clang+*/&.cmd/" cross-files/armv7a-linux-android >crossfile-armv7a
- cmd: IF %NAME%==android IF NOT EXIST C:\cache\OpenBLAS\android-armv7a appveyor DownloadFile https://github.com/borg323/OpenBLAS/releases/download/android-0.3.8-2/openblas-android-armv7a.zip
- cmd: IF %NAME%==android IF NOT EXIST C:\cache\OpenBLAS\android-armv7a appveyor DownloadFile https://github.com/borg323/OpenBLAS/releases/download/android-0.3.27/openblas-android-armv7a.zip
- cmd: IF %NAME%==android IF NOT EXIST C:\cache\OpenBLAS\android-armv7a 7z x openblas-android-armv7a.zip -oC:\cache\OpenBLAS
- cmd: set PKG_FOLDER="C:\cache"
- cmd: IF NOT EXIST c:\cache mkdir c:\cache
Expand Down
9 changes: 1 addition & 8 deletions dist/install-dml.cmd
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,7 @@
where /q tar
if errorlevel 1 goto error

where /q lc0.exe
if errorlevel 1 cd /d %~dp0
where /q lc0.exe
if errorlevel 1 (
echo This script must run in the lc0 folder.
pause
exit /b
)
cd /d %~dp0

cls
echo Installing the DirectML.dll version required by the Lc0 onnx-dml backend.
Expand Down
46 changes: 17 additions & 29 deletions meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -490,6 +490,22 @@ if get_option('build_backends')
nvcc_extra_args = ['-arch=compute_' + cuda_cc, '-code=sm_' + cuda_cc]
elif get_option('native_cuda') and nvcc_help.contains('-arch=native')
nvcc_extra_args = ['-arch=native']
elif nvcc_help.contains('-arch=all-major')
nvcc_extra_args = ['-arch=all-major', '-Wno-deprecated-gpu-targets']
else
nvcc_extra_args = ['-Wno-deprecated-gpu-targets']
# Fallback for cuda versions without -arch=all-major.
foreach x : ['35', '50', '60', '70', '80']
if nvcc_help.contains('sm_' + x)
nvcc_extra_args += '-gencode=arch=compute_' + x + ',code=sm_' + x
endif
endforeach
# For forward compatibility.
if nvcc_help.contains('sm_80') # Cuda 11+
nvcc_extra_args += '-gencode=arch=compute_80,code=compute_80'
elif nvcc_help.contains('sm_75') # Cuda 10+
nvcc_extra_args += '-gencode=arch=compute_75,code=compute_75'
endif
endif
foreach x : get_option('cudnn_include')
cuda_arguments += ['-I', x]
Expand All @@ -507,35 +523,6 @@ if get_option('build_backends')
command : [nvcc, nvcc_extra_args, cuda_arguments]
)

# Handling of fp16 cuda code: If nvcc_extra_args is empty add options to
# generate code for the major fp16 capable architectures.
if nvcc_extra_args == []
nvcc_arch = '-arch=compute_70'
nvcc_sm_list = ['sm_70', 'sm_75', 'sm_80', 'sm_90']
if host_machine.system() != 'windows'
nvcc_arch = '-arch=compute_60'
nvcc_sm_list += ['sm_60']
if ['arm', 'aarch64'].contains(host_machine.cpu_family())
message('Compiling for Jetson.')
nvcc_arch = '-arch=compute_53'
nvcc_sm_list = ['sm_53', 'sm_62', 'sm_72', 'sm_87']
endif
endif
nvcc_extra_args = [nvcc_arch]
foreach x : nvcc_sm_list
if nvcc_help.contains(x)
nvcc_extra_args += '-code=' + x
endif
endforeach
# For forward compatibility.
if nvcc_help.contains('sm_90') # Cuda 12+
nvcc_extra_args += '-gencode=arch=compute_90,code=compute_90'
elif nvcc_help.contains('sm_80') # Cuda 11+
nvcc_extra_args += '-gencode=arch=compute_80,code=compute_80'
elif nvcc_help.contains('sm_75') # Cuda 10+
nvcc_extra_args += '-gencode=arch=compute_75,code=compute_75'
endif
endif
files += custom_target('cuda fp16 code',
input : 'src/neural/cuda/fp16_kernels.cu',
output : outputname,
Expand Down Expand Up @@ -644,6 +631,7 @@ if get_option('build_backends')
'src/neural/xla/xla_tensor.cc',
]
deps += cc.find_library('dl', required: false)
has_backends = true
endif

endif # if get_option('build_backends')
Expand Down
3 changes: 2 additions & 1 deletion src/neural/cuda/network_cuda.cc
Original file line number Diff line number Diff line change
Expand Up @@ -670,7 +670,8 @@ class CudaNetwork : public Network {
stream_attribute.accessPolicyWindow.missProp = cudaAccessPropertyStreaming;

if (allow_cache_opt_ && use_res_block_winograd_fuse_opt_ &&
(res_block_mem <= scratch_size_) && (res_block_mem <= l2_cache_size_)) {
(static_cast<size_t>(res_block_mem) <= scratch_size_) &&
(res_block_mem <= l2_cache_size_)) {
// we can use a single alloc to hold all the required tensors, and enable
// persistent L2 caching on it
ReportCUDAErrors(cudaStreamSetAttribute(
Expand Down
2 changes: 1 addition & 1 deletion src/neural/cuda/network_cudnn.cc
Original file line number Diff line number Diff line change
Expand Up @@ -938,7 +938,7 @@ class CudnnNetwork : public Network {
std::unique_ptr<InputsOutputs> GetInputsOutputs() {
std::lock_guard<std::mutex> lock(inputs_outputs_lock_);
if (free_inputs_outputs_.empty()) {
return std::make_unique<InputsOutputs>(max_batch_size_, wdl_, false,
return std::make_unique<InputsOutputs>(max_batch_size_, wdl_,
moves_left_);
} else {
std::unique_ptr<InputsOutputs> resource =
Expand Down
2 changes: 1 addition & 1 deletion src/neural/xla/network_xla.cc
Original file line number Diff line number Diff line change
Expand Up @@ -311,7 +311,7 @@ std::unique_ptr<Network> MakeXlaNetwork(const std::optional<WeightsFile>& w,
w->format().network_format());
}

REGISTER_NETWORK("xla", MakeXlaNetwork, -34)
REGISTER_NETWORK("xla", MakeXlaNetwork, 34)

} // namespace
} // namespace lczero

0 comments on commit e056bd1

Please sign in to comment.