From 0f3ff5f388da00188782a29a7c6706401e0e92f5 Mon Sep 17 00:00:00 2001 From: Anders Leino Date: Thu, 28 Nov 2024 08:37:35 +0200 Subject: [PATCH 01/13] wgsl: Conditionally enable some tests based on 'half' support (#5694) Require the 'half' rendering feature for WebGPU, so that we can detect when the f16 feature is not available, for the following tests: - tests/language-feature/generics/tuple.slang - tests/language-feature/generics/variadic-0.slang This helps to address issue #5605. --- tests/expected-failure-github.txt | 2 -- tests/language-feature/generics/tuple.slang | 3 +-- tests/language-feature/generics/variadic-0.slang | 3 +-- 3 files changed, 2 insertions(+), 6 deletions(-) diff --git a/tests/expected-failure-github.txt b/tests/expected-failure-github.txt index df89623827..ba08b2b77f 100644 --- a/tests/expected-failure-github.txt +++ b/tests/expected-failure-github.txt @@ -14,6 +14,4 @@ tests/compute/interface-shader-param-in-struct.slang.4 syn (wgpu) tests/compute/interface-shader-param.slang.5 syn (wgpu) tests/language-feature/constants/static-const-in-generic-interface.slang.1 syn (wgpu) tests/language-feature/enums/strongly-typed-id.slang.1 syn (wgpu) -tests/language-feature/generics/tuple.slang.1 syn (wgpu) -tests/language-feature/generics/variadic-0.slang.4 syn (wgpu) tests/language-feature/shader-params/interface-shader-param-ordinary.slang.4 syn (wgpu) diff --git a/tests/language-feature/generics/tuple.slang b/tests/language-feature/generics/tuple.slang index c12fd12de8..32350ce7bf 100644 --- a/tests/language-feature/generics/tuple.slang +++ b/tests/language-feature/generics/tuple.slang @@ -1,6 +1,5 @@ //TEST(compute):COMPARE_COMPUTE(filecheck-buffer=CHECK): -shaderobj -output-using-type -// WGPU: f16 extension not allowed in current environment #5605 -//DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-wgpu +//TEST(compute):COMPARE_COMPUTE(filecheck-buffer=CHECK): -wgpu -shaderobj -output-using-type -render-features half //TEST_INPUT:ubuffer(data=[0 0 0 0], stride=4):out,name=outputBuffer RWStructuredBuffer outputBuffer; diff --git a/tests/language-feature/generics/variadic-0.slang b/tests/language-feature/generics/variadic-0.slang index a1f4ce7a48..831d284075 100644 --- a/tests/language-feature/generics/variadic-0.slang +++ b/tests/language-feature/generics/variadic-0.slang @@ -1,8 +1,7 @@ //TEST(compute):COMPARE_COMPUTE(filecheck-buffer=CHECK): -shaderobj -output-using-type //TEST(compute):COMPARE_COMPUTE(filecheck-buffer=CHECK): -cpu -shaderobj -output-using-type //TEST(compute):COMPARE_COMPUTE(filecheck-buffer=CHECK): -vk -shaderobj -output-using-type -// WGPU: f16 extension not allowed in current environment #5605 -//DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-wgpu +//TEST(compute):COMPARE_COMPUTE(filecheck-buffer=CHECK): -wgpu -shaderobj -output-using-type -render-features half //TEST:SIMPLE(filecheck=CHECK): -target glsl // CHECK-NOT: NullDifferential From 2c82b14c476c368c98b6e081aa9f89c878e165fb Mon Sep 17 00:00:00 2001 From: Ellie Hermaszewska Date: Thu, 28 Nov 2024 15:34:59 +0800 Subject: [PATCH 02/13] Sort filenames when generating table of contents (#5659) The order of EnumerateFiles is unspecified --- docs/scripts/Program.cs | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/docs/scripts/Program.cs b/docs/scripts/Program.cs index d543f399ec..b256af1b28 100644 --- a/docs/scripts/Program.cs +++ b/docs/scripts/Program.cs @@ -1,7 +1,8 @@ using System; +using System.Collections.Generic; using System.IO; +using System.Linq; using System.Text; -using System.Collections.Generic; namespace toc { public class Builder @@ -128,7 +129,7 @@ public static string Run(string path) { StringBuilder outputSB = new StringBuilder(); outputSB.AppendFormat("Building table of contents from {0}...\n", path); - var files = System.IO.Directory.EnumerateFiles(path, "*.md"); + var files = System.IO.Directory.EnumerateFiles(path, "*.md").OrderBy(f => System.IO.Path.GetFileName(f)); List nodes = new List(); foreach (var f in files) { @@ -230,4 +231,4 @@ public static string Run(string path) return outputSB.ToString(); } } -} \ No newline at end of file +} From 947b99e8ebaa81e9c8ee9b0f3e247d8d329041ad Mon Sep 17 00:00:00 2001 From: Ellie Hermaszewska Date: Thu, 28 Nov 2024 15:43:36 +0800 Subject: [PATCH 03/13] Add Table of Contents check to CI, and bot script to regenerate (#5618) * Sort filenames when generating table of contents The order of EnumerateFiles is unspecified * Add build table of contents bash script * Add toc checking to CI * Add --check-only option to toc checking * regenerate ToC --- .github/workflows/check-toc.yml | 13 ++ .github/workflows/regenerate-toc.yml | 82 ++++++++++++ .github/workflows/slash-command-dispatch.yml | 5 + docs/build_toc.sh | 127 +++++++++++++++++++ 4 files changed, 227 insertions(+) create mode 100644 .github/workflows/check-toc.yml create mode 100644 .github/workflows/regenerate-toc.yml create mode 100755 docs/build_toc.sh diff --git a/.github/workflows/check-toc.yml b/.github/workflows/check-toc.yml new file mode 100644 index 0000000000..2b478cb632 --- /dev/null +++ b/.github/workflows/check-toc.yml @@ -0,0 +1,13 @@ +name: Check Table of Contents (comment /regenerate-toc to auto-fix) + +on: + push: + branches: [master] + pull_request: + branches: [master] +jobs: + check-formatting: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - run: ./docs/build_toc.sh --check-only diff --git a/.github/workflows/regenerate-toc.yml b/.github/workflows/regenerate-toc.yml new file mode 100644 index 0000000000..973bbcf950 --- /dev/null +++ b/.github/workflows/regenerate-toc.yml @@ -0,0 +1,82 @@ +name: Regenerate TOC +on: + repository_dispatch: + types: [regenerate-toc-command] +jobs: + regenerate-toc: + runs-on: ubuntu-latest + steps: + - name: Checkout PR branch + uses: actions/checkout@v4 + with: + token: ${{ secrets.SLANGBOT_PAT }} + repository: ${{ github.event.client_payload.pull_request.head.repo.full_name }} + ref: ${{ github.event.client_payload.pull_request.head.ref }} + path: pr-branch + + - name: Checkout target branch + uses: actions/checkout@v4 + with: + token: ${{ secrets.SLANGBOT_PAT }} + repository: ${{ github.event.client_payload.pull_request.base.repo.full_name }} + ref: ${{ github.event.client_payload.pull_request.base.ref }} + path: target-branch + + - name: Regenerate Table of Contents + id: regen + run: | + ./target-branch/docs/build_toc.sh --source ./pr-branch + + - name: Configure Git commit signing + id: git-info + run: | + echo "${{ secrets.SLANGBOT_SIGNING_KEY }}" > "${{runner.temp}}"/signing_key + chmod 600 "${{runner.temp}}"/signing_key + git -C pr-branch config commit.gpgsign true + git -C pr-branch config gpg.format ssh + git -C pr-branch config user.signingkey "${{runner.temp}}"/signing_key + bot_info=$(curl -s -H "Authorization: Bearer ${{ secrets.SLANGBOT_PAT }}" \ + "https://api.github.com/user") + echo "bot_identity=$(echo $bot_info | jq --raw-output '.login + " <" + (.id|tostring) + "+" + .login + "@users.noreply.github.com>"')" >> $GITHUB_OUTPUT + echo "bot_name=$(echo $bot_info | jq --raw-output '.login')" >> $GITHUB_OUTPUT + + - name: Create Pull Request + id: create-pr + uses: peter-evans/create-pull-request@v7 + with: + token: ${{ secrets.SLANGBOT_PAT }} + path: pr-branch + commit-message: "regenerate documentation Table of Contents" + title: "Regenerate documentation ToC for PR #${{ github.event.client_payload.pull_request.number }}" + body: "Automated ToC generation for ${{ github.event.client_payload.pull_request.html_url }}" + committer: ${{ steps.git-info.outputs.bot_identity }} + author: ${{ steps.git-info.outputs.bot_identity }} + branch: regenerate-toc-${{ github.event.client_payload.pull_request.number }}-${{ github.event.client_payload.pull_request.head.ref }} + base: ${{ github.event.client_payload.pull_request.head.ref }} + push-to-fork: ${{ steps.git-info.outputs.bot_name }}/slang + delete-branch: true + + - name: Comment on PR + uses: peter-evans/create-or-update-comment@v4 + if: always() + with: + token: ${{ secrets.SLANGBOT_PAT }} + repository: ${{ github.event.client_payload.github.payload.repository.full_name }} + issue-number: ${{ github.event.client_payload.pull_request.number }} + body: | + ${{ + steps.regen.conclusion == 'failure' + && format('❌ Table of Contents generation failed. Please check the [workflow run](https://github.com/{0}/actions/runs/{1})', github.repository, github.run_id) + || (steps.create-pr.conclusion == 'failure' + && format('❌ Failed to create regenerate ToC pull request. Please check the [workflow run](https://github.com/{0}/actions/runs/{1})', github.repository, github.run_id) + || format('🌈 Regenerated Table of Contents, please merge the changes from [this PR]({0})', steps.create-pr.outputs.pull-request-url)) + }} + + - name: Add reaction + uses: peter-evans/create-or-update-comment@v4 + with: + token: ${{ secrets.SLANGBOT_PAT }} + repository: ${{ github.event.client_payload.github.payload.repository.full_name }} + comment-id: ${{ github.event.client_payload.github.payload.comment.id }} + reactions-edit-mode: replace + reactions: hooray diff --git a/.github/workflows/slash-command-dispatch.yml b/.github/workflows/slash-command-dispatch.yml index 3d59498da4..0295e72401 100644 --- a/.github/workflows/slash-command-dispatch.yml +++ b/.github/workflows/slash-command-dispatch.yml @@ -19,6 +19,11 @@ jobs: "command": "format", "permission": "none", "issue_type": "pull-request" + }, + { + "command": "regenerate-toc", + "permission": "none", + "issue_type": "pull-request" } ] diff --git a/docs/build_toc.sh b/docs/build_toc.sh new file mode 100755 index 0000000000..9c158d6a2c --- /dev/null +++ b/docs/build_toc.sh @@ -0,0 +1,127 @@ +#!/usr/bin/env bash +set -e + +script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +project_root="$(dirname "$script_dir")" +check_only=0 + +show_help() { + me=$(basename "$0") + cat <] [--check-only] + +Options: + --help Show this help message + --source Path to project root directory (defaults to parent of the script directory) + --check-only Check if TOC needs updating, exit 1 if changes needed +EOF +} + +while [[ "$#" -gt 0 ]]; do + case $1 in + -h | --help) + show_help + exit 0 + ;; + --source) + project_root="$2" + shift + ;; + --check-only) + check_only=1 + ;; + *) + echo "unrecognized argument: $1" >&2 + show_help >&2 + exit 1 + ;; + esac + shift +done + +missing_bin=0 + +require_bin() { + local name="$1" + if ! command -v "$name" &>/dev/null; then + echo "This script needs $name, but it isn't in \$PATH" >&2 + missing_bin=1 + return + fi +} + +require_bin "mcs" +require_bin "mono" + +if [ "$missing_bin" -eq 1 ]; then + exit 1 +fi + +temp_dir=$(mktemp -d) +trap 'rm -rf "$temp_dir"' EXIT + +cd "$project_root/docs" || exit 1 + +cat >"$temp_dir/temp_program.cs" <&2 + exit 1 +fi + +for dir in "user-guide" "gfx-user-guide"; do + if [ -d "$script_dir/$dir" ]; then + if [ "$check_only" -eq 1 ]; then + # Ensure working directory is clean + if ! git diff --quiet "$script_dir/$dir/toc.html" 2>/dev/null; then + echo "Working directory not clean, cannot check TOC" >&2 + exit 1 + fi + fi + + if ! mono "$temp_dir/toc-builder.exe" "$script_dir/$dir"; then + echo "TOC generation failed for $dir" >&2 + exit 1 + fi + + if [ "$check_only" -eq 1 ]; then + if ! git diff --quiet "$script_dir/$dir/toc.html" 2>/dev/null; then + git diff --color "$script_dir/$dir/toc.html" + git checkout -- "$script_dir/$dir/toc.html" 2>/dev/null + exit 1 + fi + fi + else + echo "Directory $dir not found" >&2 + fi +done From 6e52cc811835b82bf9140189ef1f3a8561baf327 Mon Sep 17 00:00:00 2001 From: Anders Leino Date: Thu, 28 Nov 2024 11:27:22 +0200 Subject: [PATCH 04/13] wgsl: signedness mismatch fixes (#5692) * Enable tests/language-feature/enums/strongly-typed-id.slang * Fix operator signedness mismatch issue This helps to address issue #5606. * wgsl: Insert casts for integer type return values This closes #5606. * format code --------- Co-authored-by: slangbot <186143334+slangbot@users.noreply.github.com> --- source/slang/slang-ir-wgsl-legalize.cpp | 49 +++++++++++++++++++ tests/expected-failure-github.txt | 1 - .../enums/strongly-typed-id.slang | 2 - 3 files changed, 49 insertions(+), 3 deletions(-) diff --git a/source/slang/slang-ir-wgsl-legalize.cpp b/source/slang/slang-ir-wgsl-legalize.cpp index facafb284f..e6e3755928 100644 --- a/source/slang/slang-ir-wgsl-legalize.cpp +++ b/source/slang/slang-ir-wgsl-legalize.cpp @@ -1429,6 +1429,30 @@ struct LegalizeWGSLEntryPointContext } } + void legalizeFunc(IRFunc* func) + { + // Insert casts to convert integer return types + auto funcReturnType = func->getResultType(); + if (isIntegralType(funcReturnType)) + { + for (auto block : func->getBlocks()) + { + if (auto returnInst = as(block->getTerminator())) + { + auto returnedValue = returnInst->getOperand(0); + auto returnedValueType = returnedValue->getDataType(); + if (isIntegralType(returnedValueType)) + { + IRBuilder builder(returnInst); + builder.setInsertBefore(returnInst); + auto newOp = builder.emitCast(funcReturnType, returnedValue); + builder.replaceOperand(returnInst->getOperands(), newOp); + } + } + } + } + } + void legalizeSwitch(IRSwitch* switchInst) { // WGSL Requires all switch statements to contain a default case. @@ -1491,6 +1515,28 @@ struct LegalizeWGSLEntryPointContext inst->getOperand(0)); builder.replaceOperand(inst->getOperands(), newLhs); } + else if ( + isIntegralType(inst->getOperand(0)->getDataType()) && + isIntegralType(inst->getOperand(1)->getDataType())) + { + // If integer operands differ in signedness, convert the signed one to unsigned. + // We're assuming that the cases where this is bad have already been caught by + // common validation checks. + IntInfo opIntInfo[2] = { + getIntTypeInfo(inst->getOperand(0)->getDataType()), + getIntTypeInfo(inst->getOperand(1)->getDataType())}; + if (opIntInfo[0].isSigned != opIntInfo[1].isSigned) + { + int signedOpIndex = (int)opIntInfo[1].isSigned; + opIntInfo[signedOpIndex].isSigned = false; + IRBuilder builder(inst); + builder.setInsertBefore(inst); + auto newOp = builder.emitCast( + builder.getType(getIntTypeOpFromInfo(opIntInfo[signedOpIndex])), + inst->getOperand(signedOpIndex)); + builder.replaceOperand(inst->getOperands() + signedOpIndex, newOp); + } + } } void processInst(IRInst* inst) @@ -1529,6 +1575,9 @@ struct LegalizeWGSLEntryPointContext legalizeBinaryOp(inst); break; + case kIROp_Func: + legalizeFunc(static_cast(inst)); + [[fallthrough]]; default: for (auto child : inst->getModifiableChildren()) { diff --git a/tests/expected-failure-github.txt b/tests/expected-failure-github.txt index ba08b2b77f..81a61133bc 100644 --- a/tests/expected-failure-github.txt +++ b/tests/expected-failure-github.txt @@ -13,5 +13,4 @@ tests/bugs/buffer-swizzle-store.slang.3 syn (wgpu) tests/compute/interface-shader-param-in-struct.slang.4 syn (wgpu) tests/compute/interface-shader-param.slang.5 syn (wgpu) tests/language-feature/constants/static-const-in-generic-interface.slang.1 syn (wgpu) -tests/language-feature/enums/strongly-typed-id.slang.1 syn (wgpu) tests/language-feature/shader-params/interface-shader-param-ordinary.slang.4 syn (wgpu) diff --git a/tests/language-feature/enums/strongly-typed-id.slang b/tests/language-feature/enums/strongly-typed-id.slang index 8625d1a4b8..70f655538b 100644 --- a/tests/language-feature/enums/strongly-typed-id.slang +++ b/tests/language-feature/enums/strongly-typed-id.slang @@ -1,6 +1,4 @@ //TEST(compute):COMPARE_COMPUTE(filecheck-buffer=CHECK): -shaderobj -// WGSL: No matching overload for operator... #5606 -//DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-wgpu enum MyId : uint {} extension MyId { uint get() { return (uint)this; } } From 6bc63681e0b874e66fd9b8881c533eafdc6b9b78 Mon Sep 17 00:00:00 2001 From: Anders Leino Date: Thu, 28 Nov 2024 13:56:41 +0200 Subject: [PATCH 05/13] Update Slang-RHI/slang (#5701) * Update Slang-RHI/slang This brings in new fixes for WebGPU. In particular, the "use_dxc" toggle is now used, which should enable these tests to run on WebGPU, if f16 is otherwise supported: - `tests/language-feature/generics/variadic-0.slang` - `tests/language-feature/generics/tuple.slang` This closes #5605. * Disable tests/autodiff/float-cast.slang for wgpu This test was previously not running for WebGPU because it required the 'half' render feature, and Slang-RHI was previously not reporting it. With the Slang-RHI update, the test now runs on WebGPU. It now fails because the test is using 'double' which is just not supported on WebGPU. Thus this commit disables the test. --- external/slang-rhi | 2 +- tests/autodiff/float-cast.slang | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/external/slang-rhi b/external/slang-rhi index 5b77d09cea..6c495919b9 160000 --- a/external/slang-rhi +++ b/external/slang-rhi @@ -1 +1 @@ -Subproject commit 5b77d09cea9a309b98b65469e8438835d972caca +Subproject commit 6c495919b92754f8489eb0085ad859344963dcd2 diff --git a/tests/autodiff/float-cast.slang b/tests/autodiff/float-cast.slang index 2c9797fa63..0ab00a9356 100644 --- a/tests/autodiff/float-cast.slang +++ b/tests/autodiff/float-cast.slang @@ -1,5 +1,7 @@ //TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -shaderobj -output-using-type -render-features half //TEST(compute, vulkan):COMPARE_COMPUTE_EX:-vk -compute -shaderobj -output-using-type -render-features half +// Not supported in WGSL: Double and other unsupported scalar types +//DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-wgpu //TEST_INPUT:ubuffer(data=[0 0 0 0 0], stride=4):out,name=outputBuffer RWStructuredBuffer outputBuffer; From 7a5981eb554eaa97653a1a1f74b1ec9a44b82633 Mon Sep 17 00:00:00 2001 From: Ellie Hermaszewska Date: Fri, 29 Nov 2024 13:31:04 +0800 Subject: [PATCH 06/13] Make ToC regeneration script path handling more robust (#5700) Co-authored-by: Anders Leino --- docs/build_toc.sh | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/docs/build_toc.sh b/docs/build_toc.sh index 9c158d6a2c..9c197cad65 100755 --- a/docs/build_toc.sh +++ b/docs/build_toc.sh @@ -62,7 +62,7 @@ fi temp_dir=$(mktemp -d) trap 'rm -rf "$temp_dir"' EXIT -cd "$project_root/docs" || exit 1 +docs_dir="$project_root/docs" cat >"$temp_dir/temp_program.cs" </dev/null; then + if ! git -C "$project_root" diff --quiet "docs/$dir/toc.html" 2>/dev/null; then echo "Working directory not clean, cannot check TOC" >&2 exit 1 fi fi - if ! mono "$temp_dir/toc-builder.exe" "$script_dir/$dir"; then + if ! mono "$temp_dir/toc-builder.exe" "$docs_dir/$dir"; then echo "TOC generation failed for $dir" >&2 exit 1 fi if [ "$check_only" -eq 1 ]; then - if ! git diff --quiet "$script_dir/$dir/toc.html" 2>/dev/null; then - git diff --color "$script_dir/$dir/toc.html" - git checkout -- "$script_dir/$dir/toc.html" 2>/dev/null + if ! git -C "$project_root" diff --quiet "docs/$dir/toc.html" 2>/dev/null; then + git -C "$project_root" diff --color "docs/$dir/toc.html" + git -C "$project_root" checkout -- "docs/$dir/toc.html" 2>/dev/null exit 1 fi fi From 71f97268789164bd77614636536172ba657c6a57 Mon Sep 17 00:00:00 2001 From: Ellie Hermaszewska Date: Fri, 29 Nov 2024 13:56:41 +0800 Subject: [PATCH 07/13] Add missing WGSL intrinsics to test (#5663) Closes https://github.com/shader-slang/slang/issues/5263 --- tests/autodiff/custom-intrinsic-1.slang | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/autodiff/custom-intrinsic-1.slang b/tests/autodiff/custom-intrinsic-1.slang index 1fe204b58a..f1d5116274 100644 --- a/tests/autodiff/custom-intrinsic-1.slang +++ b/tests/autodiff/custom-intrinsic-1.slang @@ -1,7 +1,5 @@ //TEST(compute, vulkan):COMPARE_COMPUTE_EX:-vk -compute -shaderobj -output-using-type //TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -shaderobj -output-using-type -// slang-test/WGPU: IR opcode during code emit #5263 -//DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-wgpu //TEST_INPUT:ubuffer(data=[0 0 0 0 0], stride=4):out,name=outputBuffer RWStructuredBuffer outputBuffer; @@ -19,6 +17,7 @@ namespace myintrinsiclib __target_intrinsic(cpp, "$P_exp($0)") __target_intrinsic(spirv, "12 resultType resultId glsl450 27 _0") __target_intrinsic(metal, "exp($0)") + __target_intrinsic(wgsl, "exp($0)") [ForwardDerivative(d_myexp)] T myexp(T x); @@ -39,6 +38,7 @@ namespace myintrinsiclib __target_intrinsic(cuda, "$P_sin($0)") __target_intrinsic(cpp, "$P_sin($0)") __target_intrinsic(spirv, "12 resultType resultId glsl450 13 _0") + __target_intrinsic(wgsl, "sin($0)") [ForwardDerivative(d_mysin)] T mysin(T x); @@ -58,6 +58,7 @@ namespace myintrinsiclib __target_intrinsic(cuda, "$P_cos($0)") __target_intrinsic(cpp, "$P_cos($0)") __target_intrinsic(spirv, "12 resultType resultId glsl450 14 _0") + __target_intrinsic(wgsl, "cos($0)") [ForwardDerivative(d_mycos)] T mycos(T x); From c3557978cf0184aaf75c27c309bc87e84fd6ab79 Mon Sep 17 00:00:00 2001 From: Bruce Mitchener Date: Fri, 29 Nov 2024 14:02:19 +0700 Subject: [PATCH 08/13] docs: Reduce typo count (#5671) Co-authored-by: Ellie Hermaszewska --- docs/64bit-type-support.md | 6 ++-- docs/README.md | 2 +- docs/cpu-target.md | 8 +++--- docs/cuda-target.md | 10 +++---- docs/design/autodiff.md | 6 ++-- docs/design/autodiff/basics.md | 6 ++-- docs/design/autodiff/decorators.md | 4 +-- docs/design/autodiff/ir-overview.md | 18 ++++++------ docs/design/autodiff/types.md | 4 +-- docs/design/capabilities.md | 14 +++++----- docs/design/casting.md | 6 +--- docs/design/coding-conventions.md | 2 +- docs/design/decl-refs.md | 2 +- docs/design/existential-types.md | 2 +- docs/design/interfaces.md | 22 +++++++-------- docs/design/ir.md | 10 +++---- docs/design/overview.md | 26 ++++++++--------- docs/design/semantic-checking.md | 16 +++++------ docs/design/serialization.md | 4 +-- docs/design/stdlib-intrinsics.md | 2 +- docs/faq.md | 2 +- docs/gfx-user-guide/01-getting-started.md | 4 +-- .../derivatives-in-compute.md | 6 ++-- docs/language-guide.md | 10 +++---- docs/language-reference/05-expressions.md | 6 ++-- docs/language-reference/06-statements.md | 6 ++-- docs/language-reference/07-declarations.md | 16 +++++------ docs/language-reference/08-attributes.md | 2 +- docs/layout.md | 4 +-- docs/nvapi-support.md | 2 +- docs/proposals/000-template.md | 2 +- docs/proposals/001-where-clauses.md | 10 +++---- .../002-type-equality-constraints.md | 6 ++-- docs/proposals/003-atomic-t.md | 2 +- docs/proposals/004-initialization.md | 16 +++++------ docs/proposals/005-write-only-textures.md | 2 +- docs/proposals/007-variadic-generics.md | 10 +++---- .../012-language-version-directive.md | 8 +++--- .../implementation/ast-ir-serialization.md | 14 +++++----- docs/proposals/legacy/001-basic-interfaces.md | 2 +- docs/proposals/legacy/002-api-headers.md | 28 +++++++++---------- docs/proposals/legacy/003-error-handling.md | 8 +++--- docs/proposals/legacy/005-components.md | 14 +++++----- .../legacy/006-artifact-container-format.md | 4 +-- docs/repro.md | 12 ++++---- docs/stdlib-doc.md | 2 +- docs/stdlib-docgen.md | 10 +++---- docs/target-compatibility.md | 8 +++--- docs/update_spirv.md | 4 +-- docs/user-guide/00-introduction.md | 2 +- docs/user-guide/02-conventional-features.md | 8 +++--- docs/user-guide/03-convenience-features.md | 2 +- docs/user-guide/05-capabilities.md | 4 +-- docs/user-guide/06-interfaces-generics.md | 2 +- docs/user-guide/07-autodiff.md | 4 +-- docs/user-guide/08-compiling.md | 2 +- docs/user-guide/09-reflection.md | 4 +-- docs/user-guide/09-targets.md | 6 ++-- .../user-guide/10-link-time-specialization.md | 2 +- docs/user-guide/a1-01-matrix-layout.md | 16 +++++------ docs/user-guide/a1-02-slangpy.md | 8 +++--- docs/user-guide/a1-05-uniformity.md | 6 ++-- .../user-guide/a2-01-spirv-target-specific.md | 8 +++--- docs/user-guide/a2-03-wgsl-target-specific.md | 2 +- .../a3-02-reference-capability-atoms.md | 2 +- docs/user-guide/toc.html | 4 +-- docs/wave-intrinsics.md | 10 +++---- 67 files changed, 238 insertions(+), 244 deletions(-) diff --git a/docs/64bit-type-support.md b/docs/64bit-type-support.md index acff2f7707..3ef5fc5201 100644 --- a/docs/64bit-type-support.md +++ b/docs/64bit-type-support.md @@ -7,7 +7,7 @@ Slang 64-bit Type Support * 64 bit integers generally require later APIs/shader models * When specifying 64 bit literals *always* use the type suffixes (ie `L`, `ULL`, `LL`) * GPU target/s generally do not support all double intrinsics - * Typically missing are trascendentals (sin, cos etc), logarithm and exponental functions + * Typically missing are trascendentals (sin, cos etc), logarithm and exponential functions * CUDA is the exception supporting nearly all double intrinsics * D3D * D3D targets *appear* to support double intrinsics (like sin, cos, log etc), but behind the scenes they are actually being converted to float @@ -26,7 +26,7 @@ The Slang language supports 64 bit built in types. Such as This also applies to vector and matrix versions of these types. -Unfortunately if a specific target supports the type or the typical HLSL instrinsic functions (such as sin/cos/max/min etc) depends very much on the target. +Unfortunately if a specific target supports the type or the typical HLSL intrinsic functions (such as sin/cos/max/min etc) depends very much on the target. Special attention has to be made with respect to literal 64 bit types. By default float and integer literals if they do not have an explicit suffix are assumed to be 32 bit. There is a variety of reasons for this design choice - the main one being around by default behavior of getting good performance. The suffixes required for 64 bit types are as follows @@ -107,7 +107,7 @@ On dxc the following intrinsics are available with double:: These are tested in the test `tests/hlsl-intrinsic/scalar-double-d3d-intrinsic.slang`. -There is no suport for transcendentals (`sin`, `cos` etc) or `log`/`exp`. More surprising is that`sqrt`, `rsqrt`, `frac`, `ceil`, `floor`, `trunc`, `step`, `lerp`, `smoothstep` are also not supported. +There is no support for transcendentals (`sin`, `cos` etc) or `log`/`exp`. More surprising is that `sqrt`, `rsqrt`, `frac`, `ceil`, `floor`, `trunc`, `step`, `lerp`, `smoothstep` are also not supported. uint64_t and int64_t Support ============================ diff --git a/docs/README.md b/docs/README.md index 086a921c13..ca6a3ddfe6 100644 --- a/docs/README.md +++ b/docs/README.md @@ -19,7 +19,7 @@ The [target compatibility guide](target-compatibility.md) gives an overview of f The [CPU target guide](cpu-target.md) gives information on compiling Slang or C++ source into shared libraries/executables or functions that can be directly executed. It also covers how to generate C++ code from Slang source. -The [CUDA target guide](cuda-target.md) provides information on compiling Slang/HLSL or CUDA source. Slang can compile to equivalent CUDA source, as well as to PTX via the nvrtc CUDA complier. +The [CUDA target guide](cuda-target.md) provides information on compiling Slang/HLSL or CUDA source. Slang can compile to equivalent CUDA source, as well as to PTX via the nvrtc CUDA compiler. Contributors ------------ diff --git a/docs/cpu-target.md b/docs/cpu-target.md index 1229cb3dd7..89a43e09ff 100644 --- a/docs/cpu-target.md +++ b/docs/cpu-target.md @@ -293,7 +293,7 @@ The global can now be set from host code via } ``` -In terms of reflection `__global` variables are not visibile. +In terms of reflection `__global` variables are not visible. ## NativeString @@ -309,7 +309,7 @@ TODO(JS): What happens with String with shader compile style on CPU? Shouldn't i It is currently not possible to step into LLVM-JIT code when using [slang-llvm](#slang-llvm). Fortunately it is possible to step into code compiled via a [regular C/C++ compiler](#regular-cpp). -Below is a code snippet showing how to swich to a [regular C/C++ compiler](#regular-cpp) at runtime. +Below is a code snippet showing how to switch to a [regular C/C++ compiler](#regular-cpp) at runtime. ```C++ SlangPassThrough findRegularCppCompiler(slang::IGlobalSession* slangSession) @@ -401,7 +401,7 @@ struct ComputeVaryingInput `ComputeVaryingInput` allows specifying a range of groupIDs to execute - all the ids in a grid from startGroup to endGroup, but not including the endGroupIDs. Most compute APIs allow specifying an x,y,z extent on 'dispatch'. This would be equivalent as having startGroupID = { 0, 0, 0} and endGroupID = { x, y, z }. The exported function allows setting a range of groupIDs such that client code could dispatch different parts of the work to different cores. This group range mechanism was chosen as the 'default' mechanism as it is most likely to achieve the best performance. -There are two other functions that consist of the entry point name postfixed with `_Thread` and `_Group`. For the entry point 'computeMain' these functions would be accessable from the shared library interface as `computeMain_Group` and `computeMain_Thread`. `_Group` has the same signature as the listed for computeMain, but it doesn't execute a range, only the single group specified by startGroupID (endGroupID is ignored). That is all of the threads within the group (as specified by `[numthreads]`) will be executed in a single call. +There are two other functions that consist of the entry point name postfixed with `_Thread` and `_Group`. For the entry point 'computeMain' these functions would be accessible from the shared library interface as `computeMain_Group` and `computeMain_Thread`. `_Group` has the same signature as the listed for computeMain, but it doesn't execute a range, only the single group specified by startGroupID (endGroupID is ignored). That is all of the threads within the group (as specified by `[numthreads]`) will be executed in a single call. It may be desirable to have even finer control of how execution takes place down to the level of individual 'thread's and this can be achieved with the `_Thread` style. The signature looks as follows @@ -566,7 +566,7 @@ It may be useful to be able to include `slang-cpp-types.h` in C++ code to access Would wrap all the Slang prelude types in the namespace `CPPPrelude`, such that say a `StructuredBuffer` could be specified in C++ source code as `CPPPrelude::StructuredBuffer`. -The code that sets up the prelude for the test infrastucture and command line usage can be found in ```TestToolUtil::setSessionDefaultPrelude```. Essentially this determines what the absolute path is to `slang-cpp-prelude.h` is and then just makes the prelude `#include "the absolute path"`. +The code that sets up the prelude for the test infrastructure and command line usage can be found in ```TestToolUtil::setSessionDefaultPrelude```. Essentially this determines what the absolute path is to `slang-cpp-prelude.h` is and then just makes the prelude `#include "the absolute path"`. The *default* prelude is set to the contents of the files for C++ held in the prelude directory and is held within the Slang shared library. It is therefore typically not necessary to distribute Slang with prelude files. diff --git a/docs/cuda-target.md b/docs/cuda-target.md index 6c59690daa..a80dc59f9c 100644 --- a/docs/cuda-target.md +++ b/docs/cuda-target.md @@ -30,7 +30,7 @@ The following are a work in progress or not implemented but are planned to be so For producing PTX binaries Slang uses [NVRTC](https://docs.nvidia.com/cuda/nvrtc/index.html). NVRTC dll/shared library has to be available to Slang (for example in the appropriate PATH for example) for it to be able to produce PTX. -The NVRTC compiler can be accessed directly via the pass through mechanism and is identifed by the enum value `SLANG_PASS_THROUGH_NVRTC`. +The NVRTC compiler can be accessed directly via the pass through mechanism and is identified by the enum value `SLANG_PASS_THROUGH_NVRTC`. Much like other targets that use downstream compilers Slang can be used to compile CUDA source directly to PTX via the pass through mechansism. The Slang command line options will broadly be mapped down to the appropriate options for the NVRTC compilation. In the API the `SlangCompileTarget` for CUDA is `SLANG_CUDA_SOURCE` and for PTX is `SLANG_PTX`. These can also be specified on the Slang command line as `-target cuda` and `-target ptx`. @@ -126,11 +126,11 @@ The UniformState and UniformEntryPointParams struct typically vary by shader. Un Read only textures will be bound as the opaque CUDA type CUtexObject. This type is the combination of both a texture AND a sampler. This is somewhat different from HLSL, where there can be separate `SamplerState` variables. This allows access of a single texture binding with different types of sampling. -If code relys on this behavior it will be necessary to bind multiple CtexObjects with different sampler settings, accessing the same texture data. +If code relies on this behavior it will be necessary to bind multiple CtexObjects with different sampler settings, accessing the same texture data. Slang has some preliminary support for TextureSampler type - a combined Texture and SamplerState. To write Slang code that can target CUDA and other platforms using this mechanism will expose the semantics appropriately within the source. -Load is only supported for Texture1D, and the mip map selection argument is ignored. This is because there is tex1Dfetch and no higher dimensional equivalents. CUDA also only allows such access if the backing array is linear memory - meaning the bound texture cannot have mip maps - thus making the mip map parameter superflous anyway. RWTexture does allow Load on other texture types. +Load is only supported for Texture1D, and the mip map selection argument is ignored. This is because there is tex1Dfetch and no higher dimensional equivalents. CUDA also only allows such access if the backing array is linear memory - meaning the bound texture cannot have mip maps - thus making the mip map parameter superfluous anyway. RWTexture does allow Load on other texture types. ## RWTexture @@ -239,7 +239,7 @@ That for pass-through usage, prelude is not pre-pended, preludes are for code ge void setDownstreamCompilerPrelude(SlangPassThrough passThrough, const char* preludeText); ``` -The code that sets up the prelude for the test infrastucture and command line usage can be found in ```TestToolUtil::setSessionDefaultPrelude```. Essentially this determines what the absolute path is to `slang-cpp-prelude.h` is and then just makes the prelude `#include "the absolute path"`. +The code that sets up the prelude for the test infrastructure and command line usage can be found in ```TestToolUtil::setSessionDefaultPrelude```. Essentially this determines what the absolute path is to `slang-cpp-prelude.h` is and then just makes the prelude `#include "the absolute path"`. Half Support ============ @@ -292,7 +292,7 @@ Will require 3 times as many steps as the earlier scalar example just using a si ## WaveGetLaneIndex -'WaveGetLaneIndex' defaults to `(threadIdx.x & SLANG_CUDA_WARP_MASK)`. Depending on how the kernel is launched this could be incorrect. There other ways to get lane index, for example using inline assembly. This mechanism though is apparently slower than the simple method used here. There is support for using the asm mechnism in the CUDA prelude using the `SLANG_USE_ASM_LANE_ID` preprocessor define to enable the feature. +'WaveGetLaneIndex' defaults to `(threadIdx.x & SLANG_CUDA_WARP_MASK)`. Depending on how the kernel is launched this could be incorrect. There are other ways to get lane index, for example using inline assembly. This mechanism though is apparently slower than the simple method used here. There is support for using the asm mechanism in the CUDA prelude using the `SLANG_USE_ASM_LANE_ID` preprocessor define to enable the feature. There is potential to calculate the lane id using the [numthreads] markup in Slang/HLSL, but that also requires some assumptions of how that maps to a lane index. diff --git a/docs/design/autodiff.md b/docs/design/autodiff.md index 29d7c82c7a..8bf26baa90 100644 --- a/docs/design/autodiff.md +++ b/docs/design/autodiff.md @@ -201,7 +201,7 @@ DP f_SSA_Proped(DP dpa, DP dpb) } // Note here that we have to 'store' all the intermediaries - // _t1, _t2, _q4, _t3, _q5, _t3_d, _t4 and _q1. This is fundementally + // _t1, _t2, _q4, _t3, _q5, _t3_d, _t4 and _q1. This is fundamentally // the tradeoff between fwd_mode and rev_mode if (_b1) @@ -288,7 +288,7 @@ void f_SSA_Rev(inout DP dpa, inout DP dpb, float dout) } // Note here that we have to 'store' all the intermediaries - // _t1, _t2, _q4, _t3, _q5, _t3_d, _t4 and _q1. This is fundementally + // _t1, _t2, _q4, _t3, _q5, _t3_d, _t4 and _q1. This is fundamentally // the tradeoff between fwd_mode and rev_mode if (_b1) @@ -330,4 +330,4 @@ void f_SSA_Rev(inout DP dpa, inout DP dpb, float dout) } } -``` \ No newline at end of file +``` diff --git a/docs/design/autodiff/basics.md b/docs/design/autodiff/basics.md index 189260aff0..43ed164ad4 100644 --- a/docs/design/autodiff/basics.md +++ b/docs/design/autodiff/basics.md @@ -4,7 +4,7 @@ This documentation is intended for Slang contributors and is written from a comp ## What is Automatic Differentiation? -Before diving into the design of the automatic differentiation (for brevity, we will call it 'auto-diff') passes, it is important to understand the end goal of what auto-diff tries to acheive. +Before diving into the design of the automatic differentiation (for brevity, we will call it 'auto-diff') passes, it is important to understand the end goal of what auto-diff tries to achieve. The over-arching goal of Slang's auto-diff is to enable the user to compute derivatives of a given shader program or function's output w.r.t its input parameters. This critical compiler feature enables users to quickly use their shaders with gradient-based parameter optimization algorithms, which forms the backbone of modern machine learning systems. It enables users to train and deploy graphics systems that contain ML primitives (like multi-layer perceptron's or MLPs) or use their shader programs as differentiable primitives within larger ML pipelines. @@ -60,7 +60,7 @@ DifferentialPair fwd_f(DifferentialPair dpx) } ``` -Note that `(2 * x)` is the multiplier corresponding to $Df(x)$. We refer to $x$ and $f(x)$ as "*primal*" values and the pertubations $dx$ and $Df(x)\cdot dx$ as "*differential*" values. The reason for this separation is that the "*differential*" output values are always linear w.r.t their "*differential*" inputs. +Note that `(2 * x)` is the multiplier corresponding to $Df(x)$. We refer to $x$ and $f(x)$ as "*primal*" values and the perturbations $dx$ and $Df(x)\cdot dx$ as "*differential*" values. The reason for this separation is that the "*differential*" output values are always linear w.r.t their "*differential*" inputs. As the name implies, `DifferentialPair` is a special pair type used by Slang to hold values and their corresponding differentials. @@ -256,7 +256,7 @@ void rev_f(inout DifferentialPair dpx, inout DifferentialPair dpy, Note that `rev_f` accepts derivatives w.r.t the output value as the input, and returns derivatives w.r.t inputs as its output (through `inout` parameters). `rev_f` still needs the primal values `x` and `y` to compute the derivatives, so those are still passed in as an input through the primal part of the differential pair. -Also note that the reverse-mode derivative function does not have to compute the primal result value (its return is void). The reason for this is a matter of convenience: reverse-mode derivatives are often invoked after all the primal fuctions, and there is typically no need for these values. We go into more detail on this topic in the checkpointing chapter. +Also note that the reverse-mode derivative function does not have to compute the primal result value (its return is void). The reason for this is a matter of convenience: reverse-mode derivatives are often invoked after all the primal functions, and there is typically no need for these values. We go into more detail on this topic in the checkpointing chapter. The reverse mode function can be used to compute both `dOutput/dx` and `dOutput/dy` with a single invocation (unlike the forward-mode case where we had to invoke `fwd_f` once for each input) diff --git a/docs/design/autodiff/decorators.md b/docs/design/autodiff/decorators.md index 626f8bc4cc..27bf0e3d00 100644 --- a/docs/design/autodiff/decorators.md +++ b/docs/design/autodiff/decorators.md @@ -45,7 +45,7 @@ interface IFoo_after_checking_and_lowering ### `[TreatAsDifferentiable]` In large codebases where some interfaces may have several possible implementations, it may not be reasonable to have to mark all possible implementations with `[Differentiable]`, especially if certain implementations use hacks or workarounds that need additional consideration before they can be marked `[Differentiable]` -In such cases, we provide the `[TreatAsDifferentiable]` decoration (AST node: `TreatAsDifferentiableAttribute`, IR: `OpTreatAsDifferentiableDecoration`), which instructs the auto-diff passes to construct an 'empty' function that returns a 0 (or 0-equivalent) for the derivative values. This allows the signature of a `[TreatAsDifferentiable]` function to match a `[Differentiable]` requirment without actually having to produce a derivative. +In such cases, we provide the `[TreatAsDifferentiable]` decoration (AST node: `TreatAsDifferentiableAttribute`, IR: `OpTreatAsDifferentiableDecoration`), which instructs the auto-diff passes to construct an 'empty' function that returns a 0 (or 0-equivalent) for the derivative values. This allows the signature of a `[TreatAsDifferentiable]` function to match a `[Differentiable]` requirement without actually having to produce a derivative. ## Custom derivative decorators In many cases, it is desirable to manually specify the derivative code for a method rather than let the auto-diff pass synthesize it from the method body. This is usually desirable if: @@ -68,7 +68,7 @@ In some cases, we face the opposite problem that inspired custom derivatives. Th This frequently occurs with hardware intrinsic operations that are lowered into special op-codes that map to hardware units, such as texture sampling & interpolation operations. However, these operations do have reference 'software' implementations which can be used to produce the derivative. -To allow user code to use the fast hardward intrinsics for the primal pass, but use synthesized derivatives for the derivative pass, we provide decorators `[PrimalSubstitute(ref-fn)]` and `[PrimalSubstituteOf(orig-fn)]` (AST Node: `PrimalSubstituteAttribute`/`PrimalSubstituteOfAttribute`, IR: `OpPrimalSubstituteDecoration`), that can be used to provide a reference implementation for the auto-diff pass. +To allow user code to use the fast hardware intrinsics for the primal pass, but use synthesized derivatives for the derivative pass, we provide decorators `[PrimalSubstitute(ref-fn)]` and `[PrimalSubstituteOf(orig-fn)]` (AST Node: `PrimalSubstituteAttribute`/`PrimalSubstituteOfAttribute`, IR: `OpPrimalSubstituteDecoration`), that can be used to provide a reference implementation for the auto-diff pass. Example: ```C diff --git a/docs/design/autodiff/ir-overview.md b/docs/design/autodiff/ir-overview.md index a6b3ec2070..83391e27f2 100644 --- a/docs/design/autodiff/ir-overview.md +++ b/docs/design/autodiff/ir-overview.md @@ -17,7 +17,7 @@ At this step, there are 2 other variants that can appear `IRBackwardDifferentiat 4. This process from (1.) is run in a loop. This is because we can have nested differentiation requests such as `IRForwardDifferentiate(IRBackwardDifferentiate(a : IRFuncType))`. The inner request is processed in the first pass, and the outer request gets processed in the next pass. ## Auto-Diff Passes for `IRForwardDifferentiate` -For forward-mode derivatives, we only require a single pass implemented wholly in `ForwardDiffTranscriber`. This implementes the linearization algorithm, which roughly follows this logic: +For forward-mode derivatives, we only require a single pass implemented wholly in `ForwardDiffTranscriber`. This implements the linearization algorithm, which roughly follows this logic: 1. Create a clone of the original function 2. Perform pre-autodiff transformations, the most @@ -357,7 +357,7 @@ The unzipping pass uses the decorations from the linearization step to figure ou The separation process uses the following high-level logic: 1. Create two clones of all the blocks in the provided function (one for primal insts, one for differential insts), and hold a mapping between each original (mixed) block to each primal and differential block. The return statement of the current final block is **removed**. 2. Process each instruction of each block: instructions marked as **primal** are moved to the corresponding **primal block**, instructions marked **differential** are moved to the corresponding **differential block**. -3. Instructions marked **mixed** need op-specific handling, and so are dispatched to the appropriate splitting function. For instance, block parameters that are holding differential-pair values are split into parameters for holding primal and differential values (the exception is function parameters, which are not affected). Simlarly, `IRVar`s, `IRTerminatorInst`s (control-flow) and `IRCall`s are all split into multiple insts. +3. Instructions marked **mixed** need op-specific handling, and so are dispatched to the appropriate splitting function. For instance, block parameters that are holding differential-pair values are split into parameters for holding primal and differential values (the exception is function parameters, which are not affected). Similarly, `IRVar`s, `IRTerminatorInst`s (control-flow) and `IRCall`s are all split into multiple insts. 4. Except for `IRReturn`, all other control-flow insts are effectively duplicated so that the control-flow between the primal blocks and differential blocks both follow the original blocks' control-flow. The main difference is that PHI arguments are split (primal blocks carry primal values in their PHI arguments, and differential blocks carry diff values) between the two. Note that condition values (i.e. booleans) are used by both the primal and differential control-flow insts. However, since booleans are always primal values, they are always defined in the primal blocks. @@ -522,7 +522,7 @@ We synthesize a CFG that satisfies this property through the following steps: %da_rev = OpAdd %da_rev_1 %da_rev_2 : %float ``` - Derivative accumulation is acheived through two ways: + Derivative accumulation is achieved through two ways: **Within** a block, we keep a list all the reverse derivative insts for each inst and only **materialize** the total derivative when it is required as an operand. This is the most efficient way to do this, because we can apply certain optimizations for composite types (derivative of an array element, vector element, struct field, etc..). @@ -756,12 +756,12 @@ After AD passes, this results in the following code: { /*...*/ } ``` -4. Construct the reverse control-flow (`reveseCFGRegion()`) by going through the reference forward-mode blocks, and cloning the control-flow onto the reverse-mode blocks, but in reverse. This is acheived by running `reverseCFGRegion()` recursively on each sub-region, where a *region* is defined as a set of blocks with a single entry block and a single exit block. This definition of a region only works because we normalized the CFG into this form. +4. Construct the reverse control-flow (`reveseCFGRegion()`) by going through the reference forward-mode blocks, and cloning the control-flow onto the reverse-mode blocks, but in reverse. This is achieved by running `reverseCFGRegion()` recursively on each sub-region, where a *region* is defined as a set of blocks with a single entry block and a single exit block. This definition of a region only works because we normalized the CFG into this form. The reversal logic follows these general rules: 1. **Unconditional Branch**: For an unconditional branch from `A->B` we simply have to map the reverse version of B with that of A. i.e. `rev[B] -> rev[A]` 2. **If-Else**: For an if-else of the form `A->[true = T->...->T_last->M, false = F->...->F_last->M]`, we construct `rev[M]->[true = rev[T_last]->...->rev[T_last]->rev[A], false = rev[F_last]->...->rev[F]->rev[A]]`. That is, we reverse each sub-region, and start from the merge block and end at the split block. - Note that we need to identify `T_last` and `F_last` i.e. the last two blocks in the true and false regions. We make the last block in the region an additional return value of `reverseCFGRegion()`, so that when reversing the true and false sub-regions, we also get the relevent last block as an additional output. Also note that additional empty blocks may be inserted to carry derivatives of the phi arguments, but this does not alter the control-flow. + Note that we need to identify `T_last` and `F_last` i.e. the last two blocks in the true and false regions. We make the last block in the region an additional return value of `reverseCFGRegion()`, so that when reversing the true and false sub-regions, we also get the relevant last block as an additional output. Also note that additional empty blocks may be inserted to carry derivatives of the phi arguments, but this does not alter the control-flow. 3. **Switch-case**: Proceeds in exactly the same way as `if-else` reversal, but with multiple cases instead of just 2. 4. **Loop**: After normalization, all (non-trivial) loops are of the form: `A->C->[true = T->...->T_last->C, false=B->...->M]`. We reverse this loop into `rev[M]->...rev[B]->rev[C]->[true=rev[T_last]->...->rev[T]->rev[C], false=rev[A]]`. The actual reversal logic also handles some corner cases by inserting additional blank blocks to avoid situations where regions may share the same merge block. @@ -975,12 +975,12 @@ When storing values this way, we must consider that instructions within loops ca **Indexed Region Processing:** In order to be able to allocate the right array and use the right indices, we need information about which blocks are part of which loop (and loops can be nested, so blocks can be part of multiple loops). To do this, we run a pre-processing step that maps all blocks to all relevant loop regions, the corresponding index variables and the inferred iteration limits (maximum times a loop can run). Note that if an instruction appears in a nested block, we create a multi-dimensional array and use multiple indices. - **Loop State Variables:** Certain variables cannot be classified as recompute. Major examples are loop state variables which are defined as variables that are read from and written to within the loop. In practice, they appear as phi-variables on the first loop block after SSA simplification. Their uses _must_ be classifed as 'store', because recomputing them requires duplicating the primal loop within the differential loop. This is because the differential loop runs backwards so the state of a primal variable at loop index $N$ cannot be recomputed when the loop is running backwards ($N+1 \to N \to N-1$), and involves running the primal loop up to $N$ times within the current iteration of the differential loop. In terms of complexity, this turns an $O(N)$ loop into an $O(N^2)$ loop, and so we disallow this. + **Loop State Variables:** Certain variables cannot be classified as recompute. Major examples are loop state variables which are defined as variables that are read from and written to within the loop. In practice, they appear as phi-variables on the first loop block after SSA simplification. Their uses _must_ be classified as 'store', because recomputing them requires duplicating the primal loop within the differential loop. This is because the differential loop runs backwards so the state of a primal variable at loop index $N$ cannot be recomputed when the loop is running backwards ($N+1 \to N \to N-1$), and involves running the primal loop up to $N$ times within the current iteration of the differential loop. In terms of complexity, this turns an $O(N)$ loop into an $O(N^2)$ loop, and so we disallow this. It is possible that the resulting $O(N^2)$ loop may end up being faster in practice due to reduced memory requirements, but we currently lack the infrastructure to robustly allow such loop duplication while keeping the user informed of the potentially drastic complexity issues. 3. **Process 'Recompute' insts:** Insert a copy of the primal instruction into a corresponding 'recomputation' block that is inserted into the differential control-flow so that it dominates the use-site. - **Insertion of Recompute Blocks:** In order to accomodate recomputation, we first preprocess the function, by going through each **breakable (i.e. loop) region** in the differential blocks, looking up the corresponding **primal region** and cloning all the primal blocks into the beginning of the differential region. Note that this cloning process does not actually clone the instructions within each block, only the control-flow (i.e. terminator) insts. This way, there is a 1:1 mapping between the primal blocks and the newly created **recompute blocks**, This way, if we decide to 'recompute' an instruction, we can simply clone it into the corresponding recompute block, and we have a guarantee that the definition and use-site are within the same loop scope, and that the definition comes before the use. + **Insertion of Recompute Blocks:** In order to accommodate recomputation, we first preprocess the function, by going through each **breakable (i.e. loop) region** in the differential blocks, looking up the corresponding **primal region** and cloning all the primal blocks into the beginning of the differential region. Note that this cloning process does not actually clone the instructions within each block, only the control-flow (i.e. terminator) insts. This way, there is a 1:1 mapping between the primal blocks and the newly created **recompute blocks**, This way, if we decide to 'recompute' an instruction, we can simply clone it into the corresponding recompute block, and we have a guarantee that the definition and use-site are within the same loop scope, and that the definition comes before the use. **Legalizing Accesses from Branches:** Our per-loop-region recompute blocks ensure that the recomputed inst is always within the same region as its uses, but it can still be out-of-scope if it is defined within a branch (i.e. if-else). We therefore still run a light-weight hoisting pass that detects these uses, inserts an `IRVar` at the immediate dominator of the def and use, and inserts loads and stores accordingly. Since they occur within the same loop region, there is no need to worry about arrays/indices (unlike the 'store' case). @@ -1363,7 +1363,7 @@ struct f_Intermediates }; -// After extraction: primal context funtion +// After extraction: primal context function float s_primal_ctx_f(float x, out f_Intermediates ctx) { // @@ -1459,4 +1459,4 @@ void outer_rev(DifferentialPair dpx, float d_output) dpx = _dpx; } -``` \ No newline at end of file +``` diff --git a/docs/design/autodiff/types.md b/docs/design/autodiff/types.md index 2655b5c252..3860f0dfb4 100644 --- a/docs/design/autodiff/types.md +++ b/docs/design/autodiff/types.md @@ -74,9 +74,9 @@ T.Differential dmul(S s, T.Differential a) 5. During auto-diff, the compiler can sometimes synthesize new aggregate types. The most common case is the intermediate context type (`kIROp_BackwardDerivativeIntermediateContextType`), which is lowered into a standard struct once the auto-diff pass is complete. It is important to synthesize the `IDifferentiable` conformance for such types since they may be further differentiated (through higher-order differentiation). This implementation is contained in `fillDifferentialTypeImplementationForStruct(...)` and is roughly analogous to the AST-side synthesis. ### Differentiable Type Dictionaries -During auto-diff, the IR passes frequently need to perform lookups to check if an `IRType` is differentiable, and retreive references to the corresponding `IDifferentiable` methods. These lookups also need to work on generic parameters (that are defined inside generic containers), and existential types that are interface-typed parameters. +During auto-diff, the IR passes frequently need to perform lookups to check if an `IRType` is differentiable, and retrieve references to the corresponding `IDifferentiable` methods. These lookups also need to work on generic parameters (that are defined inside generic containers), and existential types that are interface-typed parameters. -To accomodate this range of different type systems, Slang uses a type dictionary system that associates a dictionary of relevant types with each function. This works in the following way: +To accommodate this range of different type systems, Slang uses a type dictionary system that associates a dictionary of relevant types with each function. This works in the following way: 1. When `CheckTerm()` is called on an expression within a function that is marked differentiable (`[Differentiable]`), we check if the resolved type conforms to `IDifferentiable`. If so, we add this type to the dictionary along with the witness to its differentiability. The dictionary is currently located on `DifferentiableAttribute` that corresponds to the `[Differentiable]` modifier. 2. When lowering to IR, we create a `DifferentiableTypeDictionaryDecoration` which holds the IR versions of all the types in the dictionary as well as a reference to their `IDifferentiable` witness tables. diff --git a/docs/design/capabilities.md b/docs/design/capabilities.md index a4f4fa3965..b4bd4c099a 100644 --- a/docs/design/capabilities.md +++ b/docs/design/capabilities.md @@ -31,7 +31,7 @@ struct Texture2D { ... - // Implicit-graident sampling operation. + // Implicit-gradient sampling operation. [availableFor(implicit_gradient_texture_fetches)] float4 Sample(SamplerState s, float2 uv); } @@ -54,7 +54,7 @@ capability fragment : implicit_gradient_texture_fetches; Here we've said that whenever the `fragment` capability is available, we can safely assume that the `implicit_gradient_texture_fetches` capability is available (but not vice versa). -Given even a rudientary tool like that, we can start to build up capabilities that relate closely to the "profiles" in things like D3D: +Given even a rudimentary tool like that, we can start to build up capabilities that relate closely to the "profiles" in things like D3D: ``` capability d3d; @@ -77,12 +77,12 @@ capability opengl : khronos; Here we are saying that `sm_5_1` supports everything `sm_5_0` supports, and potentially more. We are saying that `d3d12` supports `sm_6_0` but maybe not, e.g., `sm_6_3`. We are expressing that fact that having a `glsl_*` capability means you are on some Khronos API target, but that it doesn't specify which one. -(The extact details of these declarations obviously aren't the point; getting a good hierarchy of capabilites will take time.) +(The exact details of these declarations obviously aren't the point; getting a good hierarchy of capabilities will take time.) Capability Composition ---------------------- -Sometimes we'll want to give a distinct name to a specific combination of capabilties, but not say that it supports anything new: +Sometimes we'll want to give a distinct name to a specific combination of capabilities, but not say that it supports anything new: ``` capability ps_5_1 = sm_5_1 & fragment; @@ -129,7 +129,7 @@ For a given function definition `F`, the front end will scan its body and see wh If `F` doesn't have an `[availableFor(...)]` attribute, then we can derive its *effective* `[availableFor(...)]` capability as `R` (this probably needs to be expressed as an iterative dataflow problem over the call graph, to handle cycles). -If `F` *does* have one or more `[availabelFor(...)]` clauses that amount to a declared capability `C` (again in disjunctive normal form), then we can check that `C` implies `R` and error out if it is not the case. +If `F` *does* have one or more `[availableFor(...)]` clauses that amount to a declared capability `C` (again in disjunctive normal form), then we can check that `C` implies `R` and error out if it is not the case. A reasonable implementation would track which calls introduced which requirements, and be able to explain *why* `C` does not capture the stated requirements. For a shader entry point, we should check it as if it had an `[availableFor(...)]` that is the OR of all the specified target profiles (e.g., `sm_5_0 | glsl_450 | ...`) ANDed with the specified stage (e.g., `fragment`). @@ -152,7 +152,7 @@ It should be possible to define multiple versions of a function, having differen ``` For front-end checking, these should be treated as if they were a single definition of `myFunc` with an ORed capability (e.g., `vulkan | d3d12`). -Overload resoultion will pick the "best" candidate at a call site based *only* on the signatures of the function (note that this differs greatly from how profile-specific function overloading works in Cg). +Overload resolution will pick the "best" candidate at a call site based *only* on the signatures of the function (note that this differs greatly from how profile-specific function overloading works in Cg). The front-end will then generate initial IR code for each definition of `myFunc`. Each of the IR functions will have the *same* mangled name, but different bodies, and each will have appropriate IR decorations to indicate the capabilities it requires. @@ -213,7 +213,7 @@ Certain compositions of capabilities make no sense. If a user declared a functio Knowing that certain capabilities are disjoint can also help improve the overall user experience. If a function requires `(vulkan & extensionA) | (d3d12 & featureb)` and we know we are compiling for `vulkan` we should be able to give the user a pointed error message saying they need to ask for `extensionA`, because adding `featureB` isn't going to do any good. -As a first-pass model we could have a notion of `abstract` capabilities that are used to model the root of hierarcies of disjoint capabilities: +As a first-pass model we could have a notion of `abstract` capabilities that are used to model the root of hierarchies of disjoint capabilities: ``` abstract capability api; diff --git a/docs/design/casting.md b/docs/design/casting.md index 80c1f149fc..6eafea1acc 100644 --- a/docs/design/casting.md +++ b/docs/design/casting.md @@ -146,9 +146,5 @@ The following code shows the change in behavior of 'as' is based on the source * SLANG_ASSERT(as(exprType) == nullptr); // dynamicCast is always the same object returned, so must match - SLANG_ASSERT(dynamcCast(exprType) == exprType); ``` - - - - \ No newline at end of file diff --git a/docs/design/coding-conventions.md b/docs/design/coding-conventions.md index 4223bee93f..bc540783aa 100644 --- a/docs/design/coding-conventions.md +++ b/docs/design/coding-conventions.md @@ -237,7 +237,7 @@ enum Note that the type name reflects the plural case, while the cases that represent individual bits are named with a singular prefix. -In public APIs, all `enum`s should use the style of separating the type defintion from the `enum`, and all cases should use `SCREAMING_SNAKE_CASE`: +In public APIs, all `enum`s should use the style of separating the type definition from the `enum`, and all cases should use `SCREAMING_SNAKE_CASE`: ```c++ typedef unsigned int SlangAxes; diff --git a/docs/design/decl-refs.md b/docs/design/decl-refs.md index 34b74a6f40..5c1958694e 100644 --- a/docs/design/decl-refs.md +++ b/docs/design/decl-refs.md @@ -25,7 +25,7 @@ Why do we need `DeclRef`s? -------------------------- In a compiler for a simple language, we might represent a reference to a declaration as simply a pointer to the AST node for the declaration, or some kind of handle/ID that references that AST node. -A reprsentation like that will work in simple cases, for example: +A representation like that will work in simple cases, for example: ```hlsl struct Cell { int value }; diff --git a/docs/design/existential-types.md b/docs/design/existential-types.md index 06e2613e3b..0f34690518 100644 --- a/docs/design/existential-types.md +++ b/docs/design/existential-types.md @@ -194,7 +194,7 @@ When dealing with a value type, though, we have to deal with things like making ``` interface IWritable { [mutating] void write(int val); } -stuct Cell : IWritable { int data; void write(int val) { data = val; } } +struct Cell : IWritable { int data; void write(int val) { data = val; } } T copyAndClobber(T obj) { diff --git a/docs/design/interfaces.md b/docs/design/interfaces.md index c0e284f59b..b0c4843274 100644 --- a/docs/design/interfaces.md +++ b/docs/design/interfaces.md @@ -13,7 +13,7 @@ Introduction The basic problem here is not unique to shader programming: you want to write code that accomplished one task, while abstracting over how to accomplish another task. As an example, we might want to write code to integrate incident radiance over a list of lights, while not concerning ourself with how to evaluate a reflectance function at each of those lights. -If we were doing this task on a CPU, and performance wasn't critical, we could probably handle this with higher-order functions or an equivalent mechansim like function pointers: +If we were doing this task on a CPU, and performance wasn't critical, we could probably handle this with higher-order functions or an equivalent mechanism like function pointers: float4 integrateLighting( Light[] lights, @@ -39,7 +39,7 @@ Depending on the scenario, we might be able to generate statically specialized c } Current shading languages support neither higher-order functions nor templates/generics, so neither of these options is viable. -Instead practicioners typically use preprocessor techniques to either stich together the final code, or to substitute in different function/type definitions to make a definition like `integrateLighting` reusable. +Instead practitioners typically use preprocessor techniques to either stich together the final code, or to substitute in different function/type definitions to make a definition like `integrateLighting` reusable. These ad hoc approaches actually work well in practice; we aren't proposing to replace them *just* to make code abstractly "cleaner." Rather, we've found that the ad hoc approaches end up interacting poorly with the resource binding model in modern APIs, so that *something* less ad hoc is required to achieve our performance goals. @@ -48,7 +48,7 @@ At that point, we might as well ensure that the mechanism we introduce is also a Overview -------- -The baisc idea for our approach is as follows: +The basic idea for our approach is as follows: - Start with the general *semantics* of a generic-based ("template") approach @@ -63,7 +63,7 @@ Interfaces ---------- An **interface** in Slang is akin to a `protocol` in Swift or a `trait` in Rust. -The choice of the `interface` keyword is to hilight the overlap with the conceptually similar construct that appeared in Cg, and then later in HLSL. +The choice of the `interface` keyword is to highlight the overlap with the conceptually similar construct that appeared in Cg, and then later in HLSL. ### Declaring an interface @@ -263,7 +263,7 @@ Then what should `BRDFParams` be? The two-parameter or six-parameter case? An **associated type** is a concept that solves exactly this problem. We don't care *what* the concrete type of `BRDFParams` is, so long as *every* implementation of `Material` has one. -The exact `BRDFParams` type can be different for each implementation of `Material`; the type is *assocaited* with a particular implementation. +The exact `BRDFParams` type can be different for each implementation of `Material`; the type is *associated* with a particular implementation. We will crib our syntax for this entirely from Swift, where it is verbose but explicit: @@ -276,7 +276,7 @@ We will crib our syntax for this entirely from Swift, where it is verbose but ex float3 evaluateBRDF(BRDFParams param, float3 wi, float3 wo); } -In this example we've added an assocaited type requirement so that every implementation of `Material` must supply a type named `BRDFParams` as a member. +In this example we've added an associated type requirement so that every implementation of `Material` must supply a type named `BRDFParams` as a member. We've also added a requirement that is a function to evaluate the BRDF given its parameters and incoming/outgoing directions. Using this declaration one can now define a generic function that works on any material: @@ -300,7 +300,7 @@ Some quick notes: - The use of `associatedtype` (for associated types) and `typealias` (for `typedef`-like definitions) as distinct keywords in Swift was well motivated by their experience (they used to use `typealias` for both). I would avoid having the two cases be syntactically identical. -- Swift has a pretty involved inference system where a type doesn't actually need to explicitly provide a type member with the chosen name. Instead, if you have a required method that takes or returns the assocaited type, then the compiler can infer what the type is by looking at the signature of the methods that meet other requirements. This is a complex and magical feature, and we shouldn't try to duplicate it. +- Swift has a pretty involved inference system where a type doesn't actually need to explicitly provide a type member with the chosen name. Instead, if you have a required method that takes or returns the associated type, then the compiler can infer what the type is by looking at the signature of the methods that meet other requirements. This is a complex and magical feature, and we shouldn't try to duplicate it. - Both Rust and Swift call this an "associated type." They are related to "virtual types" in things like Scala (which are in turn related to virtual classes in beta/gbeta). There are similar ideas that arise in Haskell-like languages with type classes (IIRC, the term "functional dependencies" is relevant). @@ -308,7 +308,7 @@ Some quick notes: I want to point out a few alternatives to the `Material` design above, just to show that associated types seem to be an elegant solution compared to the alternatives. -First, note that we could break `Material` into two interfaces, so long as we are allowed to place type constraints on assocaited types: +First, note that we could break `Material` into two interfaces, so long as we are allowed to place type constraints on associated types: interface BRDF { @@ -412,7 +412,7 @@ can in principle be desugared into: } with particular loss in what can be expressed. -The same desugaring appraoch should apply to global-scope functions that want to return an existential type (just with a global `typealias` instead of an `associatedtype`). +The same desugaring approach should apply to global-scope functions that want to return an existential type (just with a global `typealias` instead of an `associatedtype`). It might be inconvenient for the user to have to explicitly write the type-level expression that yields the result type (consider cases where C++ template metaprogrammers would use `auto` as a result type), but there is really no added power. @@ -434,12 +434,12 @@ The intent seems to be clear if we instead write: We could consider the latter to be sugar for the former, and allow users to write in familiar syntax akin to what ws already supported in Cg. -We'd have to be careful with such sugar, though, because there is a real and menaingful difference between saying: +We'd have to be careful with such sugar, though, because there is a real and meaningful difference between saying: - "`material` has type `Material` which is an interface type" - "`material` has type `M` where `M` implements `Material`" -In particular, if we start to work with assocaited types: +In particular, if we start to work with associated types: let b = material.evaluatePattern(...); diff --git a/docs/design/ir.md b/docs/design/ir.md index c7f4ffeb22..ba156c2f6c 100644 --- a/docs/design/ir.md +++ b/docs/design/ir.md @@ -15,7 +15,7 @@ We will start by enumerating these goals (and related non-goals) explicitly so t * As a particular case of analysis and optimization, it should be possible to validate flow-dependent properties of an input function/program (e.g., whether an `[unroll]` loop is actually unrollable) using the IR, and emit meaningful error messages that reference the AST-level names/locations of constructs involved in an error. -* It should be posible to compile modules to the IR separately and then "link" them in a way that depends only on IR-level (not AST-level) constructs. We want to allow changing implementation details of a module without forcing a re-compile of IR code using that module (what counts as "implementation details") is negotiable. +* It should be possible to compile modules to the IR separately and then "link" them in a way that depends only on IR-level (not AST-level) constructs. We want to allow changing implementation details of a module without forcing a re-compile of IR code using that module (what counts as "implementation details") is negotiable. * There should be a way to serialize IR modules in a round-trip fashion preserving all of the structure. As a long-term goal, the serialized format should provide stability across compiler versions (working more as an IL than an IR) @@ -81,7 +81,7 @@ The only exception to this rule is instructions that represent literal constants The in-memory encoding places a few more restrictions on top of this so that, e.g., currently an instruction can either have operands of children, but not both. -Because everything that could be used as an operand is also an instruction, the operands of an instruction are stored in a highly uniform way as a contiguous array of `IRUse` values (even the type is continguous with this array, so that it can be treated as an additional operand when required). +Because everything that could be used as an operand is also an instruction, the operands of an instruction are stored in a highly uniform way as a contiguous array of `IRUse` values (even the type is contiguous with this array, so that it can be treated as an additional operand when required). The `IRUse` type maintains explicit links for use-def information, currently in a slightly bloated fashion (there are well-known techniques for reducing the size of this information). ### A Class Hierarchy Mirrored in Opcodes @@ -112,7 +112,7 @@ The idea doesn't really start in Swift, but rather in the existing observation t Like Swift, we do not use an explicit CPS representation, but instead find a middle ground of a traditional SSA IR where instead of phi instructions basic blocks have parameters. The first N instructions in a Slang basic block are its parameters, each of which is an `IRParam` instruction. -A block that would have had N phi instrutions now has N parameters, but the parameters do not have operands. +A block that would have had N phi instructions now has N parameters, but the parameters do not have operands. Instead, a branch instruction that targets that block will have N *arguments* to match the parameters, representing the values to be assigned to the parameters when this control-flow edge is taken. This encoding is equivalent in what it represents to traditional phi instructions, but nicely solves the problems outlined above: @@ -123,7 +123,7 @@ This encoding is equivalent in what it represents to traditional phi instruction - There is no special work required to track which phi operands come from which predecessor block, since the operands are attached to the terminator instruction of the predecessor block itself. There is no need to update phi instructions after a CFG change that might affect the predecessor list of a block. The trade-off is that any change in the *number* of parameters of a block now requires changes to the terminator of each predecessor, but that is a less common change (isolated to passes that can introduce or eliminate block parameters/phis). -- It it much more clear how to give an operational semantics to a "branch with arguments" instead of phi instructions: compute the target block, copy the argumenst to temporary storage (because of the simultaneity requirement), and then copy the temporaries over the parameters of the target block. +- It it much more clear how to give an operational semantics to a "branch with arguments" instead of phi instructions: compute the target block, copy the arguments to temporary storage (because of the simultaneity requirement), and then copy the temporaries over the parameters of the target block. The main caveat of this representation is that it requires branch instructions to have room for arguments to the target block. For an ordinary unconditional branch this is pretty easy: we just put a variable number of arguments after the operand for the target block. For branch instructions like a two-way conditional, we might need to encode two argument lists - one for each target block - and an N-way `switch` branch only gets more complicated. @@ -138,7 +138,7 @@ This constraint could be lifted at some point, but it is important to note that A traditional SSA IR represents a function as a bunch of basic blocks of instructions, where each block ends in a *terminator* instruction. Terminators are instructions that can branch to another block, and are only allowed at the end of a block. The potential targets of a terminator determine the *successors* of the block where it appears, and contribute to the *predecessors* of any target block. -The succesor-to-predecessor edges form a graph over the basic blocks called the control-flow graph (CFG). +The successor-to-predecessor edges form a graph over the basic blocks called the control-flow graph (CFG). A simple representation of a function would store the CFG explicitly as a graph data structure, but in that case the data structure would need to be updated whenever a change is made to the terminator instruction of a branch in a way that might change the successor/predecessor relationship. diff --git a/docs/design/overview.md b/docs/design/overview.md index c81853f1a9..24c3160386 100644 --- a/docs/design/overview.md +++ b/docs/design/overview.md @@ -11,7 +11,7 @@ Compilation is always performed in the context of a *compile request*, which bun Inside the code, there is a type `CompileRequest` to represent this. The user specifies some number of *translation units* (represented in the code as a `TranslationUnitRequest`) which comprise some number of *sources* (files or strings). -HLSL follows the traditional C model where a "translaiton unit" is more or less synonymous with a source file, so when compiling HLSL code the command-line `slangc` will treat each source file as its own translation unit. +HLSL follows the traditional C model where a "translation unit" is more or less synonymous with a source file, so when compiling HLSL code the command-line `slangc` will treat each source file as its own translation unit. For Slang code, the command-line tool will by default put all source files into a single translation unit (so that they represent a shared namespace0). The user can also specify some number of *entry points* in each translation unit (`EntryPointRequest`), which combines the name of a function to compile with the pipeline stage to compile for. @@ -23,7 +23,7 @@ It might not be immediately clear why we have such fine-grained concepts as this The "Front End" --------------- -The job of the Slang front-end is to turn textual source code into a combination of code in our custom intermediate represnetation (IR) plus layout and binding information for shader parameters. +The job of the Slang front-end is to turn textual source code into a combination of code in our custom intermediate representation (IR) plus layout and binding information for shader parameters. ### Lexing @@ -60,7 +60,7 @@ The parser (`Parser` in `parser.{h,cpp}`) is mostly a straightforward recursive- Because the input is already tokenized before we start, we can use arbitrary lookahead, although we seldom look ahead more than one token. Traditionally, parsing of C-like languages requires context-sensitive parsing techniques to distinguish types from values, and deal with stuff like the C++ "most vexing parse." -Slang instead uses heuristic approaches: for example, when we encouter an `<` after an identifier, we first try parsing a generic argument list with a closing `>` and then look at the next token to determine if this looks like a generic application (in which case we continue from there) or not (in which case we backtrack). +Slang instead uses heuristic approaches: for example, when we encounter an `<` after an identifier, we first try parsing a generic argument list with a closing `>` and then look at the next token to determine if this looks like a generic application (in which case we continue from there) or not (in which case we backtrack). There are still some cases where we use lookup in the current environment to see if something is a type or a value, but officially we strive to support out-of-order declarations like most modern languages. In order to achieve that goal we will eventually move to a model where we parse the bodies of declarations and functions in a later pass, after we have resolved names in the global scope. @@ -97,7 +97,7 @@ An expression that ends up referring to a type will have a `TypeType` as its typ The most complicated thing about semantic checking is that we strive to support out-of-order declarations, which means we may need to check a function declaration later in the file before checking a function body early in the file. In turn, that function declaration might depend on a reference to a nested type declared somewhere else, etc. -We currently solve this issue by doing some amount of on-demand checking; when we have a reference to a function declaration and we need to know its type, we will first check if the function has been through semantic checking yet, and if not we will go ahead and recurisvely type check that function before we proceed. +We currently solve this issue by doing some amount of on-demand checking; when we have a reference to a function declaration and we need to know its type, we will first check if the function has been through semantic checking yet, and if not we will go ahead and recursively type check that function before we proceed. This kind of unfounded recursion can lead to real problems (especially when the user might write code with circular dependencies), so we have made some attempts to more strictly "phase" the semantic checking, but those efforts have not yet been done systematically. @@ -105,7 +105,7 @@ When code involved generics and/or interfaces, the semantic checking phase is re ### Lowering and Mandatory Optimizations -The lowering step (`lower-to-ir.{h,cpp}`) is responsible for converting semantically valid ASTs into an intermediate representation that is more suitable for specialization, optimization, and code generaton. +The lowering step (`lower-to-ir.{h,cpp}`) is responsible for converting semantically valid ASTs into an intermediate representation that is more suitable for specialization, optimization, and code generation. The main thing that happens at this step is that a lot of the "sugar" in a high-level language gets baked out. For example: - A "member function" in a type will turn into an ordinary function that takes an initial `this` parameter @@ -116,29 +116,29 @@ The main thing that happens at this step is that a lot of the "sugar" in a high- The lowering step is done once for each translation unit, and like semantic checking it does *not* depend on any particular compilation target. During this step we attach "mangled" names to any imported or exported symbols, so that each function overload, etc. has a unique name. -After IR code has been generated for a translation unit (now called a "module") we next perform a set of "mandatory" optimizations, including SSA promotion and simple copy propagation and elmination of dead control-flow paths. +After IR code has been generated for a translation unit (now called a "module") we next perform a set of "mandatory" optimizations, including SSA promotion and simple copy propagation and elimination of dead control-flow paths. These optimizations are not primarily motivated by a desire to speed up code, but rather to ensure that certain "obvious" simplifications have been performed before the next step of validation. After the IR has been "optimized" we perform certain validation/checking tasks that would have been difficult or impossible to perform on the AST. For example, we can validate that control flow never reached the end of a non-`void` function, and issue an error otherwise. There are other validation tasks that can/should be performed at this step, although not all of them are currently implemented: -- We should check that any `[unroll]` loops can actually be unrolled, by ensuring tha their termination conditions can be resolved to a compile-time constant (even if we don't know the constant yet) +- We should check that any `[unroll]` loops can actually be unrolled, by ensuring that their termination conditions can be resolved to a compile-time constant (even if we don't know the constant yet) - We should check that any resource types are being used in ways that can be statically resolved (e.g., that the code never conditionally computes a resource to reference), since this is a requirement for all our current targets -- We should check that the operands to any operation that requires a compile-time constant (e.g., the texel offset argument to certain `Sample()` calls) are passed values that end up being compile-time cosntants +- We should check that the operands to any operation that requires a compile-time constant (e.g., the texel offset argument to certain `Sample()` calls) are passed values that end up being compile-time constants The goal is to eliminate any possible sources of failure in low-level code generation, without needing to have a global view of all the code in a program. Any error conditions we have to push off until later starts to limit the value of our separate compilation support. ### Parameter Binding and Type Layout -The next phase of parameter binding (`parameter-binding.{h,cpp}`) is independednt of IR generation, and proceeds based on the AST that came out of semantic checking. +The next phase of parameter binding (`parameter-binding.{h,cpp}`) is independent of IR generation, and proceeds based on the AST that came out of semantic checking. Parameter binding is the task of figuring out what locations/bindings/offsets should be given to all shader parameters referenced by the user's code. Parameter binding is done once for each target (because, e.g., Vulkan may bind parameters differently than D3D12), and it is done for the whole compile request (all translation units) rather than one at a time. -This is because when users compile something like HLSL vertex and fragment shaders in distinct translation units, they will often share the "same" parameter via a header, and we need to ensure that it gets just one locaton. +This is because when users compile something like HLSL vertex and fragment shaders in distinct translation units, they will often share the "same" parameter via a header, and we need to ensure that it gets just one location. At a high level, parameter binding starts by computing the *type layout* of each shader parameter. A type layout describes the amount of registers/bindings/bytes/etc. that a type consumes, and also encodes the information needed to compute offsets/registers for individual `struct` fields or array elements. @@ -190,7 +190,7 @@ This step is where we can select between, say, a built-in definition of the `sat ### API Legalization -If we are targetting a GLSL-based platform, we need to translate "varying" shader entry point parameters into global variables used for cross-stage data passing. +If we are targeting a GLSL-based platform, we need to translate "varying" shader entry point parameters into global variables used for cross-stage data passing. We also need to translate any "system value" semantics into uses of the special built-in `gl_*` variables. We currently handle this kind of API-specific legalization quite early in the process, performing it right after linking. @@ -208,7 +208,7 @@ At the end of specialization, we should have code that makes no use of user-defi ### Type Legalization While HLSL and Slang allow a single `struct` type to contain both "ordinary" data like a `float3` and "resources" like a `Texture2D`, the rules for GLSL and SPIR-V are more restrictive. -Ther are some additional wrinkles that arise for such "mixed" types, so we prefer to always "legalize" the types in the users code by replacing an aggregate type like: +There are some additional wrinkles that arise for such "mixed" types, so we prefer to always "legalize" the types in the users code by replacing an aggregate type like: ```hlsl struct Material { float4 baseColor; Texture2D detailMap; }; @@ -230,7 +230,7 @@ Changing the "shape" of a type like this (so that a single variable becomes more We dont' currently apply many other optimizations on the IR code in the back-end, under the assumption that the lower-level compilers below Slang will do some of the "heavy lifting." -That said, there are certain optimizations that Slang must do eventually, for semantic completeness. One of the most important examples of these is implementing the sematncis of the `[unroll]` attribute, since we can't always rely on downstream compilers to have a capable unrolling implementation. +That said, there are certain optimizations that Slang must do eventually, for semantic completeness. One of the most important examples of these is implementing the semantics of the `[unroll]` attribute, since we can't always rely on downstream compilers to have a capable unrolling implementation. We expect that over time it will be valuable for Slang to support a wider array of optimization passes, as long as they are ones that are considered "safe" to do above the driver interface, because they won't interfere with downstream optimization opportunities. diff --git a/docs/design/semantic-checking.md b/docs/design/semantic-checking.md index 0617aa0ab2..10ddd51426 100644 --- a/docs/design/semantic-checking.md +++ b/docs/design/semantic-checking.md @@ -22,7 +22,7 @@ Checking Terms ### Some Terminology for Terms -We use the word "term" to refer genericaly to something that can be evaluated to produce a result, but where we do not yet know if the result will be a type or a value. For example, `Texture2D` might be a term that results in a type, while `main` might be a term that results in a value (of function type), but both start out as a `NameExpr` in the AST. Thus the AST uses the class hierarchy under `Expr` to represent terms, whether they evaluate to values or types. +We use the word "term" to refer generically to something that can be evaluated to produce a result, but where we do not yet know if the result will be a type or a value. For example, `Texture2D` might be a term that results in a type, while `main` might be a term that results in a value (of function type), but both start out as a `NameExpr` in the AST. Thus the AST uses the class hierarchy under `Expr` to represent terms, whether they evaluate to values or types. There is also the `Type` hierarchy, but it is important to understand that `Type` represents types as their logical immutable selves, while `Expr`s that evaluate to types are *type expressions* which can be concretely pointed to in the user's code. Type expressions have source locations, because they represent something the user wrote in their code, while `Type`s don't have singular locations by default. @@ -67,7 +67,7 @@ If we can't reasonably form an expression to return *at all* then we will return These classes are designed to make sure that subsequent code won't crash on them (since we have non-null objects), but to help avoid cascading errors. Some semantic checking logic will detect `ErrorType`s on sub-expressions and skip its own checking logic (e.g., this happens for function overload resolution), producing an `ErrorType` further up. In other cases, expressions with `ErrorType` can be silently consumed. -For example, an errorneous expression is implicitly convertible to *any* type, which means that assignment of an error expression to a local variable will always succeed, regardless of variable's type. +For example, an erroneous expression is implicitly convertible to *any* type, which means that assignment of an error expression to a local variable will always succeed, regardless of variable's type. ### Overload Resolution @@ -139,14 +139,14 @@ Checking of declarations is the most complicated and involved part of semantic c Simple approaches to semantic checking of declarations fall into two camps: -1. One can define a total ordering on declarations (usually textual order in the source file) and only allow dependecies to follow that order, so that checking can follow the same order. This is the style of C/C++, which is inherited from the legacy of traditional single-pass compilers. +1. One can define a total ordering on declarations (usually textual order in the source file) and only allow dependencies to follow that order, so that checking can follow the same order. This is the style of C/C++, which is inherited from the legacy of traditional single-pass compilers. 2. One can define a total ordering on *phases* of semantic checking, so that every declaration in the file is checked at phase N before any is checked at phase N+1. E.g., the types of all variables and functions must be determined before any expressions that use those variables/functions can be checked. This is the style of, e.g., Java and C#, which put a premium on defining context-free languages that don't dictate order of declaration. Slang tries to bridge these two worlds: it has inherited features from HLSL that were inspired by C/C++, while it also strives to support out-of-order declarations like Java/C#. -Unsurprisngly, this leads to unique challenges. +Unsurprisingly, this leads to unique challenges. -Supporting out-of-order declarations meeans that there is no simple total order on declarations (we can have mutually recursive function or type declarations), and supporting generics with value parameters means there is no simple total order on phases. +Supporting out-of-order declarations means that there is no simple total order on declarations (we can have mutually recursive function or type declarations), and supporting generics with value parameters means there is no simple total order on phases. For that last part observe that: * Resolving an overloaded function call requires knowing the types of the parameters for candidate functions. @@ -191,13 +191,13 @@ As a programmer contributing to the semantic checking infrastructure, the declar Name Lookup ----------- -Lookup is the processing of resolving the contextual meaning of names either in a lexical scope (e.g., the user wrote `foo` in a function body - what does it refer to?) or in the scope of scome type (e.g., the user wrote `obj.foo` for some value `obj` of type `T` - what does it refer to?). +Lookup is the processing of resolving the contextual meaning of names either in a lexical scope (e.g., the user wrote `foo` in a function body - what does it refer to?) or in the scope of some type (e.g., the user wrote `obj.foo` for some value `obj` of type `T` - what does it refer to?). Lookup can be tied to semantic analysis quite deeply. In order to know what a member reference like `obj.foo` refers to, we not only need to know the type of `obj`, but we may also need to know what interfaces that type conforms to (e.g., it might be a type parameter `T` with a constraint `T : IFoo`). In order to support lookup in the presence of our declaration-checking strategy described above, the lookup logic may be passed a `SemanticsVisitor` that it can use to `ensureDecl()` declarations before it relies on their properties. -However, lookup also currently gets used during parsing, and in those cases it may need ot be applied without access to the semantics-checking infrastructure (since we currently separate parsing and semantic analysis). +However, lookup also currently gets used during parsing, and in those cases it may need to be applied without access to the semantics-checking infrastructure (since we currently separate parsing and semantic analysis). In those cases a null `SemanticsVisitor` is passed in, and the lookup process will avoid using lookup approaches that rely on derived semantic information. This is fine in practice because the main thing that gets looked up during parsing are names of `SyntaxDecl`s (which are all global) and also global type/function/variable names. @@ -210,7 +210,7 @@ Just like a C/C++ parser, the Slang parser sometimes needs to disambiguate wheth Ideally the way forward is some combination of the following two strategies: -* We should strive to make parsing at the "global scope" fully context-insensitive (e.g., by using similar lookahead heuristics to C#). We are already close to this goal today, but will need to be careful that we do not introduce regressions compared to the old parser (perhaps a "compatiblity" mode for legacy HLSL code is needed?) +* We should strive to make parsing at the "global scope" fully context-insensitive (e.g., by using similar lookahead heuristics to C#). We are already close to this goal today, but will need to be careful that we do not introduce regressions compared to the old parser (perhaps a "compatibility" mode for legacy HLSL code is needed?) * We should delay the parsing of nested scopes (both function and type bodies bracketed with `{}`) until later steps of the compiler. Ideally, parsing of function bodies can be done in a context-sensitive manner that interleaves with semantic checking, closer to the traditional C/C++ model (since we don't care about out-of-order declarations in function bodies). diff --git a/docs/design/serialization.md b/docs/design/serialization.md index c05c60ad8b..008fd6da6c 100644 --- a/docs/design/serialization.md +++ b/docs/design/serialization.md @@ -24,7 +24,7 @@ We could imagine a mechanism that saved off each instance, by writing off the ad * If we try to read back on a different machine, with a different pointer size, the object layout will be incompatible * If we try to read back on the same machine where the source is compiled by a different compiler, the object layout might be incompatible (say bool or enum are different size) -* Endianess might be different +* Endianness might be different * Knowing where all the pointers are and what they point to and therefore what to serialize is far from simple. * The alignment of types might be different across different processors and different compilers @@ -304,7 +304,7 @@ Riff Container [Riff](https://en.wikipedia.org/wiki/Resource_Interchange_File_Format) is used as a mechanism to store binary sections. The format allows for a hierarchy of `chunks` that hold binary data. How the data is interpreted depends on the [FOURCC](https://en.wikipedia.org/wiki/FourCC) associated with each chunk. -As previously touched on there are multiple different mechanisms used for serialization. IR serialization, generalized serialization, SourceLoc serialization - there are also other uses, such as serializing of entry point information. Riff is used to combine all of these incompatible binary parts togther such that they can be stored together. +As previously touched on there are multiple different mechanisms used for serialization. IR serialization, generalized serialization, SourceLoc serialization - there are also other uses, such as serializing of entry point information. Riff is used to combine all of these incompatible binary parts together such that they can be stored together. The handling of these riff containers is held within the `SerialContainerUtil` class. diff --git a/docs/design/stdlib-intrinsics.md b/docs/design/stdlib-intrinsics.md index 6e86f4c3fb..a9369138db 100644 --- a/docs/design/stdlib-intrinsics.md +++ b/docs/design/stdlib-intrinsics.md @@ -21,7 +21,7 @@ Looking at these files will demonstrate the features in use. Most of the intrinsics and attributes have names that indicate that they are not for normal use. This is typically via a `__` prefix. -The `.meta.slang` files look largely like Slang source files, but their contents can also be generated programatically with C++ code. A section of code can drop into `C++` code if it is proceeded by `${{{{`. The C++ section is closed with a closing `}}}}`. This mechanism is typically used to generate different versions of a similar code sequence. Values from the C++ code can be accessed via the `$()`, where the contents of the brackets specifies something that can be calculated from within the C++ code. +The `.meta.slang` files look largely like Slang source files, but their contents can also be generated programmatically with C++ code. A section of code can drop into `C++` code if it is proceeded by `${{{{`. The C++ section is closed with a closing `}}}}`. This mechanism is typically used to generate different versions of a similar code sequence. Values from the C++ code can be accessed via the `$()`, where the contents of the brackets specifies something that can be calculated from within the C++ code. As an example, to produce an an array with values 0 to 9 we could write... diff --git a/docs/faq.md b/docs/faq.md index 5f021d7d09..824d99679c 100644 --- a/docs/faq.md +++ b/docs/faq.md @@ -34,7 +34,7 @@ The implementation of Slang has so far focused heavily on the needs of Falcor. ### Won't we all just be using C/C++ for shaders soon? -The great thing about both Vulkan and D3D12 moving to publicly-documented binary intermediate langugaes (SPIR-V and DXIL, respectively) is that there is plenty of room for language innovation on top of these interfaces. +The great thing about both Vulkan and D3D12 moving to publicly-documented binary intermediate languages (SPIR-V and DXIL, respectively) is that there is plenty of room for language innovation on top of these interfaces. Having support for writing GPU shaders in a reasonably-complete C/C++ language would be great. We are supportive of efforts in the "C++ for shaders" direction. diff --git a/docs/gfx-user-guide/01-getting-started.md b/docs/gfx-user-guide/01-getting-started.md index 41b9738f68..ae270450ac 100644 --- a/docs/gfx-user-guide/01-getting-started.md +++ b/docs/gfx-user-guide/01-getting-started.md @@ -136,7 +136,7 @@ SLANG_RETURN_ON_FAIL(device->createBufferResource( Creating a Pipeline State --------------------------- -A pipeline state object encapsulates the shader program to execute on the GPU device, as well as other fix function states for graphics rendering. In this example, we will be compiling and runing a simple compute shader written in Slang. To do that we need to create a compute pipeline state from a Slang `IComponentType`. We refer the reader to the (Slang getting started tutorial)[../user-guide/01-getting-started.html] on how to create a Slang `IComponentType` from a shader file. The following source creates a Graphics layer `IPipelineState` object from a shader module represented by a `slang::IComponentType` object: +A pipeline state object encapsulates the shader program to execute on the GPU device, as well as other fix function states for graphics rendering. In this example, we will be compiling and running a simple compute shader written in Slang. To do that we need to create a compute pipeline state from a Slang `IComponentType`. We refer the reader to the (Slang getting started tutorial)[../user-guide/01-getting-started.html] on how to create a Slang `IComponentType` from a shader file. The following source creates a Graphics layer `IPipelineState` object from a shader module represented by a `slang::IComponentType` object: ```cpp void createComputePipelineFromShader( @@ -261,4 +261,4 @@ buffer0View->release(); gDevice->release(); ``` -The order of calls to `release` does not matter, as long as all objects are released from the user. \ No newline at end of file +The order of calls to `release` does not matter, as long as all objects are released from the user. diff --git a/docs/gpu-feature/derivatives-in-compute/derivatives-in-compute.md b/docs/gpu-feature/derivatives-in-compute/derivatives-in-compute.md index 139111365a..038ea148e3 100644 --- a/docs/gpu-feature/derivatives-in-compute/derivatives-in-compute.md +++ b/docs/gpu-feature/derivatives-in-compute/derivatives-in-compute.md @@ -1,9 +1,9 @@ ### Derivatives In Compute -An entry point may be decorated with `[DerivativeGroupQuad]` or `[DerivativeGroupLinear]` to specifiy how to use derivatives in compute shaders. +An entry point may be decorated with `[DerivativeGroupQuad]` or `[DerivativeGroupLinear]` to specify how to use derivatives in compute shaders. -GLSL syntax may also be used, but is not reccomended (`derivative_group_quadsNV`/`derivative_group_linearNV`). +GLSL syntax may also be used, but is not recommended (`derivative_group_quadsNV`/`derivative_group_linearNV`). Targets: * **_SPIRV:_** Enables `DerivativeGroupQuadsNV` or `DerivativeGroupLinearNV`. * **_GLSL:_** Enables `derivative_group_quadsNV` or `derivative_group_LinearNV`. -* **_HLSL:_** Does nothing. `sm_6_6` is required to use derivatives in compute shaders. HLSL uses an equivlent of `DerivativeGroupQuad`. +* **_HLSL:_** Does nothing. `sm_6_6` is required to use derivatives in compute shaders. HLSL uses an equivalent of `DerivativeGroupQuad`. diff --git a/docs/language-guide.md b/docs/language-guide.md index 28cdae4756..d445051d68 100644 --- a/docs/language-guide.md +++ b/docs/language-guide.md @@ -53,7 +53,7 @@ When it comes time to generate output code, Slang will output any declarations f A few other details worth knowing about `import` declarations: -* The name you use on the `import` line gets translated into a file name with some very simple rules. An underscore (`_`) in the name turns into a dash (`-`) in the file name, and dot separators (`.`) turn into directory seprators (`/`). After these substitutions, `.slang` is added to the end of the name. +* The name you use on the `import` line gets translated into a file name with some very simple rules. An underscore (`_`) in the name turns into a dash (`-`) in the file name, and dot separators (`.`) turn into directory separators (`/`). After these substitutions, `.slang` is added to the end of the name. * If there are multiple `import` declarations naming the same file, it will only be imported once. This is also true for nested imports. @@ -61,12 +61,12 @@ A few other details worth knowing about `import` declarations: * If file `A.slang` imports `B.slang`, and then some other file does `import A;`, then only the names from `A.slang` are brought into scope, not those from `B.slang`. This behavior can be controlled by having `A.slang` use `__exported import B;` to also re-export the declarations it imports from `B`. -* An import is *not* like a `#include`, and so the file that does the `import` can't see preprocessor macros defined in the imported file (and vice versa). Think of `import foo;` as closer to `using namspace foo;` in C++ (perhaps without the same baggage). +* An import is *not* like a `#include`, and so the file that does the `import` can't see preprocessor macros defined in the imported file (and vice versa). Think of `import foo;` as closer to `using namespace foo;` in C++ (perhaps without the same baggage). ### Explicit Parameter Blocks -One of the most important new features of modern APIs like Direct3D 12 and Vulkan is an interface for providing shader parameters using efficient *parameter blocks* that can be stored in GPU memory (these are implemented as descritpor tables/sets in D3D12/Vulkan, and "attribute buffers" in Metal). -However, HLSL and GLSL don't support explicit syntax for parmaeter blocks, and so shader programmers are left to manually pack parameters into blocks either using `register`/`layout` modifiers, or with API-based remapping (in the D3D12 case). +One of the most important new features of modern APIs like Direct3D 12 and Vulkan is an interface for providing shader parameters using efficient *parameter blocks* that can be stored in GPU memory (these are implemented as descriptor tables/sets in D3D12/Vulkan, and "attribute buffers" in Metal). +However, HLSL and GLSL don't support explicit syntax for parameter blocks, and so shader programmers are left to manually pack parameters into blocks either using `register`/`layout` modifiers, or with API-based remapping (in the D3D12 case). Slang supports a simple and explicit syntax for exploiting parameter blocks: @@ -190,7 +190,7 @@ interface IMaterial What is the type `???` that `evalPattern` should return? We know that it needs to be a type that supports `IBRDF`, but *which* type? One material might want to use `DisneyBRDF` while another wants to use `KajiyaKay`. -The solution in Slang, as in modern languages like Swift and Rust, is to use *associated types* to express the depdence of the BRDF type on the material type: +The solution in Slang, as in modern languages like Swift and Rust, is to use *associated types* to express the dependence of the BRDF type on the material type: ```hlsl interface IMaterial diff --git a/docs/language-reference/05-expressions.md b/docs/language-reference/05-expressions.md index da0921ff28..64bee737af 100644 --- a/docs/language-reference/05-expressions.md +++ b/docs/language-reference/05-expressions.md @@ -105,7 +105,7 @@ If the member name of a swizzle consists of a single character, then the express If the member name of a swizzle consists of `M` characters, then the result is a `vector` built from the elements of the base vector with the corresponding indices. -A vector swizzle expression is an l-value if the base expression was an l-value and the list of indices corresponding to the characeters of the member name contains no duplicates. +A vector swizzle expression is an l-value if the base expression was an l-value and the list of indices corresponding to the characters of the member name contains no duplicates. ### Matrix Swizzles @@ -224,7 +224,7 @@ A cast expression can perform both built-in type conversions and invoke any sing ### Compatibility Feature -As a compatiblity feature for older code, Slang supports using a cast where the base expression is an integer literal zero and the target type is a user-defined structure type: +As a compatibility feature for older code, Slang supports using a cast where the base expression is an integer literal zero and the target type is a user-defined structure type: ```hlsl MyStruct s = (MyStruct) 0; @@ -338,7 +338,7 @@ With the exception of the assignment operator (`=`), an infix operator expressio ### Conditional Expression -The conditonal operator, `?:`, is used to select between two expressions based on the value of a condition: +The conditional operator, `?:`, is used to select between two expressions based on the value of a condition: ```hlsl useNegative ? -1.0f : 1.0f diff --git a/docs/language-reference/06-statements.md b/docs/language-reference/06-statements.md index 7a4770d994..5c3b77ad45 100644 --- a/docs/language-reference/06-statements.md +++ b/docs/language-reference/06-statements.md @@ -3,7 +3,7 @@ Statements ========== -Statements are used to define the bodies of functions and deterine order of evaluation and control flow for an entire program. +Statements are used to define the bodies of functions and determine order of evaluation and control flow for an entire program. Statements are distinct from expressions in that statements do not yield results and do not have types. This section lists the kinds of statements supported by Slang. @@ -101,7 +101,7 @@ default: break; ``` -A _case label_ consists of the keyword `case` followed by an expresison and a colon (`:`). +A _case label_ consists of the keyword `case` followed by an expressions and a colon (`:`). The expression must evaluate to a compile-time constant integer. A _default label_ consists of the keyword `default` followed by a colon (`:`). @@ -203,7 +203,7 @@ The value returned must be able to coerce to the result type of the lexically en ### Discard Statement -A `discard` statement can only be used in the context of a fragment shader, in which case it causes the current invocation to terminate and the graphics system to discard the corresponding fragment so that it does not get combined with the framebuffer pixel at its coordintes. +A `discard` statement can only be used in the context of a fragment shader, in which case it causes the current invocation to terminate and the graphics system to discard the corresponding fragment so that it does not get combined with the framebuffer pixel at its coordinates. Operations with side effects that were executed by the invocation before a `discard` will still be performed and their results will become visible according to the rules of the platform. diff --git a/docs/language-reference/07-declarations.md b/docs/language-reference/07-declarations.md index e3b7aa60d0..2c6e6bdbe7 100644 --- a/docs/language-reference/07-declarations.md +++ b/docs/language-reference/07-declarations.md @@ -85,7 +85,7 @@ var y = 9.0; A `let` declaration introduces an immutable variable, which may not be assigned to or used as the argument for an `in out` or `out` parameter. A `var` declaration introduces a mutable variable. -An explicit type may be given for a variable by placing it afte the variable name and a colon (`:`): +An explicit type may be given for a variable by placing it after the variable name and a colon (`:`): ```hlsl let x : int = 7; @@ -160,7 +160,7 @@ A global shader parameter may include an initial-value epxression, but such an e ### Variables at Function Scope -Variables declared at _function scope_ (in the body of a function, initializer, subscript acessor, etc.) may be either a function-scope constant, function-scope static variable, or a local variable. +Variables declared at _function scope_ (in the body of a function, initializer, subscript accessor, etc.) may be either a function-scope constant, function-scope static variable, or a local variable. #### Function-Scope Constants @@ -216,7 +216,7 @@ The available directions are: * `in out` or `inout` indicates pass-by-value-result (copy-in and copy-out) semantics. The callee receives a copy of the argument passed by the caller, it may manipulate the copy, and then when the call returns the final value is copied back to the argument of the caller. An implementation may assume that at every call site the arguments for `out` or `in out` parameters never alias. -Under those assumptions, the `out` and `inout` cases may be optimized to use pass-by-refernece instead of copy-in and copy-out. +Under those assumptions, the `out` and `inout` cases may be optimized to use pass-by-reference instead of copy-in and copy-out. > Note: Applications that rely on the precise order in which write-back for `out` and `in out` parameters is performed are already on shaky semantic ground. @@ -332,7 +332,7 @@ struct Things { ... } When a structure declarations ends without a semicolon, the closing curly brace (`}`) must be the last non-comment, non-whitespace token on its line. -For compatiblity with C-style code, a structure type declaration may be used as the type specifier in a traditional-style variable declaration: +For compatibility with C-style code, a structure type declaration may be used as the type specifier in a traditional-style variable declaration: ```hlsl struct Association @@ -372,7 +372,7 @@ An optional trailing comma may terminate the lis of cases. A _case declaration_ consists of the name of the case, along with an optional initial-value expression that specifies the _tag value_ for that case. If the first case declaration in the body elides an initial-value expression, the value `0` is used for the tag value. -If any other case decalration elides an initial-value expresison, its tag value is one greater than the tag value of the immediately preceding case declaration. +If any other case declaration elides an initial-value expressions, its tag value is one greater than the tag value of the immediately preceding case declaration. An enumeration case is referred to as if it were a `static` member of the enumeration type (e.g., `Color.Red`). @@ -429,7 +429,7 @@ typedef int Height; Constant Buffers and Texture Buffers ------------------------------------ -As a compatiblity feature, the `cbuffer` and `tbuffer` keywords can be used to introduce variable declarations. +As a compatibility feature, the `cbuffer` and `tbuffer` keywords can be used to introduce variable declarations. A declaration of the form: @@ -528,7 +528,7 @@ It is an error to declare an associated type anywhere other than the body of an An associated type declaration may have an inheritance clause. The inheritance clause of an associated type may only list interfaces; these are the _required interfaces_ for the associated type. -A concrete type that is used to satisfy an associated type requirement must conform to all of the required interaces of the associated type. +A concrete type that is used to satisfy an associated type requirement must conform to all of the required interfaces of the associated type. Initializers ------------ @@ -599,7 +599,7 @@ MyVector v = ...; float f = v[0]; ``` -A subscript declaration lists one or more parameters inside parantheses, followed by a result type clause starting with `->`. +A subscript declaration lists one or more parameters inside parentheses, followed by a result type clause starting with `->`. The result type clause of a subscript declaration cannot be elided. The body of a subscript declaration consists of _accessor declarations_. diff --git a/docs/language-reference/08-attributes.md b/docs/language-reference/08-attributes.md index 7ffe0f0fec..f4d900d33f 100644 --- a/docs/language-reference/08-attributes.md +++ b/docs/language-reference/08-attributes.md @@ -11,7 +11,7 @@ Attributes This attribute is only available for Vulkan SPIR-V output. -The attibute allows access to SPIR-V intrinsics, by supplying a function declaration with the appropriate signature for the SPIR-V op and no body. The intrinsic takes a single parameter which is the integer value for the SPIR-V op. +The attribute allows access to SPIR-V intrinsics, by supplying a function declaration with the appropriate signature for the SPIR-V op and no body. The intrinsic takes a single parameter which is the integer value for the SPIR-V op. In the example below the add function, uses the mechanism to directly use the SPIR-V integer add 'op' which is 128 in this case. diff --git a/docs/layout.md b/docs/layout.md index 12144c155d..75e4b9863d 100644 --- a/docs/layout.md +++ b/docs/layout.md @@ -46,7 +46,7 @@ The order of parameters in the user's code is derived by "walking" through the c * Walk through each source file of a translation unit in the order they were added/listed -* Walk through global-scope shader paramter declarations (global variables, `cbuffer`s, etc.) in the order they are listed in the (preprocessed) file. +* Walk through global-scope shader parameter declarations (global variables, `cbuffer`s, etc.) in the order they are listed in the (preprocessed) file. * After all global parameters for a translation unit have been walked, walk through any entry points in the translation unit. @@ -64,7 +64,7 @@ Computing Resource Requirements Each shader parameter computes its resource requirements based on its type, and how it is declared. -* Global-scope parameters, entry point `uniform` parameters, and `cbuffer` decalrations all use the "default" layout rules +* Global-scope parameters, entry point `uniform` parameters, and `cbuffer` declarations all use the "default" layout rules * Entry point non-`uniform` parameters use "varying" layout rules, either input or output diff --git a/docs/nvapi-support.md b/docs/nvapi-support.md index a50a93bcaf..cb96f65fd7 100644 --- a/docs/nvapi-support.md +++ b/docs/nvapi-support.md @@ -46,7 +46,7 @@ Thus causing the prelude to include nvHLSLExtns.h, and specifying the slot and p The actual values for the slot and optionally the space, are found by Slang examining the values of those values at the end of preprocessing input Slang source files. -This means that if you compile Slang source that has implicit use NVAPI, the slot and optionally the space must be defined. This can be achieved with a command line -D, throught the API or through having suitable `#define`s in the Slang source code. +This means that if you compile Slang source that has implicit use NVAPI, the slot and optionally the space must be defined. This can be achieved with a command line -D, through the API or through having suitable `#define`s in the Slang source code. It is worth noting if you *replace* the default HLSL prelude, and use NVAPI then it will be necessary to have something like the default HLSL prelude part of your custom prelude. diff --git a/docs/proposals/000-template.md b/docs/proposals/000-template.md index 3803d856b6..cb0377886d 100644 --- a/docs/proposals/000-template.md +++ b/docs/proposals/000-template.md @@ -13,7 +13,7 @@ Status Status: Design Review/Planned/Implementation In-Progress/Implemented/Partially Implemented. Note here whether the proposal is unimplemented, in-progress, has landed, etc. -Implemtation: [PR 000] [PR 001] ... (list links to PRs) +Implementation: [PR 000] [PR 001] ... (list links to PRs) Author: authors of the design doc and the implementation. diff --git a/docs/proposals/001-where-clauses.md b/docs/proposals/001-where-clauses.md index 0e49735e2f..02f60a08fd 100644 --- a/docs/proposals/001-where-clauses.md +++ b/docs/proposals/001-where-clauses.md @@ -81,7 +81,7 @@ C# is broadly similar, but uses multiple `where` clauses, one per constraint: While Haskell is a quite different language from the others mentioned here, Haskell typeclasses have undeniably influenced the concept of traits/protocols in Rust/Swift. -In Haskell a typeclass is not somethign a type "inherits" from, and instead uses type parameter for even the `This` type. +In Haskell a typeclass is not something a type "inherits" from, and instead uses type parameter for even the `This` type. Type parameters in Haskell are also introduced implicitly rather than explicitly. The `resolve` example above would become something like: @@ -89,7 +89,7 @@ The `resolve` example above would become something like: ResolutionContext u -> List t -> v We see here that the constraints are all grouped together in the `(...) =>` clause before the actual type signature of the function. -That clause serves a simlar semantic role to `where` clauses in these other languages. +That clause serves a similar semantic role to `where` clauses in these other languages. Proposed Approach ----------------- @@ -223,7 +223,7 @@ Technically it was already possible to have redundancy in a constraint by using void f( ... ) { ... } -One question that is raised by the possiblity of redundant constraints is whether the compiler should produce a diagnostic for them and, if so, whether it should be a warning or an error. +One question that is raised by the possibility of redundant constraints is whether the compiler should produce a diagnostic for them and, if so, whether it should be a warning or an error. While it may seem obvious that redundant constraints are to be avoided, it is possible that refactoring of `interface` hierarchies could change whether existing constraints are redundant or not, potentially forcing widespread edits to code that is semantically unambiguous (and just a little more verbose than necessary). We propose that redundant constraints should probably produce a warning, with a way to silence that warning easily. @@ -231,7 +231,7 @@ We propose that redundant constraints should probably produce a warning, with a The long and short of the above section is that there can be multiple ways to write semantically equivalent generic declarations, by changing the form, order, etc. of constraints. We want the signature of a function (and its mangled name, etc.) to be identical for semantically equivalent declaration syntax. -In order to ensure that a declaration's mangled name is indepenent of the form of its constraints, we must have a way to *canonicalize* those constraints. +In order to ensure that a declaration's mangled name is independent of the form of its constraints, we must have a way to *canonicalize* those constraints. The Swift compiler codebase includes a document that details the rules used for canonicalization of constraints for that compiler, and we can take inspiration from it. Our constraints are currently much more restricted, so canonicalization can follow a much simpler process, such as: @@ -288,7 +288,7 @@ In the context of `class`-based hierarchies, we can also consider having constra ### Allow `where` clauses on non-generic declarations -We could consider allowing `where` clauses to appear on any declaration nested under a generic, such that those declarations are only usable when certain additinal constraints are met. +We could consider allowing `where` clauses to appear on any declaration nested under a generic, such that those declarations are only usable when certain additional constraints are met. E.g.,: struct MyDictionary diff --git a/docs/proposals/002-type-equality-constraints.md b/docs/proposals/002-type-equality-constraints.md index 44562075ee..33612720cb 100644 --- a/docs/proposals/002-type-equality-constraints.md +++ b/docs/proposals/002-type-equality-constraints.md @@ -20,7 +20,7 @@ As of proposal [001](001-where-clauses.md), Slang allows for generic declaration Currently, the language only accepts *conformance* constraints of the form `T : IFoo`, where `T` is one of the parameters of the generic, and `IFoo` is either an `interface` or a conjunction of interfaces, which indicate that the type `T` must conform to `IFoo`. -This proposal is motivated by the observation that when an interface has associated types, there is currently no way for a programmer to introduce a generic that is only applicable when an associated type satisfies certain constriants. +This proposal is motivated by the observation that when an interface has associated types, there is currently no way for a programmer to introduce a generic that is only applicable when an associated type satisfies certain constraints. As an example, consider an interface for types that can be "packed" into a smaller representation for in-memory storage (instead of a default representation optimized for access from registers): @@ -146,7 +146,7 @@ The choice of how to represent equality constraints is more subtle. One option is to lower an equality constraint to *nothing* at the IR level, under the assumption that the casts that reference these constraints should lower to nothing. Doing so would introduce yet another case where the IR we generate doesn't "type-check." The other option is to lower a type equality constraint to an explicit generic parameter which is then applied via an explicit op to convert between the associated type and its known concrete equivalent. -The representation of the witnesses required to provide *arguments* for such parameters is something that hasn't been fully explored, so for now we prpose to take the first (easier) option. +The representation of the witnesses required to provide *arguments* for such parameters is something that hasn't been fully explored, so for now we propose to take the first (easier) option. ### Canonicalization @@ -156,7 +156,7 @@ Conformane constraints involving associated types should already be order-able a We propose the following approach: * Take all of the equality constraints that arise after any expansion steps -* Divide the types named on either side of any equality constraint into *equivalence classes*, where if `X == Y` is a constraint, then `X` and `Y` must in teh same equivalence class +* Divide the types named on either side of any equality constraint into *equivalence classes*, where if `X == Y` is a constraint, then `X` and `Y` must in the same equivalence class * Each type in an equivalence class will either be an associated type of the form `T.A.B...Z`, derived from a generic type parameter, or a *independent* type, which here means anything other than those associated types. * Because of the rules enforced during semantic checking, each equivalence class must have at least one associated type in it. * Each equivalence class may have zero or more independent types in it. diff --git a/docs/proposals/003-atomic-t.md b/docs/proposals/003-atomic-t.md index 76bb3bf811..c846ad3c70 100644 --- a/docs/proposals/003-atomic-t.md +++ b/docs/proposals/003-atomic-t.md @@ -143,7 +143,7 @@ For non-WGSL/Metal targets, we can simply lower the type out of existence into i `Atomic` type exists in almost all CPU programming languages and is the proven way to express atomic operations over different architectures that have different memory models. WGSL and Metal follows this trend to require atomic operations being expressed -this way. This proposal is to make Slang follow this trend and make `Atomic` the recommened way to express atomic operation +this way. This proposal is to make Slang follow this trend and make `Atomic` the recommended way to express atomic operation going forward. # Future Work diff --git a/docs/proposals/004-initialization.md b/docs/proposals/004-initialization.md index 4605cd0309..6a41cab231 100644 --- a/docs/proposals/004-initialization.md +++ b/docs/proposals/004-initialization.md @@ -8,7 +8,7 @@ Status Status: Design Approved, implementation in-progress. -Implemtation: N/A +Implementation: N/A Author: Yong He @@ -29,7 +29,7 @@ C++ has many different ways and syntax to initialize an object: through explicit A variable in C++ can also be in an uninitialized state after its declaration. HLSL inherits most of these behvior from C++ by allowing variables to be uninitialized. On the other hand, languages like C# and Swift has a set of well defined rules to ensure every variable is initialized after its declaration. -C++ allows using the initilization list syntax to initialize an object. The semantics of initialization lists depends on whether or not explicit constructors +C++ allows using the initialization list syntax to initialize an object. The semantics of initialization lists depends on whether or not explicit constructors are defined on the type. Proposed Approach @@ -92,10 +92,10 @@ void foo() ### Synthesis of constructors for member initialization -If a type already defines any explicit constructors, do not synthesize any constructors for initializer list call. An intializer list expression +If a type already defines any explicit constructors, do not synthesize any constructors for initializer list call. An initializer list expression for the type must exactly match one of the explicitly defined constructors. -If the type doesn't provide any explicit constructors, the compiler need to synthesize the constructors for the calls that that the intializer +If the type doesn't provide any explicit constructors, the compiler need to synthesize the constructors for the calls that that the initializer lists translate into, so that an initializer list expression can be used to initialize a variable of the type. For each type, we will synthesize one constructor at the same visibility of the type itself: @@ -110,7 +110,7 @@ If `member0`'s type is not default initializable and the the member doesn't prov The synthesized constructor will be marked as `[Synthesized]` by the compiler, so the call site can inject additional compatibility logic when calling a synthesized constructor. -The body of the constructor will initialize each member with the value comming from the corresponding constructor argument if such argument exists, +The body of the constructor will initialize each member with the value coming from the corresponding constructor argument if such argument exists, otherwise the member will be initialized to its default value either defined by the init expr of the member, or the default value of the type if the type is default-initializable. If the member type is not default-initializable and a default value isn't provided on the member, then such the constructor synthesis will fail and the constructor will not be added to the type. Failure to synthesis a constructor is not an error, and an error will appear @@ -149,7 +149,7 @@ S obj = {}; S obj = S(); ``` -Note that initializer list of a single argument does not translate into a type cast, unlike the constructor call syntax. Initializing with a single element in the intializer list always translates directly into a constructor call. For example: +Note that initializer list of a single argument does not translate into a type cast, unlike the constructor call syntax. Initializing with a single element in the initializer list always translates directly into a constructor call. For example: ```csharp void test() { @@ -359,7 +359,7 @@ internal struct Visibility4 { // Visibility4 type is considered as C-style struct. // And we still synthesize a ctor for member initialization. - // Because Visiblity4 has no public members, the synthesized + // Because Visibility4 has no public members, the synthesized // ctor will take 0 arguments. internal int x = 1; internal int y = 2; @@ -398,7 +398,7 @@ so implicit initialization for these variables can come with serious performance ### Should `out` parameters be default initialized? -Following the same philosphy of not initializing any declarations, `out` parameters are also not default-initialized. +Following the same philosophy of not initializing any declarations, `out` parameters are also not default-initialized. Alternatives Considered ----------------------- diff --git a/docs/proposals/005-write-only-textures.md b/docs/proposals/005-write-only-textures.md index a68ed8c407..698ea6e866 100644 --- a/docs/proposals/005-write-only-textures.md +++ b/docs/proposals/005-write-only-textures.md @@ -9,7 +9,7 @@ Status Status: Design Review. -Implemtation: N/A +Implementation: N/A Author: Yong He diff --git a/docs/proposals/007-variadic-generics.md b/docs/proposals/007-variadic-generics.md index 8e5de1f04e..8034c03120 100644 --- a/docs/proposals/007-variadic-generics.md +++ b/docs/proposals/007-variadic-generics.md @@ -212,7 +212,7 @@ void k() {} // Error. void h() {} // OK. ``` -Addtionally, we establish these restrictions on how `expand` and `each` maybe used: +Additionally, we establish these restrictions on how `expand` and `each` maybe used: - The pattern type of an `expand` type expression must capture at least one generic type pack parameter in an `each` expression. - The type expression after `each` must refer to a generic type pack parameter, and the `each` expression can only appear inside an `expand` expression. @@ -222,7 +222,7 @@ Similarly, when using `expand` and `each` on values, we require that: - The pattern expression of an `expand` expression must capture at least one value whose type is a generic type pack parameter. - The expression after `each` must refer to a value whose type is a generic type pack parameter, and the `each` expression can only appear inside an `expand` expression. -Combined with type euqality constriants, variadic generic type pack can be used to define homogeneously typed parameter pack: +Combined with type euqality constraints, variadic generic type pack can be used to define homogeneously typed parameter pack: ``` void calcInts(expand each T values) where T == int { @@ -653,8 +653,8 @@ by packing up all the values computed at each "loop iteration" in an `IRMakeValu } ``` -With this, we can replace the original `IRExpand` inst with `%expand` and specailization is done. The specialized instructions like `IRGetTupleElement(%v, 0)` will be picked up -in the follow-up step during specalization and replaced with the actual value at the specified index since `%v` is a known value pack represented by `IRMakeValuePack`. So after +With this, we can replace the original `IRExpand` inst with `%expand` and specialization is done. The specialized instructions like `IRGetTupleElement(%v, 0)` will be picked up +in the follow-up step during specialization and replaced with the actual value at the specified index since `%v` is a known value pack represented by `IRMakeValuePack`. So after folding and other simplifications, we should result in ``` %expand = IRMakeValuePack(2,3,4) @@ -673,7 +673,7 @@ After the specialization pass, there should be no more `IRExpand` and `IRExpandT Alternatives Considered ----------------------- -We considered the C++ `...` oeprator syntax and Swift's `repeat each` syntax and ended up picking Swift's design because it is easier to parse and is less ambiguous. Swift is strict about requiring `each` to precede a generic type pack parameter so `void f(T v)` is not a valid syntax to prevent confusion on what `T` is in this context. In Slang we don't require this because `expand each T` is always simplified down to `T`, and refer to the type pack. +We considered the C++ `...` operator syntax and Swift's `repeat each` syntax and ended up picking Swift's design because it is easier to parse and is less ambiguous. Swift is strict about requiring `each` to precede a generic type pack parameter so `void f(T v)` is not a valid syntax to prevent confusion on what `T` is in this context. In Slang we don't require this because `expand each T` is always simplified down to `T`, and refer to the type pack. We also considered not adding variadic generics support to the language at all, and just implement `Tuple` and `IFunc` as special system builtin types, like how it is done in C#. However we believe that this approach is too limited when it comes to what the user can do with tuples and `IFunc`. Given Slang's position as a high performance GPU-first language, it is more important for Slang than other CPU languages to have a powerful type system that can provide zero-cost abstraction for meta-programming tasks. That lead us to believe that the language and the users can benefit from proper support of variadic generics. diff --git a/docs/proposals/012-language-version-directive.md b/docs/proposals/012-language-version-directive.md index 4c8178ffbf..3a75e50693 100644 --- a/docs/proposals/012-language-version-directive.md +++ b/docs/proposals/012-language-version-directive.md @@ -53,7 +53,7 @@ There are some key lessons from the history of GLSL that are worth paying attent * When OpenGL ES was introduced, the OpenGL ES Shading Language also used an identical `#version` directive, but the meaning of a given version number was different between GLSL and GLSL ES (that is, different language features/capabilities were implied by the same `#version`, depending on whether one was compiling with a GLSL or GLSL ES compiler). The use of the optional profile name is highly encouraged when there might be differences in capability not encoded by just the version number. -* Initially, the version numbers for OpenGL and GLSL were not aligned. For example, OpenGL 2.0 used GLSL 1.10 by default. This led to confusion for developers, who needed to keep track of what API version coresponded to what language version. The version numbers for OpenGL and GLSL became aligned starting with OpenGL 3.3 and GLSL 3.30. +* Initially, the version numbers for OpenGL and GLSL were not aligned. For example, OpenGL 2.0 used GLSL 1.10 by default. This led to confusion for developers, who needed to keep track of what API version corresponded to what language version. The version numbers for OpenGL and GLSL became aligned starting with OpenGL 3.3 and GLSL 3.30. * A common, but minor, gotcha for developers is that the GLSL `#version` directive can only be preceded by trivia (whitespace and comments) and, importantly, cannot be preceded by any other preprocessor directives. This limitation has created problems when applications want to, e.g., prepend a sequence of `#define`s to an existing shader that starts with a `#version`. @@ -106,10 +106,10 @@ We will differentiate between two kinds of versions, which will have aligned num This proposal doesn't intend to dictate the format used for version numbers, since that is tied into the release process for the Slang toolset. We expect that version numbers will start with a year, so that, e.g., `2025.0` would be the first release in the year 2025. -A given version of the Slang toolset (e.g, `2024.10`) should always support the matching language verson. +A given version of the Slang toolset (e.g, `2024.10`) should always support the matching language version. If this proposal is accepted, we expect releases of the Slang toolset to support a *range* of language versions, ideally covering a full year or more of backwards compatibility. -This proposal does not seek to make any guarantees about the level of backwards compatiblity, leaving that the Slang project team to determine in collaboration with users. +This proposal does not seek to make any guarantees about the level of backwards compatibility, leaving that the Slang project team to determine in collaboration with users. ### `#language` Directives ### @@ -151,7 +151,7 @@ Alternatives Considered ### Compiler Options ### The main alternative here is to allow the language version to be specified via compiler options. -The exising `-lang` option for `slangc` could be extended to include a language version: e.g., `slang2025.1`. +The existing `-lang` option for `slangc` could be extended to include a language version: e.g., `slang2025.1`. This proposal is motivated by extensive experience with the pain points that arise when semantically-significant options, flags, and capabilities required by a project are encoded not in its source code, but only in its build scripts or other configuration files. Anybody who has been handed a single `.hlsl` file and asked to simply compile it (e.g., to reproduce a bug or performance issue) likely knows the litany of questions that need to be answered before that file is usable: what is the entry point name? What stage? What shader model? diff --git a/docs/proposals/implementation/ast-ir-serialization.md b/docs/proposals/implementation/ast-ir-serialization.md index 248b752a14..cc65c07159 100644 --- a/docs/proposals/implementation/ast-ir-serialization.md +++ b/docs/proposals/implementation/ast-ir-serialization.md @@ -19,7 +19,7 @@ Either the entire `Decl` hierarchy of the AST is deserialized and turned into in Similarly, we can either construct the `IRInst` hierarchy for an entire module, or none of it. Releases of the Slang compiler typically included a serialized form of the core module, and the runtime cost of deserializing this module has proven to be a problem for users of the compiler. -Becuse parts of the Slang compiler are not fully thread-safe/reentrant, the core module must be deserialized for each "global session," so that deserialization cost is incurred per-thread in scenarios with thread pools. +Because parts of the Slang compiler are not fully thread-safe/reentrant, the core module must be deserialized for each "global session," so that deserialization cost is incurred per-thread in scenarios with thread pools. Even in single-threaded scenarios, the deserialization step adds significantly to the startup time for the compiler, making single-file compiles less efficient than compiling large batches of files in a single process. Overview of Proposed Solution @@ -49,7 +49,7 @@ The linker creates a *fresh* `IRModule` for the linked result, and clones/copies 1. Given an instruction in an input module to be copied over, use an `IRBuilder` on the output module to create a deep copy of that instruction and its children. -2. Whenever an instruction being copied over references another top-level instruction local to the same input module (that is, one without a linkage decoration), either construct a deep copy of the refereneced instruction in the output module, or find and re-use a copy that was made previously. +2. Whenever an instruction being copied over references another top-level instruction local to the same input module (that is, one without a linkage decoration), either construct a deep copy of the referenced instruction in the output module, or find and re-use a copy that was made previously. 3. Whenever an instruction being copied over references a top-level instruction that might be resolved to come from another module (that is, one with a linkage decoration), use the mangled name on the linkage decoration to search *all* of the input modules for candidate instructions that match. Use some fancy logic to pick one of them (the details aren't relevant at this exact moment) and then copy the chosen instruction over, more or less starting at (1) above. @@ -117,7 +117,7 @@ The two main ways that the child declarations are accessed are: Currently the `memberDictionary` field is private, and has access go through methods that check whether the dictionary needs to be rebuilt. The `members` field should also be made private, so that we can carefully intercept any code that wants to enumerate all members of a declaration. -We should probably also make the `memberDictionary` field map from a name to the *index* of a declaration in `members`, instead of direclty to a `Decl*`. +We should probably also make the `memberDictionary` field map from a name to the *index* of a declaration in `members`, instead of directly to a `Decl*`. > Note: We're ignoring the `ContainerDecl::transparentMembers` field here, but it does need to be taken into account in the actual implementation. @@ -137,7 +137,7 @@ The `members` array will either be empty, or will be correctly-sized for the num The entries in `members` may be null, however, if the corresponding child declaration has not been deserialized. We will need to attach a pointer to information related to lazy-loading to the `ContainerDecl`. -The simplest approach would be to add a field to `ContainerDecl`, but we could also consider using a custom `Modifier` if we are concerend about bloat. +The simplest approach would be to add a field to `ContainerDecl`, but we could also consider using a custom `Modifier` if we are concerned about bloat. #### Enumeration @@ -187,7 +187,7 @@ These complications lead to two big consequences for the encoding: * The array of *AST entries* will not just contain the entries for top-level `Decl`s. It needs to contain an entry for each `Decl` that might be referenced from elsewhere in the AST. For simplicity, it will probably contain *all* `Decl`s that are not explicitly stripped as part of producing the serialized AST. -* The array won't even consist of just `Decl`s. It will also need to have entries for things like `DeclRef`s and `Type`s that can also be referened as operands of AST nodes. +* The array won't even consist of just `Decl`s. It will also need to have entries for things like `DeclRef`s and `Type`s that can also be referenced as operands of AST nodes. As a stab at a simple representation, each AST entry should include: @@ -195,7 +195,7 @@ As a stab at a simple representation, each AST entry should include: * A range of bytes in the raw data that holds the serialized representation of that node (e.g., its operands) -An entry for a `ContainerDecl` should include (whether direclty or encoded in the raw data...) +An entry for a `ContainerDecl` should include (whether directly or encoded in the raw data...) * A contiguous range of AST entry indices that represent the direct children of the node, in declaration order (the order they'd appear in `ContainerDecl::members`) @@ -208,7 +208,7 @@ Given the above representation, there is no need to explicitly encode the parent Given an AST entry index for a `Decl`, we can find its parent by recursing through the hierarchy starting at the root, and doing a binary search at each hierarchy level to find the (unique) child declaration at that level which contains that index in its range of descendents. When there is a request to on-demand deserialize a `Decl` based on its AST entry index, we would need to first deserialize each of its ancestors, up the hierarchy. -That on-demand deserialization of the ancestors can follow the flow given above for recursively walking the hirarchy to find which declaration at each level contains the given index. +That on-demand deserialization of the ancestors can follow the flow given above for recursively walking the hierarchy to find which declaration at each level contains the given index. In order to support lookup of members of a declaration by name, we propose the following: diff --git a/docs/proposals/legacy/001-basic-interfaces.md b/docs/proposals/legacy/001-basic-interfaces.md index 6083c8ae93..669537b423 100644 --- a/docs/proposals/legacy/001-basic-interfaces.md +++ b/docs/proposals/legacy/001-basic-interfaces.md @@ -232,7 +232,7 @@ These would be the first `interface`s officially exposed by the core module. While most of our existing code written in Slang uses an `I` prefix as the naming convention for `interface`s (e.g., `IThing`), we have never really discussed that choice in detail. Whatever we decide to expose for this stuff is likely to become the de facto convention for Slang code. -The `I` prefix is precedented in COM and C#/.net/CLR, which are likely to be familiar to many devleopers using Slang. +The `I` prefix is precedented in COM and C#/.net/CLR, which are likely to be familiar to many developers using Slang. Because of COM, it is also the convention used in the C++ API headers for Slang and GFX. The Rust/Swift languages do not distinguish between traits/protocols and other types. diff --git a/docs/proposals/legacy/002-api-headers.md b/docs/proposals/legacy/002-api-headers.md index 66b649228b..5efbc1d16b 100644 --- a/docs/proposals/legacy/002-api-headers.md +++ b/docs/proposals/legacy/002-api-headers.md @@ -29,7 +29,7 @@ We know that we cannot remove support for difficult cases, but it would be good Related Work ------------ -There are obviously far too many C/C++ APIs and approachs to design for C/C++ APIs for us to review them all. +There are obviously far too many C/C++ APIs and approaches to design for C/C++ APIs for us to review them all. We will simply note a few key examples that can be relevant for comparison. The gold standard for C/C++ APIs is ultimately plain C. Plain C is easy for most systems programmers to understand and benefits from having a well-defined ABI on almost every interesting platform. FFI systems for other languages tend to work with plain C APIs. Clarity around ABI makes it easy to know what changes/additions to a plain C API will and will not break binary compatibility. The Cg compiler API and the Vulkan GPU API are good examples of C-based APIs in the same domains as Slang and GFX, respectively. These APIs reveal some of the challenges of using plain C for large and complicated APIs: @@ -64,7 +64,7 @@ Across such APIs, we see a wide variety of strategies to dealing with extensibil * Vulkan uses "desc" structures (usually called "info" or "create info" structures), which contain a baseline set of state/fields, along with a linked list of dynamically-typed/-tagged extension structures. New functionality that only requires changes to "desc" structures can be added by defining a new tag and extension structure. New operations are added in a manner similar to OpenGL. -* D3D12 also uses COM interfaces and "desc" structures (although now officialy called "descriptions" to not overload the use of "descriptor" in descriptor tables), much like D3D11, and sometimes uses the same approach to extensibility (e.g., there are currently `ID3D12Device`, `ID3D12Device`, ... `ID2D12Device9`). In addition, D3D12 has also added two variations on Vulkan-like models for creating pipeline state (`ID3D12Device2::CreatePipelineState` and `ID3D12Device5::CreateStateObject`), using a notion of more fine-grained "subojects" that are dynamically-typed/-tagged and each have their own "desc". +* D3D12 also uses COM interfaces and "desc" structures (although now officially called "descriptions" to not overload the use of "descriptor" in descriptor tables), much like D3D11, and sometimes uses the same approach to extensibility (e.g., there are currently `ID3D12Device`, `ID3D12Device`, ... `ID2D12Device9`). In addition, D3D12 has also added two variations on Vulkan-like models for creating pipeline state (`ID3D12Device2::CreatePipelineState` and `ID3D12Device5::CreateStateObject`), using a notion of more fine-grained "subojects" that are dynamically-typed/-tagged and each have their own "desc". It is important to note that even with the nominal flexibility that COM provides around versioning, D3D12 has opted for a more fine-grained approach when dealing with something as complicated as GPU pipeline state. @@ -91,7 +91,7 @@ At the end of this document there is a lengthy code block that sketches a possib Questions --------- -### Will we generate all or some of the API header? If so, what will be the "ground truth" verison? +### Will we generate all or some of the API header? If so, what will be the "ground truth" version? Note that Vulkan and SPIR-V benefit from having ground-truth computer-readable definitions, allowing both header files and tooling code to be generated. @@ -278,7 +278,7 @@ namespace slang members initializers in `slang::SessionDesc`, but it *will* compile under C++14 and later. - If we want to deal with C++11 compatiblity in that case, we can, but + If we want to deal with C++11 compatibility in that case, we can, but it would slightly clutter up the way we declare these things. Realistically, we'd just split the two types: @@ -325,7 +325,7 @@ namespace slang struct SessionDesc0 { ... the original ... }; struct SessionDesc1 { ... the new one ... }; - typedef SessionDesc SesssionDesc1; + typedef SessionDesc SessionDesc1; At the point where we introduce a second version, it is probably the right time to enable developers to lock in to any version they choose. In the code above @@ -334,7 +334,7 @@ namespace slang at the point they compile. (If we wanted to get really "future-proof" we'd define every struct with the `0` - prefix right out of the gate, and always have the `typedef` in place. I'm not conviced + prefix right out of the gate, and always have the `typedef` in place. I'm not convinced that would ever pay off.) I expect most of this to be a non-issue if we are zealous about using fine-grained @@ -368,7 +368,7 @@ namespace slang UUID const& uuid, void** outObject) = 0; - /* Instead, most users will direclty call the operations only through + /* Instead, most users will directly call the operations only through wrappers that provide conveniently type-safe behavior: */ inline Result createCompileRequest( @@ -574,7 +574,7 @@ extern "C" and also to provide versioning support. We could define the same set of overloads here, with the same names, for - use by clients who don't actually care about C compatiblity but just like + use by clients who don't actually care about C compatibility but just like a C-style API. That is probably worth doing. Otherwise, we realistically need to start defining some de-facto naming @@ -612,7 +612,7 @@ extern "C" /* Finally, the C API level is where we should define the core factory entry point for creating and initializing the Slang global session (just like - in the current header). Here we jsut generalize it for creaitng "any" global + in the current header). Here we just generalize it for creaitng "any" global object, based on a UUID and a bunch of descs. */ SLANG_API SlangResult slang_createObject( @@ -645,7 +645,7 @@ generating as much of the API as possible anyway. /* Basic Types */ - /* We just define the basic types direclty, without the indirection + /* We just define the basic types directly, without the indirection through the declarations in the `slang::` namespace. */ @@ -846,7 +846,7 @@ use of exceptions instead of `Result`s: SLANG_SMART_PTR(ICompileRequest) createCompileRequest( CompileRequestDesc const& desc) { - SLANG_SMART_PTR(ICompileReqest) compileRequest; + SLANG_SMART_PTR(ICompileRequest) compileRequest; SLANG_THROW_IF_FAIL(_createCompileRequest( &desc, 1, SLANG_UUID_OF(IComileRequest), comileRequest.writeRef())); return compileRequest; @@ -857,7 +857,7 @@ use of exceptions instead of `Result`s: #endif } -Both for the sake of C API and especialy for gfx (both C and C++), we should consider +Both for the sake of C API and especially for gfx (both C and C++), we should consider defining some coarse-grained aggregate desc types as utilities: struct SimpleRasterizationPipelineStateDesc @@ -886,7 +886,7 @@ defining some coarse-grained aggregate desc types as utilities: // List members for attachments, etc. rather than just pointer-and-count: private: List colorAttachments; - public: AttachmentDesc& addColorAttachement(); + public: AttachmentDesc& addColorAttachment(); // There should also be convenience constructors common cases // (especially relevant for things like textures). @@ -924,7 +924,7 @@ defining some coarse-grained aggregate desc types as utilities: }; While the implementation of this monolithic desc types would not necessarily be pretty, -it would enable users who want the benefits of the "one big struct" appraoch to get +it would enable users who want the benefits of the "one big struct" approach to get what they seem to want. The next step down this road is to take these aggregate desc types and turn them into diff --git a/docs/proposals/legacy/003-error-handling.md b/docs/proposals/legacy/003-error-handling.md index 23c7bdc408..e8fb44402e 100644 --- a/docs/proposals/legacy/003-error-handling.md +++ b/docs/proposals/legacy/003-error-handling.md @@ -43,10 +43,10 @@ Related Work In the absence of language support, developers typically signal and propagate errors using *error codes*. The COM `HRESULT` type is a notable example of a well-defined system for using error codes in C/C++ and other languages. Error codes have the benefit of being easy to implement, and relatively light-weight. The main drawback of error codes is that developers often forget to check and/or propagate them, and when they do remember to do so it adds a lot of boilerplate. -Additonally, reserving the return value of every function for returning an error code makes code more complex because the *actual* return value must be passed via a function parameter. +Additionally, reserving the return value of every function for returning an error code makes code more complex because the *actual* return value must be passed via a function parameter. C++ uses *exceptions* for errors in various categories, including unpredictable but recoverable failures. -Propagation of errors up the call stack is entirely automatic, with unwinding of call frames and destruction of their local state occuring as part of the search for a handler. +Propagation of errors up the call stack is entirely automatic, with unwinding of call frames and destruction of their local state occurring as part of the search for a handler. Neither functions that may throw nor call sites to such functions are syntactically marked. Exceptions in C++ have often been implemented in ways that add overhead and require complicated support in platform ABIs and intermediate languages to support. @@ -66,7 +66,7 @@ Functions that return `X` and those that return `Result` are not directl Swift provides more syntactic support for errors than Rust, although the underlying mechanism is similar. A Swift function may have `throws` added between the parameter list and return type to indicate that a function may yield an error. All errors in Swift must implement the `Error` protocol, and all functions that can `throw` may produce any `Error` (although there are proposals to extend Swift with "typed `throws`"). -Any call site to a `throws` function must have a prefix `try` (e.g., `try f(a, b)`), which works simiarly to Rust's `?`; any error produced by the called function is propagated, and the ordinary result is returned. +Any call site to a `throws` function must have a prefix `try` (e.g., `try f(a, b)`), which works similarly to Rust's `?`; any error produced by the called function is propagated, and the ordinary result is returned. Swift provides an explicit `do { ... } catch ...` construct that allows handlers to be established. It also provides for conversion between exceptions and an explicit `Result` type, akin to Rust's. Higher-order functions may be declared as `rethrows` to indicate that whether or not they throw depends on whether or not any of their function-type parameters is actually a `throws` function at a call site. @@ -203,7 +203,7 @@ let y : int = 1 + _tmp; ### Desugar `throw` Expressions -For every `throw` site in a function body, there will either be no in-scope `catch` clause that matches the type thrown, or there will be eactly one most-deeply-nested `catch` that statically matches. +For every `throw` site in a function body, there will either be no in-scope `catch` clause that matches the type thrown, or there will be exactly one most-deeply-nested `catch` that statically matches. Front-end semantic checking should be able to associate each `throw` with the appropriate `catch` if any. For `throw` sites with no matching `catch`, the operation simply translates to a `return` of the thrown error (because of the way we transformed the function signature). diff --git a/docs/proposals/legacy/005-components.md b/docs/proposals/legacy/005-components.md index b257140a7b..ff53d0f770 100644 --- a/docs/proposals/legacy/005-components.md +++ b/docs/proposals/legacy/005-components.md @@ -239,7 +239,7 @@ class X ``` In the above, an instance of `X` can always find the `Y` it depends on easily and (relatively) efficiently. -There is no particularly high overhead to having `X` diretly store an indirect reference to `Y` (at least not for coarse-grained units), and it is trivial for multiple units like `X` to all share the same *instance* of `Y` (potentially even including mutable state, for applications that like that sort of thing). +There is no particularly high overhead to having `X` directly store an indirect reference to `Y` (at least not for coarse-grained units), and it is trivial for multiple units like `X` to all share the same *instance* of `Y` (potentially even including mutable state, for applications that like that sort of thing). In general most CPU languages (and especially OOP ones) can express the concepts of "is-a" and "has-a" but they often don't distinguish between when "has-as" means "refers-to-and-depends-on-a" vs. when it means "aggregates-and-owns-a". This is important when looking at a GPU language like Slang, where "aggergates-and-owns-a" is easy (we have `struct` types), but "refers-to-and-depends-on-a" is harder (not all of our targets can really support pointers). @@ -279,7 +279,7 @@ interface IMaterialSystem void doMaterialStuff(); } -__component_type DefualtMaterialSystem : IMaterialSystem +__component_type DefaultMaterialSystem : IMaterialSystem { __require lighting : ILightingSystem; @@ -365,7 +365,7 @@ At the point where an `__aggregate SomeType` member is declared, the front-end s For example, because `DefaultIntegratorSystem` declares `__require IMaterialSystem`, the compiler searches in the current context for a value that can provide that interface. It finds a single suitable value: the value implicitly defined by `__aggregate DefaultMaterialSystem`, and thus "wires up" the input dependency of the `DefaultIntegratorSystem`. -It is posible for a `__require` in an `__aggregate`d member to be satisfied via another `__require` of its outer type: +It is possible for a `__require` in an `__aggregate`d member to be satisfied via another `__require` of its outer type: ``` __component_type MyUnit @@ -384,7 +384,7 @@ While the above examples do not show it, component types should be allowed to co Detailed Explanation -------------------- -Component types need to be restricted in where and how they can be used, to avoid creating situations that would give them all the flexiblity of arbitrary `class`es. +Component types need to be restricted in where and how they can be used, to avoid creating situations that would give them all the flexibility of arbitrary `class`es. The only places where a component type may be used are: * `__require` and `__aggregate` declarations @@ -434,7 +434,7 @@ Note that when the generated code invokes an operation through one of the `__agg Effectively, the compiler generates all of the boilerplate parameter-passing that the programmer would have otherwise had to write by hand. -It might or might not be obvious that the notion of "component type" being described here has a clear correspondance to the `IComponentType` interface provided by the Slang runtime/compilation API. +It might or might not be obvious that the notion of "component type" being described here has a clear correspondence to the `IComponentType` interface provided by the Slang runtime/compilation API. It should be possible for us to provide reflection services that allow a programmer to look up a component type by name and get an `IComponentType`. The existing APIs for composing, specializing, and linking `IComponentType`s should Just Work for explicit `__component_type`s. Large aggregates akin to `MyProgram` above can be defined entirely via the C++ `IComponentType` API at program runtime. @@ -456,7 +456,7 @@ That last point is important, since a component type allows users to define a co ### Can the `__component_type` construct just be subsumed by either `struct` or `class`? -Maybe. The key challenge is that component types need to provide the "look and feel" of by-refernece re-use rather than by-value copying. A `__require T` should effectively act like a `T*` and not a bare `T` value, so I am reluctant to say that should map to `struct`. +Maybe. The key challenge is that component types need to provide the "look and feel" of by-reference re-use rather than by-value copying. A `__require T` should effectively act like a `T*` and not a bare `T` value, so I am reluctant to say that should map to `struct`. ### But what about `[mutating]` operations and writing to fields of component types, then? @@ -484,7 +484,7 @@ __component_type C { __require A; ... } __component_type D { __require B; __require C; ... } ``` -The Spark shading language research project used multiple mixin class inheritance to compose units of shader code akin to what are being proposed here as coponent types (hmm... I guess that should go into related work...). +The Spark shading language research project used multiple mixin class inheritance to compose units of shader code akin to what are being proposed here as component types (hmm... I guess that should go into related work...). In general, using inheritance to model something that isn't an "is-a" relationship is poor modeling. Inheritance as a modelling tool cannot capture some patterns that are possible with `__aggregate` (notably, with mixin inheritance you can't get multiple "copies" of a component). diff --git a/docs/proposals/legacy/006-artifact-container-format.md b/docs/proposals/legacy/006-artifact-container-format.md index 81910151b9..04daf3bd49 100644 --- a/docs/proposals/legacy/006-artifact-container-format.md +++ b/docs/proposals/legacy/006-artifact-container-format.md @@ -491,7 +491,7 @@ The grouping - how does it actually work? It might require specifying what group An advantage to this approach is that policy of how naming works as a user space problem. It is also powerful in that it allows control on compilation that has some independence from the name. -We could have some options that are named, but do not appear as part of the name/path within the container. The purpose of this is to allow customization of a compilation, without that customization necessarily appearing withing the application code. The container could store group of named options that is used, such that it is possible to recreate the compilation or perhaps to detect there is a difference. +We could have some options that are named, but do not appear as part of the name/path within the container. The purpose of this is to allow customization of a compilation, without that customization necessarily appearing within the application code. The container could store group of named options that is used, such that it is possible to recreate the compilation or perhaps to detect there is a difference. ### JSON options @@ -776,7 +776,7 @@ Discussion: Container A typical container will contain kernels - in effect blobs. The blobs themselves, or the blob names are not going to be sufficient to express the amount of information that is necessary to meet the goals laid out at the start of this document. Some extra information may be user supplied. Some extra information might be user based to know how to classify different kernels. Therefore it is necessary to have some system to handle this metadata. -As previously discussed the underlying container format is a file system. Some limited information could be infered from the filename. For example a .spv extension file is probably SPIR-V blob. For more rich meta data describing a kernel something more is needed. Two possible approaches could be to have a 'manifest' that described the contents of the container. Another approach would to have a file associated with the kernel that describes it's contents. +As previously discussed the underlying container format is a file system. Some limited information could be inferred from the filename. For example a .spv extension file is probably SPIR-V blob. For more rich meta data describing a kernel something more is needed. Two possible approaches could be to have a 'manifest' that described the contents of the container. Another approach would to have a file associated with the kernel that describes it's contents. Single Manifest Pros diff --git a/docs/repro.md b/docs/repro.md index 54ad8b9788..4d469fa280 100644 --- a/docs/repro.md +++ b/docs/repro.md @@ -3,9 +3,9 @@ Slang Compilation Reproduction Slang has both API and command line support for reproducing compilations, so called 'repro' functionality. -One use of the feature is if a compilation fails, or produces an unexpected or wrong result, it provides a simple to use mechanism where the compilation can be repeated or 'reproduced', most often on another machine. Instead of having to describe all the options, and make sure all of the files that are used are copied, and in such a way that it repeats the result, all that is required is for the compilation to be run on the host machine with repro capture enabled, and then that 'repro' used for a compilation on the test machine. There are also some mechanisms where the contents of the orginal compilation can be altered. +One use of the feature is if a compilation fails, or produces an unexpected or wrong result, it provides a simple to use mechanism where the compilation can be repeated or 'reproduced', most often on another machine. Instead of having to describe all the options, and make sure all of the files that are used are copied, and in such a way that it repeats the result, all that is required is for the compilation to be run on the host machine with repro capture enabled, and then that 'repro' used for a compilation on the test machine. There are also some mechanisms where the contents of the original compilation can be altered. -The actual data saved is the contents of the SlangCompileReqest. Currently no state is saved from the SlangSession. Saving and loading a SlangCompileRequest into a new SlangCompileRequest should provide two SlangCompileRequests with the same state, and with the second compile request having access to all the files contents the original request had directly in memory. +The actual data saved is the contents of the SlangCompileRequest. Currently no state is saved from the SlangSession. Saving and loading a SlangCompileRequest into a new SlangCompileRequest should provide two SlangCompileRequests with the same state, and with the second compile request having access to all the files contents the original request had directly in memory. There are a few command line options @@ -26,9 +26,9 @@ First it is worth just describing what is required to reproduce a compilation. M In order to capture a complete repro file typically a compilation has to be attempted. The state before compilation can be recorded (through the API for example), but it may not be enough to repeat a compilation, as files referenced by the compilation would not yet have been accessed. The repro feature records all of these accesses and contents of such files such that compilation can either be completed or at least to the same point as was reached on the host machine. -One of the more subtle issues around reproducing a compilation is around filenames. Using the API, a client can specify source files without names, or multiple files with the same name. If files are loaded via `ISlangFileSystem`, they are typically part of a hiearchical file system. This could mean they are referenced relatively. This means there can be distinct files with the same name but differenciated by directory. The files may not easily be reconstructed back into a similar hieararchical file system - as depending on the include paths (or perhaps other mechanisms) the 'files' and their contents could be arranged in a manner very hard to replicate. To work around this the repro feature does not attempt to replicate a hierarchical file system. Instead it gives every file a unique name based on their original name. If there are multiple files with the same name it will 'uniquify' them by appending an index. Doing so means that the contents of the file system can just be held as a flat collection of files. This is not enough to enable repeating the compilation though, as we now need Slang to know which files to reference when they are requested, as they are now no longer part of a hierarchical file system and their names may have been altered. To achieve this the repro functionality stores off a map of all path requests to their contents (or lack there of). Doing so means that the file system still appears to Slang as it did in the original compilation, even with all the files being actually stored using the simpler 'flat' arrangement. +One of the more subtle issues around reproducing a compilation is around filenames. Using the API, a client can specify source files without names, or multiple files with the same name. If files are loaded via `ISlangFileSystem`, they are typically part of a hierarchical file system. This could mean they are referenced relatively. This means there can be distinct files with the same name but differentiated by directory. The files may not easily be reconstructed back into a similar hieararchical file system - as depending on the include paths (or perhaps other mechanisms) the 'files' and their contents could be arranged in a manner very hard to replicate. To work around this the repro feature does not attempt to replicate a hierarchical file system. Instead it gives every file a unique name based on their original name. If there are multiple files with the same name it will 'uniquify' them by appending an index. Doing so means that the contents of the file system can just be held as a flat collection of files. This is not enough to enable repeating the compilation though, as we now need Slang to know which files to reference when they are requested, as they are now no longer part of a hierarchical file system and their names may have been altered. To achieve this the repro functionality stores off a map of all path requests to their contents (or lack there of). Doing so means that the file system still appears to Slang as it did in the original compilation, even with all the files being actually stored using the simpler 'flat' arrangement. -This means that when a repro is 'extracted' it does so to a directory which holds the files with their unique 'flat' names. The name of the directory is the name of the repro file without it's extension, or if it has no extension, with the postfix '-files'. This directory will be referered to from now on as the `repro directory`. +This means that when a repro is 'extracted' it does so to a directory which holds the files with their unique 'flat' names. The name of the directory is the name of the repro file without it's extension, or if it has no extension, with the postfix '-files'. This directory will be referred to from now on as the `repro directory`. When a repro is loaded, before files are loaded from the repro itself, they will first be looked for via their unique names in the `repro directory`. If they are not there the contents of the repro file will be used. If they are there, their contents will be used instead of the contents in the repro. This provides a simple mechanism to be able to alter the source in a repro. The steps more concretely would be... @@ -85,6 +85,6 @@ The function `spExtractRepro` allows for extracting the files used in a request The function `spLoadReproAsFileSystem` creates a file system that can access the contents of the repro with the same paths that were used on the originating system. The ISlangFileSystemExt produced can be set on a request and used for compilation. -Repro files are currently stored in a binary format. This format is sensitive to changes in the API, as well as internal state within a SlangCompileRequest. This means that the functionality can only be guarenteed to work with exactly the same version of Slang on the same version of compiler. In practice things are typically not so draconian, and future versions will aim to provide a more clear slang repro versioning system, and work will be performed to make more generally usable. +Repro files are currently stored in a binary format. This format is sensitive to changes in the API, as well as internal state within a SlangCompileRequest. This means that the functionality can only be guaranteed to work with exactly the same version of Slang on the same version of compiler. In practice things are typically not so draconian, and future versions will aim to provide a more clear slang repro versioning system, and work will be performed to make more generally usable. -Finally this version of the repo system does not take into account endianess at all. The system the repro is saved from must have the same endianess as the system loaded on. +Finally this version of the repo system does not take into account endianness at all. The system the repro is saved from must have the same endianness as the system loaded on. diff --git a/docs/stdlib-doc.md b/docs/stdlib-doc.md index 1431fdfb2b..a3b69cbed6 100644 --- a/docs/stdlib-doc.md +++ b/docs/stdlib-doc.md @@ -60418,7 +60418,7 @@ matrix fwidth(matrix x); This function can be applied to scalars, vectors, and matrices of built-in scalar types. - Note: these functions are not curently implemented for Vulkan/SPIR-V output. + Note: these functions are not currently implemented for Vulkan/SPIR-V output. ## Signature diff --git a/docs/stdlib-docgen.md b/docs/stdlib-docgen.md index ceaffd76ea..37c4d30e73 100644 --- a/docs/stdlib-docgen.md +++ b/docs/stdlib-docgen.md @@ -30,14 +30,14 @@ located in `types`, `attributes`, `interfaces` and `global-decls` directory. Note that the `index.md` in root is not generated. -You should review the generated markdown file to make sure it is formated correctly after making comment edits in the +You should review the generated markdown file to make sure it is formatted correctly after making comment edits in the `*.meta.slang` files. ## Writing and Updating Documentation The core module documentation is done directly in comments inside `source/slang/*.meta.slang` files. -A documentation comment should be placed directly above the declaration, either insde a `/** */` comment block, or +A documentation comment should be placed directly above the declaration, either inside a `/** */` comment block, or after `///`. The following directives are allowed in comments: - `@param paramName description` documents a parameter or a generic parameter. @@ -45,7 +45,7 @@ after `///`. The following directives are allowed in comments: - `@see` starts the "See also" section. - `@return` starts the `Return value" section. - `@example` starts the "Example" section. -- `@category categoryID Category Name` marks the decl to be in a category. The cateogry name is only required for the first time `categoryID` is used, and omitted for the remaining `@category` lines. +- `@category categoryID Category Name` marks the decl to be in a category. The category name is only required for the first time `categoryID` is used, and omitted for the remaining `@category` lines. - `@internal` marks the declaration as internal. - `@experimental` marks the declaration as experimental. - `@deprecated` marks the declaration as deprecated. @@ -105,7 +105,7 @@ as with `@remarks`. - Include examples if needed in the examples section. - Provide a see also section with links to related declarations. -After updating comments, build `slangc`, and run `slangc -compile-core-module -doc` in `stdlib-reference` diretory to update the markdown files for preview. +After updating comments, build `slangc`, and run `slangc -compile-core-module -doc` in `stdlib-reference` directory to update the markdown files for preview. Your PR only needs to include changes to *.meta.slang files. Once your PR is merged, slang CI will run `slangc` and push the updated markdown files to the `stdlib-reference` repo. @@ -113,7 +113,7 @@ the `stdlib-reference` repo. Use `// @hidden:` to hide all declarations after the line for docgen purpose. Use `// @public: ` to stop hiding all declarations after the line. These two special lines act like -C++'s visiblity modifiers: they apply to everything after it. +C++'s visibility modifiers: they apply to everything after it. ## How to preview generated html page locally diff --git a/docs/target-compatibility.md b/docs/target-compatibility.md index e10c244091..d9ccb484ea 100644 --- a/docs/target-compatibility.md +++ b/docs/target-compatibility.md @@ -91,7 +91,7 @@ tex.GetDimensions is the GetDimensions method on 'texture' objects. This is not ## SM6.0 Wave Intrinsics -CUDA has premliminary support for Wave Intrinsics, introduced in [PR #1352](https://github.com/shader-slang/slang/pull/1352). Slang synthesizes the 'WaveMask' based on program flow and the implied 'programmer view' of exectution. This support is built on top of WaveMask intrinsics with Wave Intrinsics being replaced with WaveMask Intrinsic calls with Slang generating the code to calculate the appropriate WaveMasks. +CUDA has premliminary support for Wave Intrinsics, introduced in [PR #1352](https://github.com/shader-slang/slang/pull/1352). Slang synthesizes the 'WaveMask' based on program flow and the implied 'programmer view' of execution. This support is built on top of WaveMask intrinsics with Wave Intrinsics being replaced with WaveMask Intrinsic calls with Slang generating the code to calculate the appropriate WaveMasks. Please read [PR #1352](https://github.com/shader-slang/slang/pull/1352) for a better description of the status. @@ -105,7 +105,7 @@ SM6.5 Wave Intrinsics are supported, but requires a downstream DXC compiler that ## WaveMask Intrinsics -In order to map better to the CUDA sync/mask model Slang supports 'WaveMask' intrinsics. They operate in broadly the same way as the Wave intrinsics, but require the programmer to specify the lanes that are involved. To write code that uses wave intrinsics acrosss targets including CUDA, currently the WaveMask intrinsics must be used. For this to work, the masks passed to the WaveMask functions should exactly match the 'Active lanes' concept that HLSL uses, otherwise the result is undefined. +In order to map better to the CUDA sync/mask model Slang supports 'WaveMask' intrinsics. They operate in broadly the same way as the Wave intrinsics, but require the programmer to specify the lanes that are involved. To write code that uses wave intrinsics across targets including CUDA, currently the WaveMask intrinsics must be used. For this to work, the masks passed to the WaveMask functions should exactly match the 'Active lanes' concept that HLSL uses, otherwise the result is undefined. The WaveMask intrinsics are not part of HLSL and are only available on Slang. @@ -243,9 +243,9 @@ On the CPU atomics are not supported, but will be in the future. The HLSL [sampler feedback feature](https://microsoft.github.io/DirectX-Specs/d3d/SamplerFeedback.html) is available for DirectX12. The features requires shader model 6.5 and therefore a version of [DXC](https://github.com/Microsoft/DirectXShaderCompiler) that supports that model or higher. The Shader Model 6.5 requirement also means only DXIL binary format is supported. -There doesn't not appear to be a similar feature available in Vulkan yet, but when it is available support should be addeed. +There doesn't not appear to be a similar feature available in Vulkan yet, but when it is available support should be added. -For CPU targets there is the IFeedbackTexture interface that requires an implemention for use. Slang does not currently include CPU implementations for texture types. +For CPU targets there is the IFeedbackTexture interface that requires an implementation for use. Slang does not currently include CPU implementations for texture types. diff --git a/docs/update_spirv.md b/docs/update_spirv.md index 4410680c51..19dd474164 100644 --- a/docs/update_spirv.md +++ b/docs/update_spirv.md @@ -95,7 +95,7 @@ cmake.exe --build build --config Release ## Copy the generated files from `spirv-tools` to `spirv-tools-generated` Copy some of generated files from `external/spirv-tools/build/` to `external/spirv-tools-generated/`. -The following files are ones you need to copy at the momment, but the list may change in the future. +The following files are ones you need to copy at the moment, but the list may change in the future. ``` DebugInfo.h NonSemanticShaderDebugInfo100.h @@ -166,7 +166,7 @@ git submodule update --init --recursive ``` You need to stage and commit the latest commit IDs of spirv-tools and spirv-headers. -Note that when you want to use a new commit IDs of the submodules, you have to stage with git-add command for the directy of the submodule itself. +Note that when you want to use a new commit IDs of the submodules, you have to stage with git-add command for the directly of the submodule itself. ``` cd external diff --git a/docs/user-guide/00-introduction.md b/docs/user-guide/00-introduction.md index 6bc64dd364..023256b9c0 100644 --- a/docs/user-guide/00-introduction.md +++ b/docs/user-guide/00-introduction.md @@ -66,7 +66,7 @@ Who is this guide for? ---------------------- The content of this guide is written for real-time graphics programmers with a moderate or higher experience level. -It assumes the reader has previously used a real-time shading langauge like HLSL, GLSL, or MetalSL together with an API like Direct3D 11/12, Vulkan, or Metal. +It assumes the reader has previously used a real-time shading language like HLSL, GLSL, or MetalSL together with an API like Direct3D 11/12, Vulkan, or Metal. We also assume that the reader is familiar enough with C/C++ to understand code examples and API signatures in those languages. If you are new to programming entirely, this guide is unlikely to be helpful. diff --git a/docs/user-guide/02-conventional-features.md b/docs/user-guide/02-conventional-features.md index 9e2743aa3e..6973cbef1f 100644 --- a/docs/user-guide/02-conventional-features.md +++ b/docs/user-guide/02-conventional-features.md @@ -123,7 +123,7 @@ void f( int b[] ) It is allowed to pass a sized array as argument to an unsized array parameter when calling a function. -Array types has a `getCount()` memeber function that returns the length of the array. +Array types has a `getCount()` member function that returns the length of the array. ```hlsl int f( int b[] ) @@ -976,7 +976,7 @@ int a[2] = {1, 2} #### Array Of Aggregate's ```csharp -// Equivlent to `float3 a[2]; a[0] = {1,2,3}; b[1] = {4,5,6};` +// Equivalent to `float3 a[2]; a[0] = {1,2,3}; b[1] = {4,5,6};` float3 a[2] = { {1,2,3}, {4,5,6} }; ``` #### Flattened Array Initializer @@ -1050,7 +1050,7 @@ struct GenerateCtor1 : GenerateCtorInner1 GenerateCtor1 val[2] = { { 3 }, { 2 } }; ``` -In addition, Slang also provides compatbility support for C-style initializer lists with `struct`s. C-style initializer lists can use [Partial Initializer List's](#Partial-Initializer-List's) and [Flattened Array Initializer With Struct's](#Flattened-Array-Initializer-With-Struct) +In addition, Slang also provides compatibility support for C-style initializer lists with `struct`s. C-style initializer lists can use [Partial Initializer List's](#Partial-Initializer-List's) and [Flattened Array Initializer With Struct's](#Flattened-Array-Initializer-With-Struct) A struct is considered a C-style struct if: 1. User never defines a custom constructor with **more than** 0 parameters @@ -1109,7 +1109,7 @@ float3 val2 = {}; #### Struct Type -1. Atempt to call default constructor (`__init()`) of a `struct` +1. Attempt to call default constructor (`__init()`) of a `struct` ```csharp diff --git a/docs/user-guide/03-convenience-features.md b/docs/user-guide/03-convenience-features.md index 65357ce1a8..aeec8eb280 100644 --- a/docs/user-guide/03-convenience-features.md +++ b/docs/user-guide/03-convenience-features.md @@ -672,7 +672,7 @@ struct MaxValueAttribute uniform int scaleFactor; ``` -In the above code, the `MaxValueAttribute` struct type is decorated with the `[__AttributeUsage]` attribute, which informs that `MaxValueAttribute` type should be interpreted as a definiton for a user-defined attribute, `[MaxValue]`, that can be used to decorate all variables or fields. The members of the struct defines the argument list for the attribute. +In the above code, the `MaxValueAttribute` struct type is decorated with the `[__AttributeUsage]` attribute, which informs that `MaxValueAttribute` type should be interpreted as a definition for a user-defined attribute, `[MaxValue]`, that can be used to decorate all variables or fields. The members of the struct defines the argument list for the attribute. The `scaleFactor` uniform parameter is declared with the user defined `[MaxValue]` attribute, providing two arguments for `value` and `description`. diff --git a/docs/user-guide/05-capabilities.md b/docs/user-guide/05-capabilities.md index 8f377f5a47..6426cb37c7 100644 --- a/docs/user-guide/05-capabilities.md +++ b/docs/user-guide/05-capabilities.md @@ -92,7 +92,7 @@ public void myFunc() } ``` -## Inferrence of Capability Requirements +## Inference of Capability Requirements By default, Slang will infer the capability requirements of a function given its definition, as long as the function has `internal` or `private` visibility. For example, given: ```csharp @@ -110,7 +110,7 @@ Slang will automatically deduce that `myFunc` has capability ``` Since `discard` statement requires capability `fragment`. -## Inferrence on target_switch +## Inference on target_switch A `__target_switch` statement will introduce disjunctions in its inferred capability requirement. For example: ```csharp diff --git a/docs/user-guide/06-interfaces-generics.md b/docs/user-guide/06-interfaces-generics.md index 3d35d2bf52..7a42484420 100644 --- a/docs/user-guide/06-interfaces-generics.md +++ b/docs/user-guide/06-interfaces-generics.md @@ -464,7 +464,7 @@ struct ArrayFloatContainer ``` Because C++ does not require a template function to define _constraints_ on the templated type, there are no interfaces or inheritances involved in the definition of `ArrayFloatContainer`. However `ArrayFloatContainer` still needs to define what its `Iterator` type is, so the `sum` function can be successfully specialized with an `ArrayFloatContainer`. -Note that the biggest difference between C++ templates and generics is that templates are not type-checked prior to specialization, and therefore the code that consumes a templated type (`TContainer` in this example) can simply assume `container` has a method named `getElementAt`, and the `TContainer` scope provides a type definition for `TContainer::Iterator`. Compiler error only arises when the programmer is attempting to specialize the `sum` function with a type that does not meet these assumptions. Contrarily, Slang requires all possible uses of a generic type be declared through an interface. By stating that `TContainer:IContainer` in the generics declaration, the Slang compiler can verify that `container.getElementAt` is calling a valid function. Similarily, the interface also tells the compiler that `TContainer.Iterator` is a valid type and enables the compiler to fully type check the `sum` function without specializing it first. +Note that the biggest difference between C++ templates and generics is that templates are not type-checked prior to specialization, and therefore the code that consumes a templated type (`TContainer` in this example) can simply assume `container` has a method named `getElementAt`, and the `TContainer` scope provides a type definition for `TContainer::Iterator`. Compiler error only arises when the programmer is attempting to specialize the `sum` function with a type that does not meet these assumptions. Contrarily, Slang requires all possible uses of a generic type be declared through an interface. By stating that `TContainer:IContainer` in the generics declaration, the Slang compiler can verify that `container.getElementAt` is calling a valid function. Similarly, the interface also tells the compiler that `TContainer.Iterator` is a valid type and enables the compiler to fully type check the `sum` function without specializing it first. ### Similarity to Swift and Rust diff --git a/docs/user-guide/07-autodiff.md b/docs/user-guide/07-autodiff.md index 9fc5205e71..0664d2499c 100644 --- a/docs/user-guide/07-autodiff.md +++ b/docs/user-guide/07-autodiff.md @@ -501,7 +501,7 @@ The following built-in functions are backward differentiable and both their forw ## Primal Substitute Functions -Sometimes it is desirable to replace a function with another when generating forward or backward derivative propagation code. For example, the following code shows a function that computes the integral of some term by sampling and we want to use a different sampling stragegy when computing the derivatives. +Sometimes it is desirable to replace a function with another when generating forward or backward derivative propagation code. For example, the following code shows a function that computes the integral of some term by sampling and we want to use a different sampling strategy when computing the derivatives. ```csharp float myTerm(float x) { @@ -535,7 +535,7 @@ float getSampleForDerivativeComputation(float a, float b) Here, the `[PrimalSubstituteOf(getSample)]` attributes marks the `getSampleForDerivativeComputation` function as the substitute for `getSample` in derivative propagation functions. When a function has a primal substitute, the compiler will treat all calls to that function as if it is a call to the substitute function when generating derivative code. Note that this only applies to compiler generated derivative function and does not affect user provided derivative functions. If a user provided derivative function calls `getSample`, it will not be replaced by `getSampleForDerivativeComputation` by the compiler. -Similar to `[ForwardDerivative]` and `[ForwardDerivativeOf]` attributes, The `[PrimalSubsitute(substFunc)]` attribute works the other way around: it specifies the primal substitute function of the function being marked. +Similar to `[ForwardDerivative]` and `[ForwardDerivativeOf]` attributes, The `[PrimalSubstitute(substFunc)]` attribute works the other way around: it specifies the primal substitute function of the function being marked. Primal substitute can be used as another way to make a function differentiable. A function is considered differentiable if it has a primal substitute that is differentiable. The following code illustrates this mechanism. ```csharp diff --git a/docs/user-guide/08-compiling.md b/docs/user-guide/08-compiling.md index 23a33d9147..765dba5a5a 100644 --- a/docs/user-guide/08-compiling.md +++ b/docs/user-guide/08-compiling.md @@ -956,7 +956,7 @@ meanings of their `CompilerOptionValue` encodings. | VulkanUseGLLayout | When set, will use std430 layout instead of D3D buffer layout for raw buffer load/stores. `intValue0` specifies a bool value for the setting. | | VulkanEmitReflection | Specifies the `-fspv-reflect` option. When set will include additional reflection instructions in the output SPIRV. `intValue0` specifies a bool value for the setting. | | GLSLForceScalarLayout | Specifies the `-force-glsl-scalar-layout` option. When set will use `scalar` layout for all buffers when generating SPIRV. `intValue0` specifies a bool value for the setting. | -| EnableEffectAnnotations | When set will turn on compatibilty mode to parse legacy HLSL effect annoation syntax. `intValue0` specifies a bool value for the setting. | +| EnableEffectAnnotations | When set will turn on compatibility mode to parse legacy HLSL effect annotation syntax. `intValue0` specifies a bool value for the setting. | | EmitSpirvViaGLSL | When set will emit SPIRV by emitting GLSL first and then use glslang to produce the final SPIRV code. `intValue0` specifies a bool value for the setting. | | EmitSpirvDirectly | When set will use Slang's direct-to-SPIRV backend to generate SPIRV directly from Slang IR. `intValue0` specifies a bool value for the setting. | | SPIRVCoreGrammarJSON | When set will use the provided SPIRV grammar file to parse SPIRV assembly blocks. `stringValue0` specifies a path to the spirv core grammar json file. | diff --git a/docs/user-guide/09-reflection.md b/docs/user-guide/09-reflection.md index ff874fb5fd..4e810fd1f2 100644 --- a/docs/user-guide/09-reflection.md +++ b/docs/user-guide/09-reflection.md @@ -137,7 +137,7 @@ If you have a type layout with kind `Array` you can query information about the ```c++ size_t arrayElementCount = typeLayout->getElementCount(); slang::TypeLayoutReflection* elementTypeLayout = typeLayout->getElementTypeLayout(); -sie_t arrayElementStride = typeLayout->getElementStride(category); +size_t arrayElementStride = typeLayout->getElementStride(category); ``` An array of unknown size will currently report zero elements. @@ -196,7 +196,7 @@ In the case of a compute shader entry point, you can also query the user-specifi ```c++ SlangUInt threadGroupSize[3]; -entryPoint->getComputeThreadGruopSize(3, &threadGroupSize[0]); +entryPoint->getComputeThreadGroupSize(3, &threadGroupSize[0]); ``` ## Function Reflection diff --git a/docs/user-guide/09-targets.md b/docs/user-guide/09-targets.md index f3f146625b..4f25aa6000 100644 --- a/docs/user-guide/09-targets.md +++ b/docs/user-guide/09-targets.md @@ -129,9 +129,9 @@ The D3D11 rasterization pipeline can include up to five programmable stages, alt Shader parameters are passed to each D3D11 stage via slots. Each stage has its own slots of the following types: -- **Constant buffers** are used for passing relatively small (4KB or less) amounts of data that will be read by GPU code. Constant bufers are passed via `b` registers. +- **Constant buffers** are used for passing relatively small (4KB or less) amounts of data that will be read by GPU code. Constant buffers are passed via `b` registers. -- **Shader resource views** (SRVs) include most textures, buffers, and other opaque resource types thare are read or sampled by GPU code. SRVs use `t` registers. +- **Shader resource views** (SRVs) include most textures, buffers, and other opaque resource types there are read or sampled by GPU code. SRVs use `t` registers. - **Unordered access views** (UAVs) include textures, buffers, and other opaque resource types used for write or read-write operations in GPU code. UAVs use `u` registers. @@ -221,7 +221,7 @@ A D3D12 pipeline layout can specify that certain root parameters or certain slot The D3D12 ray tracing pipeline adds a new mechanism for passing shader parameters. In addition to allowing shader parameters to be passed to the entire pipeline via root parameters, each shader table entry provides storage space for passing argument data specific to that entry. -Similar to the use of a pipline layout (root signature) to configure the use of root parameters, each kernel used within shader entries must be configured with a "local root signature" that defines how the storage space in the shader table entry is to be used. +Similar to the use of a pipeline layout (root signature) to configure the use of root parameters, each kernel used within shader entries must be configured with a "local root signature" that defines how the storage space in the shader table entry is to be used. Shader parameters are still bound to registers and spaces as for non-ray-tracing code, and the local root signature simply allows those same registers/spaces to be associated with locations in a shader table entry. One important detail is that some shader table entries are associated with a kernel for a single stage (e.g., a single miss shader), while other shader table entries are associated with a "hit group" consisting of up to one each of an intersection, any-hit, and closest-hit kernel. diff --git a/docs/user-guide/10-link-time-specialization.md b/docs/user-guide/10-link-time-specialization.md index 4541a6f552..14b10c63e1 100644 --- a/docs/user-guide/10-link-time-specialization.md +++ b/docs/user-guide/10-link-time-specialization.md @@ -5,7 +5,7 @@ permalink: /user-guide/link-time-specialization # Link-time Specialization and Module Precompilation -Traditionally, graphics developers have been relying on the preprocesor defines to specialize their shader code for high-performance GPU execution. +Traditionally, graphics developers have been relying on the preprocessor defines to specialize their shader code for high-performance GPU execution. While functioning systems can be built around preprocessor macros, overusing them leads to many problems: - Long compilation time. With preprocessors defines, specialzation happens before parsing, which is a very early stage in the compilation flow. This means that the compiler must redo almost all work from the scratch with every specialized variant, including parsing, type checking, IR generation diff --git a/docs/user-guide/a1-01-matrix-layout.md b/docs/user-guide/a1-01-matrix-layout.md index 3a9921db88..cb301ce8f5 100644 --- a/docs/user-guide/a1-01-matrix-layout.md +++ b/docs/user-guide/a1-01-matrix-layout.md @@ -24,14 +24,14 @@ Two conventions of matrix transform math Depending on the platform a developer is used to, a matrix-vector transform can be expressed as either `v*m` (`mul(v, m)` in HLSL), or `m*v` (`mul(m,v)` in HLSL). This convention, together with the matrix layout (column-major or row-major), determines how a matrix should be filled out in host code. -In HLSL/Slang the order of vector and matrix parameters to `mul` determine how the *vector* is interpretted. This interpretation is required because a vector does not in as of it's self differentiate between being a row or a column. +In HLSL/Slang the order of vector and matrix parameters to `mul` determine how the *vector* is interpreted. This interpretation is required because a vector does not in as of it's self differentiate between being a row or a column. -* `mul(v, m)` - v is interpretted as a row vector -* `mul(m, v)` - v is interpretted as a column vector. +* `mul(v, m)` - v is interpreted as a row vector. +* `mul(m, v)` - v is interpreted as a column vector. Through this mechanism a developer is able to write transforms in their preferred style. -These two styles are not directly interchangable - for a given `v` and `m` then generally `mul(v, m) != mul(m, v)`. For that the matrix needs to be transposed so +These two styles are not directly interchangeable - for a given `v` and `m` then generally `mul(v, m) != mul(m, v)`. For that the matrix needs to be transposed so * `mul(v, m) == mul(transpose(m), v)` * `mul(m, v) == mul(v, transpose(m))` @@ -42,7 +42,7 @@ This behavior is *independent* of how a matrix layout in memory. Host code needs Another way to think about this difference is in terms of where translation terms should be placed in memory when filling a typical 4x4 transform matrix. When transforming a row vector (ie `mul(v, m)`) with a `row-major` matrix layout, translation will be at `m + 12, 13, 14`. For a `column-major` matrix layout, translation will be at `m + 3, 7, 11`. -Note it is a *HLSL*/*Slang* convention that the parameter ordering of `mul(v, m)` means v is a *row* vector. A host maths library *could* have a transform function `SomeLib::transform(v, m)` such that `v` is a interpretted as *column* vector. For simplicitys sake the remainder of this discussion assumes that the `mul(v, m)` in equivalent in host code follows the interpretation that `v` is *row* vector. +Note it is a *HLSL*/*Slang* convention that the parameter ordering of `mul(v, m)` means v is a *row* vector. A host maths library *could* have a transform function `SomeLib::transform(v, m)` such that `v` is a interpreted as *column* vector. For simplicitys sake the remainder of this discussion assumes that the `mul(v, m)` in equivalent in host code follows the interpretation that `v` is *row* vector. Discussion ---------- @@ -64,7 +64,7 @@ If we accept 2, then there are only two possible combinations - either both host This is simple, but is perhaps not the end of the story. First lets assume that we want our Slang code to be as portable as possible. As previously discussed for CUDA and C++/CPU targets Slang ignores the matrix layout settings - the matrix layout is *always* `row-major`. -Second lets consider performance. The matrix layout in a host maths libray is not arbitrary from a performance point of view. A performant host maths library will want to use SIMD instructions. With both x86/x64 SSE and ARM NEON SIMD it makes a performance difference which layout is used, depending on if `column` or `row` is the *prefered* vector interpretation. If the `row` vector interpretation is prefered, it is most performant to have `row-major` matrix layout. Conversely if `column` vector interpretation is prefered `column-major` matrix will be the most performant. +Second lets consider performance. The matrix layout in a host maths library is not arbitrary from a performance point of view. A performant host maths library will want to use SIMD instructions. With both x86/x64 SSE and ARM NEON SIMD it makes a performance difference which layout is used, depending on if `column` or `row` is the *preferred* vector interpretation. If the `row` vector interpretation is preferred, it is most performant to have `row-major` matrix layout. Conversely if `column` vector interpretation is preferred `column-major` matrix will be the most performant. The performance difference comes down to a SIMD implementation having to do a transpose if the layout doesn't match the preferred vector interpretation. @@ -78,7 +78,7 @@ The only combination that fulfills all aspects is `row-major` matrix layout and It's worth noting that for targets that honor the default matrix layout - that setting can act like a toggle transposing a matrix layout. If for some reason the combination of choices leads to inconsistent vector transforms, an implementation can perform this transform in *host* code at the boundary between host and the kernel. This is not the most performant or convenient scenario, but if supported in an implementation it could be used for targets that do not support kernel matrix layout settings. -If only targetting platforms that honor matrix layout, there is more flexibility, our constraints are +If only targeting platforms that honor matrix layout, there is more flexibility, our constraints are 1) Consistency : Same vector interpretation in shader and host code 2) Performance: Host vector interpretation should match host matrix layout @@ -88,7 +88,7 @@ Then there are two combinations that work 1) `row-major` matrix layout for host and kernel, and `row` vector interpretation. 2) `column-major` matrix layout for host and kernel, and `column` vector interpretation. -If the host maths library is not performance orientated, it may be arbitray from a performance point of view if a `row` or `column` vector interpretation is used. In that case assuming shader and host vector interpretation is the same it is only important that the kernel and maths library matrix layout match. +If the host maths library is not performance orientated, it may be arbitrary from a performance point of view if a `row` or `column` vector interpretation is used. In that case assuming shader and host vector interpretation is the same it is only important that the kernel and maths library matrix layout match. Another way of thinking about these combinations is to think of each change in `row-major`/`column-major` matrix layout and `row`/`column` vector interpretation is a transpose. If there are an *even* number of flips then all the transposes cancel out. Therefore the following combinations work diff --git a/docs/user-guide/a1-02-slangpy.md b/docs/user-guide/a1-02-slangpy.md index f00ca0a14b..8a98285574 100644 --- a/docs/user-guide/a1-02-slangpy.md +++ b/docs/user-guide/a1-02-slangpy.md @@ -308,7 +308,7 @@ float computeOutputPixel(TensorView input, uint2 pixelLoc) } } - // Comptue the average value. + // Compute the average value. sumValue /= count; return sumValue; @@ -390,7 +390,7 @@ float computeOutputPixel( } } - // Comptue the average value. + // Compute the average value. sumValue /= count; return sumValue; @@ -494,7 +494,7 @@ TorchTensor square(TorchTensor input) ``` Here, we mark the function with the `[TorchEntryPoint]` attribute, so it will be compiled to C++ and exported as a python callable. -Since this is a host function, we can perform tensor allocations. For instnace, `square()` calls `TorchTensor.zerosLike` to allocate a 2D-tensor that has the same size as the input. +Since this is a host function, we can perform tensor allocations. For instance, `square()` calls `TorchTensor.zerosLike` to allocate a 2D-tensor that has the same size as the input. `zerosLike` returns a `TorchTensor` object that represents a CPU handle of a PyTorch tensor. Then we launch `square_kernel` with the `__dispatch_kernel` syntax. Note that we can directly pass @@ -729,7 +729,7 @@ Marks a function for export to Python. Functions marked with `[TorchEntryPoint]` Marks a function as a CUDA device function, and ensures the compiler to include it in the generated CUDA source. #### `[AutoPyBindCUDA]` attribute -Markes a cuda kernel for automatic binding generation so that it may be invoked from python without having to hand-code the torch entry point. The marked function **must** also be marked with `[CudaKernel]`. If the marked function is also marked with `[Differentiable]`, this will also generate bindings for the derivative methods. +Marks a cuda kernel for automatic binding generation so that it may be invoked from python without having to hand-code the torch entry point. The marked function **must** also be marked with `[CudaKernel]`. If the marked function is also marked with `[Differentiable]`, this will also generate bindings for the derivative methods. Restriction: methods marked with `[AutoPyBindCUDA]` will not operate diff --git a/docs/user-guide/a1-05-uniformity.md b/docs/user-guide/a1-05-uniformity.md index 630dfb8024..be07f89c01 100644 --- a/docs/user-guide/a1-05-uniformity.md +++ b/docs/user-guide/a1-05-uniformity.md @@ -5,13 +5,13 @@ layout: user-guide Uniformity Analysis =========== -On certain hardwares, accessing resources with a non-uniform index may lead to significant performance degradation. Developers can often benefit from a compiler warning for unintentional non-uniform resource access. +On certain hardware, accessing resources with a non-uniform index may lead to significant performance degradation. Developers can often benefit from a compiler warning for unintentional non-uniform resource access. Starting from v2024.1.0, Slang provides uniformity analysis that can warn users if a non-dynamically-uniform value is being used unintentionally. This feature is not enabled by default but can be turned on with the `-validate-uniformity` commandline option when using `slangc`, or the `CompilerOptionName::ValidateUniformity` compiler option when using the API. In addition to specifying the compiler option, the source code must be augmented with the `dynamic_uniform` modifier to mark function parameters, struct fields or local variables as expecting a dynamic uniform value. -For example, the following code will triger a warning: +For example, the following code will trigger a warning: ```csharp // Indicate that the `v` parameter needs to be dynamic uniform. float f(dynamic_uniform float v) @@ -101,4 +101,4 @@ void main() { expectUniform(f()); // Warning. } -``` \ No newline at end of file +``` diff --git a/docs/user-guide/a2-01-spirv-target-specific.md b/docs/user-guide/a2-01-spirv-target-specific.md index e96b81162b..048318a09a 100644 --- a/docs/user-guide/a2-01-spirv-target-specific.md +++ b/docs/user-guide/a2-01-spirv-target-specific.md @@ -60,7 +60,7 @@ The system-value semantics are translated to the following SPIR-V code. | `SV_SampleIndex` | `BuiltIn SampleId` | | `SV_ShadingRate` | `BuiltIn PrimitiveShadingRateKHR` | | `SV_StartVertexLocation` | `*Not supported* | -| `SV_StartInstanceLocation` | `*Not suported* | +| `SV_StartInstanceLocation` | `*Not supported* | | `SV_StencilRef` | `BuiltIn FragStencilRefEXT` | | `SV_Target` | `Location` | | `SV_TessFactor` | `BuiltIn TessLevelOuter` | @@ -139,7 +139,7 @@ StructuredBuffer and ByteAddressBuffer are translated to a shader storage buffer RWStructuredBuffer and RWByteAddressBuffer are translated to a shader storage buffer with `read-write` access. RasterizerOrderedStructuredBuffer and RasterizerOrderedByteAddressBuffer will use an extension, `SPV_EXT_fragment_shader_interlock`. -If you need to apply a different buffer layout for indivisual `ConstantBuffer` or `StructuredBuffer`, you can specify the layout as a second generic argument. E.g., `ConstantBuffer`, `StructuredBuffer`, `StructuredBuffer` or `StructuredBuffer`. +If you need to apply a different buffer layout for individual `ConstantBuffer` or `StructuredBuffer`, you can specify the layout as a second generic argument. E.g., `ConstantBuffer`, `StructuredBuffer`, `StructuredBuffer` or `StructuredBuffer`. Note that there are compiler options, "-fvk-use-scalar-layout" / "-force-glsl-scalar-layout" and "-fvk-use-dx-layout". These options do the same but they are applied globally. @@ -153,7 +153,7 @@ In contrast to `ConstantBuffer`, a `ParameterBlock` introduces a new descript `ParameterBlock` is designed specifically for D3D12/Vulkan/Metal/WebGPU, so that parameters defined in `T` can be placed into an independent descriptor table/descriptor set/argument buffer/binding group. -For example, when targeting Vulkan, when a ParameterBlock doesn't contain nested parameter block fields, it will always map to a single descriptor set, with a dedicated set number and every resources is placed into the set with binding index starting from 0. This allows the user application to create and pre-populate the descriptor set and reuse it during command encoding, without explicilty specifying the binding index for each individual parameter. +For example, when targeting Vulkan, when a ParameterBlock doesn't contain nested parameter block fields, it will always map to a single descriptor set, with a dedicated set number and every resources is placed into the set with binding index starting from 0. This allows the user application to create and pre-populate the descriptor set and reuse it during command encoding, without explicitly specifying the binding index for each individual parameter. When both ordinary data fields and resource typed fields exist in a parameter block, all ordinary data fields will be grouped together into a uniform buffer and appear as a binding 0 of the resulting descriptor set. @@ -237,7 +237,7 @@ To generate a valid SPIR-V with multiple entry points, use `-fvk-use-entrypoint- Global memory pointers ------------------------------ -Slang supports global memory pointers when targetting SPIRV. See [an example and explanation](convenience-features.html#pointers-limited). +Slang supports global memory pointers when targeting SPIRV. See [an example and explanation](convenience-features.html#pointers-limited). `float4*` in user code will be translated to a pointer in PhysicalStorageBuffer storage class in SPIRV. When a slang module uses a pointer type, the resulting SPIRV will be using the SpvAddressingModelPhysicalStorageBuffer64 addressing mode. Modules without use of pointers will use SpvAddressingModelLogical addressing mode. diff --git a/docs/user-guide/a2-03-wgsl-target-specific.md b/docs/user-guide/a2-03-wgsl-target-specific.md index 7d1ec87f6f..75a2a6da32 100644 --- a/docs/user-guide/a2-03-wgsl-target-specific.md +++ b/docs/user-guide/a2-03-wgsl-target-specific.md @@ -43,7 +43,7 @@ The system-value semantics are translated to the following WGSL code. | SV_SampleIndex | `@builtin(sample_index)` | | SV_ShadingRate | *Not supported* | | SV_StartVertexLocation | *Not supported* | -| SV_StartInstanceLocation | *Not suported* | +| SV_StartInstanceLocation | *Not supported* | | SV_StencilRef | *Not supported* | | SV_Target | *Not supported* | | SV_TessFactor | *Not supported* | diff --git a/docs/user-guide/a3-02-reference-capability-atoms.md b/docs/user-guide/a3-02-reference-capability-atoms.md index 9f673eaf8e..e7a9b1bb41 100644 --- a/docs/user-guide/a3-02-reference-capability-atoms.md +++ b/docs/user-guide/a3-02-reference-capability-atoms.md @@ -1046,7 +1046,7 @@ Compound Capabilities > Capabilities required to use GLSL-style subgroup operations 'subgroup_shuffle' `subgroup_shufflerelative` -> Capabilities required to use GLSL-style subgroup operations 'subgroup_shufle_relative' +> Capabilities required to use GLSL-style subgroup operations 'subgroup_shuffle_relative' `subgroup_clustered` > Capabilities required to use GLSL-style subgroup operations 'subgroup_clustered' diff --git a/docs/user-guide/toc.html b/docs/user-guide/toc.html index 6566919c15..7ab3b5b91c 100644 --- a/docs/user-guide/toc.html +++ b/docs/user-guide/toc.html @@ -65,8 +65,8 @@
  • Capability Atoms and Capability Requirements
  • Conflicting Capabilities
  • Requirements in Parent Scope
  • -
  • Inferrence of Capability Requirements
  • -
  • Inferrence on target_switch
  • +
  • Inference of Capability Requirements
  • +
  • Inference on target_switch
  • Capability Aliases
  • Validation of Capability Requirements
  • diff --git a/docs/wave-intrinsics.md b/docs/wave-intrinsics.md index 640158f79b..aa46f72a1e 100644 --- a/docs/wave-intrinsics.md +++ b/docs/wave-intrinsics.md @@ -31,7 +31,7 @@ Using WaveMask intrinsics is generally more verbose and prone to error than the * Might allow for higher performance (for example it gives more control of divergence) * Maps most closely to CUDA -On D3D12 and Vulkan the WaveMask instrinsics can be used, but the mask is effectively ignored. For this to work across targets including CUDA, the mask must be calculated such that it exactly matches that of HLSL defined 'active' lanes, else the behavior is undefined. +On D3D12 and Vulkan the WaveMask intrinsics can be used, but the mask is effectively ignored. For this to work across targets including CUDA, the mask must be calculated such that it exactly matches that of HLSL defined 'active' lanes, else the behavior is undefined. The WaveMask intrinsics are a non standard Slang feature, and may change in the future. @@ -41,7 +41,7 @@ RWStructuredBuffer outputBuffer; [numthreads(4, 1, 1)] void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) { - // It is the programmers responsibility to determine the inital mask, and that is dependent on the launch + // It is the programmers responsibility to determine the initial mask, and that is dependent on the launch // It's common to launch such that all lanes are active - with CUDA this would mean 32 lanes. // Here the launch only has 4 lanes active, and so the initial mask is 0xf. const WaveMask mask0 = 0xf; @@ -212,7 +212,7 @@ T WaveBroadcastLaneAt(T value, constexpr int lane); ``` All lanes receive the value specified in lane. Lane must be an active lane, otherwise the result is undefined. -This is a more restricive version of `WaveReadLaneAt` - which can take a non constexpr lane, *but* must be the same value for all lanes in the warp. Or 'dynamically uniform' as described in the HLSL documentation. +This is a more restrictive version of `WaveReadLaneAt` - which can take a non constexpr lane, *but* must be the same value for all lanes in the warp. Or 'dynamically uniform' as described in the HLSL documentation. ``` T WaveShuffle(T value, int lane); @@ -220,7 +220,7 @@ T WaveShuffle(T value, int lane); Shuffle is a less restrictive version of `WaveReadLaneAt` in that it has no restriction on the lane value - it does *not* require the value to be same on all lanes. -There isn't explicit support for WaveShuffle in HLSL, and for now it will emit `WaveReadLaneAt`. As it turns out for a sizable set of hardware WaveReadLaneAt does work correctly when the lane is not 'dynamically uniform'. This is not necessarily the case for hardware general though, so if targetting HLSL it is important to make sure that this does work correctly on your target hardware. +There isn't explicit support for WaveShuffle in HLSL, and for now it will emit `WaveReadLaneAt`. As it turns out for a sizable set of hardware WaveReadLaneAt does work correctly when the lane is not 'dynamically uniform'. This is not necessarily the case for hardware general though, so if targeting HLSL it is important to make sure that this does work correctly on your target hardware. Our intention is that Slang will support the appropriate HLSL mechanism that makes this work on all hardware when it's available. @@ -338,5 +338,3 @@ T WaveMaskReadLaneAt(WaveMask mask, T value, int lane); T WaveMaskShuffle(WaveMask mask, T value, int lane); ``` - - \ No newline at end of file From 55bea7d72c7efe694cb8afb13f6219c11df9c911 Mon Sep 17 00:00:00 2001 From: Jay Kwak <82421531+jkwak-work@users.noreply.github.com> Date: Thu, 28 Nov 2024 23:02:43 -0800 Subject: [PATCH 09/13] Fix markdown mistakes on user guide (#5672) * Fix markdown typos * Fix indentation * Fix indentation 2 * Fix indentation 3 * Fix indentation more --------- Co-authored-by: Ellie Hermaszewska --- docs/user-guide/02-conventional-features.md | 319 ++++++++++---------- 1 file changed, 162 insertions(+), 157 deletions(-) diff --git a/docs/user-guide/02-conventional-features.md b/docs/user-guide/02-conventional-features.md index 6973cbef1f..58554928ea 100644 --- a/docs/user-guide/02-conventional-features.md +++ b/docs/user-guide/02-conventional-features.md @@ -801,143 +801,147 @@ Auto-Generated Constructors ### Auto-Generated Constructors - Struct Slang has the following rules: -1. Auto-generate a `__init()` if not already defined -> Assume -```csharp -struct DontGenerateCtor -{ - int a; - int b = 5; - - // Since the user has explicitly defined a constructor - // here, Slang will not synthesize a conflicting - // constructor. - __init() - { - // b = 5; - a = 5; - b = 6; - } -}; - -struct GenerateCtor -{ - int a; - int b = 5; - - // Slang will automatically generate an implicit constructor: - // __init() - // { - // b = 5; - // } -}; -``` +1. Auto-generate a `__init()` if not already defined. + + Assume: + ```csharp + struct DontGenerateCtor + { + int a; + int b = 5; + + // Since the user has explicitly defined a constructor + // here, Slang will not synthesize a conflicting + // constructor. + __init() + { + // b = 5; + a = 5; + b = 6; + } + }; + + struct GenerateCtor + { + int a; + int b = 5; + + // Slang will automatically generate an implicit constructor: + // __init() + // { + // b = 5; + // } + }; + ``` 2. If all members have equal visibility, auto-generate a 'member-wise constructor' if not conflicting with a user defined constructor. -```csharp -struct GenerateCtorInner -{ - int a; - - // Slang will automatically generate an implicit - // __init(int in_a) - // { - // a = in_a; - // } -}; -struct GenerateCtor : GenerateCtorInner -{ - int b; - int c = 5; + ```csharp + struct GenerateCtorInner + { + int a; + + // Slang will automatically generate an implicit + // __init(int in_a) + // { + // a = in_a; + // } + }; + struct GenerateCtor : GenerateCtorInner + { + int b; + int c = 5; + + // Slang will automatically generate an implicit + // __init(int in_a, int in_b, int in_c) + // { + // c = 5; + // + // this = GenerateCtorInner(in_a); + // + // b = in_b; + // c = in_c; + // } + }; + ``` - // Slang will automatically generate an implicit - // __init(int in_a, int in_b, int in_c) - // { - // c = 5; - // - // this = GenerateCtorInner(in_a); - // - // b = in_b; - // c = in_c; - // } -}; -``` 3. If not all members have equal visibility, auto-generate a 'member-wise constructor' based on member visibility if not conflicting with a user defined constructor. - * We generate 3 different visibilities of 'member-wise constructor's in order: - 1. `public` 'member-wise constructor' - * Contains members of visibility: `public` - * Do not generate if `internal` or `private` member lacks an init expression - 2. `internal` 'member-wise constructor' - * Contains members of visibility: `internal`, `public` - * Do not generate if `private` member lacks an init expression - 3. `private` 'member-wise constructor' - * Contains members of visibility: `private`, `internal`, `public` -```csharp -struct GenerateCtorInner1 -{ - internal int a = 0; - - // Slang will automatically generate an implicit - // internal __init(int in_a) - // { - // a = 0; - // - // a = in_a; - // } -}; -struct GenerateCtor1 : GenerateCtorInner1 -{ - internal int b = 0; - public int c; - // Slang will automatically generate an implicit - // internal __init(int in_a, int in_b, int in_c) - // { - // b = 0; - // - // this = GenerateCtorInner1(in_a); - // - // b = in_b; - // c = in_c; - // } - // - // public __init(int in_c) - // { - // b = 0; - // - // this = GenerateCtorInner1(); - // - // c = in_c; - // } -}; - -struct GenerateCtorInner2 -{ - internal int a; - // Slang will automatically generate an implicit - // internal __init(int in_a) - // { - // a = in_a; - // } -}; -struct GenerateCtor2 : GenerateCtorInner2 -{ - internal int b; - public int c; - - /// Note: `internal b` is missing init expression, - // Do not generate a `public` 'member-wise' constructor. - - // Slang will automatically generate an implicit - // internal __init(int in_a, int in_b, int in_c) - // { - // this = GenerateCtorInner2(in_a); - // - // b = in_b; - // c = in_c; - // } -}; -``` + We generate 3 different visibilities of 'member-wise constructor's in order: + 1. `public` 'member-wise constructor' + - Contains members of visibility: `public` + - Do not generate if `internal` or `private` member lacks an init expression + 2. `internal` 'member-wise constructor' + - Contains members of visibility: `internal`, `public` + - Do not generate if `private` member lacks an init expression + 3. `private` 'member-wise constructor' + - Contains members of visibility: `private`, `internal`, `public` + + ```csharp + struct GenerateCtorInner1 + { + internal int a = 0; + + // Slang will automatically generate an implicit + // internal __init(int in_a) + // { + // a = 0; + // + // a = in_a; + // } + }; + struct GenerateCtor1 : GenerateCtorInner1 + { + internal int b = 0; + public int c; + + // Slang will automatically generate an implicit + // internal __init(int in_a, int in_b, int in_c) + // { + // b = 0; + // + // this = GenerateCtorInner1(in_a); + // + // b = in_b; + // c = in_c; + // } + // + // public __init(int in_c) + // { + // b = 0; + // + // this = GenerateCtorInner1(); + // + // c = in_c; + // } + }; + + struct GenerateCtorInner2 + { + internal int a; + // Slang will automatically generate an implicit + // internal __init(int in_a) + // { + // a = in_a; + // } + }; + struct GenerateCtor2 : GenerateCtorInner2 + { + internal int b; + public int c; + + /// Note: `internal b` is missing init expression, + // Do not generate a `public` 'member-wise' constructor. + + // Slang will automatically generate an implicit + // internal __init(int in_a, int in_b, int in_c) + // { + // this = GenerateCtorInner2(in_a); + // + // b = in_b; + // c = in_c; + // } + }; + ``` Initializer Lists ---------- @@ -1111,38 +1115,39 @@ float3 val2 = {}; 1. Attempt to call default constructor (`__init()`) of a `struct` + ```csharp + struct Foo + { + int a; + int b; + __init() + { + a = 5; + b = 5; + } + }; -```csharp -struct Foo -{ - int a; - int b; - __init() - { - a = 5; - b = 5; - } -}; + ... -... + // Equivalent to `Foo val = Foo();` + Foo val = {}; + ``` -// Equivalent to `Foo val = Foo();` -Foo val = {}; -``` 2. As a fallback, zero-initialize the struct -```csharp -struct Foo -{ - int a; - int b; -}; + ```csharp + struct Foo + { + int a; + int b; + }; -... + ... + + // Equivalent to `Foo val; val.a = 0; val.b = 0;` + Foo val = {}; + ``` -// Equivalent to `Foo val; val.a = 0; val.b = 0;` -Foo val = {}; -``` ### Initializer Lists - Other features Slang allows calling a default-initializer inside a default-constructor. From e53ca9872f62ceb2bb86f5a00bdec033c1fda6fb Mon Sep 17 00:00:00 2001 From: Jay Kwak <82421531+jkwak-work@users.noreply.github.com> Date: Thu, 28 Nov 2024 23:40:52 -0800 Subject: [PATCH 10/13] Support nonuniformEXT for GLSL (#5695) Adding nonuniformEXT intrinsic for GLSL Co-authored-by: Ellie Hermaszewska --- source/slang/glsl.meta.slang | 11 +++++++++++ tests/cross-compile/non-uniform-indexing.slang | 5 +++++ 2 files changed, 16 insertions(+) diff --git a/source/slang/glsl.meta.slang b/source/slang/glsl.meta.slang index ada908306d..1d91930e45 100644 --- a/source/slang/glsl.meta.slang +++ b/source/slang/glsl.meta.slang @@ -9601,3 +9601,14 @@ public bool allInvocationsEqual(bool value) { return WaveActiveAllEqual(value); } + + +/// extensions +/// https://github.com/KhronosGroup/GLSL/blob/main/extensions/ext/GL_EXT_nonuniform_qualifier.txt + +__generic +__intrinsic_op($(kIROp_NonUniformResourceIndex)) +[require(cpp_cuda_glsl_hlsl_spirv, nonuniformqualifier)] +public T nonuniformEXT(T index); + + diff --git a/tests/cross-compile/non-uniform-indexing.slang b/tests/cross-compile/non-uniform-indexing.slang index 73483afae4..dc53d5922f 100644 --- a/tests/cross-compile/non-uniform-indexing.slang +++ b/tests/cross-compile/non-uniform-indexing.slang @@ -1,4 +1,5 @@ //TEST:SIMPLE(filecheck=CHECK):-target spirv-assembly -entry main -stage fragment -verify-debug-serial-ir +//TEST:SIMPLE(filecheck=CHECK):-target spirv-assembly -entry main -stage fragment -verify-debug-serial-ir -DGLSL -allow-glsl // Confirm that `NonUniformResourceIndex` translates to SPIR-V as expected @@ -11,5 +12,9 @@ float4 main( float3 uv : UV) : SV_Target { +#if defined(GLSL) + return t[nonuniformEXT(int(uv.z))].Sample(s, uv.xy); +#else return t[NonUniformResourceIndex(int(uv.z))].Sample(s, uv.xy); +#endif } From 0b92e1de87cf805888127615457eea906fd0bc39 Mon Sep 17 00:00:00 2001 From: Anders Leino Date: Fri, 29 Nov 2024 09:51:49 +0200 Subject: [PATCH 11/13] Explain why tests/autodiff/diff-ptr-type-array.slang is disabled for WGPU (#5702) This closes #5613. Co-authored-by: Ellie Hermaszewska --- tests/autodiff/diff-ptr-type-array.slang | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/autodiff/diff-ptr-type-array.slang b/tests/autodiff/diff-ptr-type-array.slang index cbf692574c..7f048a095e 100644 --- a/tests/autodiff/diff-ptr-type-array.slang +++ b/tests/autodiff/diff-ptr-type-array.slang @@ -1,6 +1,6 @@ //TEST(compute):COMPARE_COMPUTE(filecheck-buffer=CHECK): -shaderobj -output-using-type //TEST(compute):COMPARE_COMPUTE(filecheck-buffer=CHECK): -vk -shaderobj -output-using-type -// WGSL: Runtime-sized arrays not in 'storage' address space are being generated #5613 +// Not supported in WGSL: Arrays of textures or buffers //DISABLE_TEST(compute):COMPARE_COMPUTE:-wgpu //TEST_INPUT:ubuffer(data=[0 0 0 0], stride=4):out,name=outputBuffer From c005fe9978ec7d432f0a72c0b38f545f215e1244 Mon Sep 17 00:00:00 2001 From: Jay Kwak <82421531+jkwak-work@users.noreply.github.com> Date: Thu, 28 Nov 2024 23:56:47 -0800 Subject: [PATCH 12/13] Emit OpExecutionMode SpacingEqual for Domain stage (#5696) Domain stage should emit one more `OpExecutionMode` with `SpacingEqual`, similary to how Hull stage does. Currently Hull stage emits four OpExecutionMode as following: OpExecutionMode %hullMain SpacingEqual OpExecutionMode %hullMain OutputVertices 4 OpExecutionMode %hullMain VertexOrderCw OpExecutionMode %hullMain Quads And Domain stage emits only one OpExecutionMode: OpExecutionMode %domainMain Quads This commit adds the following instruction for Domain stage: OpExecutionMode %domainMain SpacingEqual It is because the Vulkan Validation Layer prints error when the Domain shader didn't have `OpeExecutionMode SpacingEqual`. `SpacingEqual` corresponds to an attribute, `[partitioning("integer")]`, given to the Hull stage. Although Domain stage is not marked with same attribute, it is assumed to use the same value used for matching Hull Stage. The error message from VVL is following: ``` vkCreateShadersEXT(): pCreateInfos[2].stage is VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT, but spacing is not specified. The Vulkan spec states: If codeType is VK_SHADER_CODE_TYPE_SPIRV_EXT, and stage is VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT, pCode must contain an OpExecutionMode instruction specifying the spacing of segments on the edges of tessellated primitives (https://vulkan.lunarg.com/doc/view/1.3.283.0/windows/1.3-extensions/vkspec.html#VUID-VkShaderCreateInfoEXT-codeType-08874) ``` Co-authored-by: Ellie Hermaszewska --- source/slang/slang-emit-spirv.cpp | 4 +- tests/spirv/tessellation.slang | 65 +++++++++++++++++++++++-------- 2 files changed, 50 insertions(+), 19 deletions(-) diff --git a/source/slang/slang-emit-spirv.cpp b/source/slang/slang-emit-spirv.cpp index 04ddee9909..90d0db5d74 100644 --- a/source/slang/slang-emit-spirv.cpp +++ b/source/slang/slang-emit-spirv.cpp @@ -4267,6 +4267,7 @@ struct SPIRVEmitContext : public SourceEmitterBase, public SPIRVEmitSharedContex ensureExtensionDeclaration(UnownedStringSlice("SPV_EXT_mesh_shader")); break; case Stage::Hull: + case Stage::Domain: { requireSPIRVCapability(SpvCapabilityTessellation); @@ -4288,10 +4289,7 @@ struct SPIRVEmitContext : public SourceEmitterBase, public SPIRVEmitSharedContex arg); } requireSPIRVExecutionMode(nullptr, getIRInstSpvID(entryPoint), mode); - break; } - case Stage::Domain: - requireSPIRVCapability(SpvCapabilityTessellation); break; default: break; diff --git a/tests/spirv/tessellation.slang b/tests/spirv/tessellation.slang index deb6ed2989..9ac5860f9f 100644 --- a/tests/spirv/tessellation.slang +++ b/tests/spirv/tessellation.slang @@ -1,22 +1,23 @@ -//TEST:SIMPLE(filecheck=CHECK): -target spirv +//TEST:SIMPLE(filecheck=HULL): -target spirv -stage hull -entry hullMain +//TEST:SIMPLE(filecheck=DOMAIN): -target spirv -stage domain -entry domainMain -// CHECK-DAG: OpExecutionMode %main SpacingEqual +// HULL-DAG: OpExecutionMode %hullMain SpacingEqual +// HULL-DAG: OpExecutionMode %hullMain OutputVertices 4 +// HULL-DAG: OpExecutionMode %hullMain VertexOrderCw +// HULL-DAG: OpExecutionMode %hullMain Quads -// CHECK-DAG: OpExecutionMode %main OutputVertices 4 +// HULL: OpDecorate %gl_TessLevelOuter BuiltIn TessLevelOuter +// HULL: OpDecorate %gl_TessLevelOuter Patch +// HULL: OpDecorate %gl_TessLevelInner BuiltIn TessLevelInner +// HULL: OpDecorate %gl_TessLevelInner Patch -// CHECK-DAG: OpExecutionMode %main VertexOrderCw +// HULL: OpControlBarrier %uint_2 %uint_4 %uint_0 -// CHECK-DAG: OpExecutionMode %main Quads +// HULL: OpStore %gl_TessLevelOuter +// HULL: OpStore %gl_TessLevelInner -// CHECK: OpDecorate %gl_TessLevelOuter BuiltIn TessLevelOuter -// CHECK: OpDecorate %gl_TessLevelOuter Patch -// CHECK: OpDecorate %gl_TessLevelInner BuiltIn TessLevelInner -// CHECK: OpDecorate %gl_TessLevelInner Patch - -// CHECK: OpControlBarrier %uint_2 %uint_4 %uint_0 - -// CHECK: OpStore %gl_TessLevelOuter -// CHECK: OpStore %gl_TessLevelInner +// DOMAIN-DAG: OpExecutionMode %domainMain SpacingEqual +// DOMAIN-DAG: OpExecutionMode %domainMain Quads struct VS_OUT { @@ -34,13 +35,18 @@ struct HSC_OUT float InsideTessFactor[2] : SV_InsideTessFactor; }; +struct DS_OUT +{ + float4 position : SV_Position; +}; + // Hull Shader (HS) [domain("quad")] [partitioning("integer")] [outputtopology("triangle_cw")] [outputcontrolpoints(4)] [patchconstantfunc("constants")] -HS_OUT main(InputPatch patch, uint i : SV_OutputControlPointID) +HS_OUT hullMain(InputPatch patch, uint i : SV_OutputControlPointID) { HS_OUT o; o.position = patch[i].position; @@ -62,4 +68,31 @@ HSC_OUT constants(InputPatch patch) o.InsideTessFactor[0] = lerp(o.EdgeTessFactor[1], o.EdgeTessFactor[3], 0.5); o.InsideTessFactor[1] = lerp(o.EdgeTessFactor[0], o.EdgeTessFactor[2], 0.5); return o; -} \ No newline at end of file +} + +[domain("quad")] +DS_OUT domainMain( + float2 uv : SV_DomainLocation, // Tessellated coordinates (u, v) + const OutputPatch patch, // Control points from the hull shader + const HSC_OUT patchConstants // Patch constants calculated by the hull shader +) +{ + DS_OUT o; + + // Interpolate the position of the tessellated point within the patch + float3 p0 = patch[0].position; + float3 p1 = patch[1].position; + float3 p2 = patch[2].position; + float3 p3 = patch[3].position; + + // Bilinear interpolation of the position in the quad + float3 interpolatedPosition = + p0 * (1 - uv.x) * (1 - uv.y) + + p1 * uv.x * (1 - uv.y) + + p3 * uv.x * uv.y + + p2 * (1 - uv.x) * uv.y; + + // Output final position in clip space + o.position = float4(interpolatedPosition, 1.0); + return o; +} From 136c2e22b80d3ebf500d09d5ce6f4fa47dcac8a0 Mon Sep 17 00:00:00 2001 From: Ellie Hermaszewska Date: Sat, 30 Nov 2024 00:51:11 +0800 Subject: [PATCH 13/13] fetch and extract prebuilt deps lazily (#5707) Closes https://github.com/shader-slang/slang/issues/5706 and https://github.com/shader-slang/slang/issues/5657 --- cmake/FetchedSharedLibrary.cmake | 53 +++++++++++++++++++------------- 1 file changed, 31 insertions(+), 22 deletions(-) diff --git a/cmake/FetchedSharedLibrary.cmake b/cmake/FetchedSharedLibrary.cmake index fa0bd9f5e4..bd7eb10f9f 100644 --- a/cmake/FetchedSharedLibrary.cmake +++ b/cmake/FetchedSharedLibrary.cmake @@ -1,35 +1,44 @@ # Helper function to download and extract an archive function(download_and_extract archive_name url) - get_filename_component(extension ${url} EXT) - set(archive_path "${CMAKE_CURRENT_BINARY_DIR}/${archive_name}${extension}") - set(extract_dir "${CMAKE_CURRENT_BINARY_DIR}/${archive_name}") + cmake_path(GET url FILENAME filename_with_ext) + cmake_path(GET url STEM LAST_ONLY file_stem) + set(archive_path "${CMAKE_CURRENT_BINARY_DIR}/${filename_with_ext}") + set(extract_dir "${CMAKE_CURRENT_BINARY_DIR}/${file_stem}") - if(EXISTS ${url}) - message(STATUS "Using local file for ${archive_name}: ${url}") - set(archive_path ${url}) + # Check if already extracted + file(GLOB EXTRACT_DIR_CONTENTS "${extract_dir}/*") + if(EXTRACT_DIR_CONTENTS) + message(STATUS "Using existing extracted files in ${extract_dir}") else() - message(STATUS "Fetching ${archive_name} from ${url}") - file( - DOWNLOAD ${url} ${archive_path} - # SHOW_PROGRESS - STATUS status - ) - - list(GET status 0 status_code) - list(GET status 1 status_string) - if(NOT status_code EQUAL 0) + # Check if archive already exists + if(EXISTS ${url}) + message(STATUS "Using local file for ${archive_name}: ${url}") + set(archive_path ${url}) + elseif(EXISTS ${archive_path}) message( - WARNING - "Failed to download ${archive_name} from ${url}: ${status_string}" + STATUS + "Using existing archive for ${archive_name}: ${archive_path}" ) - return() + else() + message(STATUS "Fetching ${archive_name} from ${url}") + file(DOWNLOAD ${url} ${archive_path} STATUS status) + + list(GET status 0 status_code) + list(GET status 1 status_string) + if(NOT status_code EQUAL 0) + message( + WARNING + "Failed to download ${archive_name} from ${url}: ${status_string}" + ) + return() + endif() endif() - endif() - file(ARCHIVE_EXTRACT INPUT ${archive_path} DESTINATION ${extract_dir}) + file(ARCHIVE_EXTRACT INPUT ${archive_path} DESTINATION ${extract_dir}) + message(STATUS "${archive_name} extracted to ${extract_dir}") + endif() set(${archive_name}_SOURCE_DIR ${extract_dir} PARENT_SCOPE) - message(STATUS "${archive_name} downloaded and extracted to ${extract_dir}") endfunction() # Add rules to copy & install shared library of name 'library_name' in the 'module_subdir' directory.