Skip to content

Commit fce7022

Browse files
committed
nWorkgroups -> totalWorkgroups, add a constructor overload for totalWorkgroups substitution to KernelCode constructors
1 parent cae3a11 commit fce7022

5 files changed

Lines changed: 117 additions & 85 deletions

File tree

examples/gpu_puzzles/key.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ void puzzle1(Context &ctx) {
5454
Tensor a = createTensor(ctx, {N}, kf32, makeData<N>().data());
5555
Tensor output = createTensor(ctx, {N}, kf32);
5656
Kernel op = createKernel(ctx, {kPuzzle1, N}, Bindings{a, output},
57-
/*nWorkgroups */ {1, 1, 1});
57+
/*totalWorkgroups */ {1, 1, 1});
5858
showResult<N>(ctx, op, output);
5959
}
6060

@@ -146,7 +146,7 @@ void puzzle4(Context &ctx) {
146146
};
147147
Kernel op =
148148
createKernel(ctx, {kPuzzle4, /*workgroup size*/ {Wx, Wy, 1}},
149-
Bindings{input, output}, /* nWorkgroups */ {1, 1, 1}, Params{N});
149+
Bindings{input, output}, /* totalWorkgroups */ {1, 1, 1}, Params{N});
150150
showResult<N, N, N>(ctx, op, output);
151151
}
152152

experimental/fasthtml/gpu_puzzles/client.js

Lines changed: 16 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -18,72 +18,72 @@ const PuzzleSpec = [
1818
{
1919
name: "Map",
2020
description:
21-
'Implement a "kernel" (GPU function) that adds 10 to each position of vector `a` and stores it in vector `out`. You have 1 thread per position.',
21+
'Implement a "kernel" (GPU function) that adds 10 to each position of vector `in1` and stores it in vector `out`. You have 1 thread per position.',
2222
},
2323
{
2424
name: "Zip",
2525
description:
26-
"Implement a kernel that adds together each position of `a` and `b` and stores it in `out`. You have 1 thread per position.",
26+
"Implement a kernel that adds together each position of `in1` and `in2` and stores it in `out`. You have 1 thread per position.",
2727
},
2828
{
2929
name: "Guards",
3030
description:
31-
"Implement a kernel that adds 10 to each position of `a` and stores it in `out`. You have more threads than positions.",
31+
"Implement a kernel that adds 10 to each position of `in1` and stores it in `out`. You have more threads than positions.",
3232
},
3333
{
3434
name: "Map 2D",
3535
description:
36-
"Implement a kernel that adds 10 to each position of `a` and stores it in `out`. Input `a` is 2D and square. You have more threads than positions.",
36+
"Implement a kernel that adds 10 to each position of `in1` and stores it in `out`. Input `in1` is 2D and square. You have more threads than positions.",
3737
},
3838
{
3939
name: "Broadcast",
4040
description:
41-
"Implement a kernel that adds `a` and `b` and stores it in `out`. Inputs `a` and `b` are vectors. You have more threads than positions.",
41+
"Implement a kernel that adds `in1` and `in2` and stores it in `out`. Inputs `in1` and `in2` are vectors. You have more threads than positions.",
4242
},
4343
{
4444
name: "Blocks",
4545
description:
46-
"Implement a kernel that adds 10 to each position of `a` and stores it in `out`. You have fewer threads per block than the size of `a`.",
46+
"Implement a kernel that adds 10 to each position of `in1` and stores it in `out`. You have fewer threads per block than the size of `in1`.",
4747
},
4848
{
4949
name: "Blocks 2D",
5050
description:
51-
"Implement the same kernel in 2D. You have fewer threads per block than the size of `a` in both directions.",
51+
"Implement the same kernel in 2D. You have fewer threads per block than the size of `in1` in both directions.",
5252
},
5353
{
5454
name: "Shared",
5555
description:
56-
"Implement a kernel that adds 10 to each position of `a` and stores it in `out`. You have fewer threads per block than the size of `a`. Use shared memory and `cuda.syncthreads` to ensure threads do not cross.",
56+
"Implement a kernel that adds 10 to each position of `in1` and stores it in `out`. You have fewer threads per block than the size of `in1`. Use shared memory and `cuda.syncthreads` to ensure threads do not cross.",
5757
},
5858
{
5959
name: "Pooling",
6060
description:
61-
"Implement a kernel that sums together the last 3 positions of `a` and stores it in `out`. You have 1 thread per position.",
61+
"Implement a kernel that sums together the last 3 positions of `in1` and stores it in `out`. You have 1 thread per position.",
6262
},
6363
{
6464
name: "Dot Product",
6565
description:
66-
"Implement a kernel that computes the dot-product of `a` and `b` and stores it in `out`. You have 1 thread per position.",
66+
"Implement a kernel that computes the dot-product of `in1` and `in2` and stores it in `out`. You have 1 thread per position.",
6767
},
6868
{
6969
name: "1D Convolution",
7070
description:
71-
"Implement a kernel that computes a 1D convolution between `a` and `b` and stores it in `out`. Handle the general case.",
71+
"Implement a kernel that computes a 1D convolution between `in1` and `in2` and stores it in `out`. Handle the general case.",
7272
},
7373
{
7474
name: "Prefix Sum",
7575
description:
76-
"Implement a kernel that computes a sum over `a` and stores it in `out`. If the size of `a` is greater than the block size, only store the sum of each block using parallel prefix sum.",
76+
"Implement a kernel that computes a sum over `in1` and stores it in `out`. If the size of `in1` is greater than the block size, only store the sum of each block using parallel prefix sum.",
7777
},
7878
{
7979
name: "Axis Sum",
8080
description:
81-
"Implement a kernel that computes a sum over each column of `a` and stores it in `out`.",
81+
"Implement a kernel that computes a sum over each column of `in1` and stores it in `out`.",
8282
},
8383
{
8484
name: "Matrix Multiply",
8585
description:
86-
"Implement a kernel that multiplies square matrices `a` and `b` and stores the result in `out`. Optimize by using shared memory for partial dot-products.",
86+
"Implement a kernel that multiplies square matrices `in1` and `in2` and stores the result in `out`. Optimize by using shared memory for partial dot-products.",
8787
},
8888
];
8989

@@ -315,10 +315,11 @@ function render() {
315315
document.getElementById("output").style.display = "none";
316316
// use puzzleIndex to get markdown
317317
document.getElementById("writeup").innerHTML = "<zero-md src=\"/assets/markdown/puzzle" + (AppState.puzzleIndex + 1) + ".md\"></zero-md>";
318-
console.log("writeup: ", document.getElementById("writeup").innerHTML);
318+
document.getElementById("solution").textContent = "Hide Solution";
319319
document.getElementById("writeup").style.display = "block";
320320
} else {
321321
document.getElementById("output").style.display = "block";
322322
document.getElementById("writeup").style.display = "none";
323+
document.getElementById("solution").textContent = "Show Solution";
323324
}
324325
}

0 commit comments

Comments
 (0)