-
Notifications
You must be signed in to change notification settings - Fork 0
/
webgl-matmul-shared-alongY.js
126 lines (111 loc) · 3.77 KB
/
webgl-matmul-shared-alongY.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
async function runMatMulSharedAlongY(gl, texA, texB, width, height, sharedDim, texC) {
const fragmentShaderSource = `#version 300 es
precision highp float;
in vec2 TexCoord;
out vec4 TexelValue;
// Texture samplers
uniform sampler2D A;
uniform sampler2D B;
void main()
{
float value = 0.0;
int x = int(TexCoord.s * ${width}.0); // rescale
int y = int(TexCoord.t * ${height}.0); // rescale
// loop over the shared dim
for(int k=0; k < ${sharedDim}; ++k) {
float a = texelFetch(A, ivec2(y, k), 0).r;
float b = texelFetch(B, ivec2(x, k), 0).r;
value += a * b;
}
TexelValue = vec4(value);
}`;
const program = createProgram(gl, getDefaultVertexShader(gl),
compileShader(gl, fragmentShaderSource, gl.FRAGMENT_SHADER));
const handleA = gl.getUniformLocation(program, 'A');
const handleB = gl.getUniformLocation(program, 'B');
gl.useProgram(program);
attachOutputTexture(gl, texC);
gl.viewport(0, 0, width, height);
gl.activeTexture(gl.TEXTURE0);
gl.bindTexture(gl.TEXTURE_2D, texA);
gl.uniform1i(handleA, 0);
gl.activeTexture(gl.TEXTURE1);
gl.bindTexture(gl.TEXTURE_2D, texB);
gl.uniform1i(handleB, 1);
gl.drawArrays(gl.TRIANGLE_STRIP, 0, 4);
await waitForSync(gl);
};
// CPU Equivalent for result comparison only
function cpuMatMulSharedAlongY(a, shapeA, b, shapeB, c) {
let offset = 0;
for (let i = 0; i < shapeA[1]; i++) {
for (let j = 0; j < shapeB[1]; j++) {
let sum = 0;
for (let k = 0; k < shapeA[0]; k++) {
sum += a[k*shapeA[1] + i] * b[k*shapeB[1] + j];
}
c[offset] = sum;
offset++;
}
}
}
function getTestData() {
return [
{ a:[8,3], b:[8,4]},
{ a:[64,56*56], b:[64,64]},
{ a:[64,56*56], b:[64,256]},
{ a:[256,7*7], b:[256,64]},
{ a:[512,56*56], b:[512,256]},
{ a:[768,28*28], b:[768,128]},
{ a:[2304,28*28], b:[2304,128]},
{ a:[1024,14*14], b:[1024,256]},
{ a:[2048,7*7], b:[2048,512]}
];
}
//
// Main
//
async function main() {
const canvas = createCanvas(1, 1);
const gl = getContext(canvas);
setupVBO(gl);
createFrameBuffer(gl);
const testDatas = getTestData();
for(let i = 0; i < testDatas.length; ++i) {
const testData = testDatas[i];
const sharedDim = testData.a[0];
const shapeA = testData.a;
const shapeB = testData.b;
console.info(`Running matmul-shared-alongY for [${shapeA.toString()}]-[${shapeB.toString()}]`);
// output texture dimensions
const width = shapeB[1];
const height = shapeA[1];
const a = createRandomArray(shapeA[0] * shapeA[1]);
const texA = createTexture(gl, gl.R32F, gl.RED, gl.FLOAT, shapeA[1], shapeA[0], a);
//debugPrintTexture(gl, texA, shapeA[1], shapeA[0], gl.RED, gl.FLOAT);
const b = createRandomArray(shapeB[0] * shapeB[1]);
const texB = createTexture(gl, gl.R32F, gl.RED, gl.FLOAT, shapeB[1], shapeB[0], b);
//debugPrintTexture(gl, texB, shapeB[1], shapeB[0], gl.RED, gl.FLOAT);
const c = new Float32Array(width * height);
const texC = createTexture(gl, gl.R32F, gl.RED, gl.FLOAT, width, height, null);
console.time('matmul');
await runMatMulSharedAlongY(gl, texA, texB, width, height, sharedDim, texC);
console.timeEnd('matmul');
console.time('readpixels');
readOutput(gl, width, height, gl.RED, gl.FLOAT, c);
console.timeEnd('readpixels');
const expected = new Float32Array(width * height);
cpuMatMulSharedAlongY(a, shapeA, b, shapeB, expected);
if(!compareOutputs(c, expected, 0.1)) {
console.error('Expected and Actual did not match');
console.log(c);
console.log(expected);
} else {
console.info('Actual and expected matched!');
}
gl.deleteTexture(texA);
gl.deleteTexture(texB);
gl.deleteTexture(texC);
}
}
main();