Skip to content

Commit

Permalink
Optimize Decompose()/Dispose()
Browse files Browse the repository at this point in the history
  • Loading branch information
WrathfulSpatula committed Nov 1, 2024
1 parent 0dd1e6c commit 399e28b
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 29 deletions.
21 changes: 5 additions & 16 deletions src/common/qengine.cu
Original file line number Diff line number Diff line change
Expand Up @@ -671,9 +671,7 @@ __global__ void decomposeprob(qCudaCmplx* stateVec, bitCapIntOcl* bitCapIntOclPt
qCudaReal1_f partProb = ZERO_R1_CUDA;

for (bitCapIntOcl k = 0U; k < partPower; k++) {
bitCapIntOcl l = j | (k << start);

const qCudaCmplx amp = stateVec[l];
const qCudaCmplx amp = stateVec[j | (k << start)];
const qCudaReal1_f nrm = (qCudaReal1_f)qCudaDot(amp, amp);
partProb += nrm;
partStateAngle[k] += qCudaArg(amp) * (qCudaReal1)nrm;
Expand Down Expand Up @@ -734,9 +732,7 @@ __global__ void disposeprob(qCudaCmplx* stateVec, bitCapIntOcl* bitCapIntOclPtr,
qCudaReal1 partProb = ZERO_R1_CUDA;

for (bitCapIntOcl k = 0U; k < partPower; k++) {
bitCapIntOcl l = j | (k << start);

qCudaCmplx amp = stateVec[l];
qCudaCmplx amp = stateVec[j | (k << start)];
qCudaReal1 nrm = qCudaDot(amp, amp);
partProb += nrm;
}
Expand All @@ -754,16 +750,9 @@ __global__ void disposeprob(qCudaCmplx* stateVec, bitCapIntOcl* bitCapIntOclPtr,
l |= (k ^ l) << len;
l = j | l;

qCudaCmplx amp = stateVec[l];
qCudaReal1_f nrm = (qCudaReal1_f)qCudaDot(amp, amp);

if (nrm >= REAL1_EPSILON_CUDA) {
qCudaReal1 currentAngle = qCudaArg(amp);
if (firstAngle < angleThresh) {
firstAngle = currentAngle;
}
remainderStateAngle[k] = currentAngle - (qCudaReal1)firstAngle;
}
const qCudaCmplx amp = stateVec[l];
const qCudaReal1_f nrm = (qCudaReal1_f)qCudaDot(amp, amp);
remainderStateAngle[k] += qCudaArg(amp) * (qCudaReal1)nrm;
}
}
}
Expand Down
22 changes: 9 additions & 13 deletions src/qengine/state.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1206,26 +1206,23 @@ void QEngineCPU::DecomposeDispose(bitLenInt start, bitLenInt length, QEngineCPUP

if (destination) {
par_for(0U, remainderPower, [&](const bitCapIntOcl& lcv, const unsigned& cpu) {
bitCapIntOcl j;
j = lcv & pow2MaskOcl(start);
bitCapIntOcl j = lcv & pow2MaskOcl(start);
j |= (lcv ^ j) << length;

for (bitCapIntOcl k = 0U; k < partPower; ++k) {
bitCapIntOcl l = j | (k << start);

const complex amp = stateVec->read(l);
const complex amp = stateVec->read(j | (k << start));
const real1 nrm = norm(amp);
remainderStateProb[lcv] += nrm;
partStateAngle[k] += arg(amp) * nrm;
}
});

par_for(0U, partPower, [&](const bitCapIntOcl& lcv, const unsigned& cpu) {
bitCapIntOcl j;
j = lcv << start;
const bitCapIntOcl startMask = pow2MaskOcl(start);
const bitCapIntOcl j = lcv << start;

for (bitCapIntOcl k = 0U; k < remainderPower; ++k) {
bitCapIntOcl l = k & pow2MaskOcl(start);
bitCapIntOcl l = k & startMask;
l |= j | ((k ^ l) << length);

const complex amp = stateVec->read(l);
Expand Down Expand Up @@ -1253,17 +1250,16 @@ void QEngineCPU::DecomposeDispose(bitLenInt start, bitLenInt length, QEngineCPUP
j |= (lcv ^ j) << length;

for (bitCapIntOcl k = 0U; k < partPower; ++k) {
bitCapIntOcl l = j | (k << start);
remainderStateProb[lcv] += norm(stateVec->read(l));
remainderStateProb[lcv] += norm(stateVec->read(j | (k << start)));
}
});

par_for(0U, partPower, [&](const bitCapIntOcl& lcv, const unsigned& cpu) {
bitCapIntOcl j;
j = lcv << start;
const bitCapIntOcl startMask = pow2MaskOcl(start);
const bitCapIntOcl j = lcv << start;

for (bitCapIntOcl k = 0U; k < remainderPower; ++k) {
bitCapIntOcl l = k & pow2MaskOcl(start);
bitCapIntOcl l = k & startMask;
l |= j | ((k ^ l) << length);

const complex amp = stateVec->read(l);
Expand Down

0 comments on commit 399e28b

Please sign in to comment.