diff --git a/visa/BuildCISAIR.h b/visa/BuildCISAIR.h index d002d5b67504..3fd6af20681b 100644 --- a/visa/BuildCISAIR.h +++ b/visa/BuildCISAIR.h @@ -669,6 +669,9 @@ class CISA_IR_Builder : public VISABuilder { bool CISA_create_nbarrier(bool isWait, VISA_opnd *barrierId, VISA_opnd *threadCount, int lineNum); + bool CISA_create_nbarrier_signal(VISA_opnd *barrierId, VISA_opnd *barrierType, + VISA_opnd *numProds, VISA_opnd *numCons, + int lineNum); bool CISA_create_lsc_typed_block2d_inst( diff --git a/visa/BuildCISAIRImpl.cpp b/visa/BuildCISAIRImpl.cpp index a8a2a29d0db0..e8aecb8090f0 100644 --- a/visa/BuildCISAIRImpl.cpp +++ b/visa/BuildCISAIRImpl.cpp @@ -4192,13 +4192,16 @@ bool CISA_IR_Builder::CISA_create_lsc_fence(LSC_SFID sfid, LSC_FENCE_OP fence, return true; } -bool CISA_IR_Builder::CISA_create_nbarrier(bool isWait, VISA_opnd *barrierId, +bool CISA_IR_Builder::CISA_create_nbarrier(bool isWait, + VISA_opnd *barrierId, VISA_opnd *threadCount, int lineNum) { if (isWait) { + // wait VISA_CALL_TO_BOOL(AppendVISANamedBarrierWait, static_cast(barrierId)); } else { + // signal VISA_CALL_TO_BOOL(AppendVISANamedBarrierSignal, static_cast(barrierId), static_cast(threadCount)); @@ -4206,6 +4209,19 @@ bool CISA_IR_Builder::CISA_create_nbarrier(bool isWait, VISA_opnd *barrierId, return true; } +bool CISA_IR_Builder::CISA_create_nbarrier_signal(VISA_opnd *barrierId, + VISA_opnd *barrierType, + VISA_opnd *numProds, + VISA_opnd *numCons, + int lineNum) { + VISA_CALL_TO_BOOL(AppendVISANamedBarrierSignal, + static_cast(barrierId), + static_cast(barrierType), + static_cast(numProds), + static_cast(numCons)); + return true; +} + bool CISA_IR_Builder::CISA_create_lsc_typed_block2d_inst( LSC_OP opcode, LSC_CACHE_OPTS caching, LSC_ADDR_TYPE addrModel, diff --git a/visa/BuildIR.h b/visa/BuildIR.h index 4a56ce65a7a7..2c22e458d4a9 100644 --- a/visa/BuildIR.h +++ b/visa/BuildIR.h @@ -2308,7 +2308,6 @@ class IR_Builder { int status = VISA_SUCCESS; return translateLscFence(pred, sfid, fenceOp, scope, status); } - enum class NamedBarrierType { PRODUCER, CONSUMER, BOTH }; //////////////////////////////////////////////////////////////////////// // default barrier functions @@ -2320,12 +2319,10 @@ class IR_Builder { //////////////////////////////////////////////////////////////////////// // named barrier functions int translateVISANamedBarrierSignal(G4_Predicate *prd, G4_Operand *barrierId, - G4_Operand *threadCount); + G4_Operand *barrierType, + G4_Operand *numProducers, + G4_Operand *numConsumers); int translateVISANamedBarrierWait(G4_Predicate *prd, G4_Operand *barrierId); - void generateNamedBarrier(G4_Predicate *prd, int numProducer, int numConsumer, - NamedBarrierType type, G4_Operand *barrierId); - void generateNamedBarrier(G4_Predicate *prd, G4_Operand *barrierId, - G4_SrcRegRegion *threadValue); //////////////////////////////////////////////////////////////////////// // fence etc diff --git a/visa/ByteCodeReaderNG.cpp b/visa/ByteCodeReaderNG.cpp index 9f86328fd3b6..9574822a3a07 100644 --- a/visa/ByteCodeReaderNG.cpp +++ b/visa/ByteCodeReaderNG.cpp @@ -725,15 +725,27 @@ static void readInstructionCommonNG(unsigned &bytePos, const char *buf, uint32_t mode = readOtherOperandNG(bytePos, buf, ISA_TYPE_UB); kernelBuilder->AppendVISASplitBarrierInst(mode != 0); } else if (opcode == ISA_NBARRIER) { + // Still support reading visa binary ? uint32_t mode = readOtherOperandNG(bytePos, buf, ISA_TYPE_UB); auto barrierId = readVectorOperandNG(bytePos, buf, container, false); - VISA_VectorOpnd *threadCount = + VISA_VectorOpnd *barrierType = readVectorOperandNG(bytePos, buf, container, false); - bool isWait = (mode & 1) == 0; + VISA_VectorOpnd *numProds = + readVectorOperandNG(bytePos, buf, container, false); + VISA_VectorOpnd *numCons = + readVectorOperandNG(bytePos, buf, container, false); + bool isWait = (mode == 0); if (isWait) { kernelBuilder->AppendVISANamedBarrierWait(barrierId); } else { - kernelBuilder->AppendVISANamedBarrierSignal(barrierId, threadCount); + const auto &vo = barrierType->_opnd.v_opnd; + if ((vo.tag & 0x7) == OPERAND_IMMEDIATE && + vo.opnd_val.const_opnd._val.lval == 0 && numProds == numCons) { + kernelBuilder->AppendVISANamedBarrierSignal(barrierId, numProds); + } else { + kernelBuilder->AppendVISANamedBarrierSignal(barrierId, barrierType, + numProds, numCons); + } } } else { bool hasMask = (opcode == ISA_FENCE); diff --git a/visa/CISA.y b/visa/CISA.y index d023bdb27e7d..5a3b22dc132f 100644 --- a/visa/CISA.y +++ b/visa/CISA.y @@ -1306,6 +1306,9 @@ SynchronizationInstruction: | SBARRIER_WAIT { pBuilder->CISA_create_sbarrier_instruction(false, CISAlineno); } + | NBARRIER_SIGNAL VecSrcOperand_G_I_IMM VecSrcOperand_G_I_IMM VecSrcOperand_G_I_IMM VecSrcOperand_G_I_IMM { + pBuilder->CISA_create_nbarrier_signal($2.cisa_gen_opnd, $3.cisa_gen_opnd, $4.cisa_gen_opnd, $5.cisa_gen_opnd, CISAlineno); + } | NBARRIER_SIGNAL VecSrcOperand_G_I_IMM VecSrcOperand_G_I_IMM { pBuilder->CISA_create_nbarrier(false, $2.cisa_gen_opnd, $3.cisa_gen_opnd, CISAlineno); } diff --git a/visa/IsaDescription.cpp b/visa/IsaDescription.cpp index 214da2e4c051..422d0fe623d8 100644 --- a/visa/IsaDescription.cpp +++ b/visa/IsaDescription.cpp @@ -117,7 +117,7 @@ struct ISA_Inst_Info ISA_Inst_Table[ISA_OPCODE_ENUM_SIZE] = { {ISA_RAW_SEND, ISA_Inst_Misc, "raw_send", 0, 0}, {ISA_RESERVED_5E, ISA_Inst_Reserved, "reserved5E", 0, 0}, {ISA_YIELD, ISA_Inst_Sync, "yield", 0, 0}, - {ISA_NBARRIER, ISA_Inst_Sync, "nbarrier", 0, 1}, + {ISA_NBARRIER, ISA_Inst_Sync, "nbarrier", 4, 0}, {ISA_RESERVED_61, ISA_Inst_Reserved, "reserved61", 0, 0}, {ISA_RESERVED_62, ISA_Inst_Reserved, "reserved62", 0, 0}, {ISA_RESERVED_63, ISA_Inst_Reserved, "reserved63", 0, 0}, @@ -1828,12 +1828,14 @@ VISA_INST_Desc CISA_INST_table[ISA_NUM_OPCODE] = { ISA_NBARRIER, ISA_Inst_Sync, "nbarrier", - 3, + 5, 0, { {OPND_IMM, ISA_TYPE_UB, 0}, {OPND_VECTOR_SRC_G_I_IMM, ISA_TYPE_UB, 0}, {OPND_VECTOR_SRC_G_I_IMM, ISA_TYPE_UB, 0}, + {OPND_VECTOR_SRC_G_I_IMM, ISA_TYPE_UB, 0}, + {OPND_VECTOR_SRC_G_I_IMM, ISA_TYPE_UB, 0}, }, }, diff --git a/visa/IsaDisassembly.cpp b/visa/IsaDisassembly.cpp index d8d921042b06..43cfe32a52d5 100644 --- a/visa/IsaDisassembly.cpp +++ b/visa/IsaDisassembly.cpp @@ -886,11 +886,19 @@ static std::string printInstructionCommon(const print_format_provider_t *header, sstr << (mode ? ".signal" : ".wait"); } else if (opcode == ISA_NBARRIER) { uint8_t mode = getPrimitiveOperand(inst, i); - bool isSignal = mode & 1; + bool isSignal = (mode > 0); sstr << (isSignal ? ".signal" : ".wait"); sstr << printOperand(header, inst, 1, opt); if (isSignal) { - sstr << printOperand(header, inst, 2, opt); + if (mode == 1) { + // nbarrier.signal + sstr << printOperand(header, inst, 3, opt); + } else { + // nbarrier.signal < + sstr << printOperand(header, inst, 2, opt); + sstr << printOperand(header, inst, 3, opt); + sstr << printOperand(header, inst, 4, opt); + } } } } diff --git a/visa/VISAKernel.h b/visa/VISAKernel.h index 1c82c5fb4058..6a153de96947 100644 --- a/visa/VISAKernel.h +++ b/visa/VISAKernel.h @@ -918,9 +918,14 @@ class VISAKernelImpl : public VISAFunction { VISA_BUILDER_API int AppendVISANamedBarrierWait(VISA_VectorOpnd *barrierId) override; + // Named barrier with the same number of producers and consumers VISA_BUILDER_API int AppendVISANamedBarrierSignal(VISA_VectorOpnd *barrierId, VISA_VectorOpnd *barrierCount) override; + // General producer-consumer named barrier + VISA_BUILDER_API int AppendVISANamedBarrierSignal( + VISA_VectorOpnd *barrierId, VISA_VectorOpnd *barrierType, + VISA_VectorOpnd *numProducers, VISA_VectorOpnd *numConsumers) override; /********** APPEND INSTRUCTION APIS END ******************/ diff --git a/visa/VISAKernelImpl.cpp b/visa/VISAKernelImpl.cpp index 84f75590905c..7577cafab82e 100644 --- a/visa/VISAKernelImpl.cpp +++ b/visa/VISAKernelImpl.cpp @@ -8417,19 +8417,29 @@ VISAKernelImpl::AppendVISANamedBarrierSignal(VISA_VectorOpnd *barrierId, int status = VISA_SUCCESS; + VISA_VectorOpnd *barrierType; + uint16_t value = 0; + status = CreateVISAImmediate(barrierType, &value, ISA_TYPE_UW); + if (status != VISA_SUCCESS) + return status; + if (IS_GEN_BOTH_PATH) { status = m_builder->translateVISANamedBarrierSignal( - nullptr, barrierId->g4opnd, barrierCount->g4opnd); + nullptr, barrierId->g4opnd, barrierType->g4opnd, barrierCount->g4opnd, + barrierCount->g4opnd); } if (IS_VISA_BOTH_PATH) { VISA_INST_Desc *inst_desc = &CISA_INST_table[ISA_NBARRIER]; - VISA_opnd *opnd[3]; + VISA_opnd *opnd[5]; int num_operands = 0; - uint8_t mode = 1; // signal + // signal 1: nbarrier.signal + uint8_t mode = 1; ADD_OPND(num_operands, opnd, CreateOtherOpndHelper(0, num_operands, inst_desc, mode)); ADD_OPND(num_operands, opnd, barrierId); + ADD_OPND(num_operands, opnd, barrierType); + ADD_OPND(num_operands, opnd, barrierCount); ADD_OPND(num_operands, opnd, barrierCount); CisaFramework::CisaInst *inst = new (m_mem) CisaFramework::CisaInst(m_mem); @@ -8443,6 +8453,45 @@ VISAKernelImpl::AppendVISANamedBarrierSignal(VISA_VectorOpnd *barrierId, return status; } +VISA_BUILDER_API int VISAKernelImpl::AppendVISANamedBarrierSignal( + VISA_VectorOpnd *barrierId, VISA_VectorOpnd *barrierType, + VISA_VectorOpnd *numProducers, VISA_VectorOpnd *numConsumers) { + TIME_SCOPE(VISA_BUILDER_APPEND_INST); + + AppendVISAInstCommon(); + + int status = VISA_SUCCESS; + + if (IS_GEN_BOTH_PATH) { + status = m_builder->translateVISANamedBarrierSignal( + nullptr, barrierId->g4opnd, barrierType->g4opnd, numProducers->g4opnd, + numConsumers->g4opnd); + } + if (IS_VISA_BOTH_PATH) { + VISA_INST_Desc *inst_desc = &CISA_INST_table[ISA_NBARRIER]; + VISA_opnd *opnd[5]; + int num_operands = 0; + + // signal : nbarrier.signal + uint8_t mode = 2; + ADD_OPND(num_operands, opnd, + CreateOtherOpndHelper(0, num_operands, inst_desc, mode)); + ADD_OPND(num_operands, opnd, barrierId); + ADD_OPND(num_operands, opnd, barrierType); + ADD_OPND(num_operands, opnd, numProducers); + ADD_OPND(num_operands, opnd, numConsumers); + + CisaFramework::CisaInst *inst = new (m_mem) CisaFramework::CisaInst(m_mem); + + inst->createCisaInstruction(ISA_NBARRIER, EXEC_SIZE_1, 0, + PredicateOpnd::getNullPred(), opnd, + num_operands, inst_desc); + addInstructionToEnd(inst); + } + + return status; +} + uint32_t VISAKernelImpl::addStringPool(std::string str) { if (str.empty()) { return 0; diff --git a/visa/VisaToG4/TranslateSendSync.cpp b/visa/VisaToG4/TranslateSendSync.cpp index 32e2c3cd848f..e299b69aa5b8 100644 --- a/visa/VisaToG4/TranslateSendSync.cpp +++ b/visa/VisaToG4/TranslateSendSync.cpp @@ -152,98 +152,126 @@ G4_INST *IR_Builder::translateLscFence(G4_Predicate *pred, SFID sfid, return fenceInst; } +static void generateNamedBarrier(int &status, IR_Builder &irb, + G4_Predicate *prd, G4_Operand *numProds, + G4_Operand *numCons, + G4_Operand *barrierType, + G4_Operand *barrierId) { + // We only need three dwords but they must be GRF aligned + // (The payload only uses HDR.2:d) + G4_Declare *header = irb.createTempVar(3, Type_UD, irb.getGRFAlign()); + // + // The approach here is to set all immediate values via an initial mov; + // then copy in arguments that come from variable after the fact. + // HDR.2[31:24] = Num Consumers + // HDR.2[23:16] = Num Producers + // HDR.2[15:14] = BarrierType + // HDR.2[13:8] = [undefined] + // HDR.2[7:0] = Named BarrierID + uint32_t immVal = 0; + auto tryEncImmOp = + [&](G4_Operand *op, int offset, uint64_t mask) { + if (op->isImm()) { + auto imm = op->asImm()->getImm(); + vISA_ASSERT((imm & ~mask) == 0, "invalid operand count"); + immVal |= imm << offset; + } + return !op->isImm(); + }; + // collect and group all immediate parameters into the initial value + bool typeNeedsMov = tryEncImmOp(barrierType, 14, 0x3); + bool consNeedsMov = tryEncImmOp(numCons, 24, 0xFF); + bool prodsNeedsMov = tryEncImmOp(numProds, 16, 0xFF); + bool barIdNeedsMov = tryEncImmOp(barrierId, 0, 0xFF); + + if (prd) { + // if the sequence accepts a predicate and one is given + // we must emulate + prd = irb.duplicateOperand(prd); + vISA_ASSERT(prd->getControl() == PRED_DEFAULT, + "predication must be default"); + prd->setControl(G4_Predicate_Control::PRED_ANY_WHOLE); + } -void IR_Builder::generateNamedBarrier(G4_Predicate *prd, int numProducer, - int numConsumer, NamedBarrierType type, - G4_Operand *barrierId) { - struct NamedBarrierPayload { - uint32_t id : 8; - uint32_t fence : 4; - uint32_t padding : 2; - uint32_t type : 2; - uint32_t consumer : 8; - uint32_t producer : 8; - }; - - union { - NamedBarrierPayload payload; - uint32_t data; - } payload; - - payload.data = 0; - payload.payload.consumer = numConsumer; - payload.payload.producer = numProducer; - - auto getVal = [](NamedBarrierType type) { - switch (type) { - case NamedBarrierType::BOTH: - return 0; - case NamedBarrierType::PRODUCER: - return 1; - case NamedBarrierType::CONSUMER: - return 2; - default: - vISA_ASSERT_UNREACHABLE("unrecognized NM barreir type"); - return -1; - } - }; - payload.payload.type = getVal(type); - - G4_Declare *header = createTempVar(8, Type_UD, getGRFAlign()); - if (barrierId->isImm()) { - payload.payload.id = (uint8_t)barrierId->asImm()->getInt(); - auto dst = createDst(header->getRegVar(), 0, 2, 1, Type_UD); - auto src = createImm(payload.data, Type_UD); - createMov(prd, g4::SIMD1, dst, src, InstOpt_WriteEnable, true); + // at the very least we need to encode the barrier type (even if 0's) + // Start that as the value in HDR.2:ud + // + // Special case: + // if immVal is zero and barIdNeedsMov. Just create a barrierId mov as + // the initialization to payload.ud[2]. + if (immVal == 0 && barIdNeedsMov) { + // Avoid redundant inst: mov payload.ud[2], 0 + // Just do: mov payload.ud[2], barrierId:ub + vISA_ASSERT(barrierId->isSrcRegRegion() && + IS_BTYPE(barrierId->getType()), + "barrier id should be srcRegRegion with byte type"); + G4_DstRegRegion *dst = irb.createDst(header->getRegVar(), 0, 2, 1, Type_UD); + G4_SrcRegRegion *src = barrierId->asSrcRegRegion(); + G4_INST *i = + irb.createMov(prd, g4::SIMD1, dst, src, InstOpt_WriteEnable, true); + i->setComments("init payload.ud[2] for prod+cons with barrierId"); + barIdNeedsMov = false; } else { - // barrier id should be a srcRegion with int type - // and (1) Hdr.2:ud barrierId 0xFF - // or (1) Hdr.2:ud Hdr.2 payload.data - vISA_ASSERT(barrierId->isSrcRegRegion() && IS_INT(barrierId->getType()), - "expect barrier id to be int"); - auto dst = createDst(header->getRegVar(), 0, 2, 1, Type_UD); - auto src1 = createImm(0xFF, Type_UD); - createBinOp(prd, G4_and, g4::SIMD1, dst, barrierId, src1, - InstOpt_WriteEnable, true); - dst = createDst(header->getRegVar(), 0, 2, 1, Type_UD); - auto orSrc0 = - createSrc(header->getRegVar(), 0, 2, getRegionScalar(), Type_UD); - auto orSrc1 = createImm(payload.data, Type_UD); - createBinOp(prd, G4_or, g4::SIMD1, dst, orSrc0, orSrc1, InstOpt_WriteEnable, - true); + G4_INST *i = irb.createMov( + prd, g4::SIMD1, irb.createDst(header->getRegVar(), 0, 2, 1, Type_UD), + irb.createImm(immVal, Type_UD), InstOpt_WriteEnable, true); + i->setComments("init payload.ud[2] with all immediates"); } - // 1 message length, 0 response length, no header, no ack - int desc = (0x1 << 25) + 0x4; - - auto msgDesc = createSyncMsgDesc(SFID::GATEWAY, desc); - createSendInst(prd, G4_send, g4::SIMD1, createNullDst(Type_UD), - createSrcRegRegion(header, getRegionStride1()), - createImm(desc, Type_UD), InstOpt_WriteEnable, msgDesc, true); -} - -void IR_Builder::generateNamedBarrier(G4_Predicate *prd, G4_Operand *barrierId, - G4_SrcRegRegion *threadCount) { - G4_Declare *header = createTempVar(8, Type_UD, getGRFAlign()); + auto isSame = [](G4_Operand *O0, G4_Operand *O1) { + return (O0 == O1 || (O0->isSrcRegRegion() && O1->isSrcRegRegion() && + *O0->asSrcRegRegion() == *O1->asSrcRegRegion())); + }; - // mov (1) Hdr.2<1>:ud 0x0 - // mov (2) Hdr.10<1>:ub threadcount:ub - // mov (1) Hdr.8<1>:ub barrierId:ub - auto dst = createDst(header->getRegVar(), 0, 2, 1, Type_UD); - auto src = createImm(0, Type_UD); - createMov(g4::SIMD1, dst, src, InstOpt_WriteEnable, true); - dst = createDst(header->getRegVar(), 0, 10, 1, Type_UB); - createMov(g4::SIMD2, dst, threadCount, InstOpt_WriteEnable, true); - dst = createDst(header->getRegVar(), 0, 8, 1, Type_UB); - createMov(g4::SIMD1, dst, barrierId, InstOpt_WriteEnable, true); + // For anything that was indirect (probably most things here) + // we must move manually. + if (prodsNeedsMov && consNeedsMov && isSame(numProds, numCons)) { + // optimization to use SIMD2 byte move for both producer and consumer thread counts + G4_DstRegRegion *dst = irb.createDst(header->getRegVar(), 0, 10, 1, Type_UB); + G4_INST *i = irb.createMov(prd, g4::SIMD2, dst, numProds, InstOpt_WriteEnable, true); + i->setComments("set producer+consumer"); + consNeedsMov = prodsNeedsMov = false; + } + // Explicity move in the non-immediate stragglers that come from registers + if (barIdNeedsMov) { + G4_DstRegRegion *dst = + irb.createDst(header->getRegVar(), 0, 8, 1, Type_UB); + G4_INST *i = irb.createMov(prd, g4::SIMD1, dst, barrierId, InstOpt_WriteEnable, true); + i->setComments("set barrierId"); + } + if (typeNeedsMov) { + G4_Declare *tmpUD = irb.createTempVar(1, Type_UD, G4_SubReg_Align::Even_Word); + auto tDst = irb.createDst(tmpUD->getRegVar(), 0, 0, 1, Type_UD); + G4_INST *typeI = irb.createBinOp(nullptr, G4_shl, g4::SIMD1, tDst, barrierType, + irb.createImm(6, Type_UW), InstOpt_WriteEnable, true); + typeI->setComments("prepare barrierType(shl)"); + G4_DstRegRegion *dst = irb.createDst(header->getRegVar(), 0, 9, 1, Type_UB); + auto tSrc = + irb.createSrc(tmpUD->getRegVar(), 0, 0, irb.getRegionScalar(), Type_UB); + G4_INST *i = + irb.createMov(prd, g4::SIMD1, dst, tSrc, InstOpt_WriteEnable, true); + i->setComments("set barrierType"); + } + if (prodsNeedsMov) { + G4_DstRegRegion *dst = + irb.createDst(header->getRegVar(), 0, 10, 1, Type_UB); + G4_INST *i = irb.createMov(prd, g4::SIMD1, dst, numProds, InstOpt_WriteEnable, true); + i->setComments("set producer"); + } + if (consNeedsMov) { + G4_DstRegRegion *dst = + irb.createDst(header->getRegVar(), 0, 11, 1, Type_UB); + G4_INST *i = irb.createMov(prd, g4::SIMD1, dst, numCons, InstOpt_WriteEnable, true); + i->setComments("set consumer"); + } - // 1 message length, 0 response length, no header, no ack int desc = (0x1 << 25) + 0x4; - auto msgDesc = createSyncMsgDesc(SFID::GATEWAY, desc); - createSendInst(nullptr, G4_send, g4::SIMD1, createNullDst(Type_UD), - createSrcRegRegion(header, getRegionStride1()), - createImm(desc, Type_UD), InstOpt_WriteEnable, msgDesc, true); + auto msgDesc = irb.createSyncMsgDesc(SFID::GATEWAY, desc); + (void)irb.createSendInst( + prd, G4_send, g4::SIMD1, irb.createNullDst(Type_UD), + irb.createSrcRegRegion(header, irb.getRegionStride1()), + irb.createImm(desc, Type_UD), InstOpt_WriteEnable, msgDesc, true); } @@ -309,6 +337,21 @@ static void checkNamedBarrierSrc(G4_Operand *src, bool isBarrierId, } } +static void checkNamedBarrierType(G4_Operand *src) { + enum class NamedBarrierType { BOTH = 0, PRODUCER = 1, CONSUMER = 2 }; + if (src->isImm()) { + uint32_t val = (uint32_t)src->asImm()->getInt(); + vISA_ASSERT(val == 0 || val == 1 || val == 2, "illegal named barrier type"); + } else if (src->isSrcRegRegion()) { + vISA_ASSERT(src->asSrcRegRegion()->isScalar(), + "barrier type should have scalar region"); + vISA_ASSERT(IS_WTYPE(src->getType()) && IS_INT(src->getType()), + "barrier type operand should be byte type"); + } else { + vISA_ASSERT(false, "illegal barrier type operand"); + } +} + void IR_Builder::updateNamedBarrier(G4_Operand *barrierId) { if (barrierId->isImm()) { // Mark the barrier id is being used. @@ -378,23 +421,22 @@ int IR_Builder::translateVISANamedBarrierWait(G4_Predicate *pred, int IR_Builder::translateVISANamedBarrierSignal(G4_Predicate *pred, G4_Operand *barrierId, - G4_Operand *threadCount) { + G4_Operand *barrierType, + G4_Operand *numProducers, + G4_Operand *numConsumers) { TIME_SCOPE(VISA_BUILDER_IR_CONSTRUCTION); - checkNamedBarrierSrc(barrierId, true, kernel); - checkNamedBarrierSrc(threadCount, false, kernel); + checkNamedBarrierSrc(barrierId, true /* barierId */, kernel); + checkNamedBarrierType(barrierType); + checkNamedBarrierSrc(numProducers, false /* numProds */, kernel); + checkNamedBarrierSrc(numConsumers, false /* numCons */, kernel); updateNamedBarrier(barrierId); - if (threadCount->isImm()) { - int numThreads = (int)threadCount->asImm()->getInt(); - generateNamedBarrier(pred, numThreads, numThreads, NamedBarrierType::BOTH, - barrierId); - } else { - generateNamedBarrier(pred, barrierId, threadCount->asSrcRegRegion()); - } - - return VISA_SUCCESS; + int status = VISA_SUCCESS; + generateNamedBarrier(status, *this, pred, numProducers, numConsumers, + barrierType, barrierId); + return status; } // create a fence instruction to the data cache diff --git a/visa/include/VISABuilderAPIDefinition.h b/visa/include/VISABuilderAPIDefinition.h index 0675f73ec8c6..dc1a2051ec51 100644 --- a/visa/include/VISABuilderAPIDefinition.h +++ b/visa/include/VISABuilderAPIDefinition.h @@ -544,6 +544,10 @@ class VISAKernel { AppendVISANamedBarrierSignal(VISA_VectorOpnd *barrierId, VISA_VectorOpnd *barrierCount) = 0; + VISA_BUILDER_API virtual int AppendVISANamedBarrierSignal( + VISA_VectorOpnd *barrierId, VISA_VectorOpnd *barrierType, + VISA_VectorOpnd *numProducers, VISA_VectorOpnd *numConsumers) = 0; + /// FIXME: we should probably have separate API for logic and shift /// instructions, as the arguments they expect are quite different /// AppendVISALogicOrShiftInst -- append a two-dst, two-source vISA arithmetic