-
Notifications
You must be signed in to change notification settings - Fork 10
how to generate code for switch option
The condition expression of switch should be integer type in C
language, so dose the case expression, since it uses one or more branch tables to deal with switch in assemble/machine code.
But both condition and case expressions can be Object type in JavaScript language, even can be complex expressions.
According to Reference, Java use hash code to deal with String type in switch. So could jslang to deal with null
, true
, false
, number
, string
, and other constant expressions.
Break expression can be used in case expression, but if there isn't a break expression at the end of case expression, it should flow to next case expression. So, each case expression should has two BasicBlocks, one for check, another to hold real instructions of the case expression, and the latter should be in the same order of the code.
In one word, the pseudo-code is:
//switch(expr)
//{
//caseA:
// stmtA
//caseD:
// stmtD
//default:
// stmtdefault
//casea:
// stmta
//caseb:
// stmtb
//casec:
// stmtc
//
// note: caseA and casea have the same hashCode
//
//Output as:
// expr
// switch [caseA.checkBB, caseD.checkBB, caseb.checkBB, casec.checkBB]
//caseA.checkBB:
// cond (expr==exprA) & br caseA.realBB, casea.checkBB
//caseD.checkBB:
// cond (expr==exprD) & br caseD.realBB, default.realBB
//casea.checkBB:
// cond (expr==expra) & br casea.realBB, default.realBB
//caseb.checkBB:
// cond (expr==exprb) & br caseb.realBB, default.realBB
//casec.checkBB:
// cond (expr==exprc) & br casec.realBB, default.realBB
//caseA.realBB:
// stmtA
//caseD.realBB:
// stmtD
//default.realBB:
// stmtdefault
//casea.realBB:
// stmta
//caseb.realBB:
// stmtb
//casec.realBB:
// stmtc
//endBB:
// ...
But I need another way to deal with Objects and other complex expressions, if/else is a suitable solution. Deal with complex expressions with if/else first, then create SwitchInst to deal with other case expressions if needed.
- Create a struct to hold extra data:
typedef struct CaseInfo
{
CaseClauseNode *caseNode;
SwitchType type;
unsigned int hashCode;
llvm::BasicBlock *checkBB; //the check block of the case, maybe shared block if have the same hashCode
llvm::BasicBlock *realBB; //the real block of the case
struct CaseInfo *next;
}CaseInfo;
`caseNode` is the case expression node;
`type` could be `SwitchDouble`, `SwitchString`, `SwitchUnkown`, `SwitchDefault`;
`hashCode` is used when `type` is `SwitchDouble`, `SwitchString`, to store the hash code of expression;
`checkBB` and `realBB` are described as above;
`next` to deal with no break expression case;
- Init std::vector with the ClauseListNode list as follow:
static SwitchType initCaseInfo(ClauseListNode *list, std::vector<CaseInfo> &caseInfos)
{
CaseInfo caseInfo;
JSValue jsValue;
BooleanNode *booleanNode;
NumberNode *numberNode;
StringNode *stringNode;
SwitchType switchType = SwitchInteger;
for (; list; list = list->next())
{
ExpressionNode *exprNode = list->getClause()->expr();
memset((void *)&caseInfo, 0x00, sizeof(CaseInfo));
caseInfo.caseNode = list->getClause();
switch (exprNode->opcodeID())
{
case op_const_undefined:
caseInfo.type = SwitchDouble;
caseInfo.hashCode = hash6432shift(((uint64_t)JSVAL_TAG_UNDEFINED)<<32 | (uint64_t)0x00000000);
break;
case op_const_null:
caseInfo.type = SwitchDouble;
caseInfo.hashCode = hash6432shift(((uint64_t)JSVAL_TAG_NULL)<<32 | (uint64_t)0x00000000);
break;
case op_const_boolean:
booleanNode = (BooleanNode *)exprNode;
caseInfo.type = SwitchDouble;
caseInfo.hashCode = hash6432shift(((uint64_t)JSVAL_TAG_BOOLEAN)<<32 | (uint64_t)booleanNode->value());
break;
case op_const_number:
numberNode = (NumberNode *)exprNode;
jsValue.asDouble = numberNode->value();
caseInfo.type = SwitchDouble;
caseInfo.hashCode = hash6432shift(jsValue.asInt64);
break;
case op_const_string:
stringNode = (StringNode *)exprNode;
caseInfo.type = SwitchString;
#if defined(JSLANG_UTF8)
caseInfo.hashCode = bkdr_hash(stringNode->value().name.c_str());
#else
caseInfo.hashCode = bkdr_hash(stringNode->value().name.c_str());
#err "Can't deal with charset !"
#endif
break;
default:
caseInfo.type = SwitchUnkown;
break;
}
switchType = (SwitchType)((int)switchType | (int)caseInfo.type);
caseInfos.push_back(caseInfo);
}
return switchType;
}
- Init all cases in caseInfos, in original order: m_list1, m_defaultClause, m_list2(to deal with no break case)
std::vector<CaseInfo> caseInfos;
initCaseInfo(m_block->list1(), caseInfos);
if (NULL != m_block->defaultClause())
{
CaseInfo caseInfo;
memset((void *)&caseInfo, 0x00, sizeof(CaseInfo));
caseInfo.caseNode = m_block->defaultClause();
caseInfo.type = SwitchDefault;
caseInfos.push_back(caseInfo);
}
initCaseInfo( m_block->list2(), caseInfos);
- merge case via check haseCode, and create block for each case
std::map<unsigned int, llvm::BasicBlock*> hashBBs;
for (int indexX = realCount - 1; indexX >= 0; indexX--)
{
if (SwitchDefault == caseInfos[indexX].type)
{
// default case has no checkBB
defaultBB = BasicBlock::Create(getGlobalContext(), "sw.default");
caseInfos[indexX].realBB = defaultBB;
continue;
}
else if (SwitchUnkown == caseInfos[indexX].type)
{
//link all non-const expressions together with next point
caseInfos[indexX].checkBB = BasicBlock::Create(getGlobalContext(), "sw.nonconst.case");
caseInfos[indexX].realBB = BasicBlock::Create(getGlobalContext(), "sw.real.case");
for (int indexY = indexX + 1; indexY < realCount; indexY++)
{
if (SwitchUnkown == caseInfos[indexY].type)
{
caseInfos[indexX].next = &caseInfos[indexY];
break;
}
}
continue;
}
hashCode = caseInfos[indexX].hashCode % realCount;
checkBB = hashBBs[hashCode];
if (NULL == checkBB)
{
checkCount++;
}
else
{
for (int indexY = indexX + 1; indexY < realCount; indexY++)
{
if (hashCode == caseInfos[indexY].hashCode)
{
caseInfos[indexX].next = &caseInfos[indexY];
break;
}
}
if (NULL == caseInfos[indexX].next)
{
std::cout << __FUNCTION__ << " hash conflict but can't find it !" << std::endl;
abort();
}
}
checkBB = BasicBlock::Create(getGlobalContext(), "sw.check.case");
hashBBs[hashCode] = checkBB;
//store the real hashCode, so we can get hashCode directly atfer the first loop
caseInfos[indexX].hashCode = hashCode;
caseInfos[indexX].checkBB = checkBB;
caseInfos[indexX].realBB = BasicBlock::Create(getGlobalContext(), "sw.real.case");
}
- init and push all non-const checkBBs
for (int indexX = 0; indexX < realCount; indexX++)
{
if (SwitchUnkown != caseInfos[indexX].type)
{
continue;
}
ExpressionNode *caseExpr = (ExpressionNode *)caseInfos[indexX].caseNode->expr();
//in non-const check:
// current non-const expression's checkBB is used for next non-const expression
// since the first non-const expression use "currentBlock"
// the last non-const expression's checkBB is used as new "currentBlock"
llvm::Value *condV = CodeGenJSValue32FunctionCmp(context, exprRef, caseExpr->emitBytecode(context, dst), op_stricteq);
BranchInst::Create(caseInfos[indexX].realBB, caseInfos[indexX].checkBB, condV, context.currentBlock());
currentFunction->getBasicBlockList().push_back(caseInfos[indexX].checkBB);
context.pushBlock(caseInfos[indexX].checkBB);
}
- create switch inst at currentBB if needed, if there aren't any non-complex expressions, SwitchInst isn't needed, just branch to default case or the end.
if (checkCount> 0)
{
llvm::IntegerType *int32Type = llvm::Type::getInt32Ty(llvm::getGlobalContext());
RegisterID *hashV = CodeGenJSValue32FunctionGetHashCode(context, exprRef);
BinaryOperator* switchV = BinaryOperator::Create(Instruction::URem, hashV, llvm::ConstantInt::get(int32Type, realCount), "mod", context.currentBlock());
llvm::SwitchInst *switchInst = llvm::SwitchInst::Create(switchV, defaultBB, realCount, context.currentBlock());
//init and push all const checkBBs
for (int indexX = 0; indexX < realCount; indexX++)
{
if (SwitchDefault == caseInfos[indexX].type)
{
continue;
}
if (SwitchUnkown == caseInfos[indexX].type)
{
continue;
}
hashCode = caseInfos[indexX].hashCode;
if (caseInfos[indexX].checkBB == hashBBs[hashCode])
{
//the merged entry a same hashCode, add to switchInst
switchInst->addCase(llvm::ConstantInt::get(int32Type, hashCode), caseInfos[indexX].checkBB);
}
ExpressionNode *caseExpr = (ExpressionNode *)caseInfos[indexX].caseNode->expr();
llvm::BasicBlock* falseBB = defaultBB;
if (NULL != caseInfos[indexX].next)
{
falseBB = caseInfos[indexX].next->checkBB;
}
currentFunction->getBasicBlockList().push_back(caseInfos[indexX].checkBB);
context.pushBlock(caseInfos[indexX].checkBB);
llvm::Value *condV = CodeGenJSValue32FunctionCmp(context, exprRef, caseExpr->emitBytecode(context, dst), op_stricteq);
BranchInst::Create(caseInfos[indexX].realBB, falseBB, condV, context.currentBlock());
context.popBlock(caseInfos[indexX].checkBB);
}
}
else
{
//if all the cases are non-const cases, no need to create SwitchInst, just branch to defaultBB(endBB if there is no defult) of the statement
BranchInst::Create(defaultBB, context.currentBlock());
}
- init and push all realBBs in the original order
context.setBreakBlock(endBB);
for (int indexX = 0; indexX < realCount; indexX++)
{
SourceElements *caseStmt = (SourceElements *)caseInfos[indexX].caseNode->statments();
llvm::BasicBlock* continueBB = endBB;
if (indexX + 1 < realCount)
{
continueBB = caseInfos[indexX + 1].realBB;
}
currentFunction->getBasicBlockList().push_back(caseInfos[indexX].realBB);
context.pushBlock(caseInfos[indexX].realBB);
caseStmt->emitBytecode(context, dst);
if (NULL == context.currentBlock()->getTerminator())
{
BranchInst::Create(continueBB, context.currentBlock());
}
context.popBlock(caseInfos[indexX].realBB);
}
- push endBB
currentFunction->getBasicBlockList().push_back(endBB);
context.pushBlock(endBB);
Okay, let's test with it, IR code generated by jslang for JavaScript
language:
var a = 0;
switch (a)
{
case null:
console.log("null");
break;
case undefined=1:
console.log("undefined");
break;
case a:
console.log("number1");
//break;
default:
console.log("unknown");
break;
case "0":
console.log("string");
break;
case false:
console.log("boolean");
break;
case 0:
console.log("number");
break;
}
define i32 @main(i32, i8**) {
entry:
call void @jsextern_print_tick()
call void @AllInOneInit()
%asDouble = bitcast %union.JSValue* @a to double*
store double 0.000000e+00, double* %asDouble, align 8
%asDouble1 = bitcast %union.JSValue* @undefined to double*
store double 1.000000e+00, double* %asDouble1, align 8
%2 = call i1 @js_op_strict_eq(%union.JSValue* @a, %union.JSValue* @undefined)
br i1 %2, label %sw.real.case13, label %sw.nonconst.case
sw.nonconst.case: ; preds = %entry
br i1 true, label %sw.real.case19, label %sw.nonconst.case2
sw.nonconst.case2: ; preds = %sw.nonconst.case
%3 = call i32 @JSValueHash(%union.JSValue* @a)
%mod = urem i32 %3, 7
switch i32 %mod, label %sw.default [
i32 5, label %sw.check.case
i32 6, label %sw.check.case3
i32 4, label %sw.check.case4
]
sw.check.case: ; preds = %sw.nonconst.case2
%4 = alloca %union.JSValue, align 8
%asIntPtr = bitcast %union.JSValue* %4 to i32*
%5 = load i32* @jsValue32TagPart
%tagRef = getelementptr i32* %asIntPtr, i32 %5
store i32 -121, i32* %tagRef, align 4
%6 = load i32* @jsValue32PayloadPart
%payloadRef = getelementptr i32* %asIntPtr, i32 %6
store i32 0, i32* %payloadRef, align 4
%7 = call i1 @js_op_strict_eq(%union.JSValue* @a, %union.JSValue* %4)
br i1 %7, label %sw.real.case, label %sw.end
sw.check.case3: ; preds = %sw.nonconst.case2
%8 = call i64 @js_string_new_utf8_len(i8* getelementptr inbounds ([2 x i8]* @"0", i32 0, i32 0), i32 1)
%jsValueRightRef = alloca %union.JSValue, align 8
%asInt64 = bitcast %union.JSValue* %jsValueRightRef to i64*
store i64 %8, i64* %asInt64, align 8
%9 = call i1 @js_op_strict_eq(%union.JSValue* @a, %union.JSValue* %jsValueRightRef)
br i1 %9, label %sw.real.case30, label %sw.check.case8
sw.check.case4: ; preds = %sw.nonconst.case2
%10 = alloca %union.JSValue, align 8
%asIntPtr5 = bitcast %union.JSValue* %10 to i32*
%11 = load i32* @jsValue32TagPart
%tagRef6 = getelementptr i32* %asIntPtr5, i32 %11
store i32 -123, i32* %tagRef6, align 4
%12 = load i32* @jsValue32PayloadPart
%payloadRef7 = getelementptr i32* %asIntPtr5, i32 %12
store i32 0, i32* %payloadRef7, align 4
%13 = call i1 @js_op_strict_eq(%union.JSValue* @a, %union.JSValue* %10)
br i1 %13, label %sw.real.case36, label %sw.end
sw.check.case8: ; preds = %sw.check.case3
%jsValueRightRef9 = alloca %union.JSValue, align 8
%asDouble10 = bitcast %union.JSValue* %jsValueRightRef9 to double*
store double 0.000000e+00, double* %asDouble10, align 8
%14 = call i1 @js_op_strict_eq(%union.JSValue* @a, %union.JSValue* %jsValueRightRef9)
br i1 %14, label %sw.real.case42, label %sw.end
sw.real.case: ; preds = %sw.check.case
%15 = call i64 @js_object_getprop_utf8(%union.JSValue* @js_console, i8* getelementptr inbounds ([4 x i8]* @log3, i32 0, i32 0))
%log = alloca %union.JSValue, align 8
%asInt6411 = bitcast %union.JSValue* %log to i64*
store i64 %15, i64* %asInt6411, align 8
%16 = call i64 @js_string_new_utf8_len(i8* getelementptr inbounds ([5 x i8]* @null, i32 0, i32 0), i32 4)
%array = alloca [1 x %union.JSValue]
%17 = bitcast [1 x %union.JSValue]* %array to %union.JSValue*
%argvIndex = getelementptr %union.JSValue* %17, i32 0
%asInt6412 = bitcast %union.JSValue* %argvIndex to i64*
store i64 %16, i64* %asInt6412, align 8
%18 = call i64 @js_invoke_closure(%union.JSValue* %log, %union.JSValue* @js_console, i32 1, %union.JSValue* %17)
br label %sw.end
sw.real.case13: ; preds = %entry
%19 = call i64 @js_object_getprop_utf8(%union.JSValue* @js_console, i8* getelementptr inbounds ([4 x i8]* @log4, i32 0, i32 0))
%log14 = alloca %union.JSValue, align 8
%asInt6415 = bitcast %union.JSValue* %log14 to i64*
store i64 %19, i64* %asInt6415, align 8
%20 = call i64 @js_string_new_utf8_len(i8* getelementptr inbounds ([10 x i8]* @undefined5, i32 0, i32 0), i32 9)
%array16 = alloca [1 x %union.JSValue]
%21 = bitcast [1 x %union.JSValue]* %array16 to %union.JSValue*
%argvIndex17 = getelementptr %union.JSValue* %21, i32 0
%asInt6418 = bitcast %union.JSValue* %argvIndex17 to i64*
store i64 %20, i64* %asInt6418, align 8
%22 = call i64 @js_invoke_closure(%union.JSValue* %log14, %union.JSValue* @js_console, i32 1, %union.JSValue* %21)
br label %sw.end
sw.real.case19: ; preds = %sw.nonconst.case
%23 = call i64 @js_object_getprop_utf8(%union.JSValue* @js_console, i8* getelementptr inbounds ([4 x i8]* @log6, i32 0, i32 0))
%log20 = alloca %union.JSValue, align 8
%asInt6421 = bitcast %union.JSValue* %log20 to i64*
store i64 %23, i64* %asInt6421, align 8
%24 = call i64 @js_string_new_utf8_len(i8* getelementptr inbounds ([8 x i8]* @number1, i32 0, i32 0), i32 7)
%array22 = alloca [1 x %union.JSValue]
%25 = bitcast [1 x %union.JSValue]* %array22 to %union.JSValue*
%argvIndex23 = getelementptr %union.JSValue* %25, i32 0
%asInt6424 = bitcast %union.JSValue* %argvIndex23 to i64*
store i64 %24, i64* %asInt6424, align 8
%26 = call i64 @js_invoke_closure(%union.JSValue* %log20, %union.JSValue* @js_console, i32 1, %union.JSValue* %25)
br label %sw.default
sw.default: ; preds = %sw.real.case19, %sw.nonconst.case2
%27 = call i64 @js_object_getprop_utf8(%union.JSValue* @js_console, i8* getelementptr inbounds ([4 x i8]* @log7, i32 0, i32 0))
%log25 = alloca %union.JSValue, align 8
%asInt6426 = bitcast %union.JSValue* %log25 to i64*
store i64 %27, i64* %asInt6426, align 8
%28 = call i64 @js_string_new_utf8_len(i8* getelementptr inbounds ([8 x i8]* @unknown, i32 0, i32 0), i32 7)
%array27 = alloca [1 x %union.JSValue]
%29 = bitcast [1 x %union.JSValue]* %array27 to %union.JSValue*
%argvIndex28 = getelementptr %union.JSValue* %29, i32 0
%asInt6429 = bitcast %union.JSValue* %argvIndex28 to i64*
store i64 %28, i64* %asInt6429, align 8
%30 = call i64 @js_invoke_closure(%union.JSValue* %log25, %union.JSValue* @js_console, i32 1, %union.JSValue* %29)
br label %sw.end
sw.real.case30: ; preds = %sw.check.case3
%31 = call i64 @js_object_getprop_utf8(%union.JSValue* @js_console, i8* getelementptr inbounds ([4 x i8]* @log8, i32 0, i32 0))
%log31 = alloca %union.JSValue, align 8
%asInt6432 = bitcast %union.JSValue* %log31 to i64*
store i64 %31, i64* %asInt6432, align 8
%32 = call i64 @js_string_new_utf8_len(i8* getelementptr inbounds ([7 x i8]* @string, i32 0, i32 0), i32 6)
%array33 = alloca [1 x %union.JSValue]
%33 = bitcast [1 x %union.JSValue]* %array33 to %union.JSValue*
%argvIndex34 = getelementptr %union.JSValue* %33, i32 0
%asInt6435 = bitcast %union.JSValue* %argvIndex34 to i64*
store i64 %32, i64* %asInt6435, align 8
%34 = call i64 @js_invoke_closure(%union.JSValue* %log31, %union.JSValue* @js_console, i32 1, %union.JSValue* %33)
br label %sw.end
sw.real.case36: ; preds = %sw.check.case4
%35 = call i64 @js_object_getprop_utf8(%union.JSValue* @js_console, i8* getelementptr inbounds ([4 x i8]* @log9, i32 0, i32 0))
%log37 = alloca %union.JSValue, align 8
%asInt6438 = bitcast %union.JSValue* %log37 to i64*
store i64 %35, i64* %asInt6438, align 8
%36 = call i64 @js_string_new_utf8_len(i8* getelementptr inbounds ([8 x i8]* @boolean, i32 0, i32 0), i32 7)
%array39 = alloca [1 x %union.JSValue]
%37 = bitcast [1 x %union.JSValue]* %array39 to %union.JSValue*
%argvIndex40 = getelementptr %union.JSValue* %37, i32 0
%asInt6441 = bitcast %union.JSValue* %argvIndex40 to i64*
store i64 %36, i64* %asInt6441, align 8
%38 = call i64 @js_invoke_closure(%union.JSValue* %log37, %union.JSValue* @js_console, i32 1, %union.JSValue* %37)
br label %sw.end
sw.real.case42: ; preds = %sw.check.case8
%39 = call i64 @js_object_getprop_utf8(%union.JSValue* @js_console, i8* getelementptr inbounds ([4 x i8]* @log11, i32 0, i32 0))
%log43 = alloca %union.JSValue, align 8
%asInt6444 = bitcast %union.JSValue* %log43 to i64*
store i64 %39, i64* %asInt6444, align 8
%40 = call i64 @js_string_new_utf8_len(i8* getelementptr inbounds ([7 x i8]* @number, i32 0, i32 0), i32 6)
%array45 = alloca [1 x %union.JSValue]
%41 = bitcast [1 x %union.JSValue]* %array45 to %union.JSValue*
%argvIndex46 = getelementptr %union.JSValue* %41, i32 0
%asInt6447 = bitcast %union.JSValue* %argvIndex46 to i64*
store i64 %40, i64* %asInt6447, align 8
%42 = call i64 @js_invoke_closure(%union.JSValue* %log43, %union.JSValue* @js_console, i32 1, %union.JSValue* %41)
br label %sw.end
sw.end: ; preds = %sw.real.case42, %sw.real.case36, %sw.real.case30, %sw.default, %sw.real.case13, %sw.real.case, %sw.check.case8, %sw.check.case4, %sw.check.case
call void @jsextern_print_tick()
ret i32 0
}
The output is:
---0(ms)---
litterendian
AllInOneInit:115
number1
unknown
---11(ms)---