98 {
"sinc", Intrinsic::not_intrinsic},
99 {
"sincn", Intrinsic::not_intrinsic},
100 {
"cos", Intrinsic::cos},
101 {
"sin", Intrinsic::sin},
102 {
"tan", Intrinsic::not_intrinsic},
103 {
"acos", Intrinsic::not_intrinsic},
104 {
"__nv_frcp_rd", Intrinsic::not_intrinsic},
105 {
"__nv_frcp_rn", Intrinsic::not_intrinsic},
106 {
"__nv_frcp_ru", Intrinsic::not_intrinsic},
107 {
"__nv_frcp_rz", Intrinsic::not_intrinsic},
108 {
"__nv_drcp_rd", Intrinsic::not_intrinsic},
109 {
"__nv_drcp_rn", Intrinsic::not_intrinsic},
110 {
"__nv_drcp_ru", Intrinsic::not_intrinsic},
111 {
"__nv_drcp_rz", Intrinsic::not_intrinsic},
112 {
"asin", Intrinsic::not_intrinsic},
113 {
"__nv_asin", Intrinsic::not_intrinsic},
114 {
"atan", Intrinsic::not_intrinsic},
115 {
"atan2", Intrinsic::not_intrinsic},
116 {
"__nv_atan2", Intrinsic::not_intrinsic},
117#if LLVM_VERSION_MAJOR >= 19
118 {
"cosh", Intrinsic::cosh},
119 {
"sinh", Intrinsic::sinh},
120 {
"tanh", Intrinsic::tanh},
122 {
"cosh", Intrinsic::not_intrinsic},
123 {
"sinh", Intrinsic::not_intrinsic},
124 {
"tanh", Intrinsic::not_intrinsic},
126 {
"acosh", Intrinsic::not_intrinsic},
127 {
"asinh", Intrinsic::not_intrinsic},
128 {
"atanh", Intrinsic::not_intrinsic},
129 {
"exp", Intrinsic::exp},
130 {
"exp2", Intrinsic::exp2},
131 {
"exp10", Intrinsic::not_intrinsic},
132 {
"log", Intrinsic::log},
133 {
"log10", Intrinsic::log10},
134 {
"expm1", Intrinsic::not_intrinsic},
135 {
"log1p", Intrinsic::not_intrinsic},
136 {
"log2", Intrinsic::log2},
137 {
"logb", Intrinsic::not_intrinsic},
138 {
"pow", Intrinsic::pow},
139 {
"sqrt", Intrinsic::sqrt},
140 {
"cbrt", Intrinsic::not_intrinsic},
141 {
"hypot", Intrinsic::not_intrinsic},
143 {
"__mulsc3", Intrinsic::not_intrinsic},
144 {
"__muldc3", Intrinsic::not_intrinsic},
145 {
"__multc3", Intrinsic::not_intrinsic},
146 {
"__mulxc3", Intrinsic::not_intrinsic},
148 {
"__divsc3", Intrinsic::not_intrinsic},
149 {
"__divdc3", Intrinsic::not_intrinsic},
150 {
"__divtc3", Intrinsic::not_intrinsic},
151 {
"__divxc3", Intrinsic::not_intrinsic},
153 {
"Faddeeva_erf", Intrinsic::not_intrinsic},
154 {
"Faddeeva_erfc", Intrinsic::not_intrinsic},
155 {
"Faddeeva_erfcx", Intrinsic::not_intrinsic},
156 {
"Faddeeva_erfi", Intrinsic::not_intrinsic},
157 {
"Faddeeva_dawson", Intrinsic::not_intrinsic},
158 {
"Faddeeva_erf_re", Intrinsic::not_intrinsic},
159 {
"Faddeeva_erfc_re", Intrinsic::not_intrinsic},
160 {
"Faddeeva_erfcx_re", Intrinsic::not_intrinsic},
161 {
"Faddeeva_erfi_re", Intrinsic::not_intrinsic},
162 {
"Faddeeva_dawson_re", Intrinsic::not_intrinsic},
163 {
"erf", Intrinsic::not_intrinsic},
164 {
"erfi", Intrinsic::not_intrinsic},
165 {
"erfc", Intrinsic::not_intrinsic},
166 {
"erfinv", Intrinsic::not_intrinsic},
168 {
"__fd_sincos_1", Intrinsic::not_intrinsic},
169 {
"sincospi", Intrinsic::not_intrinsic},
170 {
"cmplx_inv", Intrinsic::not_intrinsic},
173 {
"j0", Intrinsic::not_intrinsic},
174 {
"j1", Intrinsic::not_intrinsic},
175 {
"jn", Intrinsic::not_intrinsic},
176 {
"y0", Intrinsic::not_intrinsic},
177 {
"y1", Intrinsic::not_intrinsic},
178 {
"yn", Intrinsic::not_intrinsic},
179 {
"tgamma", Intrinsic::not_intrinsic},
180 {
"lgamma", Intrinsic::not_intrinsic},
181 {
"logabsgamma", Intrinsic::not_intrinsic},
182 {
"ceil", Intrinsic::ceil},
183 {
"__nv_ceil", Intrinsic::ceil},
184 {
"floor", Intrinsic::floor},
185 {
"fmod", Intrinsic::not_intrinsic},
186 {
"trunc", Intrinsic::trunc},
187 {
"round", Intrinsic::round},
188 {
"rint", Intrinsic::rint},
189 {
"nearbyint", Intrinsic::nearbyint},
190 {
"remainder", Intrinsic::not_intrinsic},
191 {
"copysign", Intrinsic::copysign},
192 {
"nextafter", Intrinsic::not_intrinsic},
193 {
"nexttoward", Intrinsic::not_intrinsic},
194 {
"fdim", Intrinsic::not_intrinsic},
195 {
"fmax", Intrinsic::maxnum},
196 {
"fmin", Intrinsic::minnum},
197 {
"fabs", Intrinsic::fabs},
198 {
"fma", Intrinsic::fma},
199 {
"ilogb", Intrinsic::not_intrinsic},
200 {
"scalbn", Intrinsic::not_intrinsic},
201 {
"scalbln", Intrinsic::not_intrinsic},
202 {
"powi", Intrinsic::powi},
203 {
"cabs", Intrinsic::not_intrinsic},
204 {
"ldexp", Intrinsic::not_intrinsic},
205 {
"fmod", Intrinsic::not_intrinsic},
206 {
"finite", Intrinsic::not_intrinsic},
207 {
"isinf", Intrinsic::not_intrinsic},
208 {
"isnan", Intrinsic::not_intrinsic},
209 {
"lround", Intrinsic::lround},
210 {
"llround", Intrinsic::llround},
211 {
"lrint", Intrinsic::lrint},
212 {
"llrint", Intrinsic::llrint}};
362 std::map<Value *, std::set<int64_t>> &intseen,
363 ScalarEvolution &SE)
const {
364 if (
auto constant = dyn_cast<ConstantInt>(val)) {
365#if LLVM_VERSION_MAJOR > 14
366 if (constant->getValue().getSignificantBits() > 64)
369 if (constant->getValue().getMinSignedBits() > 64)
372 return {constant->getSExtValue()};
375 if (isa<ConstantPointerNull>(val)) {
381 if (
auto arg = dyn_cast<llvm::Argument>(val)) {
385 llvm::errs() <<
" KnownValues[" << *pair.first <<
"] - "
386 << pair.first->getParent()->getName() <<
"\n";
388 llvm::errs() <<
" arg: " << *arg <<
" - " << arg->getParent()->getName()
392 return found->second;
395 if (intseen.find(val) != intseen.end())
399 if (
auto ci = dyn_cast<CastInst>(val)) {
403 auto insert = [&](int64_t v) {
404 if (intseen[val].size() == 0) {
405 intseen[val].insert(v);
407 if (intseen[val].size() == 1) {
409 if (abs(*intseen[val].begin()) > abs(v)) {
410 intseen[val].clear();
411 intseen[val].insert(v);
419 intseen[val].insert(v);
426 intseen[val].insert(v);
431 if (
auto II = dyn_cast<IntrinsicInst>(val)) {
432 switch (II->getIntrinsicID()) {
433#if LLVM_VERSION_MAJOR >= 12
440 case Intrinsic::nvvm_read_ptx_sreg_tid_x:
441 case Intrinsic::nvvm_read_ptx_sreg_tid_y:
442 case Intrinsic::nvvm_read_ptx_sreg_tid_z:
443 case Intrinsic::nvvm_read_ptx_sreg_ctaid_x:
444 case Intrinsic::nvvm_read_ptx_sreg_ctaid_y:
445 case Intrinsic::nvvm_read_ptx_sreg_ctaid_z:
446 case Intrinsic::amdgcn_workitem_id_x:
447 case Intrinsic::amdgcn_workitem_id_y:
448 case Intrinsic::amdgcn_workitem_id_z:
455 if (
auto LI = dyn_cast<LoadInst>(val)) {
456 if (
auto AI = dyn_cast<AllocaInst>(LI->getPointerOperand())) {
457 StoreInst *SI =
nullptr;
459 for (
auto u : AI->users()) {
460 if (
auto SIu = dyn_cast<StoreInst>(u)) {
461 if (SI && SIu->getValueOperand() == AI) {
466 }
else if (!isa<LoadInst>(u)) {
467 if (!cast<Instruction>(u)->mayReadOrWriteMemory() &&
468 cast<Instruction>(u)->use_empty())
470 if (
auto CI = dyn_cast<CallBase>(u)) {
471 if (
auto F = CI->getCalledFunction()) {
472 auto funcName = F->getName();
473 if (funcName ==
"__kmpc_for_static_init_4" ||
474 funcName ==
"__kmpc_for_static_init_4u" ||
475 funcName ==
"__kmpc_for_static_init_8" ||
476 funcName ==
"__kmpc_for_static_init_8u") {
485 if (SI && !failed && DT.dominates(SI, LI)) {
493 if (
auto pn = dyn_cast<PHINode>(val)) {
494 if (SE.isSCEVable(pn->getType()))
495 if (
auto S = dyn_cast<SCEVAddRecExpr>(SE.getSCEV(pn))) {
496 if (
auto StartC = dyn_cast<SCEVConstant>(S->getStart())) {
497 auto L = S->getLoop();
498 auto BE = SE.getBackedgeTakenCount(L);
499 if (BE != SE.getCouldNotCompute()) {
500 if (
auto Iters = dyn_cast<SCEVConstant>(BE)) {
501 uint64_t ival = Iters->getAPInt().getZExtValue();
507 bool rotated =
false;
508 BasicBlock *Latch = L->getLoopLatch();
509 rotated = Latch && L->isLoopExiting(Latch);
519 if (
auto StepC = dyn_cast<SCEVConstant>(S->getOperand(1))) {
520 APInt StartI = StartC->getAPInt();
521 APInt A = StepC->getAPInt();
530 ival = std::min(ival, (uint64_t)0);
544 ival = std::min(ival, (uint64_t)0);
547 ival = std::min(ival, (uint64_t)0);
551 for (uint64_t i = istart; i <= ival; i++) {
552 if (
auto Val = dyn_cast<SCEVConstant>(S->evaluateAtIteration(
553 SE.getConstant(Iters->getType(), i,
false),
555 insert(Val->getAPInt().getSExtValue());
564 for (
unsigned i = 0; i < pn->getNumIncomingValues(); ++i) {
565 auto a = pn->getIncomingValue(i);
566 auto b = pn->getIncomingBlock(i);
569 if (pn->getParent() == b || DT.dominates(pn, b)) {
576 for (
auto pval : inset) {
577 if (pval < 20 && pval > -20) {
585 if (
auto bo = dyn_cast<BinaryOperator>(a)) {
586 if (bo->getOperand(0) == pn || bo->getOperand(1) == pn) {
587 if (bo->getOpcode() == BinaryOperator::Add ||
588 bo->getOpcode() == BinaryOperator::Sub) {
597 if (
auto bo = dyn_cast<BinaryOperator>(val)) {
600 if (bo->getOpcode() == BinaryOperator::Mul) {
602 if (inset0.size() == 1 || inset1.size() == 1) {
603 for (
auto val0 : inset0) {
604 for (
auto val1 : inset1) {
610 if (inset0.count(0) || inset1.count(0)) {
611 intseen[val].insert(0);
615 if (bo->getOpcode() == BinaryOperator::Add) {
616 if (inset0.size() == 1 || inset1.size() == 1) {
617 for (
auto val0 : inset0) {
618 for (
auto val1 : inset1) {
624 if (bo->getOpcode() == BinaryOperator::Sub) {
625 if (inset0.size() == 1 || inset1.size() == 1) {
626 for (
auto val0 : inset0) {
627 for (
auto val1 : inset1) {
634 if (bo->getOpcode() == BinaryOperator::SDiv) {
635 if (inset0.size() == 1 || inset1.size() == 1) {
636 for (
auto val0 : inset0) {
637 for (
auto val1 : inset1) {
644 if (bo->getOpcode() == BinaryOperator::Shl) {
645 if (inset0.size() == 1 || inset1.size() == 1) {
646 for (
auto val0 : inset0) {
647 for (
auto val1 : inset1) {
648 insert(val0 << val1);
656 if (bo->getOpcode() == BinaryOperator::AShr ||
657 bo->getOpcode() == BinaryOperator::LShr) {
658 if (inset0.size() == 1 || inset1.size() == 1) {
659 for (
auto val0 : inset0) {
660 for (
auto val1 : inset1) {
661 insert(val0 >> val1);
1228 for (Instruction &I : BB) {
1229 if (
auto MD = I.getMetadata(
"enzyme_type")) {
1232 auto RegSize = (DL.getTypeSizeInBits(I.getType()) + 7) / 8;
1233 for (
const auto &pair : TT.getMapping()) {
1234 if (pair.first[0] != -1) {
1235 if ((
size_t)pair.first[0] >= RegSize) {
1236 llvm::errs() <<
" bad enzyme_type " << TT.str()
1237 <<
" RegSize=" << RegSize <<
" I:" << I <<
"\n";
1238 llvm::report_fatal_error(
"Canonicalization failed");
1245 if (CallBase *call = dyn_cast<CallBase>(&I)) {
1246#if LLVM_VERSION_MAJOR >= 14
1247 size_t num_args = call->arg_size();
1249 size_t num_args = call->getNumArgOperands();
1252 if (call->getAttributes().hasAttribute(AttributeList::ReturnIndex,
1254 auto attr = call->getAttributes().getAttribute(
1255 AttributeList::ReturnIndex,
"enzyme_type");
1259 auto RegSize = I.getType()->isVoidTy()
1261 : (DL.getTypeSizeInBits(I.getType()) + 7) / 8;
1262 for (
const auto &pair : TT.getMapping()) {
1263 if (pair.first[0] != -1) {
1264 if ((
size_t)pair.first[0] >= RegSize) {
1265 llvm::errs() <<
" bad enzyme_type " << TT.str()
1266 <<
" RegSize=" << RegSize <<
" I:" << I <<
"\n";
1267 llvm::report_fatal_error(
"Canonicalization failed");
1273 for (
size_t i = 0; i < num_args; i++) {
1274 if (call->getAttributes().hasParamAttr(i,
"enzyme_type")) {
1275 auto attr = call->getAttributes().getParamAttr(i,
"enzyme_type");
1278 auto RegSize = I.getType()->isVoidTy()
1280 : (DL.getTypeSizeInBits(I.getType()) + 7) / 8;
1281 for (
const auto &pair : TT.getMapping()) {
1282 if (pair.first[0] != -1) {
1283 if ((
size_t)pair.first[0] >= RegSize) {
1284 llvm::errs() <<
" bad enzyme_type " << TT.str()
1285 <<
" RegSize=" << RegSize <<
" I:" << I <<
"\n";
1286 llvm::report_fatal_error(
"Canonicalization failed");
1294 Function *F = call->getCalledFunction();
1297 if (F->getAttributes().hasAttribute(AttributeList::ReturnIndex,
1299 auto attr = F->getAttributes().getAttribute(
1300 AttributeList::ReturnIndex,
"enzyme_type");
1303 auto RegSize = I.getType()->isVoidTy()
1305 : (DL.getTypeSizeInBits(I.getType()) + 7) / 8;
1306 for (
const auto &pair : TT.getMapping()) {
1307 if (pair.first[0] != -1) {
1308 if ((
size_t)pair.first[0] >= RegSize) {
1309 llvm::errs() <<
" bad enzyme_type " << TT.str()
1310 <<
" RegSize=" << RegSize <<
" I:" << I <<
"\n";
1311 llvm::report_fatal_error(
"Canonicalization failed");
1317 size_t f_num_args = F->arg_size();
1318 for (
size_t i = 0; i < f_num_args; i++) {
1319 if (F->getAttributes().hasParamAttr(i,
"enzyme_type")) {
1320 auto attr = F->getAttributes().getParamAttr(i,
"enzyme_type");
1323 auto RegSize = I.getType()->isVoidTy()
1325 : (DL.getTypeSizeInBits(I.getType()) + 7) / 8;
1326 for (
const auto &pair : TT.getMapping()) {
1327 if (pair.first[0] != -1) {
1328 if ((
size_t)pair.first[0] >= RegSize) {
1330 <<
" bad enzyme_type " << TT.str()
1331 <<
" RegSize=" << RegSize <<
" I:" << I <<
"\n";
1332 llvm::report_fatal_error(
"Canonicalization failed");
1341 if (
auto castinst = dyn_cast<ConstantExpr>(call->getCalledOperand())) {
1342 if (castinst->isCast())
1343 if (
auto fn = dyn_cast<Function>(castinst->getOperand(0))) {
1347 if (F && F->getName().contains(
"__enzyme_float")) {
1348 assert(num_args == 1 || num_args == 2);
1349 assert(call->getArgOperand(0)->getType()->isPointerTy());
1352 if (num_args == 2) {
1353 assert(isa<ConstantInt>(call->getArgOperand(1)));
1354 auto CI = cast<ConstantInt>(call->getArgOperand(1));
1355 if (CI->isNegative())
1358 num = CI->getLimitedValue();
1361 TT.
insert({(int)num}, Type::getFloatTy(call->getContext()));
1363 for (
size_t i = 0; i < (size_t)num; i += 4)
1364 TT.
insert({(int)i}, Type::getFloatTy(call->getContext()));
1368 if (F && F->getName().contains(
"__enzyme_double")) {
1369 assert(num_args == 1 || num_args == 2);
1370 assert(call->getArgOperand(0)->getType()->isPointerTy());
1373 if (num_args == 2) {
1374 assert(isa<ConstantInt>(call->getArgOperand(1)));
1375 num = cast<ConstantInt>(call->getArgOperand(1))->getLimitedValue();
1377 for (
size_t i = 0; i < num; i += 8)
1378 TT.
insert({(int)i}, Type::getDoubleTy(call->getContext()));
1382 if (F && F->getName().contains(
"__enzyme_integer")) {
1383 assert(num_args == 1 || num_args == 2);
1384 assert(call->getArgOperand(0)->getType()->isPointerTy());
1386 if (num_args == 2) {
1387 assert(isa<ConstantInt>(call->getArgOperand(1)));
1388 num = cast<ConstantInt>(call->getArgOperand(1))->getLimitedValue();
1391 for (
size_t i = 0; i < num; i++)
1396 if (F && F->getName().contains(
"__enzyme_pointer")) {
1397 assert(num_args == 1 || num_args == 2);
1398 assert(call->getArgOperand(0)->getType()->isPointerTy());
1401 if (num_args == 2) {
1402 assert(isa<ConstantInt>(call->getArgOperand(1)));
1403 num = cast<ConstantInt>(call->getArgOperand(1))->getLimitedValue();
1405 for (
size_t i = 0; i < num;
1406 i += ((DL.getPointerSizeInBits() + 7) / 8))
1412 StringSet<> JuliaKnownTypes = {
"julia.gc_alloc_obj",
1413 "jl_alloc_array_1d",
1414 "jl_alloc_array_2d",
1415 "jl_alloc_array_3d",
1416 "ijl_alloc_array_1d",
1417 "ijl_alloc_array_2d",
1418 "ijl_alloc_array_3d",
1419 "jl_gc_alloc_typed",
1420 "ijl_gc_alloc_typed",
1421 "jl_alloc_genericmemory",
1422 "ijl_alloc_genericmemory",
1423 "jl_alloc_genericmemory_unchecked",
1424 "ijl_alloc_genericmemory_unchecked",
1427 if (JuliaKnownTypes.count(F->getName())) {
1441 if (CallBase *call = dyn_cast<CallBase>(&I)) {
1442 if (call->getCalledFunction() &&
1443 (call->getCalledFunction()->getIntrinsicID() == Intrinsic::memcpy ||
1444 call->getCalledFunction()->getIntrinsicID() ==
1445 Intrinsic::memmove)) {
1446 int64_t copySize = 1;
1449 copySize =
max(copySize, val);
1460 }
else if (call->getCalledFunction() &&
1461 (call->getCalledFunction()->getIntrinsicID() ==
1462 Intrinsic::memset ||
1463 call->getCalledFunction()->getName() ==
1464 "memset_pattern16")) {
1465 int64_t copySize = 1;
1468 copySize =
max(copySize, val);
1478#if LLVM_VERSION_MAJOR >= 20
1479 }
else if (call->getCalledFunction() &&
1480 (call->getCalledFunction()->getIntrinsicID() ==
1481 Intrinsic::experimental_memset_pattern)) {
1482 int64_t copySize = 1;
1485 copySize =
max(copySize, val);
1496 }
else if (call->getCalledFunction() &&
1497 call->getCalledFunction()->getIntrinsicID() ==
1498 Intrinsic::masked_gather) {
1499 auto VT = cast<VectorType>(call->getType());
1500 auto LoadSize = (DL.getTypeSizeInBits(VT) + 7) / 8;
1504 }
else if (call->getCalledFunction() &&
1505 call->getCalledFunction()->getIntrinsicID() ==
1506 Intrinsic::masked_scatter) {
1508 }
else if (call->getCalledFunction() &&
1509 call->getCalledFunction()->getIntrinsicID() ==
1510 Intrinsic::masked_load) {
1511 auto VT = cast<VectorType>(call->getType());
1512 auto LoadSize = (DL.getTypeSizeInBits(VT) + 7) / 8;
1516 }
else if (call->getCalledFunction() &&
1517 call->getCalledFunction()->getIntrinsicID() ==
1518 Intrinsic::masked_store) {
1520 }
else if (call->getType()->isPointerTy()) {
1523 llvm::errs() <<
" unknown tbaa call instruction user inst: " << I
1524 <<
" vdptr: " << vdptr.
str() <<
"\n";
1526 }
else if (
auto SI = dyn_cast<StoreInst>(&I)) {
1528 (DL.getTypeSizeInBits(SI->getValueOperand()->getType()) + 7) / 8;
1541 }
else if (
auto LI = dyn_cast<LoadInst>(&I)) {
1542 auto LoadSize = (DL.getTypeSizeInBits(
LI->getType()) + 7) / 8;
1556 llvm::errs() <<
" inst: " << I <<
" vdptr: " << vdptr.
str() <<
"\n";
1557 assert(0 &&
"unknown tbaa instruction user");
1558 llvm_unreachable(
"unknown tbaa instruction user");
3028 llvm::Instruction::BinaryOps Opcode,
3031 Instruction *origin) {
3032 if (Opcode == BinaryOperator::FAdd || Opcode == BinaryOperator::FSub ||
3033 Opcode == BinaryOperator::FMul || Opcode == BinaryOperator::FDiv ||
3034 Opcode == BinaryOperator::FRem) {
3035 auto ty = T->getScalarType();
3036 assert(ty->isFloatingPointTy());
3039 bool LegalOr =
true;
3044 raw_string_ostream ss(
str);
3045 ss <<
"Illegal updateAnalysis prev:" << LHS.
str()
3046 <<
" new: " << Data.
str() <<
"\n";
3047 ss <<
"val: " << *
Args[0];
3048 ss <<
"origin: " << *origin;
3051 wrap(origin),
nullptr);
3056 raw_string_ostream ss(
str);
3057 ss <<
"Illegal updateAnalysis prev:" << RHS.
str()
3058 <<
" new: " << Data.
str() <<
"\n";
3059 ss <<
"val: " << *
Args[1];
3060 ss <<
"origin: " << *origin;
3063 wrap(origin),
nullptr);
3069 auto size = (dl.getTypeSizeInBits(T) + 7) / 8;
3070 auto AnalysisLHS = LHS.
Data0();
3071 auto AnalysisRHS = RHS.
Data0();
3072 auto AnalysisRet = Ret.
Data0();
3075 case BinaryOperator::Sub:
3082 LHS |=
TypeTree(AnalysisRHS[{}]).PurgeAnything().
Only(-1,
nullptr);
3083 RHS |=
TypeTree(AnalysisLHS[{}]).PurgeAnything().
Only(-1,
nullptr);
3096 case BinaryOperator::Add:
3097 case BinaryOperator::Mul:
3107 case BinaryOperator::Xor:
3109 for (
int i = 0; i < 2; ++i) {
3117 ((i == 0) ? RHS : LHS)
3118 .checkedOrIn(
TypeTree(FT).Only(-1,
nullptr),
3123 raw_string_ostream ss(
str);
3129 ss <<
"Illegal updateBinop (xor up) Analysis " << *origin <<
"\n";
3130 ss <<
" (i=" << i <<
") " << (i == 0 ?
"RHS" :
"LHS") <<
" "
3131 << ((i == 0) ? RHS : LHS).str() <<
" FT from ret: " << *FT
3136 wrap(origin),
nullptr);
3138 EmitFailure(
"IllegalUpdateAnalysis", origin->getDebugLoc(),
3140 report_fatal_error(
"Performed illegal updateAnalysis");
3145 case BinaryOperator::Or:
3146 for (
int i = 0; i < 2; ++i) {
3151 bool validXor =
false;
3152 if (
auto CIT = dyn_cast_or_null<ConstantInt>(
Args[i])) {
3153 if (dl.getTypeSizeInBits(FT) != dl.getTypeSizeInBits(CIT->getType()))
3155 auto CI = CIT->getValue();
3156#if LLVM_VERSION_MAJOR > 16
3159 if (CI.isNullValue())
3166#
if LLVM_VERSION_MAJOR > 16
3167 && (CI & ~0b01111111100000000000000000000000ULL).isZero()
3169 && (CI & ~0b01111111100000000000000000000000ULL).isNullValue()
3173#
if LLVM_VERSION_MAJOR > 16
3176 ~0b0111111111110000000000000000000000000000000000000000000000000000ULL)
3181 ~0b0111111111110000000000000000000000000000000000000000000000000000ULL)
3187 }
else if (
auto CV = dyn_cast_or_null<ConstantVector>(
Args[i])) {
3189 if (dl.getTypeSizeInBits(FT) !=
3190 dl.getTypeSizeInBits(CV->getOperand(i)->getType()))
3192 for (
size_t i = 0, end = CV->getNumOperands(); i < end; ++i) {
3193 auto CI = dyn_cast<ConstantInt>(CV->getOperand(i))->getValue();
3195#if LLVM_VERSION_MAJOR > 16
3198 if (CI.isNullValue())
3204#
if LLVM_VERSION_MAJOR > 16
3205 && (CI & ~0b01111111100000000000000000000000ULL).isZero()
3207 && (CI & ~0b01111111100000000000000000000000ULL).isNullValue()
3211#
if LLVM_VERSION_MAJOR > 16
3214 ~0b0111111111110000000000000000000000000000000000000000000000000000ULL)
3219 ~0b0111111111110000000000000000000000000000000000000000000000000000ULL)
3226 }
else if (
auto CV = dyn_cast_or_null<ConstantDataVector>(
Args[i])) {
3228 if (dl.getTypeSizeInBits(FT) !=
3229 dl.getTypeSizeInBits(CV->getElementType()))
3231 for (
size_t i = 0, end = CV->getNumElements(); i < end; ++i) {
3232 auto CI = CV->getElementAsAPInt(i);
3233#if LLVM_VERSION_MAJOR > 16
3236 if (CI.isNullValue())
3242#
if LLVM_VERSION_MAJOR > 16
3243 && (CI & ~0b01111111100000000000000000000000ULL).isZero()
3245 && (CI & ~0b01111111100000000000000000000000ULL).isNullValue()
3249#
if LLVM_VERSION_MAJOR > 16
3252 ~0b0111111111110000000000000000000000000000000000000000000000000000ULL)
3257 ~0b0111111111110000000000000000000000000000000000000000000000000000ULL)
3266 ((i == 0) ? RHS : LHS) |=
TypeTree(FT).
Only(-1,
nullptr);
3277 Result.
binopIn(Legal, AnalysisRHS, Opcode);
3280 raw_string_ostream ss(
str);
3286 ss <<
"Illegal updateBinop Analysis " << *origin <<
"\n";
3287 ss <<
"Illegal binopIn(down): " << Opcode <<
" lhs: " << Result.
str()
3288 <<
" rhs: " << AnalysisRHS.str() <<
"\n";
3292 wrap(origin),
nullptr);
3294 EmitFailure(
"IllegalUpdateAnalysis", origin->getDebugLoc(), origin,
3296 report_fatal_error(
"Performed illegal updateAnalysis");
3298 if (Opcode == BinaryOperator::And) {
3299 for (
int i = 0; i < 2; ++i) {
3303 if (andval <= 16 && andval >= 0) {
3305 }
else if (andval < 0 && andval >= -64) {
3309 Result = (i == 0 ? AnalysisRHS : AnalysisLHS);
3314 if (
Args[i] && isa<ConstantInt>(
Args[i]) &&
3315 (i == 0 ? AnalysisRHS : AnalysisLHS).Inner0() ==
3320 }
else if (Opcode == BinaryOperator::Add ||
3321 Opcode == BinaryOperator::Sub) {
3322 for (
int i = 0; i < 2; ++i) {
3323 if (i == 1 || Opcode == BinaryOperator::Add)
3324 if (
auto CI = dyn_cast_or_null<ConstantInt>(
Args[i])) {
3325 if (CI->isNegative() || CI->isZero() ||
3326 CI->getLimitedValue() <= 4096) {
3330 Result = (i == 0 ? AnalysisRHS : AnalysisLHS);
3334 }
else if (Opcode == BinaryOperator::Mul) {
3335 for (
int i = 0; i < 2; ++i) {
3338 if (
Args[i] && isa<ConstantInt>(
Args[i]) &&
3343 }
else if (Opcode == BinaryOperator::URem) {
3344 if (
auto CI = dyn_cast_or_null<ConstantInt>(
Args[1])) {
3346 if (CI->getLimitedValue() <= 4096) {
3350 }
else if (Opcode == BinaryOperator::Xor) {
3351 for (
int i = 0; i < 2; ++i) {
3353 if (!(FT = (i == 0 ? RHS : LHS).IsAllFloat(size, dl)))
3361 }
else if (Opcode == BinaryOperator::Or) {
3362 for (
int i = 0; i < 2; ++i) {
3364 if (!(FT = (i == 0 ? RHS : LHS).IsAllFloat(size, dl)))
3367 bool validXor =
false;
3368 if (
auto CIT = dyn_cast_or_null<ConstantInt>(
Args[i])) {
3369 if (dl.getTypeSizeInBits(FT) !=
3370 dl.getTypeSizeInBits(CIT->getType()))
3372 auto CI = CIT->getValue();
3373#if LLVM_VERSION_MAJOR > 16
3376 if (CI.isNullValue())
3383#
if LLVM_VERSION_MAJOR > 16
3384 && (CI & ~0b01111111100000000000000000000000ULL).isZero()
3386 && (CI & ~0b01111111100000000000000000000000ULL).isNullValue()
3390#
if LLVM_VERSION_MAJOR > 16
3393 ~0b0111111111110000000000000000000000000000000000000000000000000000ULL)
3398 ~0b0111111111110000000000000000000000000000000000000000000000000000ULL)
3404 }
else if (
auto CV = dyn_cast_or_null<ConstantVector>(
Args[i])) {
3406 if (dl.getTypeSizeInBits(FT) !=
3407 dl.getTypeSizeInBits(CV->getOperand(i)->getType()))
3409 for (
size_t i = 0, end = CV->getNumOperands(); i < end; ++i) {
3410 auto CI = dyn_cast<ConstantInt>(CV->getOperand(i))->getValue();
3411#if LLVM_VERSION_MAJOR > 16
3414 if (CI.isNullValue())
3420#
if LLVM_VERSION_MAJOR > 16
3421 && (CI & ~0b01111111100000000000000000000000ULL).isZero()
3424 (CI & ~0b01111111100000000000000000000000ULL).isNullValue()
3428#
if LLVM_VERSION_MAJOR > 16
3431 ~0b0111111111110000000000000000000000000000000000000000000000000000ULL)
3436 ~0b0111111111110000000000000000000000000000000000000000000000000000ULL)
3443 }
else if (
auto CV = dyn_cast_or_null<ConstantDataVector>(
Args[i])) {
3445 if (dl.getTypeSizeInBits(FT) !=
3446 dl.getTypeSizeInBits(CV->getElementType()))
3448 for (
size_t i = 0, end = CV->getNumElements(); i < end; ++i) {
3449 auto CI = CV->getElementAsAPInt(i);
3450#if LLVM_VERSION_MAJOR > 16
3453 if (CI.isNullValue())
3459#
if LLVM_VERSION_MAJOR > 16
3460 && (CI & ~0b01111111100000000000000000000000ULL).isZero()
3463 (CI & ~0b01111111100000000000000000000000ULL).isNullValue()
3467#
if LLVM_VERSION_MAJOR > 16
3470 ~0b0111111111110000000000000000000000000000000000000000000000000000ULL)
3475 ~0b0111111111110000000000000000000000000000000000000000000000000000ULL)
3489 Ret = Result.
Only(-1,
nullptr);
3589 switch (I.getIntrinsicID()) {
3590 case Intrinsic::ctpop:
3591 case Intrinsic::ctlz:
3592 case Intrinsic::cttz:
3593 case Intrinsic::nvvm_read_ptx_sreg_tid_x:
3594 case Intrinsic::nvvm_read_ptx_sreg_tid_y:
3595 case Intrinsic::nvvm_read_ptx_sreg_tid_z:
3596 case Intrinsic::nvvm_read_ptx_sreg_ntid_x:
3597 case Intrinsic::nvvm_read_ptx_sreg_ntid_y:
3598 case Intrinsic::nvvm_read_ptx_sreg_ntid_z:
3599 case Intrinsic::nvvm_read_ptx_sreg_ctaid_x:
3600 case Intrinsic::nvvm_read_ptx_sreg_ctaid_y:
3601 case Intrinsic::nvvm_read_ptx_sreg_ctaid_z:
3602 case Intrinsic::nvvm_read_ptx_sreg_nctaid_x:
3603 case Intrinsic::nvvm_read_ptx_sreg_nctaid_y:
3604 case Intrinsic::nvvm_read_ptx_sreg_nctaid_z:
3605 case Intrinsic::nvvm_read_ptx_sreg_warpsize:
3606 case Intrinsic::amdgcn_workitem_id_x:
3607 case Intrinsic::amdgcn_workitem_id_y:
3608 case Intrinsic::amdgcn_workitem_id_z:
3613#if LLVM_VERSION_MAJOR < 22
3614 case Intrinsic::nvvm_barrier0_popc:
3615 case Intrinsic::nvvm_barrier0_and:
3616 case Intrinsic::nvvm_barrier0_or:
3618 case Intrinsic::nvvm_barrier_cta_red_and_aligned_all:
3619 case Intrinsic::nvvm_barrier_cta_red_and_aligned_count:
3620 case Intrinsic::nvvm_barrier_cta_red_or_aligned_all:
3621 case Intrinsic::nvvm_barrier_cta_red_or_aligned_count:
3622 case Intrinsic::nvvm_barrier_cta_red_popc_aligned_all:
3623 case Intrinsic::nvvm_barrier_cta_red_popc_aligned_count:
3631 case Intrinsic::nvvm_wmma_m16n16k16_store_d_f32_col:
3632 case Intrinsic::nvvm_wmma_m16n16k16_store_d_f32_col_stride:
3633 case Intrinsic::nvvm_wmma_m16n16k16_store_d_f32_row:
3634 case Intrinsic::nvvm_wmma_m16n16k16_store_d_f32_row_stride:
3635 case Intrinsic::nvvm_wmma_m32n8k16_store_d_f32_col:
3636 case Intrinsic::nvvm_wmma_m32n8k16_store_d_f32_col_stride:
3637 case Intrinsic::nvvm_wmma_m32n8k16_store_d_f32_row:
3638 case Intrinsic::nvvm_wmma_m32n8k16_store_d_f32_row_stride:
3639 case Intrinsic::nvvm_wmma_m8n32k16_store_d_f32_col:
3640 case Intrinsic::nvvm_wmma_m8n32k16_store_d_f32_col_stride:
3641 case Intrinsic::nvvm_wmma_m8n32k16_store_d_f32_row:
3642 case Intrinsic::nvvm_wmma_m8n32k16_store_d_f32_row_stride: {
3645 TT.
insert({-1, 0}, Type::getFloatTy(I.getContext()));
3647 for (
int i = 1; i <= 9; i++)
3655 case Intrinsic::nvvm_wmma_m16n16k16_store_d_f16_col:
3656 case Intrinsic::nvvm_wmma_m16n16k16_store_d_f16_col_stride:
3657 case Intrinsic::nvvm_wmma_m16n16k16_store_d_f16_row:
3658 case Intrinsic::nvvm_wmma_m16n16k16_store_d_f16_row_stride:
3659 case Intrinsic::nvvm_wmma_m32n8k16_store_d_f16_col:
3660 case Intrinsic::nvvm_wmma_m32n8k16_store_d_f16_col_stride:
3661 case Intrinsic::nvvm_wmma_m32n8k16_store_d_f16_row:
3662 case Intrinsic::nvvm_wmma_m32n8k16_store_d_f16_row_stride:
3663 case Intrinsic::nvvm_wmma_m8n32k16_store_d_f16_col:
3664 case Intrinsic::nvvm_wmma_m8n32k16_store_d_f16_col_stride:
3665 case Intrinsic::nvvm_wmma_m8n32k16_store_d_f16_row:
3666 case Intrinsic::nvvm_wmma_m8n32k16_store_d_f16_row_stride: {
3669 TT.
insert({-1, 0}, Type::getHalfTy(I.getContext()));
3671 for (
int i = 1; i <= 9; i++)
3679 case Intrinsic::nvvm_wmma_m16n16k16_load_c_f32_col:
3680 case Intrinsic::nvvm_wmma_m16n16k16_load_c_f32_col_stride:
3681 case Intrinsic::nvvm_wmma_m16n16k16_load_c_f32_row:
3682 case Intrinsic::nvvm_wmma_m16n16k16_load_c_f32_row_stride:
3683 case Intrinsic::nvvm_wmma_m32n8k16_load_c_f32_col:
3684 case Intrinsic::nvvm_wmma_m32n8k16_load_c_f32_col_stride:
3685 case Intrinsic::nvvm_wmma_m32n8k16_load_c_f32_row:
3686 case Intrinsic::nvvm_wmma_m32n8k16_load_c_f32_row_stride:
3687 case Intrinsic::nvvm_wmma_m8n32k16_load_c_f32_col:
3688 case Intrinsic::nvvm_wmma_m8n32k16_load_c_f32_col_stride:
3689 case Intrinsic::nvvm_wmma_m8n32k16_load_c_f32_row:
3690 case Intrinsic::nvvm_wmma_m8n32k16_load_c_f32_row_stride: {
3693 TT.
insert({-1, 0}, Type::getFloatTy(I.getContext()));
3702 case Intrinsic::nvvm_wmma_m16n16k16_load_a_f16_col:
3703 case Intrinsic::nvvm_wmma_m16n16k16_load_a_f16_col_stride:
3704 case Intrinsic::nvvm_wmma_m16n16k16_load_a_f16_row:
3705 case Intrinsic::nvvm_wmma_m16n16k16_load_a_f16_row_stride:
3706 case Intrinsic::nvvm_wmma_m16n16k16_load_b_f16_col:
3707 case Intrinsic::nvvm_wmma_m16n16k16_load_b_f16_col_stride:
3708 case Intrinsic::nvvm_wmma_m16n16k16_load_b_f16_row:
3709 case Intrinsic::nvvm_wmma_m16n16k16_load_b_f16_row_stride:
3710 case Intrinsic::nvvm_wmma_m16n16k16_load_c_f16_col:
3711 case Intrinsic::nvvm_wmma_m16n16k16_load_c_f16_col_stride:
3712 case Intrinsic::nvvm_wmma_m16n16k16_load_c_f16_row:
3713 case Intrinsic::nvvm_wmma_m16n16k16_load_c_f16_row_stride:
3714 case Intrinsic::nvvm_wmma_m32n8k16_load_a_f16_col:
3715 case Intrinsic::nvvm_wmma_m32n8k16_load_a_f16_col_stride:
3716 case Intrinsic::nvvm_wmma_m32n8k16_load_a_f16_row:
3717 case Intrinsic::nvvm_wmma_m32n8k16_load_a_f16_row_stride:
3718 case Intrinsic::nvvm_wmma_m32n8k16_load_b_f16_col:
3719 case Intrinsic::nvvm_wmma_m32n8k16_load_b_f16_col_stride:
3720 case Intrinsic::nvvm_wmma_m32n8k16_load_b_f16_row:
3721 case Intrinsic::nvvm_wmma_m32n8k16_load_b_f16_row_stride:
3722 case Intrinsic::nvvm_wmma_m32n8k16_load_c_f16_col:
3723 case Intrinsic::nvvm_wmma_m32n8k16_load_c_f16_col_stride:
3724 case Intrinsic::nvvm_wmma_m32n8k16_load_c_f16_row:
3725 case Intrinsic::nvvm_wmma_m32n8k16_load_c_f16_row_stride:
3726 case Intrinsic::nvvm_wmma_m8n32k16_load_a_f16_col:
3727 case Intrinsic::nvvm_wmma_m8n32k16_load_a_f16_col_stride:
3728 case Intrinsic::nvvm_wmma_m8n32k16_load_a_f16_row:
3729 case Intrinsic::nvvm_wmma_m8n32k16_load_a_f16_row_stride:
3730 case Intrinsic::nvvm_wmma_m8n32k16_load_b_f16_col:
3731 case Intrinsic::nvvm_wmma_m8n32k16_load_b_f16_col_stride:
3732 case Intrinsic::nvvm_wmma_m8n32k16_load_b_f16_row:
3733 case Intrinsic::nvvm_wmma_m8n32k16_load_b_f16_row_stride:
3734 case Intrinsic::nvvm_wmma_m8n32k16_load_c_f16_col:
3735 case Intrinsic::nvvm_wmma_m8n32k16_load_c_f16_col_stride:
3736 case Intrinsic::nvvm_wmma_m8n32k16_load_c_f16_row:
3737 case Intrinsic::nvvm_wmma_m8n32k16_load_c_f16_row_stride: {
3740 TT.
insert({-1, 0}, Type::getHalfTy(I.getContext()));
3749 case Intrinsic::nvvm_wmma_m16n16k16_load_c_s32_col:
3750 case Intrinsic::nvvm_wmma_m16n16k16_load_c_s32_col_stride:
3751 case Intrinsic::nvvm_wmma_m16n16k16_load_a_s8_col:
3752 case Intrinsic::nvvm_wmma_m16n16k16_load_a_s8_col_stride:
3753 case Intrinsic::nvvm_wmma_m16n16k16_load_a_u8_col_stride:
3754 case Intrinsic::nvvm_wmma_m16n16k16_load_a_u8_col:
3755 case Intrinsic::nvvm_wmma_m16n16k16_load_a_s8_row:
3756 case Intrinsic::nvvm_wmma_m16n16k16_load_a_s8_row_stride:
3757 case Intrinsic::nvvm_wmma_m16n16k16_load_a_u8_row_stride:
3758 case Intrinsic::nvvm_wmma_m16n16k16_load_a_u8_row:
3759 case Intrinsic::nvvm_wmma_m16n16k16_load_b_s8_col:
3760 case Intrinsic::nvvm_wmma_m16n16k16_load_b_s8_col_stride:
3761 case Intrinsic::nvvm_wmma_m16n16k16_load_b_u8_col_stride:
3762 case Intrinsic::nvvm_wmma_m16n16k16_load_b_u8_col:
3763 case Intrinsic::nvvm_wmma_m16n16k16_load_b_s8_row:
3764 case Intrinsic::nvvm_wmma_m16n16k16_load_b_s8_row_stride:
3765 case Intrinsic::nvvm_wmma_m16n16k16_load_b_u8_row_stride:
3766 case Intrinsic::nvvm_wmma_m16n16k16_load_b_u8_row:
3767 case Intrinsic::nvvm_wmma_m16n16k16_load_c_s32_row:
3768 case Intrinsic::nvvm_wmma_m16n16k16_load_c_s32_row_stride:
3769 case Intrinsic::nvvm_wmma_m32n8k16_load_a_s8_col:
3770 case Intrinsic::nvvm_wmma_m32n8k16_load_a_s8_col_stride:
3771 case Intrinsic::nvvm_wmma_m32n8k16_load_a_u8_col_stride:
3772 case Intrinsic::nvvm_wmma_m32n8k16_load_a_u8_col:
3773 case Intrinsic::nvvm_wmma_m32n8k16_load_a_s8_row:
3774 case Intrinsic::nvvm_wmma_m32n8k16_load_a_s8_row_stride:
3775 case Intrinsic::nvvm_wmma_m32n8k16_load_a_u8_row_stride:
3776 case Intrinsic::nvvm_wmma_m32n8k16_load_a_u8_row:
3777 case Intrinsic::nvvm_wmma_m32n8k16_load_b_s8_col:
3778 case Intrinsic::nvvm_wmma_m32n8k16_load_b_s8_col_stride:
3779 case Intrinsic::nvvm_wmma_m32n8k16_load_b_u8_col_stride:
3780 case Intrinsic::nvvm_wmma_m32n8k16_load_b_u8_col:
3781 case Intrinsic::nvvm_wmma_m32n8k16_load_b_s8_row:
3782 case Intrinsic::nvvm_wmma_m32n8k16_load_b_s8_row_stride:
3783 case Intrinsic::nvvm_wmma_m32n8k16_load_b_u8_row_stride:
3784 case Intrinsic::nvvm_wmma_m32n8k16_load_b_u8_row:
3785 case Intrinsic::nvvm_wmma_m32n8k16_load_c_s32_col:
3786 case Intrinsic::nvvm_wmma_m32n8k16_load_c_s32_col_stride:
3787 case Intrinsic::nvvm_wmma_m32n8k16_load_c_s32_row:
3788 case Intrinsic::nvvm_wmma_m32n8k16_load_c_s32_row_stride:
3789 case Intrinsic::nvvm_wmma_m8n32k16_load_a_s8_col:
3790 case Intrinsic::nvvm_wmma_m8n32k16_load_a_s8_col_stride:
3791 case Intrinsic::nvvm_wmma_m8n32k16_load_a_u8_col_stride:
3792 case Intrinsic::nvvm_wmma_m8n32k16_load_a_u8_col:
3793 case Intrinsic::nvvm_wmma_m8n32k16_load_a_s8_row:
3794 case Intrinsic::nvvm_wmma_m8n32k16_load_a_s8_row_stride:
3795 case Intrinsic::nvvm_wmma_m8n32k16_load_a_u8_row_stride:
3796 case Intrinsic::nvvm_wmma_m8n32k16_load_a_u8_row:
3797 case Intrinsic::nvvm_wmma_m8n32k16_load_b_s8_col:
3798 case Intrinsic::nvvm_wmma_m8n32k16_load_b_s8_col_stride:
3799 case Intrinsic::nvvm_wmma_m8n32k16_load_b_u8_col_stride:
3800 case Intrinsic::nvvm_wmma_m8n32k16_load_b_u8_col:
3801 case Intrinsic::nvvm_wmma_m8n32k16_load_b_s8_row:
3802 case Intrinsic::nvvm_wmma_m8n32k16_load_b_s8_row_stride:
3803 case Intrinsic::nvvm_wmma_m8n32k16_load_b_u8_row_stride:
3804 case Intrinsic::nvvm_wmma_m8n32k16_load_b_u8_row:
3805 case Intrinsic::nvvm_wmma_m8n32k16_load_c_s32_col:
3806 case Intrinsic::nvvm_wmma_m8n32k16_load_c_s32_col_stride:
3807 case Intrinsic::nvvm_wmma_m8n32k16_load_c_s32_row:
3808 case Intrinsic::nvvm_wmma_m8n32k16_load_c_s32_row_stride:
3809 case Intrinsic::nvvm_wmma_m8n8k128_load_a_b1_row:
3810 case Intrinsic::nvvm_wmma_m8n8k128_load_a_b1_row_stride:
3811 case Intrinsic::nvvm_wmma_m8n8k128_load_b_b1_col:
3812 case Intrinsic::nvvm_wmma_m8n8k128_load_b_b1_col_stride:
3813 case Intrinsic::nvvm_wmma_m8n8k128_load_c_s32_col:
3814 case Intrinsic::nvvm_wmma_m8n8k128_load_c_s32_col_stride:
3815 case Intrinsic::nvvm_wmma_m8n8k128_load_c_s32_row:
3816 case Intrinsic::nvvm_wmma_m8n8k128_load_c_s32_row_stride:
3817 case Intrinsic::nvvm_wmma_m8n8k32_load_a_s4_row:
3818 case Intrinsic::nvvm_wmma_m8n8k32_load_a_s4_row_stride:
3819 case Intrinsic::nvvm_wmma_m8n8k32_load_a_u4_row_stride:
3820 case Intrinsic::nvvm_wmma_m8n8k32_load_a_u4_row:
3821 case Intrinsic::nvvm_wmma_m8n8k32_load_b_s4_col:
3822 case Intrinsic::nvvm_wmma_m8n8k32_load_b_s4_col_stride:
3823 case Intrinsic::nvvm_wmma_m8n8k32_load_b_u4_col_stride:
3824 case Intrinsic::nvvm_wmma_m8n8k32_load_b_u4_col:
3825 case Intrinsic::nvvm_wmma_m8n8k32_load_c_s32_col:
3826 case Intrinsic::nvvm_wmma_m8n8k32_load_c_s32_col_stride:
3827 case Intrinsic::nvvm_wmma_m8n8k32_load_c_s32_row:
3828 case Intrinsic::nvvm_wmma_m8n8k32_load_c_s32_row_stride: {
3833 case Intrinsic::nvvm_wmma_m16n16k16_mma_col_col_f16_f16:
3834 case Intrinsic::nvvm_wmma_m16n16k16_mma_col_row_f16_f16:
3835 case Intrinsic::nvvm_wmma_m16n16k16_mma_row_col_f16_f16:
3836 case Intrinsic::nvvm_wmma_m16n16k16_mma_row_row_f16_f16:
3837 case Intrinsic::nvvm_wmma_m32n8k16_mma_col_col_f16_f16:
3838 case Intrinsic::nvvm_wmma_m32n8k16_mma_col_row_f16_f16:
3839 case Intrinsic::nvvm_wmma_m32n8k16_mma_row_col_f16_f16:
3840 case Intrinsic::nvvm_wmma_m32n8k16_mma_row_row_f16_f16:
3841 case Intrinsic::nvvm_wmma_m8n32k16_mma_col_col_f16_f16:
3842 case Intrinsic::nvvm_wmma_m8n32k16_mma_col_row_f16_f16:
3843 case Intrinsic::nvvm_wmma_m8n32k16_mma_row_col_f16_f16:
3844 case Intrinsic::nvvm_wmma_m8n32k16_mma_row_row_f16_f16: {
3845 for (
int i = 0; i < 16; i++)
3850 for (
int i = 16; i < 16 + 8; i++)
3862 case Intrinsic::nvvm_wmma_m16n16k16_mma_col_col_f16_f32:
3863 case Intrinsic::nvvm_wmma_m16n16k16_mma_col_row_f16_f32:
3864 case Intrinsic::nvvm_wmma_m16n16k16_mma_row_col_f16_f32:
3865 case Intrinsic::nvvm_wmma_m16n16k16_mma_row_row_f16_f32:
3866 case Intrinsic::nvvm_wmma_m32n8k16_mma_col_col_f16_f32:
3867 case Intrinsic::nvvm_wmma_m32n8k16_mma_col_row_f16_f32:
3868 case Intrinsic::nvvm_wmma_m32n8k16_mma_row_col_f16_f32:
3869 case Intrinsic::nvvm_wmma_m32n8k16_mma_row_row_f16_f32:
3870 case Intrinsic::nvvm_wmma_m8n32k16_mma_col_col_f16_f32:
3871 case Intrinsic::nvvm_wmma_m8n32k16_mma_col_row_f16_f32:
3872 case Intrinsic::nvvm_wmma_m8n32k16_mma_row_col_f16_f32:
3873 case Intrinsic::nvvm_wmma_m8n32k16_mma_row_row_f16_f32: {
3874 for (
int i = 0; i < 16; i++)
3879 for (
int i = 16; i < 16 + 8; i++)
3891 case Intrinsic::nvvm_wmma_m16n16k16_mma_col_col_f32_f16:
3892 case Intrinsic::nvvm_wmma_m16n16k16_mma_col_row_f32_f16:
3893 case Intrinsic::nvvm_wmma_m16n16k16_mma_row_col_f32_f16:
3894 case Intrinsic::nvvm_wmma_m16n16k16_mma_row_row_f32_f16:
3895 case Intrinsic::nvvm_wmma_m32n8k16_mma_col_col_f32_f16:
3896 case Intrinsic::nvvm_wmma_m32n8k16_mma_col_row_f32_f16:
3897 case Intrinsic::nvvm_wmma_m32n8k16_mma_row_col_f32_f16:
3898 case Intrinsic::nvvm_wmma_m32n8k16_mma_row_row_f32_f16:
3899 case Intrinsic::nvvm_wmma_m8n32k16_mma_col_col_f32_f16:
3900 case Intrinsic::nvvm_wmma_m8n32k16_mma_col_row_f32_f16:
3901 case Intrinsic::nvvm_wmma_m8n32k16_mma_row_col_f32_f16:
3902 case Intrinsic::nvvm_wmma_m8n32k16_mma_row_row_f32_f16: {
3903 for (
int i = 0; i < 16; i++)
3908 for (
int i = 16; i < 16 + 8; i++)
3920 case Intrinsic::nvvm_wmma_m16n16k16_mma_col_col_f32_f32:
3921 case Intrinsic::nvvm_wmma_m16n16k16_mma_col_row_f32_f32:
3922 case Intrinsic::nvvm_wmma_m16n16k16_mma_row_col_f32_f32:
3923 case Intrinsic::nvvm_wmma_m16n16k16_mma_row_row_f32_f32:
3924 case Intrinsic::nvvm_wmma_m32n8k16_mma_col_col_f32_f32:
3925 case Intrinsic::nvvm_wmma_m32n8k16_mma_col_row_f32_f32:
3926 case Intrinsic::nvvm_wmma_m32n8k16_mma_row_col_f32_f32:
3927 case Intrinsic::nvvm_wmma_m32n8k16_mma_row_row_f32_f32:
3928 case Intrinsic::nvvm_wmma_m8n32k16_mma_col_col_f32_f32:
3929 case Intrinsic::nvvm_wmma_m8n32k16_mma_col_row_f32_f32:
3930 case Intrinsic::nvvm_wmma_m8n32k16_mma_row_col_f32_f32:
3931 case Intrinsic::nvvm_wmma_m8n32k16_mma_row_row_f32_f32: {
3932 for (
int i = 0; i < 16; i++)
3937 for (
int i = 16; i < 16 + 8; i++)
3949#if LLVM_VERSION_MAJOR < 20
3950 case Intrinsic::nvvm_ldg_global_i:
3951 case Intrinsic::nvvm_ldg_global_p:
3952 case Intrinsic::nvvm_ldg_global_f:
3954 case Intrinsic::nvvm_ldu_global_i:
3955 case Intrinsic::nvvm_ldu_global_p:
3956 case Intrinsic::nvvm_ldu_global_f: {
3957 auto &DL = I.getParent()->getParent()->getParent()->getDataLayout();
3958 auto LoadSize = (DL.getTypeSizeInBits(I.getType()) + 7) / 8;
3963 DL, 0, LoadSize, 0);
3971 case Intrinsic::log:
3972 case Intrinsic::log2:
3973 case Intrinsic::log10:
3974 case Intrinsic::exp:
3975 case Intrinsic::exp2:
3976 case Intrinsic::sin:
3977 case Intrinsic::cos:
3978#if LLVM_VERSION_MAJOR >= 19
3979 case Intrinsic::sinh:
3980 case Intrinsic::cosh:
3981 case Intrinsic::tanh:
3983 case Intrinsic::floor:
3984 case Intrinsic::ceil:
3985 case Intrinsic::trunc:
3986 case Intrinsic::rint:
3987 case Intrinsic::nearbyint:
3988 case Intrinsic::round:
3989 case Intrinsic::sqrt:
3990#if LLVM_VERSION_MAJOR >= 21
3991 case Intrinsic::nvvm_fabs:
3992 case Intrinsic::nvvm_fabs_ftz:
3994 case Intrinsic::nvvm_fabs_f:
3995 case Intrinsic::nvvm_fabs_d:
3996 case Intrinsic::nvvm_fabs_ftz_f:
3998 case Intrinsic::fabs:
4011 case Intrinsic::fmuladd:
4012 case Intrinsic::fma:
4037 case Intrinsic::powi:
4053#if LLVM_VERSION_MAJOR >= 12
4054 case Intrinsic::vector_reduce_fadd:
4055 case Intrinsic::vector_reduce_fmul:
4057 case Intrinsic::experimental_vector_reduce_v2_fadd:
4058 case Intrinsic::experimental_vector_reduce_v2_fmul:
4060 case Intrinsic::copysign:
4061 case Intrinsic::maxnum:
4062 case Intrinsic::minnum:
4063#if LLVM_VERSION_MAJOR >= 15
4064 case Intrinsic::maximum:
4065 case Intrinsic::minimum:
4067 case Intrinsic::nvvm_fmax_f:
4068 case Intrinsic::nvvm_fmax_d:
4069 case Intrinsic::nvvm_fmax_ftz_f:
4070 case Intrinsic::nvvm_fmin_f:
4071 case Intrinsic::nvvm_fmin_d:
4072 case Intrinsic::nvvm_fmin_ftz_f:
4073 case Intrinsic::pow:
4091#if LLVM_VERSION_MAJOR >= 12
4092 case Intrinsic::smax:
4093 case Intrinsic::smin:
4094 case Intrinsic::umax:
4095 case Intrinsic::umin:
4104 auto opType0 =
getAnalysis(I.getOperand(0))[{-1}];
4105 auto opType1 =
getAnalysis(I.getOperand(1))[{-1}];
4106 if (opType0 == opType1 &&
4119 case Intrinsic::umul_with_overflow:
4120 case Intrinsic::smul_with_overflow:
4121 case Intrinsic::ssub_with_overflow:
4122 case Intrinsic::usub_with_overflow:
4123 case Intrinsic::sadd_with_overflow:
4124 case Intrinsic::uadd_with_overflow: {
4128 BinaryOperator::BinaryOps opcode;
4130 switch (I.getIntrinsicID()) {
4131 case Intrinsic::ssub_with_overflow:
4132 case Intrinsic::usub_with_overflow: {
4137 opcode = BinaryOperator::Sub;
4141 case Intrinsic::smul_with_overflow:
4142 case Intrinsic::umul_with_overflow: {
4143 opcode = BinaryOperator::Mul;
4148 case Intrinsic::sadd_with_overflow:
4149 case Intrinsic::uadd_with_overflow: {
4150 opcode = BinaryOperator::Add;
4156 llvm_unreachable(
"unknown binary operator");
4170 raw_string_ostream ss(
str);
4176 ss <<
"Illegal updateBinopIntr Analysis " << I <<
"\n";
4177 ss <<
"Illegal binopIn(intr): " << I <<
" lhs: " << vd.
str()
4184 EmitFailure(
"IllegalUpdateAnalysis", I.getDebugLoc(), &I, ss.str());
4185 report_fatal_error(
"Performed illegal updateAnalysis");
4187 auto &dl = I.getParent()->getParent()->getParent()->getDataLayout();
4188 int sz = (dl.getTypeSizeInBits(I.getOperand(0)->getType()) + 7) / 8;
4191 int sz2 = (dl.getTypeSizeInBits(I.getType()) + 7) / 8;
4539 if (
auto iasm = dyn_cast<InlineAsm>(call.getCalledOperand())) {
4541 if (StringRef(iasm->getAsmString()).contains(
"cpuid")) {
4543#if LLVM_VERSION_MAJOR >= 14
4544 for (
auto &arg : call.args())
4546 for (
auto &arg : call.arg_operands())
4554 if (call.hasFnAttr(
"enzyme_ta_norecur"))
4560 if (ci->getAttributes().hasAttribute(AttributeList::FunctionIndex,
4561 "enzyme_ta_norecur"))
4569#include "BlasTA.inc"
4573 const char* NoTARecurStartsWith[] = {
4574 "std::__u::basic_ostream<wchar_t, std::__u::char_traits<wchar_t>>& std::__u::operator<<",
4578 std::string demangledName = llvm::demangle(funcName.str());
4581 while ((start = demangledName.find(
"> >", start)) != std::string::npos) {
4582 demangledName.replace(start, 3,
">>");
4584 for (
auto Name : NoTARecurStartsWith)
4597 if (
startsWith(funcName,
"llvm.intel.subscript")) {
4598 assert(isa<IntrinsicInst>(call));
4603#define CONSIDER(fn) \
4604 if (funcName == #fn) { \
4605 analyzeFuncTypes(::fn, call, *this); \
4609#define CONSIDER2(fn, ...) \
4610 if (funcName == #fn) { \
4611 analyzeFuncTypesNoFn<__VA_ARGS__>(call, *this); \
4618 SmallVector<TypeTree, 4> args;
4619 SmallVector<std::set<int64_t>, 4> knownValues;
4620#if LLVM_VERSION_MAJOR >= 14
4621 for (
auto &arg : call.args())
4623 for (
auto &arg : call.arg_operands())
4627 knownValues.push_back(
4631 bool err = customrule->second(
direction, returnAnalysis, args,
4632 knownValues, &call,
this);
4639#if LLVM_VERSION_MAJOR >= 14
4640 for (
auto &arg : call.args())
4642 for (
auto &arg : call.arg_operands())
4654 if (
startsWith(funcName,
"_ZN3std2io5stdio6_print") ||
4659 if (funcName ==
"dgemm_64" || funcName ==
"dgemm_64_" ||
4660 funcName ==
"dgemm" || funcName ==
"dgemm_") {
4665 for (
int i : {0, 1, 2, 3, 4, 7, 9, 12})
4670 ptrdbl.
insert({-1, 0}, Type::getDoubleTy(call.getContext()));
4673 for (
int i : {5, 6, 8, 10, 11})
4678 if (funcName ==
"__kmpc_fork_call") {
4679 Function *fn = dyn_cast<Function>(call.getArgOperand(2));
4681 if (
auto castinst = dyn_cast<ConstantExpr>(call.getArgOperand(2)))
4682 if (castinst->isCast())
4683 fn = dyn_cast<Function>(castinst->getOperand(0));
4686#if LLVM_VERSION_MAJOR >= 14
4687 if (call.arg_size() - 3 != fn->getFunctionType()->getNumParams() - 2)
4690 if (call.getNumArgOperands() - 3 !=
4691 fn->getFunctionType()->getNumParams() - 2)
4703 for (
auto &arg : fn->args()) {
4706 std::pair<Argument *, TypeTree>(&arg, IntPtr));
4708 std::pair<Argument *, std::set<int64_t>>(&arg, {0}));
4710 typeInfo.
Arguments.insert(std::pair<Argument *, TypeTree>(
4711 &arg,
getAnalysis(call.getArgOperand(argnum - 2 + 3))));
4712 std::set<int64_t> bounded;
4714 call.getArgOperand(argnum - 2 + 3),
DT, intseen,
SE)) {
4720 std::pair<Argument *, std::set<int64_t>>(&arg, bounded));
4727 llvm::errs() <<
" starting omp IPO of ";
4728 call.print(llvm::errs(), *
MST);
4729 llvm::errs() <<
"\n";
4732 auto a = fn->arg_begin();
4736#if LLVM_VERSION_MAJOR >= 14
4737 for (
unsigned i = 3; i < call.arg_size(); ++i)
4739 for (
unsigned i = 3; i < call.getNumArgOperands(); ++i)
4742 auto dt = STR.
query(a);
4750 if (funcName ==
"__kmpc_for_static_init_4" ||
4751 funcName ==
"__kmpc_for_static_init_4u" ||
4752 funcName ==
"__kmpc_for_static_init_8" ||
4753 funcName ==
"__kmpc_for_static_init_8u") {
4756 size_t numBytes = 4;
4757 if (funcName ==
"__kmpc_for_static_init_8" ||
4758 funcName ==
"__kmpc_for_static_init_8u")
4760 for (
size_t i = 0; i < numBytes; i++)
4772 if (funcName ==
"omp_get_max_threads" || funcName ==
"omp_get_thread_num" ||
4773 funcName ==
"omp_get_num_threads" ||
4774 funcName ==
"__kmpc_global_thread_num") {
4778 if (funcName ==
"_ZNSt6localeC1Ev") {
4786 if (
startsWith(funcName,
"_ZNKSt3__14hash")) {
4791 if (
startsWith(funcName,
"_ZNKSt3__112basic_string") ||
4792 startsWith(funcName,
"_ZNSt3__112basic_string") ||
4793 startsWith(funcName,
"_ZNSt3__112__hash_table") ||
4794 startsWith(funcName,
"_ZNKSt3__115basic_stringbuf")) {
4798 if (funcName ==
"__dynamic_cast" ||
4799 funcName ==
"_ZSt18_Rb_tree_decrementPKSt18_Rb_tree_node_base" ||
4800 funcName ==
"_ZSt18_Rb_tree_incrementPKSt18_Rb_tree_node_base" ||
4801 funcName ==
"_ZSt18_Rb_tree_decrementPSt18_Rb_tree_node_base" ||
4802 funcName ==
"_ZSt18_Rb_tree_incrementPSt18_Rb_tree_node_base") {
4806 if (funcName ==
"memcmp") {
4818 if (funcName ==
"cuDeviceGet") {
4827 if (funcName ==
"cuDeviceGetName") {
4836 if (funcName ==
"cudaRuntimeGetVersion" ||
4837 funcName ==
"cuDriverGetVersion" || funcName ==
"cuDeviceGetCount") {
4846 if (funcName ==
"cuMemGetInfo_v2") {
4856 if (funcName ==
"cuDevicePrimaryCtxRetain" ||
4857 funcName ==
"cuCtxGetCurrent") {
4864 if (funcName ==
"cuStreamQuery") {
4869 if (funcName ==
"cuMemAllocAsync" || funcName ==
"cuMemAlloc" ||
4870 funcName ==
"cuMemAlloc_v2" || funcName ==
"cudaMalloc" ||
4871 funcName ==
"cudaMallocAsync" || funcName ==
"cudaMallocHost" ||
4872 funcName ==
"cudaMallocFromPoolAsync") {
4882 if (funcName ==
"jl_hrtime" || funcName ==
"ijl_hrtime") {
4886 if (funcName ==
"jl_get_task_tid" || funcName ==
"ijl_get_task_tid") {
4890 if (funcName ==
"jl_get_binding_or_error" ||
4891 funcName ==
"ijl_get_binding_or_error") {
4895 if (funcName ==
"julia.gc_loaded") {
4902 if (funcName ==
"julia.pointer_from_objref") {
4909 if (funcName ==
"_ZNSt6chrono3_V212steady_clock3nowEv") {
4916 funcName = funcName.substr(1);
4917 if (funcName ==
"MPI_Init") {
4930 if (funcName ==
"MPI_Comm_size" || funcName ==
"MPI_Comm_rank" ||
4931 funcName ==
"MPI_Get_processor_name") {
4939 if (funcName ==
"MPI_Barrier" || funcName ==
"MPI_Finalize") {
4943 if (funcName ==
"MPI_Send" || funcName ==
"MPI_Ssend" ||
4944 funcName ==
"MPI_Bsend" || funcName ==
"MPI_Recv" ||
4945 funcName ==
"MPI_Brecv" || funcName ==
"PMPI_Send" ||
4946 funcName ==
"PMPI_Ssend" || funcName ==
"PMPI_Bsend" ||
4947 funcName ==
"PMPI_Recv" || funcName ==
"PMPI_Brecv") {
4950 if (
Constant *C = dyn_cast<Constant>(call.getOperand(2))) {
4951 while (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
4952 C = CE->getOperand(0);
4954 if (
auto GV = dyn_cast<GlobalVariable>(C)) {
4955 if (GV->getName() ==
"ompi_mpi_double") {
4956 buf.
insert({0}, Type::getDoubleTy(C->getContext()));
4957 }
else if (GV->getName() ==
"ompi_mpi_float") {
4958 buf.
insert({0}, Type::getFloatTy(C->getContext()));
4959 }
else if (GV->getName() ==
"ompi_mpi_cxx_bool") {
4962 }
else if (
auto CI = dyn_cast<ConstantInt>(C)) {
4964 if (CI->getValue() == 1275070475) {
4965 buf.
insert({0}, Type::getDoubleTy(C->getContext()));
4966 }
else if (CI->getValue() == 1275069450) {
4967 buf.
insert({0}, Type::getFloatTy(C->getContext()));
4981 if (funcName ==
"MPI_Isend" || funcName ==
"MPI_Irecv" ||
4982 funcName ==
"PMPI_Isend" || funcName ==
"PMPI_Irecv") {
4985 if (
Constant *C = dyn_cast<Constant>(call.getOperand(2))) {
4986 while (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
4987 C = CE->getOperand(0);
4989 if (
auto GV = dyn_cast<GlobalVariable>(C)) {
4990 if (GV->getName() ==
"ompi_mpi_double") {
4991 buf.
insert({0}, Type::getDoubleTy(C->getContext()));
4992 }
else if (GV->getName() ==
"ompi_mpi_float") {
4993 buf.
insert({0}, Type::getFloatTy(C->getContext()));
4994 }
else if (GV->getName() ==
"ompi_mpi_cxx_bool") {
4997 }
else if (
auto CI = dyn_cast<ConstantInt>(C)) {
4999 if (CI->getValue() == 1275070475) {
5000 buf.
insert({0}, Type::getDoubleTy(C->getContext()));
5001 }
else if (CI->getValue() == 1275069450) {
5002 buf.
insert({0}, Type::getFloatTy(C->getContext()));
5018 if (funcName ==
"MPI_Wait") {
5026 if (funcName ==
"MPI_Waitany") {
5038 if (funcName ==
"MPI_Waitall") {
5048 if (funcName ==
"MPI_Bcast") {
5058 if (funcName ==
"MPI_Reduce" || funcName ==
"PMPI_Reduce") {
5061 if (
Constant *C = dyn_cast<Constant>(call.getOperand(3))) {
5062 while (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
5063 C = CE->getOperand(0);
5065 if (
auto GV = dyn_cast<GlobalVariable>(C)) {
5066 if (GV->getName() ==
"ompi_mpi_double") {
5067 buf.
insert({0}, Type::getDoubleTy(C->getContext()));
5068 }
else if (GV->getName() ==
"ompi_mpi_float") {
5069 buf.
insert({0}, Type::getFloatTy(C->getContext()));
5070 }
else if (GV->getName() ==
"ompi_mpi_cxx_bool") {
5073 }
else if (
auto CI = dyn_cast<ConstantInt>(C)) {
5075 if (CI->getValue() == 1275070475) {
5076 buf.
insert({0}, Type::getDoubleTy(C->getContext()));
5077 }
else if (CI->getValue() == 1275069450) {
5078 buf.
insert({0}, Type::getFloatTy(C->getContext()));
5099 if (funcName ==
"MPI_Allreduce" || funcName ==
"PMPI_Allreduce") {
5102 if (
Constant *C = dyn_cast<Constant>(call.getOperand(3))) {
5103 while (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
5104 C = CE->getOperand(0);
5106 if (
auto GV = dyn_cast<GlobalVariable>(C)) {
5107 if (GV->getName() ==
"ompi_mpi_double") {
5108 buf.
insert({0}, Type::getDoubleTy(C->getContext()));
5109 }
else if (GV->getName() ==
"ompi_mpi_float") {
5110 buf.
insert({0}, Type::getFloatTy(C->getContext()));
5111 }
else if (GV->getName() ==
"ompi_mpi_cxx_bool") {
5114 }
else if (
auto CI = dyn_cast<ConstantInt>(C)) {
5116 if (CI->getValue() == 1275070475) {
5117 buf.
insert({0}, Type::getDoubleTy(C->getContext()));
5118 }
else if (CI->getValue() == 1275069450) {
5119 buf.
insert({0}, Type::getFloatTy(C->getContext()));
5139 if (funcName ==
"MPI_Sendrecv_replace") {
5157 if (funcName ==
"MPI_Sendrecv") {
5181 if (funcName ==
"MPI_Gather" || funcName ==
"MPI_Scatter") {
5195 if (funcName ==
"MPI_Allgather") {
5210 if (ci->hasFnAttribute(
"enzyme_notypeanalysis")) {
5214 if (funcName ==
"memcpy" || funcName ==
"memmove") {
5219 if (funcName ==
"posix_memalign") {
5231 if (funcName ==
"calloc") {
5241 unsigned index = (size_t)*opidx;
5242 if (
auto CI = dyn_cast<ConstantInt>(call.getOperand(index))) {
5243 auto &DL = call.getParent()->getParent()->getParent()->getDataLayout();
5244 auto LoadSize = CI->getZExtValue();
5254 if (funcName ==
"malloc") {
5256 if (
auto CI = dyn_cast<ConstantInt>(call.getOperand(0))) {
5257 auto &DL = call.getParent()->getParent()->getParent()->getDataLayout();
5258 auto LoadSize = CI->getZExtValue();
5268 if (funcName ==
"__size_returning_new_experiment") {
5270 auto &DL = call.getParent()->getParent()->getParent()->getDataLayout();
5271 if (
auto CI = dyn_cast<ConstantInt>(call.getOperand(0))) {
5272 auto LoadSize = CI->getZExtValue();
5277 ptr = ptr.Only(0, &call);
5284 if (funcName ==
"julia.gc_alloc_obj" || funcName ==
"jl_gc_alloc_typed" ||
5285 funcName ==
"ijl_gc_alloc_typed") {
5287 if (
auto CI = dyn_cast<ConstantInt>(call.getOperand(1))) {
5288 auto &DL = call.getParent()->getParent()->getParent()->getDataLayout();
5289 auto LoadSize = CI->getZExtValue();
5299 if (funcName ==
"julia.except_enter" || funcName ==
"ijl_excstack_state" ||
5300 funcName ==
"jl_excstack_state") {
5304 if (funcName ==
"jl_array_copy" || funcName ==
"ijl_array_copy" ||
5305 funcName ==
"jl_inactive_inout" ||
5306 funcName ==
"jl_genericmemory_copy_slice" ||
5307 funcName ==
"ijl_genericmemory_copy_slice") {
5317 for (
auto &Arg : ci->args()) {
5318 if (Arg.getType()->isIntegerTy()) {
5324 assert(ci->getReturnType()->isPointerTy());
5328 if (funcName ==
"malloc_usable_size" || funcName ==
"malloc_size" ||
5329 funcName ==
"_msize") {
5335 if (funcName ==
"realloc") {
5340 sz =
max(sz, (
size_t)val);
5344 auto &dl = call.getParent()->getParent()->getParent()->getDataLayout();
5352 res.
orIn(res2,
false);
5354 res = res.
Only(-1, &call);
5363 if (funcName ==
"sigaction") {
5373 if (funcName ==
"mmap") {
5389 if (funcName ==
"munmap") {
5397 if (funcName ==
"pthread_mutex_lock" ||
5398 funcName ==
"pthread_mutex_trylock" ||
5399 funcName ==
"pthread_rwlock_rdlock" ||
5400 funcName ==
"pthread_rwlock_unlock" ||
5401 funcName ==
"pthread_attr_init" || funcName ==
"pthread_attr_destroy" ||
5402 funcName ==
"pthread_rwlock_unlock" ||
5403 funcName ==
"pthread_mutex_unlock") {
5411 for (
auto &Arg : ci->args()) {
5412 if (Arg.getType()->isIntegerTy()) {
5416 if (Arg.getType()->isPointerTy()) {
5422 if (!ci->getReturnType()->isVoidTy()) {
5427 assert(ci->getReturnType()->isVoidTy());
5430 if (funcName ==
"memchr" || funcName ==
"memrchr") {
5438 if (funcName ==
"strlen") {
5444 if (funcName ==
"strcmp") {
5452 if (funcName ==
"bcmp") {
5462 if (funcName ==
"getcwd") {
5470 if (funcName ==
"sysconf") {
5476 if (funcName ==
"dladdr") {
5484 if (funcName ==
"__errno_location") {
5491 if (funcName ==
"getenv") {
5500 if (funcName ==
"getcwd") {
5508 if (funcName ==
"mprotect") {
5518 if (funcName ==
"memcmp") {
5528 if (funcName ==
"signal") {
5536 if (funcName ==
"write" || funcName ==
"read" || funcName ==
"writev" ||
5537 funcName ==
"readv") {
5549 if (funcName ==
"gsl_sf_legendre_array_e") {
5560 CONSIDER2(modf,
double,
double,
double *)
5564 CONSIDER2(remquo,
double,
double,
double,
int *)
5569#if LLVM_VERSION_MAJOR >= 14
5570 for (
size_t i = 0; i < call.arg_size(); ++i)
5572 for (
size_t i = 0; i < call.getNumArgOperands(); ++i)
5575 Type *T = call.getArgOperand(i)->getType();
5576 if (T->isFloatingPointTy()) {
5578 call.getArgOperand(i),
5580 call.getArgOperand(i)->getType()->getScalarType()))
5583 }
else if (T->isIntegerTy()) {
5586 }
else if (
auto ST = dyn_cast<StructType>(T)) {
5587 assert(ST->getNumElements() >= 1);
5588 for (
size_t i = 1; i < ST->getNumElements(); ++i) {
5589 assert(ST->getTypeAtIndex((
unsigned)0) == ST->getTypeAtIndex(i));
5591 if (ST->getTypeAtIndex((
unsigned)0)->isFloatingPointTy())
5593 call.getArgOperand(i),
5595 ST->getTypeAtIndex((
unsigned)0)->getScalarType()))
5598 else if (ST->getTypeAtIndex((
unsigned)0)->isIntegerTy()) {
5602 llvm::errs() << *T <<
" - " << call <<
"\n";
5603 llvm_unreachable(
"Unknown type for libm");
5605 }
else if (
auto AT = dyn_cast<ArrayType>(T)) {
5606 assert(AT->getNumElements() >= 1);
5607 if (AT->getElementType()->isFloatingPointTy())
5609 call.getArgOperand(i),
5613 else if (AT->getElementType()->isIntegerTy()) {
5617 llvm::errs() << *T <<
" - " << call <<
"\n";
5618 llvm_unreachable(
"Unknown type for libm");
5621 llvm::errs() << *T <<
" - " << call <<
"\n";
5622 llvm_unreachable(
"Unknown type for libm");
5625 Type *T = call.getType();
5626 if (T->isFloatingPointTy()) {
5631 }
else if (T->isIntegerTy()) {
5634 }
else if (T->isVoidTy()) {
5635 }
else if (
auto ST = dyn_cast<StructType>(T)) {
5636 assert(ST->getNumElements() >= 1);
5638 auto &DL = call.getParent()->getParent()->getParent()->getDataLayout();
5639 for (
size_t i = 0; i < ST->getNumElements(); ++i) {
5640 auto T = ST->getTypeAtIndex(i);
5644 ConstantInt::get(Type::getInt64Ty(call.getContext()), 0),
5645 ConstantInt::get(Type::getInt32Ty(call.getContext()), i)};
5646 auto ud = UndefValue::get(
getUnqual(ST));
5647 auto g2 = GetElementPtrInst::Create(ST, ud, vec);
5648 APInt ai(DL.getIndexSizeInBits(0), 0);
5649 g2->accumulateConstantOffset(DL, ai);
5651 size_t Offset = ai.getZExtValue();
5654 if (i + 1 == ST->getNumElements())
5655 nextOffset = (DL.getTypeSizeInBits(ST) + 7) / 8;
5658 ConstantInt::get(Type::getInt64Ty(call.getContext()), 0),
5659 ConstantInt::get(Type::getInt32Ty(call.getContext()), i + 1)};
5660 auto ud = UndefValue::get(
getUnqual(ST));
5661 auto g2 = GetElementPtrInst::Create(ST, ud, vec);
5662 APInt ai(DL.getIndexSizeInBits(0), 0);
5663 g2->accumulateConstantOffset(DL, ai);
5665 nextOffset = ai.getZExtValue();
5668 if (T->isFloatingPointTy()) {
5670 }
else if (T->isIntegerTy()) {
5676 nextOffset - Offset,
5680 auto Size = (DL.getTypeSizeInBits(ST) + 7) / 8;
5683 }
else if (
auto AT = dyn_cast<ArrayType>(T)) {
5684 assert(AT->getNumElements() >= 1);
5685 if (AT->getElementType()->isFloatingPointTy())
5692 llvm::errs() << *T <<
" - " << call <<
"\n";
5693 llvm_unreachable(
"Unknown type for libm");
5696 llvm::errs() << *T <<
" - " << call <<
"\n";
5697 llvm_unreachable(
"Unknown type for libm");
5701 if (funcName ==
"__lgamma_r_finite") {
5703 call.getArgOperand(0),
5716 if (funcName ==
"__fd_sincos_1" || funcName ==
"__fd_sincos_1f" ||
5717 funcName ==
"__fd_sincos_1l") {
5727 if (funcName ==
"frexp" || funcName ==
"frexpf" || funcName ==
"frexpl") {
5737#if LLVM_VERSION_MAJOR < 17
5739 objSize = DL.getTypeSizeInBits(
5740 call.getOperand(1)->getType()->getPointerElementType()) /
5743 for (
size_t i = 0; i < objSize; ++i) {
5750 if (funcName ==
"__cxa_guard_acquire" || funcName ==
"printf" ||
5751 funcName ==
"vprintf" || funcName ==
"puts" || funcName ==
"fputc" ||
5752 funcName ==
"fprintf") {
5759 if (!ci->empty() && !
hasMetadata(ci,
"enzyme_gradient") &&