36 cl::desc(
"Place 8 bools together in a single byte"));
40 cl::desc(
"Zero initialize the cache"));
44 cl::desc(
"Enable Enzyme to print performance info"));
47 "enzyme-max-cache", cl::init(
false), cl::Hidden,
49 "Avoid reallocs when possible by potentially overallocating cache"));
63 if (
auto AI = dyn_cast<AllocaInst>(I)) {
69 SE.eraseValueFromMap(I);
71 if (!I->use_empty()) {
73 raw_string_ostream ss(
str);
74 ss <<
"Erased value with a use:\n";
75 ss << *
newFunc->getParent() <<
"\n";
80 nullptr,
nullptr,
nullptr);
82 EmitFailure(
"GetIndexError", I->getDebugLoc(), I, ss.str());
84 I->replaceAllUsesWith(UndefValue::get(I->getType()));
86 assert(I->use_empty());
96 llvm::AllocaInst *cache = found->second.first;
98 assert(isa<Instruction>(B));
101 SmallVector<Instruction *, 3> tmpInstructions(stfound->second.begin(),
102 stfound->second.end());
104 for (
auto st : tmpInstructions)
105 cast<StoreInst>(&*st)->eraseFromParent();
106 MDNode *TBAA =
nullptr;
107 if (
auto I = dyn_cast<Instruction>(A))
108 TBAA = I->getMetadata(LLVMContext::MD_tbaa);
116 A->replaceAllUsesWith(B);
121std::pair<PHINode *, Instruction *>
126 BasicBlock *Header = L->getHeader();
128 IRBuilder<> B(Header, Header->begin());
129 PHINode *CanonicalIV = B.CreatePHI(Ty, 1, Name);
131 B.SetInsertPoint(Header->getFirstNonPHIOrDbg());
132 Instruction *Inc = cast<Instruction>(
133 B.CreateAdd(CanonicalIV, ConstantInt::get(Ty, 1), Name +
".next",
136 for (BasicBlock *Pred : predecessors(Header)) {
138 if (L->contains(Pred)) {
139 CanonicalIV->addIncoming(Inc, Pred);
141 CanonicalIV->addIncoming(ConstantInt::get(Ty, 0), Pred);
144 assert(L->getCanonicalInductionVariable() == CanonicalIV);
145 return std::pair<PHINode *, Instruction *>(CanonicalIV, Inc);
154 BasicBlock *Header = L->getHeader();
156 for (BasicBlock::iterator II = Header->begin(); isa<PHINode>(II); ++II) {
157 PHINode *PN = cast<PHINode>(II);
158 if (PN->getType() != Ty)
161 Instruction *Inc =
nullptr;
163 for (BasicBlock *Pred : predecessors(Header)) {
165 if (L->contains(Pred)) {
167 dyn_cast<BinaryOperator>(PN->getIncomingValueForBlock(Pred));
168 if (!Inc2 || Inc2->getOpcode() != Instruction::Add ||
169 Inc2->getOperand(0) != PN) {
173 auto CI = dyn_cast<ConstantInt>(Inc2->getOperand(1));
174 if (!CI || !CI->isOne()) {
186 auto CI = dyn_cast<ConstantInt>(PN->getIncomingValueForBlock(Pred));
187 if (!CI || !CI->isZero()) {
199 return std::make_pair(PN, Inc);
201 llvm::errs() << *Header <<
"\n";
202 assert(0 &&
"Could not find canonical IV");
203 return std::pair<PHINode *, Instruction *>(
nullptr,
nullptr);
209 BasicBlock *Header, PHINode *CanonicalIV, Instruction *Increment,
211 llvm::function_ref<
void(Instruction *, Value *)> replacer,
212 llvm::function_ref<
void(Instruction *)> eraser) {
215 SmallVector<Instruction *, 8> IVsToRemove;
217 auto CanonicalSCEV = SE.getSCEV(CanonicalIV);
219 for (BasicBlock::iterator II = Header->begin(); isa<PHINode>(II);) {
220 PHINode *PN = cast<PHINode>(II);
222 if (PN == CanonicalIV)
224 if (!SE.isSCEVable(PN->getType()))
226 const SCEV *S = SE.getSCEV(PN);
227 if (SE.getCouldNotCompute() == S || isa<SCEVUnknown>(S))
231 if (!SE.dominates(S, Header))
234 if (S == CanonicalSCEV) {
235 replacer(PN, CanonicalIV);
241 auto Tmp = B.CreatePHI(PN->getType(), 0);
242 for (
auto Pred : predecessors(Header))
243 Tmp->addIncoming(UndefValue::get(Tmp->getType()), Pred);
249#if LLVM_VERSION_MAJOR >= 22
250 SCEVExpander Exp(SE,
"enzyme");
252 SCEVExpander Exp(SE, Header->getParent()->getParent()->getDataLayout(),
259 Exp.expandCodeFor(S, Tmp->getType(), Header->getFirstNonPHI());
264 if (
auto addrec = dyn_cast<SCEVAddRecExpr>(S)) {
265 if (addrec->getLoop()->getHeader() == Header) {
266 if (
auto add_or_mul = dyn_cast<BinaryOperator>(NewIV)) {
267#if LLVM_VERSION_MAJOR >= 23
268 if (any(addrec->getNoWrapFlags(llvm::SCEV::FlagNUW)))
269 add_or_mul->setHasNoUnsignedWrap(
true);
270 if (any(addrec->getNoWrapFlags(llvm::SCEV::FlagNSW)))
271 add_or_mul->setHasNoSignedWrap(
true);
273 if (addrec->getNoWrapFlags(llvm::SCEV::FlagNUW))
274 add_or_mul->setHasNoUnsignedWrap(
true);
275 if (addrec->getNoWrapFlags(llvm::SCEV::FlagNSW))
276 add_or_mul->setHasNoSignedWrap(
true);
281 replacer(Tmp, NewIV);
286 Increment->moveAfter(CanonicalIV->getParent()->getFirstNonPHI());
287 SmallVector<Instruction *, 1> toErase;
288 for (
auto use : CanonicalIV->users()) {
289 auto BO = dyn_cast<BinaryOperator>(use);
292 if (BO->getOpcode() != BinaryOperator::Add)
294 if (use == Increment)
297 Value *toadd =
nullptr;
298 if (BO->getOperand(0) == CanonicalIV) {
299 toadd = BO->getOperand(1);
301 assert(BO->getOperand(1) == CanonicalIV);
302 toadd = BO->getOperand(0);
304 if (
auto CI = dyn_cast<ConstantInt>(toadd)) {
307 BO->replaceAllUsesWith(Increment);
308 toErase.push_back(BO);
313 for (
auto BO : toErase)
318 BasicBlock *Preheader, PHINode *CanonicalIV,
320 Instruction *Increment,
321 ArrayRef<BasicBlock *> latches) {
323 if (latches.size() == 1 && isa<BranchInst>(latches[0]->getTerminator()) &&
324 cast<BranchInst>(latches[0]->getTerminator())->isConditional())
325 for (
auto use : CanonicalIV->users()) {
326 if (
auto cmp = dyn_cast<ICmpInst>(use)) {
327 if (cast<BranchInst>(latches[0]->getTerminator())->getCondition() !=
331 if (cmp->getOperand(0) != CanonicalIV) {
335 assert(cmp->getOperand(0) == CanonicalIV);
337 auto scv = SE.getSCEVAtScope(cmp->getOperand(1), L);
338 if (cmp->isUnsigned() ||
339 (scv != SE.getCouldNotCompute() && SE.isKnownNonNegative(scv))) {
345 if (cmp->getPredicate() == ICmpInst::ICMP_ULT ||
346 cmp->getPredicate() == ICmpInst::ICMP_SLT) {
347 cmp->setPredicate(ICmpInst::ICMP_NE);
353 if (cmp->getPredicate() == ICmpInst::ICMP_ULE ||
354 cmp->getPredicate() == ICmpInst::ICMP_SLE) {
355 IRBuilder<> builder(Preheader->getTerminator());
356 if (
auto inst = dyn_cast<Instruction>(cmp->getOperand(1))) {
357 builder.SetInsertPoint(inst->getNextNode());
361 builder.CreateNUWAdd(
363 ConstantInt::get(cmp->getOperand(1)->getType(), 1,
false)));
364 cmp->setPredicate(ICmpInst::ICMP_NE);
369 if (cmp->getPredicate() == ICmpInst::ICMP_UGE ||
370 cmp->getPredicate() == ICmpInst::ICMP_SGE) {
371 cmp->setPredicate(ICmpInst::ICMP_EQ);
377 if (cmp->getPredicate() == ICmpInst::ICMP_UGT ||
378 cmp->getPredicate() == ICmpInst::ICMP_SGT) {
379 IRBuilder<> builder(Preheader->getTerminator());
380 if (
auto inst = dyn_cast<Instruction>(cmp->getOperand(1))) {
381 builder.SetInsertPoint(inst->getNextNode());
385 builder.CreateNUWAdd(
387 ConstantInt::get(cmp->getOperand(1)->getType(), 1,
false)));
388 cmp->setPredicate(ICmpInst::ICMP_EQ);
393 if (cmp->getPredicate() == ICmpInst::ICMP_NE) {
400 Increment->moveAfter(CanonicalIV->getParent()->getFirstNonPHI());
402 if (latches.size() == 1 && isa<BranchInst>(latches[0]->getTerminator()) &&
403 cast<BranchInst>(latches[0]->getTerminator())->isConditional())
404 for (
auto use : Increment->users()) {
405 if (
auto cmp = dyn_cast<ICmpInst>(use)) {
406 if (cast<BranchInst>(latches[0]->getTerminator())->getCondition() !=
411 if (cmp->getOperand(0) != Increment) {
415 assert(cmp->getOperand(0) == Increment);
417 auto scv = SE.getSCEVAtScope(cmp->getOperand(1), L);
418 if (cmp->isUnsigned() ||
419 (scv != SE.getCouldNotCompute() && SE.isKnownNonNegative(scv))) {
426 if (cmp->getPredicate() == ICmpInst::ICMP_ULT ||
427 cmp->getPredicate() == ICmpInst::ICMP_SLT) {
428 cmp->setPredicate(ICmpInst::ICMP_NE);
434 if (cmp->getPredicate() == ICmpInst::ICMP_ULE ||
435 cmp->getPredicate() == ICmpInst::ICMP_SLE) {
436 cmp->setOperand(0, CanonicalIV);
437 cmp->setPredicate(ICmpInst::ICMP_NE);
443 if (cmp->getPredicate() == ICmpInst::ICMP_UGE ||
444 cmp->getPredicate() == ICmpInst::ICMP_SGE) {
445 cmp->setPredicate(ICmpInst::ICMP_EQ);
451 if (cmp->getPredicate() == ICmpInst::ICMP_UGT ||
452 cmp->getPredicate() == ICmpInst::ICMP_SGT) {
453 cmp->setOperand(0, CanonicalIV);
454 cmp->setPredicate(ICmpInst::ICMP_EQ);
468 assert(found.dynamic);
470 return cast<AllocaInst>(&*found.trueLimit);
473 ReverseLimit ? found.preheader : &
newFunc->getEntryBlock());
474 AllocaInst *LimitVar =
478 for (
auto ExitBlock : found.exitBlocks) {
479 IRBuilder<> B(ExitBlock, ExitBlock->begin());
480 auto Limit = B.CreatePHI(found.var->getType(), 1);
482 for (BasicBlock *Pred : predecessors(ExitBlock)) {
483 if (L->contains(Pred)) {
484 Limit->addIncoming(found.var, Pred);
486 Limit->addIncoming(UndefValue::get(found.var->getType()), Pred);
492 found.trueLimit = LimitVar;
498 assert(BB->getParent() ==
newFunc);
499 Loop *L =
LI.getLoopFor(BB);
507 loopContext = *found;
515 assert(
loopContexts[L].header &&
"loop must have header");
518 if (!L->getLoopPreheader()) {
519 llvm::errs() <<
"fn: " << *L->getHeader()->getParent() <<
"\n";
520 llvm::errs() <<
"L: " << *L <<
"\n";
522 assert(
loopContexts[L].preheader &&
"loop must have preheader");
529 PHINode *CanonicalIV = pair.first;
530 auto incVar = pair.second;
535 CanonicalIV,
SE, *
this, incVar,
539 .CreateAlloca(CanonicalIV->getType(),
nullptr,
540 CanonicalIV->getName() +
"'ac");
542 Align(cast<IntegerType>(CanonicalIV->getType())->getBitWidth() / 8));
544 const SCEV *Limit =
nullptr;
545 const SCEV *MaxIterations =
nullptr;
547 const SCEV *MayExitMaxBECount =
nullptr;
549 SmallVector<BasicBlock *, 8> ExitingBlocks;
550 L->getExitingBlocks(ExitingBlocks);
554 for (
auto &ExitingBlock : ExitingBlocks) {
555 BasicBlock *Exit =
nullptr;
556 for (
auto *SBB : successors(ExitingBlock)) {
557 if (!L->contains(SBB)) {
565 ExitingBlock =
nullptr;
568 std::remove(ExitingBlocks.begin(), ExitingBlocks.end(),
nullptr),
569 ExitingBlocks.end());
573 for (BasicBlock *ExitingBlock : ExitingBlocks) {
574 assert(L->contains(ExitingBlock));
576 ScalarEvolution::ExitLimit EL =
579 bool seenHeaders =
false;
580 SmallPtrSet<BasicBlock *, 4> Seen;
581 std::deque<BasicBlock *> Todo = {ExitingBlock};
582 while (Todo.size()) {
583 auto cur = Todo.front();
587 if (!L->contains(cur))
593 for (
auto S : successors(cur)) {
598 if (MaxIterations ==
nullptr ||
599 MaxIterations ==
SE.getCouldNotCompute()) {
600 MaxIterations = EL.ExactNotTaken;
602 if (MaxIterations !=
SE.getCouldNotCompute()) {
603 if (EL.ExactNotTaken !=
SE.getCouldNotCompute()) {
605 SE.getUMaxFromMismatchedTypes(MaxIterations, EL.ExactNotTaken);
609 if (MayExitMaxBECount ==
nullptr ||
610 EL.ExactNotTaken ==
SE.getCouldNotCompute())
611 MayExitMaxBECount = EL.ExactNotTaken;
613 if (EL.ExactNotTaken != MayExitMaxBECount) {
614 MayExitMaxBECount =
SE.getCouldNotCompute();
618 if (MayExitMaxBECount ==
nullptr) {
619 MayExitMaxBECount =
SE.getCouldNotCompute();
621 if (MaxIterations ==
nullptr) {
622 MaxIterations =
SE.getCouldNotCompute();
624 Limit = MayExitMaxBECount;
627 Value *LimitVar =
nullptr;
629 if (
SE.getCouldNotCompute() != Limit) {
631 if (CanonicalIV ==
nullptr) {
632 report_fatal_error(
"Couldn't get canonical IV.");
635 SmallPtrSet<const SCEV *, 2> PotentialMins;
636 SmallVector<const SCEV *, 2> Todo = {Limit};
637 while (Todo.size()) {
638 auto S = Todo.back();
640 if (
auto SA = dyn_cast<SCEVSMaxExpr>(S)) {
641 for (
auto op : SA->operands())
643 }
else if (
auto SA = dyn_cast<SCEVUMaxExpr>(S)) {
644 for (
auto op : SA->operands())
646 }
else if (
auto SA = dyn_cast<SCEVAddExpr>(S)) {
647 for (
auto op : SA->operands())
650 PotentialMins.insert(S);
652 for (
auto op : PotentialMins) {
653 auto SM = dyn_cast<SCEVMulExpr>(op);
656 if (SM->getNumOperands() != 2)
658 for (
int i = 0; i < 2; i++)
659 if (
auto C = dyn_cast<SCEVConstant>(SM->getOperand(i))) {
661#if LLVM_VERSION_MAJOR > 16
662 if (C->getAPInt().isAllOnes())
664 if (C->getAPInt().isAllOnesValue())
667 const SCEV *prev = SM->getOperand(1 - i);
669 if (
auto ext = dyn_cast<SCEVZeroExtendExpr>(prev)) {
670 prev = ext->getOperand();
673 if (
auto ext = dyn_cast<SCEVSignExtendExpr>(prev)) {
674 prev = ext->getOperand();
679 if (
auto V = dyn_cast<SCEVUnknown>(prev)) {
680 if (
auto omp_lb_post = dyn_cast<LoadInst>(V->getValue())) {
682 dyn_cast<AllocaInst>(omp_lb_post->getPointerOperand());
684 for (
auto u : AI->users()) {
685 CallInst *call = dyn_cast<CallInst>(u);
688 Function *F = call->getCalledFunction();
691 if (F->getName() ==
"__kmpc_for_static_init_4" ||
692 F->getName() ==
"__kmpc_for_static_init_4u" ||
693 F->getName() ==
"__kmpc_for_static_init_8" ||
694 F->getName() ==
"__kmpc_for_static_init_8u") {
696 for (
auto u : call->getArgOperand(4)->users()) {
697 if (
auto si = dyn_cast<StoreInst>(u)) {
698 lb = si->getValueOperand();
704 for (
auto u : call->getArgOperand(5)->users()) {
705 if (
auto si = dyn_cast<StoreInst>(u)) {
706 ub = si->getValueOperand();
711 IRBuilder<> post(omp_lb_post->getNextNode());
713 post.CreateSub(ub, lb), CanonicalIV->getType());
715 post.CreateSub(omp_lb_post, lb,
"",
true,
true),
716 CanonicalIV->getType());
728 if (Limit->getType() != CanonicalIV->getType())
729 Limit =
SE.getZeroExtendExpr(Limit, CanonicalIV->getType());
731#if LLVM_VERSION_MAJOR >= 22
732 SCEVExpander Exp(
SE,
"enzyme");
734 SCEVExpander Exp(
SE, BB->getParent()->getParent()->getDataLayout(),
737 LimitVar = Exp.expandCodeFor(Limit, CanonicalIV->getType(),
744 DebugLoc loc = L->getHeader()->begin()->getDebugLoc();
745 for (
auto &I : *L->getHeader()) {
748 loc = I.getDebugLoc();
751 "SE could not compute loop limit of ",
752 L->getHeader()->getName(),
" of ",
753 L->getHeader()->getParent()->getName(),
"lim: ", *Limit,
754 " maxlim: ", *MaxIterations);
767 SE.getCouldNotCompute() != MaxIterations) {
768 if (MaxIterations->getType() != CanonicalIV->getType())
770 SE.getZeroExtendExpr(MaxIterations, CanonicalIV->getType());
772#if LLVM_VERSION_MAJOR >= 22
773 SCEVExpander Exp(
SE,
"enzyme");
775 SCEVExpander Exp(
SE, BB->getParent()->getParent()->getDataLayout(),
780 Exp.expandCodeFor(MaxIterations, CanonicalIV->getType(),
790 StringRef name,
bool shouldFree,
791 bool allocateInternal,
799 auto i64 = Type::getInt64Ty(T->getContext());
805 SmallVector<Type *, 4> types = {T};
806 SmallVector<PointerType *, 4> malloctypes;
807 bool isi1 = T->isIntegerTy() && cast<IntegerType>(T)->getBitWidth() == 1;
809 types[0] = Type::getInt8Ty(T->getContext());
810 for (
size_t i = 0; i < sublimits.size(); ++i) {
816 auto P = B.CreatePHI(i64, 1);
818 CallInst *malloccall;
824 malloctypes.push_back(cast<PointerType>(malloccall->getType()));
825 for (
auto &I : make_early_inc_range(reverse(*BB)))
828 BB->eraseFromParent();
830 types.push_back(allocType);
835 entryBuilder.setFastMathFlags(
getFast());
837 entryBuilder.CreateAlloca(types.back(),
nullptr, name +
"_cache");
839 ConstantInt *byteSizeOfType = ConstantInt::get(
840 i64,
newFunc->getParent()->getDataLayout().getTypeAllocSizeInBits(
845 alloc->setAlignment(Align(align));
847 if (sublimits.size() == 0) {
849 if (!isa<UndefValue>(val))
853 Value *storeInto = alloc;
858 for (
int i = sublimits.size() - 1; i >= 0; i--) {
859 const auto &containedloops = sublimits[i].second;
861 Type *myType = types[i];
863 ConstantInt *byteSizeOfType = ConstantInt::get(
864 Type::getInt64Ty(T->getContext()),
865 newFunc->getParent()->getDataLayout().getTypeAllocSizeInBits(myType) /
868 unsigned bsize = (unsigned)byteSizeOfType->getZExtValue();
871 CallInst *malloccall =
nullptr;
874 if (allocateInternal) {
876 IRBuilder<> allocationBuilder(
877 &containedloops.back().first.preheader->back());
879 Value *size = sublimits[i].first;
881 size = allocationBuilder.CreateLShr(
882 allocationBuilder.CreateAdd(
883 size, ConstantInt::get(Type::getInt64Ty(T->getContext()), 7),
885 ConstantInt::get(Type::getInt64Ty(T->getContext()), 3));
887 if (extraSize && i == 0) {
888 ValueToValueMapTy available;
889 for (
auto &sl : sublimits) {
890 for (
auto &cl : sl.second) {
892 available[cl.first.var] = cl.first.var;
895 Value *es =
unwrapM(extraSize, allocationBuilder, available,
898 size = allocationBuilder.CreateMul(size, es,
"",
true,
902 StoreInst *storealloc =
nullptr;
904 if (sublimits[i].second.back().first.maxLimit) {
905 Instruction *ZeroInst =
nullptr;
907 allocationBuilder, myType, size, name +
"_malloccache", &malloccall,
911 if (firstallocation != malloccall)
913 cast<Instruction>(firstallocation));
915 for (
auto &actx : sublimits[i].second) {
916 if (actx.first.offset) {
917 malloccall->setMetadata(
"enzyme_ompfor",
918 MDNode::get(malloccall->getContext(), {}));
924 if (ZeroInst->getOperand(0) != malloccall) {
926 cast<Instruction>(ZeroInst->getOperand(0)));
930 storealloc = allocationBuilder.CreateStore(firstallocation, storeInto);
936 if (CachePointerInvariantGroups.find(std::make_pair(
937 (Value *)alloc, i)) == CachePointerInvariantGroups.end()) {
938 MDNode *invgroup = MDNode::getDistinct(alloc->getContext(), {});
939 CachePointerInvariantGroups[std::make_pair((Value *)alloc, i)] =
942 storealloc->setMetadata(
943 LLVMContext::MD_invariant_group,
944 CachePointerInvariantGroups[std::make_pair((Value *)alloc, i)]);
950 llvm::PointerType *allocType = cast<PointerType>(types[i + 1]);
951 llvm::PointerType *mallocType = malloctypes[i];
956 auto zerostore = allocationBuilder.CreateStore(
962 IRBuilder<> build(containedloops.back().first.incvar->getNextNode());
963 Value *allocation = build.CreateLoad(allocType, storeInto);
965 if (allocation->getType() != mallocType) {
967 cast<Instruction>(build.CreateBitCast(allocation, mallocType));
972 CallInst *realloccall =
nullptr;
974 build, allocation, myType, containedloops.back().first.incvar, size,
979 if (reallocation->getType() != allocType) {
981 cast<Instruction>(build.CreateBitCast(reallocation, allocType));
988 storealloc = build.CreateStore(reallocation, storeInto);
1000 storealloc->setAlignment(Align(alignSize));
1005 if (CachePointerInvariantGroups.find(std::make_pair((Value *)alloc, i)) ==
1006 CachePointerInvariantGroups.end()) {
1007 MDNode *invgroup = MDNode::getDistinct(alloc->getContext(), {});
1008 CachePointerInvariantGroups[std::make_pair((Value *)alloc, i)] =
1011 Type *nextType = types[i + 1];
1013 containedloops.back().first.preheader, sublimits, i, alloc, nextType,
1014 byteSizeOfType, storeInto,
1015 CachePointerInvariantGroups[std::make_pair((Value *)alloc, i)]);
1016 if (freecall && malloccall) {
1017 auto ident = MDNode::getDistinct(malloccall->getContext(), {});
1018 malloccall->setMetadata(
"enzyme_cache_alloc",
1019 MDNode::get(malloccall->getContext(), {ident}));
1020 freecall->setMetadata(
"enzyme_cache_free",
1021 MDNode::get(freecall->getContext(), {ident}));
1028 IRBuilder<> v(&sublimits[i - 1].second.back().first.preheader->back());
1030 Value *idx = computeIndexOfChunk(
1031 true, v, containedloops,
1032 ValueToValueMapTy());
1034 storeInto = v.CreateLoad(types[i + 1], storeInto);
1035 cast<LoadInst>(storeInto)->setAlignment(Align(alignSize));
1036 storeInto = v.CreateGEP(types[i], storeInto, idx);
1037 cast<GetElementPtrInst>(storeInto)->setIsInBounds(
true);
1043Value *CacheUtility::computeIndexOfChunk(
1044 bool inForwardPass, IRBuilder<> &v,
1045 ArrayRef<std::pair<LoopContext, llvm::Value *>> containedloops,
1046 const ValueToValueMapTy &available) {
1048 SmallVector<Value *, 3> indices;
1051 SmallVector<Value *, 3> limits;
1054 for (
size_t i = 0; i < containedloops.size(); ++i) {
1055 const auto &pair = containedloops[i];
1057 const auto &idx = pair.first;
1058 Value *var = idx.var;
1062 if (idx.var ==
nullptr)
1063 var = ConstantInt::get(Type::getInt64Ty(
newFunc->getContext()), 0);
1064 else if (available.count(var)) {
1065 var = available.find(var)->second;
1066 }
else if (!inForwardPass) {
1067 var = v.CreateLoad(idx.var->getType(), idx.antivaralloc);
1072 var = v.CreateAdd(var,
lookupM(idx.offset, v),
"",
true,
1076 indices.push_back(var);
1077 Value *lim = pair.second;
1079 if (limits.size() == 0) {
1080 limits.push_back(lim);
1082 limits.push_back(v.CreateMul(limits.back(), lim,
"",
1087 assert(indices.size() > 0);
1090 Value *idx = indices[0];
1091 for (
unsigned ind = 1; ind < indices.size(); ++ind) {
1092 idx = v.CreateAdd(idx,
1093 v.CreateMul(indices[ind], limits[ind - 1],
"",
1114 SmallVector<LoopContext, 4> contexts;
1123 auto subctx = ctx.
Block;
1124 auto zero = ConstantInt::get(Type::getInt64Ty(
newFunc->getContext()), 0);
1138 contexts.push_back(idx);
1141 for (BasicBlock *blk = ctx.
Block; blk !=
nullptr;) {
1146 contexts.emplace_back(std::move(idx));
1151 SmallVector<BasicBlock *, 4> allocationPreheaders(contexts.size(),
nullptr);
1153 SmallVector<Value *, 4> limits(contexts.size(),
nullptr);
1156 for (
int i = contexts.size() - 1; i >= 0; --i) {
1159 if ((
unsigned)i == contexts.size() - 1) {
1160 allocationPreheaders[i] = contexts[i].preheader;
1161 }
else if (!contexts[i].maxLimit) {
1164 allocationPreheaders[i] = contexts[i].preheader;
1168 allocationPreheaders[i] = allocationPreheaders[i + 1];
1174 if (!contexts[i].maxLimit) {
1176 ConstantInt::get(Type::getInt64Ty(ctx.
Block->getContext()), 1);
1180 ValueToValueMapTy prevMap;
1183 for (
int j = contexts.size() - 1;; --j) {
1189 if (allocationPreheaders[i] != contexts[j].preheader) {
1190 prevMap[contexts[j].var] = contexts[j].var;
1196 IRBuilder<> allocationBuilder(&allocationPreheaders[i]->back());
1197 Value *limitMinus1 =
nullptr;
1199 Value *limit = contexts[i].maxLimit;
1200 if (contexts[i].allocLimit)
1201 limit = contexts[i].allocLimit;
1205 limitMinus1 =
unwrapM(limit, allocationBuilder, prevMap,
1212 if (limitMinus1 ==
nullptr) {
1213 EmitWarning(
"NoOuterLimit", *cast<Instruction>(&*limit),
1214 "Could not compute outermost loop limit by moving value ",
1215 *limit,
" computed at block", contexts[i].header->getName(),
1216 " function ", contexts[i].header->getParent()->getName());
1217 allocationPreheaders[i] = contexts[i].preheader;
1218 allocationBuilder.SetInsertPoint(&allocationPreheaders[i]->back());
1219 limitMinus1 =
unwrapM(limit, allocationBuilder, prevMap,
1221 if (limitMinus1 ==
nullptr) {
1222 llvm::errs() << *contexts[i].preheader->getParent() <<
"\n";
1223 llvm::errs() <<
"block: " << *allocationPreheaders[i] <<
"\n";
1224 llvm::errs() <<
"limit: " << *limit <<
"\n";
1226 assert(limitMinus1 !=
nullptr);
1227 }
else if (i == 0 && extraSize &&
1228 unwrapM(extraSize, allocationBuilder, prevMap,
1231 "NoOuterLimit", *cast<Instruction>(extraSize),
newFunc,
1232 cast<Instruction>(extraSize)->getParent(),
1233 "Could not compute outermost loop limit by moving extraSize value ",
1234 *extraSize,
" computed at block", contexts[i].header->getName(),
1235 " function ", contexts[i].header->getParent()->getName());
1236 allocationPreheaders[i] = contexts[i].preheader;
1237 allocationBuilder.SetInsertPoint(&allocationPreheaders[i]->back());
1239 assert(limitMinus1 !=
nullptr);
1241 ValueToValueMapTy reverseMap;
1243 for (
int j = contexts.size() - 1;; --j) {
1249 if (allocationPreheaders[i] != contexts[j].preheader) {
1250 if (!inForwardPass) {
1251 reverseMap[contexts[j].var] = RB->CreateLoad(
1252 contexts[j].var->getType(), contexts[j].antivaralloc);
1261 if (inForwardPass) {
1265 auto &map = LimitCache[limitMinus1];
1266 auto found = map.find(allocationPreheaders[i]);
1267 if (found != map.end() && found->second !=
nullptr) {
1268 limits[i] = found->second;
1270 limits[i] = map[allocationPreheaders[i]] =
1271 allocationBuilder.CreateNUWAdd(
1272 limitMinus1, ConstantInt::get(limitMinus1->getType(), 1));
1275 Value *lim =
unwrapM(limitMinus1, *RB, reverseMap,
1277 allocationPreheaders[i]);
1279 llvm::errs() << *
newFunc <<
"\n";
1280 llvm::errs() << *limitMinus1 <<
"\n";
1283 limits[i] = RB->CreateNUWAdd(lim, ConstantInt::get(lim->getType(), 1));
1291 Value *size =
nullptr;
1293 SmallVector<std::pair<LoopContext, Value *>, 3> lims;
1296 for (
unsigned i = 0; i < contexts.size(); ++i) {
1297 IRBuilder<> allocationBuilder(&allocationPreheaders[i]->back());
1298 lims.push_back(std::make_pair(contexts[i], limits[i]));
1300 if (size ==
nullptr) {
1303 }
else if (!inForwardPass) {
1304 size = RB->CreateMul(size, limits[i],
"",
1308 auto cidx = std::make_tuple(size, limits[i], allocationPreheaders[i]);
1309 if (SizeCache.find(cidx) == SizeCache.end()) {
1311 allocationBuilder.CreateMul(size, limits[i],
"",
1314 size = SizeCache[cidx];
1319 if ((i + 1 < contexts.size()) &&
1320 (allocationPreheaders[i] != allocationPreheaders[i + 1])) {
1321 sublimits.push_back(std::make_pair(size, lims));
1328 if (size !=
nullptr) {
1329 sublimits.push_back(std::make_pair(size, lims));
1339 IRBuilder<> &BuilderM, Value *val,
1340 AllocaInst *cache, MDNode *TBAA) {
1341 assert(BuilderM.GetInsertBlock()->getParent() ==
newFunc);
1343 if (
auto inst = dyn_cast<Instruction>(val))
1344 assert(inst->getParent()->getParent() ==
newFunc);
1346 IRBuilder<> v(BuilderM.GetInsertBlock());
1347 v.SetInsertPoint(BuilderM.GetInsertBlock(), BuilderM.GetInsertPoint());
1348 v.setFastMathFlags(
getFast());
1359 if (BuilderM.GetInsertPoint() != BuilderM.GetInsertBlock()->end()) {
1360 for (
auto I = BuilderM.GetInsertBlock()->rbegin(),
1361 E = BuilderM.GetInsertBlock()->rend();
1363 if (&*I == &*BuilderM.GetInsertPoint())
1365 if (
auto si = dyn_cast<StoreInst>(&*I)) {
1367 if (ni !=
nullptr) {
1368 v.SetInsertPoint(ni);
1370 v.SetInsertPoint(si->getParent());
1376 bool isi1 = val->getType()->isIntegerTy() &&
1377 cast<IntegerType>(val->getType())->getBitWidth() == 1;
1379 true, v, ctx, cache,
1381 llvm::ValueToValueMapTy(),
1384 Value *tostore = val;
1390 if (
auto gep = dyn_cast<GetElementPtrInst>(loc)) {
1391 auto bo = cast<BinaryOperator>(*gep->idx_begin());
1392 assert(bo->getOpcode() == BinaryOperator::LShr);
1393 auto subidx = v.CreateAnd(
1394 v.CreateTrunc(bo->getOperand(0),
1395 Type::getInt8Ty(cache->getContext())),
1396 ConstantInt::get(Type::getInt8Ty(cache->getContext()), 7));
1397 auto mask = v.CreateNot(v.CreateShl(
1398 ConstantInt::get(Type::getInt8Ty(cache->getContext()), 1), subidx));
1400 Value *loadChunk = v.CreateLoad(mask->getType(), loc);
1401 auto cleared = v.CreateAnd(loadChunk, mask);
1403 auto toset = v.CreateShl(
1404 v.CreateZExt(val, Type::getInt8Ty(cache->getContext())), subidx);
1405 tostore = v.CreateOr(cleared, toset);
1406 assert(tostore->getType() == mask->getType());
1410#if LLVM_VERSION_MAJOR < 17
1411 if (tostore->getContext().supportsTypedPointers()) {
1412 if (tostore->getType() != loc->getType()->getPointerElementType()) {
1413 llvm::errs() <<
"val: " << *val <<
"\n";
1414 llvm::errs() <<
"tostore: " << *tostore <<
"\n";
1415 llvm::errs() <<
"loc: " << *loc <<
"\n";
1417 assert(tostore->getType() == loc->getType()->getPointerElementType());
1421 StoreInst *storeinst = v.CreateStore(tostore, loc);
1425 if (tostore == val) {
1426 if (ValueInvariantGroups.find(cache) == ValueInvariantGroups.end()) {
1427 MDNode *invgroup = MDNode::getDistinct(cache->getContext(), {});
1428 ValueInvariantGroups[cache] = invgroup;
1430 storeinst->setMetadata(LLVMContext::MD_invariant_group,
1431 ValueInvariantGroups[cache]);
1435 ConstantInt *byteSizeOfType =
1436 ConstantInt::get(Type::getInt64Ty(cache->getContext()),
1437 ctx.Block->getParent()
1440 .getTypeAllocSizeInBits(val->getType()) /
1443 storeinst->setMetadata(LLVMContext::MD_tbaa, TBAA);
1444 storeinst->setAlignment(Align(align));
1454 llvm::Instruction *inst,
1455 llvm::AllocaInst *cache,
1456 llvm::MDNode *TBAA) {
1462 IRBuilder<> v(inst->getParent());
1465 if (&*inst->getParent()->rbegin() != inst) {
1466 auto pn = dyn_cast<PHINode>(inst);
1467 Instruction *putafter = (pn && pn->getNumIncomingValues() > 0)
1468 ? (inst->getParent()->getFirstNonPHI())
1471 v.SetInsertPoint(putafter);
1473 v.setFastMathFlags(
getFast());
1483 Value *cache,
bool storeInInstructionsMap,
1484 const ValueToValueMapTy &available,
1488 auto sublimits =
getSubLimits(inForwardPass, &BuilderM, ctx, extraSize);
1490 Value *next = cache;
1491 assert(next->getType()->isPointerTy());
1493 SmallVector<Type *, 4> types = {T};
1494 bool isi1 = T->isIntegerTy() && cast<IntegerType>(T)->getBitWidth() == 1;
1496 types[0] = Type::getInt8Ty(T->getContext());
1497 auto i64 = Type::getInt64Ty(T->getContext());
1498 for (
size_t i = 0; i < sublimits.size(); ++i) {
1504 auto P = B.CreatePHI(i64, 1);
1506 CallInst *malloccall;
1512 for (
auto &I : make_early_inc_range(reverse(*BB)))
1513 I.eraseFromParent();
1515 BB->eraseFromParent();
1517 types.push_back(allocType);
1521 for (
int i = sublimits.size() - 1; i >= 0; i--) {
1523 next = BuilderM.CreateLoad(types[i + 1], next);
1524 if (storeInInstructionsMap && isa<AllocaInst>(cache))
1526 cast<Instruction>(next));
1528 if (!next->getType()->isPointerTy()) {
1529 llvm::errs() << *
newFunc <<
"\n";
1530 llvm::errs() <<
"cache: " << *cache <<
"\n";
1531 llvm::errs() <<
"next: " << *next <<
"\n";
1532 assert(next->getType()->isPointerTy());
1536 if (CachePointerInvariantGroups.find(std::make_pair(cache, i)) ==
1537 CachePointerInvariantGroups.end()) {
1538 MDNode *invgroup = MDNode::getDistinct(cache->getContext(), {});
1539 CachePointerInvariantGroups[std::make_pair(cache, i)] = invgroup;
1541 cast<LoadInst>(next)->setMetadata(
1542 LLVMContext::MD_invariant_group,
1543 CachePointerInvariantGroups[std::make_pair(cache, i)]);
1546 ConstantInt *byteSizeOfType = ConstantInt::get(
1547 Type::getInt64Ty(cache->getContext()),
1548 newFunc->getParent()->getDataLayout().getTypeAllocSizeInBits(
1551 cast<LoadInst>(next)->setMetadata(
1552 LLVMContext::MD_dereferenceable,
1554 cache->getContext(),
1555 ArrayRef<Metadata *>(ConstantAsMetadata::get(byteSizeOfType))));
1558 cast<LoadInst>(next)->setAlignment(Align(align));
1560 const auto &containedloops = sublimits[i].second;
1562 if (containedloops.size() > 0) {
1563 Value *idx = computeIndexOfChunk(inForwardPass, BuilderM, containedloops,
1566 idx = BuilderM.CreateLShr(
1567 idx, ConstantInt::get(Type::getInt64Ty(
newFunc->getContext()), 3));
1568 if (i == 0 && extraSize) {
1569 Value *es =
lookupM(extraSize, BuilderM);
1571 idx = BuilderM.CreateMul(idx, es,
"",
true,
true);
1573 next = BuilderM.CreateGEP(types[i], next, idx);
1574 cast<GetElementPtrInst>(next)->setIsInBounds(
true);
1575 if (storeInInstructionsMap && isa<AllocaInst>(cache))
1577 cast<Instruction>(next));
1579 assert(next->getType()->isPointerTy());
1587 llvm::IRBuilder<> &BuilderM,
1589 llvm::Value *cache) {
1591 auto result = BuilderM.CreateLoad(T, cptr);
1594 if (ValueInvariantGroups.find(cache) == ValueInvariantGroups.end()) {
1595 MDNode *invgroup = MDNode::getDistinct(cache->getContext(), {});
1596 ValueInvariantGroups[cache] = invgroup;
1599 result->setMetadata(LLVMContext::MD_invariant_group,
1600 ValueInvariantGroups[cache]);
1601 ConstantInt *byteSizeOfType = ConstantInt::get(
1602 Type::getInt64Ty(cache->getContext()),
1603 newFunc->getParent()->getDataLayout().getTypeAllocSizeInBits(
1604 result->getType()) /
1607 result->setAlignment(Align(align));
1615 Type *T,
bool inForwardPass, IRBuilder<> &BuilderM,
LimitContext ctx,
1616 Value *cache,
bool isi1,
const ValueToValueMapTy &available,
1617 Value *extraSize, Value *extraOffset) {
1621 false, available, extraSize);
1625 cptr = BuilderM.CreateGEP(T, cptr, extraOffset);
1626 cast<GetElementPtrInst>(cptr)->setIsInBounds(
true);
1634 if (
auto gep = dyn_cast<GetElementPtrInst>(cptr)) {
1635 auto bo = cast<BinaryOperator>(*gep->idx_begin());
1636 assert(bo->getOpcode() == BinaryOperator::LShr);
1637 Value *res = BuilderM.CreateLShr(
1640 BuilderM.CreateTrunc(bo->getOperand(0),
1641 Type::getInt8Ty(cache->getContext())),
1642 ConstantInt::get(Type::getInt8Ty(cache->getContext()), 7)));
1643 return BuilderM.CreateTrunc(res, Type::getInt1Ty(result->getContext()));
llvm::cl::opt< bool > EfficientMaxCache("enzyme-max-cache", cl::init(false), cl::Hidden, cl::desc("Avoid reallocs when possible by potentially overallocating cache"))
std::pair< PHINode *, Instruction * > InsertNewCanonicalIV(Loop *L, Type *Ty, const llvm::Twine &Name)
void CanonicalizeLatches(const Loop *L, BasicBlock *Header, BasicBlock *Preheader, PHINode *CanonicalIV, MustExitScalarEvolution &SE, CacheUtility &gutils, Instruction *Increment, ArrayRef< BasicBlock * > latches)
void RemoveRedundantIVs(BasicBlock *Header, PHINode *CanonicalIV, Instruction *Increment, MustExitScalarEvolution &SE, llvm::function_ref< void(Instruction *, Value *)> replacer, llvm::function_ref< void(Instruction *)> eraser)
std::pair< PHINode *, Instruction * > FindCanonicalIV(Loop *L, Type *Ty)
llvm::cl::opt< bool > EnzymeZeroCache
llvm::cl::opt< bool > EfficientBoolCache
Pack 8 bools together in a single byte.
@ AttemptFullUnwrapWithLookup
static void getExitBlocks(const llvm::Loop *L, llvm::SmallPtrSetImpl< llvm::BasicBlock * > &ExitBlocks)
static llvm::SmallVector< llvm::BasicBlock *, 3 > getLatches(const llvm::Loop *L, const llvm::SmallPtrSetImpl< llvm::BasicBlock * > &ExitBlocks)
static std::string str(AugmentedStruct c)
llvm::Value * CreateReAllocation(llvm::IRBuilder<> &B, llvm::Value *prev, llvm::Type *T, llvm::Value *OuterCount, llvm::Value *InnerCount, const llvm::Twine &Name, llvm::CallInst **caller, bool ZeroMem)
llvm::SmallVector< llvm::Instruction *, 2 > PostCacheStore(llvm::StoreInst *SI, llvm::IRBuilder<> &B)
Value * CreateAllocation(IRBuilder<> &Builder, llvm::Type *T, Value *Count, const Twine &Name, CallInst **caller, Instruction **ZeroMem, bool isDefault)
LLVMValueRef(* CustomErrorHandler)(const char *, LLVMValueRef, ErrorType, const void *, LLVMValueRef, LLVMBuilderRef)
llvm::FastMathFlags getFast()
Get LLVM fast math flags.
llvm::Constant * getUndefinedValueForType(llvm::Module &M, llvm::Type *T, bool forceZero)
static llvm::Instruction * getNextNonDebugInstructionOrNull(llvm::Instruction *Z)
Get the next non-debug instruction, if one exists.
static V * findInMap(std::map< K, V > &map, K key)
void EmitFailure(llvm::StringRef RemarkName, const llvm::DiagnosticLocation &Loc, const llvm::Instruction *CodeRegion, Args &...args)
static std::map< K, V >::iterator insert_or_assign2(std::map< K, V > &map, K key, V val)
Insert into a map.
void EmitWarning(llvm::StringRef RemarkName, const llvm::DiagnosticLocation &Loc, const llvm::BasicBlock *BB, const Args &...args)
static llvm::Instruction * getNextNonDebugInstruction(llvm::Instruction *Z)
Get the next non-debug instruction, erring if none exists.
llvm::cl::opt< bool > EnzymePrintPerf
Print additional debug info relevant to performance.
static llvm::Instruction * getFirstNonPHIOrDbg(llvm::BasicBlock *B)
llvm::Function *const newFunc
The function whose instructions we are caching.
MustExitScalarEvolution SE
virtual llvm::Value * unwrapM(llvm::Value *const val, llvm::IRBuilder<> &BuilderM, const llvm::ValueToValueMapTy &available, UnwrapMode mode, llvm::BasicBlock *scope=nullptr, bool permitCache=true)=0
High-level utility to "unwrap" an instruction at a new location specified by BuilderM.
virtual llvm::CallInst * freeCache(llvm::BasicBlock *forwardPreheader, const SubLimitType &antimap, int i, llvm::AllocaInst *alloc, llvm::Type *myType, llvm::ConstantInt *byteSizeOfType, llvm::Value *storeInto, llvm::MDNode *InvariantMD)
If an allocation is requested to be freed, this subclass will be called to chose how and where to fre...
std::map< llvm::AllocaInst *, llvm::SmallVector< llvm::AssertingVH< llvm::CallInst >, 4 > > scopeAllocs
A map of allocations to a set of instructions which allocate memory as part of the cache.
llvm::AllocaInst * createCacheForScope(LimitContext ctx, llvm::Type *T, llvm::StringRef name, bool shouldFree, bool allocateInternal=true, llvm::Value *extraSize=nullptr)
Create a cache of Type T at the given LimitContext.
virtual void erase(llvm::Instruction *I)
Erase this instruction both from LLVM modules and any local data-structures.
llvm::Value * getCachePointer(llvm::Type *T, bool inForwardPass, llvm::IRBuilder<> &BuilderM, LimitContext ctx, llvm::Value *cache, bool storeInInstructionsMap, const llvm::ValueToValueMapTy &available, llvm::Value *extraSize)
Given an allocation specified by the LimitContext ctx and cache, compute a pointer that can hold the ...
llvm::Value * lookupValueFromCache(llvm::Type *T, bool inForwardPass, llvm::IRBuilder<> &BuilderM, LimitContext ctx, llvm::Value *cache, bool isi1, const llvm::ValueToValueMapTy &available, llvm::Value *extraSize=nullptr, llvm::Value *extraOffset=nullptr)
Given an allocation specified by the LimitContext ctx and cache, lookup the underlying cached value.
bool getContext(llvm::BasicBlock *BB, LoopContext &loopContext, bool ReverseLimit)
Given a BasicBlock BB in newFunc, set loopContext to the relevant contained loop and return true.
llvm::Value * loadFromCachePointer(llvm::Type *T, llvm::IRBuilder<> &BuilderM, llvm::Value *cptr, llvm::Value *cache)
Perform the final load from the cache, applying requisite invariant group and alignment.
std::map< llvm::AllocaInst *, std::set< llvm::AssertingVH< llvm::CallInst > > > scopeFrees
A map of allocations to a set of instructions which free memory as part of the cache.
std::map< llvm::Loop *, LoopContext > loopContexts
Map of Loop to requisite loop information needed for AD (forward/reverse induction/etc)
std::map< llvm::AllocaInst *, llvm::SmallVector< llvm::AssertingVH< llvm::Instruction >, 4 > > scopeInstructions
A map of allocations to a vector of instruction used to create by the allocation Keeping track of the...
llvm::BasicBlock * inversionAllocs
std::map< llvm::Value *, std::pair< llvm::AssertingVH< llvm::AllocaInst >, LimitContext > > scopeMap
A map of values being cached to their underlying allocation/limit context.
llvm::SmallVector< std::pair< llvm::Value *, llvm::SmallVector< std::pair< LoopContext, llvm::Value * >, 4 > >, 0 > SubLimitType
Given a LimitContext ctx, representing a location inside a loop nest, break each of the loops up into...
virtual void replaceAWithB(llvm::Value *A, llvm::Value *B, bool storeInCache=false)
Replace this instruction both in LLVM modules and any local data-structures.
unsigned getCacheAlignment(unsigned bsize) const
llvm::AllocaInst * getDynamicLoopLimit(llvm::Loop *L, bool ReverseLimit=true)
llvm::SmallPtrSet< llvm::LoadInst *, 10 > CacheLookups
SubLimitType getSubLimits(bool inForwardPass, llvm::IRBuilder<> *RB, LimitContext ctx, llvm::Value *extraSize=nullptr)
Given a LimitContext ctx, representing a location inside a loop nest, break each of the loops up into...
void storeInstructionInCache(LimitContext ctx, llvm::IRBuilder<> &BuilderM, llvm::Value *val, llvm::AllocaInst *cache, llvm::MDNode *TBAA=nullptr)
Given an allocation defined at a particular ctx, store the value val in the cache at the location def...
virtual llvm::Value * lookupM(llvm::Value *val, llvm::IRBuilder<> &BuilderM, const llvm::ValueToValueMapTy &incoming_availalble=llvm::ValueToValueMapTy(), bool tryLegalityCheck=true, llvm::BasicBlock *scope=nullptr)=0
High-level utility to get the value an instruction at a new location specified by BuilderM.
virtual bool assumeDynamicLoopOfSizeOne(llvm::Loop *L) const =0
llvm::SmallPtrSet< llvm::BasicBlock *, 4 > GuaranteedUnreachable
ScalarEvolution::ExitLimit computeExitLimit(const llvm::Loop *L, llvm::BasicBlock *ExitingBlock, bool AllowPredicates)
bool ForceSingleIteration
Container for all loop information to synthesize gradients.
llvm::Loop * parent
Parent loop of this loop.
llvm::BasicBlock * header
Header of this loop.
llvm::AssertingVH< llvm::Instruction > incvar
Increment of the induction.
bool dynamic
Whether this loop has a statically analyzable number of iterations.
llvm::SmallPtrSet< llvm::BasicBlock *, 8 > exitBlocks
All blocks this loop exits too.
llvm::AssertingVH< llvm::AllocaInst > antivaralloc
Allocation of induction variable of reverse pass.
llvm::AssertingVH< llvm::PHINode > var
Canonical induction variable of the loop.
AssertingReplacingVH offset
An offset to add to the index when getting the cache pointer.
llvm::BasicBlock * preheader
Preheader of this loop.
AssertingReplacingVH maxLimit
limit is last value of a canonical induction variable iters is number of times loop is run (thus iter...
AssertingReplacingVH allocLimit
An overriding allocation limit size.
AssertingReplacingVH trueLimit