Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
ad86aa8
[MLIR][OpenMP] Remove Generic-SPMD early detection
skatrak May 21, 2025
d3c02e9
[OpenMP][OMPIRBuilder] Add device shared memory allocation support
skatrak Jul 8, 2025
8d3d190
[MLIR][OpenMP] Support allocations of device shared memory
skatrak Jun 27, 2025
c340791
[OpenMP][OMPIRBuilder] Use device shared memory for arg structures
skatrak Jul 3, 2025
8c1f771
[OpenMP][OMPIRBuilder] Support parallel in Generic kernels
skatrak Jul 4, 2025
d1929e3
[OpenMPOpt] Make parallel regions reachable from new DeviceRTL loop f…
skatrak Jul 25, 2025
1135d95
[OMPIRBuilder] Add support for explicit deallocation points
skatrak Aug 21, 2025
2c3db47
[MLIR][OpenMP] Refactor omp.target_allocmem to allow reuse, NFC
skatrak Sep 12, 2025
bda01ae
[Flang][MLIR][OpenMP] Add explicit shared memory (de-)allocation ops
skatrak Sep 12, 2025
4ebdc72
[Flang][OpenMP] Add pass to replace allocas with device shared memory
skatrak Sep 16, 2025
58abac4
[MLIR][OpenMP][OMPIRBuilder] Improve shared memory checks
skatrak Sep 16, 2025
4af004b
Address test failures: enable passing test and fix omp.target private…
skatrak Oct 14, 2025
424886a
[Flang][MLIR][OpenMP] Support passing local values to device functions
skatrak Oct 20, 2025
64b584b
delay stack to shared pass to process all llvm.mlir.allocas
skatrak Oct 22, 2025
09520da
move stack-to-shared pass from Flang to the OpenMP dialect
skatrak Oct 22, 2025
9bbee8f
Unify device shared memory logic for fix-up pass and MLIR to LLVMRI t…
skatrak Oct 23, 2025
4c1c46a
Simplify omp.alloc_shared_mem
skatrak Oct 24, 2025
afd271b
Make omp-stack-to-shared pass available to the compiler and move test…
skatrak Oct 24, 2025
9f113a4
remove spurious diffs
skatrak Oct 27, 2025
270a889
Remove overly restrictive verifier check for omp.target_freemem
skatrak Oct 28, 2025
609eda1
Fix issue with non-pointer parallel region inputs
skatrak Oct 31, 2025
f264642
Support other map-like clauses
skatrak Nov 5, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions clang/lib/CodeGen/CGOpenMPRuntime.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11354,8 +11354,8 @@ void CGOpenMPRuntime::emitTargetDataCalls(
llvm::OpenMPIRBuilder::LocationDescription OmpLoc(CodeGenIP);
llvm::OpenMPIRBuilder::InsertPointTy AfterIP =
cantFail(OMPBuilder.createTargetData(
OmpLoc, AllocaIP, CodeGenIP, DeviceID, IfCondVal, Info, GenMapInfoCB,
CustomMapperCB,
OmpLoc, AllocaIP, CodeGenIP, /*DeallocIPs=*/{}, DeviceID, IfCondVal,
Info, GenMapInfoCB, CustomMapperCB,
/*MapperFunc=*/nullptr, BodyCB, DeviceAddrCB, RTLoc));
CGF.Builder.restoreIP(AfterIP);
}
Expand Down
77 changes: 43 additions & 34 deletions clang/lib/CodeGen/CGStmtOpenMP.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2238,20 +2238,21 @@ void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) {
const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel);
const Stmt *ParallelRegionBodyStmt = CS->getCapturedStmt();

auto BodyGenCB = [&, this](InsertPointTy AllocaIP,
InsertPointTy CodeGenIP) {
auto BodyGenCB = [&, this](InsertPointTy AllocIP, InsertPointTy CodeGenIP,
ArrayRef<InsertPointTy> DeallocIPs) {
OMPBuilderCBHelpers::EmitOMPOutlinedRegionBody(
*this, ParallelRegionBodyStmt, AllocaIP, CodeGenIP, "parallel");
*this, ParallelRegionBodyStmt, AllocIP, CodeGenIP, "parallel");
return llvm::Error::success();
};

CGCapturedStmtInfo CGSI(*CS, CR_OpenMP);
CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(*this, &CGSI);
llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(
AllocaInsertPt->getParent(), AllocaInsertPt->getIterator());
llvm::OpenMPIRBuilder::InsertPointTy AfterIP = cantFail(
OMPBuilder.createParallel(Builder, AllocaIP, BodyGenCB, PrivCB, FiniCB,
IfCond, NumThreads, ProcBind, S.hasCancel()));
llvm::OpenMPIRBuilder::InsertPointTy AfterIP =
cantFail(OMPBuilder.createParallel(
Builder, AllocaIP, /*DeallocIPs=*/{}, BodyGenCB, PrivCB, FiniCB,
IfCond, NumThreads, ProcBind, S.hasCancel()));
Builder.restoreIP(AfterIP);
return;
}
Expand Down Expand Up @@ -4936,21 +4937,23 @@ void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) {
llvm::SmallVector<BodyGenCallbackTy, 4> SectionCBVector;
if (CS) {
for (const Stmt *SubStmt : CS->children()) {
auto SectionCB = [this, SubStmt](InsertPointTy AllocaIP,
InsertPointTy CodeGenIP) {
OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
*this, SubStmt, AllocaIP, CodeGenIP, "section");
auto SectionCB = [this, SubStmt](InsertPointTy AllocIP,
InsertPointTy CodeGenIP,
ArrayRef<InsertPointTy> DeallocIPs) {
OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(*this, SubStmt, AllocIP,
CodeGenIP, "section");
return llvm::Error::success();
};
SectionCBVector.push_back(SectionCB);
}
} else {
auto SectionCB = [this, CapturedStmt](InsertPointTy AllocaIP,
InsertPointTy CodeGenIP) {
OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
*this, CapturedStmt, AllocaIP, CodeGenIP, "section");
return llvm::Error::success();
};
auto SectionCB =
[this, CapturedStmt](InsertPointTy AllocIP, InsertPointTy CodeGenIP,
ArrayRef<InsertPointTy> DeallocIPs) {
OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
*this, CapturedStmt, AllocIP, CodeGenIP, "section");
return llvm::Error::success();
};
SectionCBVector.push_back(SectionCB);
}

Expand Down Expand Up @@ -5004,10 +5007,11 @@ void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) {
return llvm::Error::success();
};

auto BodyGenCB = [SectionRegionBodyStmt, this](InsertPointTy AllocaIP,
InsertPointTy CodeGenIP) {
auto BodyGenCB = [SectionRegionBodyStmt,
this](InsertPointTy AllocIP, InsertPointTy CodeGenIP,
ArrayRef<InsertPointTy> DeallocIPs) {
OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
*this, SectionRegionBodyStmt, AllocaIP, CodeGenIP, "section");
*this, SectionRegionBodyStmt, AllocIP, CodeGenIP, "section");
return llvm::Error::success();
};

Expand Down Expand Up @@ -5089,10 +5093,11 @@ void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) {
return llvm::Error::success();
};

auto BodyGenCB = [MasterRegionBodyStmt, this](InsertPointTy AllocaIP,
InsertPointTy CodeGenIP) {
auto BodyGenCB = [MasterRegionBodyStmt,
this](InsertPointTy AllocIP, InsertPointTy CodeGenIP,
ArrayRef<InsertPointTy> DeallocIPs) {
OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
*this, MasterRegionBodyStmt, AllocaIP, CodeGenIP, "master");
*this, MasterRegionBodyStmt, AllocIP, CodeGenIP, "master");
return llvm::Error::success();
};

Expand Down Expand Up @@ -5139,10 +5144,11 @@ void CodeGenFunction::EmitOMPMaskedDirective(const OMPMaskedDirective &S) {
return llvm::Error::success();
};

auto BodyGenCB = [MaskedRegionBodyStmt, this](InsertPointTy AllocaIP,
InsertPointTy CodeGenIP) {
auto BodyGenCB = [MaskedRegionBodyStmt,
this](InsertPointTy AllocIP, InsertPointTy CodeGenIP,
ArrayRef<InsertPointTy> DeallocIPs) {
OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
*this, MaskedRegionBodyStmt, AllocaIP, CodeGenIP, "masked");
*this, MaskedRegionBodyStmt, AllocIP, CodeGenIP, "masked");
return llvm::Error::success();
};

Expand Down Expand Up @@ -5182,10 +5188,11 @@ void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) {
return llvm::Error::success();
};

auto BodyGenCB = [CriticalRegionBodyStmt, this](InsertPointTy AllocaIP,
InsertPointTy CodeGenIP) {
auto BodyGenCB = [CriticalRegionBodyStmt,
this](InsertPointTy AllocIP, InsertPointTy CodeGenIP,
ArrayRef<InsertPointTy> DeallocIPs) {
OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
*this, CriticalRegionBodyStmt, AllocaIP, CodeGenIP, "critical");
*this, CriticalRegionBodyStmt, AllocIP, CodeGenIP, "critical");
return llvm::Error::success();
};

Expand Down Expand Up @@ -6152,8 +6159,8 @@ void CodeGenFunction::EmitOMPTaskgroupDirective(
InsertPointTy AllocaIP(AllocaInsertPt->getParent(),
AllocaInsertPt->getIterator());

auto BodyGenCB = [&, this](InsertPointTy AllocaIP,
InsertPointTy CodeGenIP) {
auto BodyGenCB = [&, this](InsertPointTy AllocIP, InsertPointTy CodeGenIP,
ArrayRef<InsertPointTy> DeallocIPs) {
Builder.restoreIP(CodeGenIP);
EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
return llvm::Error::success();
Expand All @@ -6162,7 +6169,8 @@ void CodeGenFunction::EmitOMPTaskgroupDirective(
if (!CapturedStmtInfo)
CapturedStmtInfo = &CapStmtInfo;
llvm::OpenMPIRBuilder::InsertPointTy AfterIP =
cantFail(OMPBuilder.createTaskgroup(Builder, AllocaIP, BodyGenCB));
cantFail(OMPBuilder.createTaskgroup(Builder, AllocaIP,
/*DeallocIPs=*/{}, BodyGenCB));
Builder.restoreIP(AfterIP);
return;
}
Expand Down Expand Up @@ -6879,8 +6887,9 @@ void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) {
return llvm::Error::success();
};

auto BodyGenCB = [&S, C, this](InsertPointTy AllocaIP,
InsertPointTy CodeGenIP) {
auto BodyGenCB = [&S, C, this](InsertPointTy AllocIP,
InsertPointTy CodeGenIP,
ArrayRef<InsertPointTy> DeallocIPs) {
Builder.restoreIP(CodeGenIP);

const CapturedStmt *CS = S.getInnermostCapturedStmt();
Expand All @@ -6898,7 +6907,7 @@ void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) {
OutlinedFn, CapturedVars);
} else {
OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
*this, CS->getCapturedStmt(), AllocaIP, CodeGenIP, "ordered");
*this, CS->getCapturedStmt(), AllocIP, CodeGenIP, "ordered");
}
return llvm::Error::success();
};
Expand Down
2 changes: 2 additions & 0 deletions flang/include/flang/Optimizer/Support/InitFIR.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
#include "mlir/Dialect/Math/IR/Math.h"
#include "mlir/Dialect/OpenACC/OpenACC.h"
#include "mlir/Dialect/OpenACC/Transforms/Passes.h"
#include "mlir/Dialect/OpenMP/Transforms/Passes.h"
#include "mlir/Dialect/SCF/IR/SCF.h"
#include "mlir/Dialect/SCF/Transforms/Passes.h"
#include "mlir/InitAllDialects.h"
Expand Down Expand Up @@ -106,6 +107,7 @@ inline void loadDialects(mlir::MLIRContext &context) {
/// but is a smaller set since we aren't using many of the passes found there.
inline void registerMLIRPassesForFortranTools() {
mlir::acc::registerOpenACCPasses();
mlir::omp::registerOpenMPPasses();
mlir::registerCanonicalizerPass();
mlir::registerCSEPass();
mlir::affine::registerAffineLoopFusionPass();
Expand Down
3 changes: 1 addition & 2 deletions flang/lib/Optimizer/CodeGen/CodeGenOpenMP.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -245,8 +245,7 @@ struct TargetAllocMemOpConversion
size = mlir::LLVM::MulOp::create(rewriter, loc, ity, size, scaleSize);
for (mlir::Value opnd : adaptor.getOperands().drop_front())
size = mlir::LLVM::MulOp::create(
rewriter, loc, ity, size,
integerCast(lowerTy(), loc, rewriter, ity, opnd));
rewriter, loc, ity, size, integerCast(lowerTy(), loc, rewriter, ity, opnd));
auto mallocTyWidth = lowerTy().getIndexTypeBitwidth();
auto mallocTy =
mlir::IntegerType::get(rewriter.getContext(), mallocTyWidth);
Expand Down
4 changes: 4 additions & 0 deletions flang/lib/Optimizer/Passes/Pipelines.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
/// common to flang and the test tools.

#include "flang/Optimizer/Passes/Pipelines.h"
#include "mlir/Dialect/OpenMP/Transforms/Passes.h"
#include "llvm/Support/CommandLine.h"

/// Force setting the no-alias attribute on fuction arguments when possible.
Expand Down Expand Up @@ -408,6 +409,9 @@ void createDefaultFIRCodeGenPassPipeline(mlir::PassManager &pm,
}

fir::addFIRToLLVMPass(pm, config);

if (config.EnableOpenMP && !config.EnableOpenMPSimd)
pm.addPass(mlir::omp::createStackToSharedPass());
}

/// Create a pass pipeline for lowering from MLIR to LLVM IR
Expand Down
2 changes: 2 additions & 0 deletions flang/test/Fir/basic-program.fir
Original file line number Diff line number Diff line change
Expand Up @@ -161,5 +161,7 @@ func.func @_QQmain() {
// PASSES-NEXT: LowerNontemporalPass
// PASSES-NEXT: FIRToLLVMLowering
// PASSES-NEXT: ReconcileUnrealizedCasts
// PASSES-NEXT: 'llvm.func' Pipeline
// PASSES-NEXT: StackToSharedPass
// PASSES-NEXT: PrepareForOMPOffloadPrivatizationPass
// PASSES-NEXT: LLVMIRLoweringPass
14 changes: 6 additions & 8 deletions flang/test/Integration/OpenMP/threadprivate-target-device.f90
Original file line number Diff line number Diff line change
Expand Up @@ -14,16 +14,14 @@
! target code in the same function.

! CHECK: define weak_odr protected amdgpu_kernel void @{{.*}}(ptr %{{.*}}, ptr %[[ARG1:.*]], ptr %[[ARG2:.*]]) #{{[0-9]+}} {
! CHECK: %[[ALLOCA_X:.*]] = alloca ptr, align 8, addrspace(5)
! CHECK: %[[ASCAST_X:.*]] = addrspacecast ptr addrspace(5) %[[ALLOCA_X]] to ptr
! CHECK: store ptr %[[ARG1]], ptr %[[ASCAST_X]], align 8
! CHECK: %[[ALLOC_N:.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 8)
! CHECK: store ptr %[[ARG2]], ptr %[[ALLOC_N]], align 8

! CHECK: %[[ALLOCA_N:.*]] = alloca ptr, align 8, addrspace(5)
! CHECK: %[[ASCAST_N:.*]] = addrspacecast ptr addrspace(5) %[[ALLOCA_N]] to ptr
! CHECK: store ptr %[[ARG2]], ptr %[[ASCAST_N]], align 8
! CHECK: %[[ALLOC_X:.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 8)
! CHECK: store ptr %[[ARG1]], ptr %[[ALLOC_X]], align 8

! CHECK: %[[LOAD_X:.*]] = load ptr, ptr %[[ASCAST_X]], align 8
! CHECK: call void @bar_(ptr %[[LOAD_X]], ptr %[[ASCAST_N]])
! CHECK: %[[LOAD_X:.*]] = load ptr, ptr %[[ALLOC_X]], align 8
! CHECK: call void @bar_(ptr %[[LOAD_X]], ptr %[[ALLOC_N]])

module test
implicit none
Expand Down
Loading