Skip to content

Commit

Permalink
Add support for KA for GPUs (#34)
Browse files Browse the repository at this point in the history
  • Loading branch information
michel2323 committed Jun 27, 2022
1 parent cec89e6 commit 18dcaee
Show file tree
Hide file tree
Showing 69 changed files with 2,246 additions and 126 deletions.
10 changes: 7 additions & 3 deletions Project.toml
Original file line number Diff line number Diff line change
@@ -1,13 +1,15 @@
name = "ExaAdmm"
uuid = "4d6a948c-1075-4240-a564-361a5d4e22a2"
authors = ["Youngdae Kim <[email protected]>", "Kibaek Kim <[email protected]>", "Weiqi Zhang <[email protected]>", "François Pacaud <[email protected]>", "Michel Schanen <[email protected]>"]
version = "0.1.3"
version = "0.2.0"

[deps]
AMDGPU = "21141c5a-9bdb-4563-92ae-f87d6854732e"
CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
DelimitedFiles = "8bb1440f-4735-579b-a4ab-409b98df4dab"
ExaTron = "28b18bf8-76f9-41ea-81fa-0f922810b349"
FileIO = "5789e2e9-d7fb-5bc7-8068-2c6fae9b9549"
KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c"
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
MPI = "da04e1cc-30fd-572f-bb4f-1f8673147195"
Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
Expand All @@ -16,8 +18,10 @@ SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"

[compat]
AMDGPU = "0.3"
CUDA = "3.4"
ExaTron = "1"
ExaTron = "2"
FileIO = "1.14"
julia = "1.7"
KernelAbstractions = "0.8"
MPI = "0.19"
julia = "1.7"
51 changes: 37 additions & 14 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,29 +8,52 @@ ExaAdmm.jl implements the two-level alternating direction method of multipliers
The package can be installed in the Julia REPL with the command below:

```julia
] ExaAdmm
] add ExaAdmm
```

Running the algorithms on GPU requires Nvidia GPUs with `CUDA.jl`.
Running the algorithms on the GPU requires either NVIDIA GPUs with [`CUDA.jl`](https://github.com/JuliaGPU/CUDA.jl) or [`KernelAbstractions.jl`](https://github.com/JuliaGPU/KernelAbstractions.jl) (KA) with the respective device support (e.g., [`AMDGPU.jl`](https://github.com/JuliaGPU/AMDGPU.jl) and `ROCKernels.jl`). Currently, only the ACOPF problem is supported using KA.

## How to run

Currently, `ExaAdmm.jl` supports electrical grid files in the MATLAB format. You can download them from [here](https://github.com/MATPOWER/matpower).
Below shows an example of solving `case1354pegase.m` using `ExaAdmm.jl` on GPUs.
Below shows an example of solving `case1354pegase.m` using `ExaAdmm.jl` on an NVIDIA GPU

```julia
env, mod = ExaAdmm.solve_acopf(
"case1354pegase.m";
rho_pq=1e1,
rho_va=1e3,
outer_iterlim=20,
inner_iterlim=20,
scale=1e-4,
tight_factor=0.99,
use_gpu=true
using ExaAdmm

env, mod = solve_acopf(
"case1354pegase.m";
rho_pq=1e1,
rho_va=1e3,
outer_iterlim=20,
inner_iterlim=20,
scale=1e-4,
tight_factor=0.99,
use_gpu=true,
verbose=1
);
```

and the same example on an AMD GPU:
```julia
using ExaAdmm
using AMDGPU
using ROCKernels

ExaAdmm.KAArray{T}(n::Int, ::ROCDevice) where {T} = ROCArray{T}(undef, n)

env, mod = solve_acopf(
"case1354pegase.m";
rho_pq=1e1,
rho_va=1e3,
outer_iterlim=20,
inner_iterlim=20,
scale=1e-4,
tight_factor=0.99,
use_gpu=true,
ka_device = ROCDevice(),
verbose=1
)
```
The following table shows parameter values we used for solving pegase and ACTIVSg data.

Data | rho_pq | rho_va | scale | obj_scale
Expand All @@ -49,7 +72,7 @@ We have used the same `tight_factor=0.99`, `outer_iterlim=20`, and `inner_iterli
- Youngdae Kim and Kibaek Kim. "Accelerated Computation and Tracking of AC Optimal Power Flow Solutions using GPUs" arXiv preprint arXiv:2110.06879, 2021
- Youngdae Kim, François Pacaud, Kibaek Kim, and Mihai Anitescu. "Leveraging GPU batching for scalable nonlinear programming through massive lagrangian decomposition" arXiv preprint arXiv:2106.14995, 2021

## Acknowledgements
## Acknowledgments

This research was supported by the Exascale ComputingProject (17-SC-20-SC), a collaborative effort of the U.S. Department of Energy Office of Science and the National Nuclear Security Administration.
This material is based upon work supported by the U.S. Department of Energy, Office of Science, under contract number DE-AC02-06CH11357.
Expand Down
28 changes: 26 additions & 2 deletions src/ExaAdmm.jl
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,24 @@ using LinearAlgebra
using SparseArrays
using MPI
using CUDA
import AMDGPU: ROCArray, has_rocm_gpu
using KernelAbstractions
using ExaTron
using Random

const KA = KernelAbstractions

export solve_acopf

struct KAArray{T} end

include("utils/parse_matpower.jl")
include("utils/opfdata.jl")
include("utils/environment.jl")
include("utils/grid_data.jl")
include("utils/print_statistics.jl")
include("utils/utilities_gpu.jl")
include("utils/utilities_ka.jl")

include("algorithms/admm_two_level.jl")

Expand Down Expand Up @@ -44,7 +53,7 @@ include("models/acopf/acopf_admm_update_residual_cpu.jl")
include("models/acopf/acopf_admm_update_lz_cpu.jl")
include("models/acopf/acopf_admm_prepoststep_cpu.jl")

# GPU specific implementation
# CUDA specific implementation
include("models/acopf/acopf_init_solution_gpu.jl")
include("models/acopf/acopf_generator_kernel_gpu.jl")
include("models/acopf/acopf_eval_linelimit_kernel_gpu.jl")
Expand All @@ -59,9 +68,25 @@ include("models/acopf/acopf_admm_update_residual_gpu.jl")
include("models/acopf/acopf_admm_update_lz_gpu.jl")
include("models/acopf/acopf_admm_prepoststep_gpu.jl")

# KA specific implementation
include("models/acopf/acopf_init_solution_ka.jl")
include("models/acopf/acopf_generator_kernel_ka.jl")
include("models/acopf/acopf_eval_linelimit_kernel_ka.jl")
include("models/acopf/acopf_tron_linelimit_kernel_ka.jl")
include("models/acopf/acopf_auglag_linelimit_kernel_ka.jl")
include("models/acopf/acopf_bus_kernel_ka.jl")
include("models/acopf/acopf_admm_update_x_ka.jl")
include("models/acopf/acopf_admm_update_xbar_ka.jl")
include("models/acopf/acopf_admm_update_z_ka.jl")
include("models/acopf/acopf_admm_update_l_ka.jl")
include("models/acopf/acopf_admm_update_residual_ka.jl")
include("models/acopf/acopf_admm_update_lz_ka.jl")
include("models/acopf/acopf_admm_prepoststep_ka.jl")

# Rolling horizon
include("models/acopf/acopf_admm_rolling_cpu.jl")
include("models/acopf/acopf_admm_rolling_gpu.jl")
include("models/acopf/acopf_admm_rolling_ka.jl")

# ----------------------------------------
# Multi-period ACOPF implementation
Expand Down Expand Up @@ -133,5 +158,4 @@ include("models/mpec/mpec_admm_update_residual_gpu.jl")
include("models/mpec/mpec_admm_update_lz_gpu.jl")
include("models/mpec/mpec_admm_prepoststep_gpu.jl")
=#

end # module
26 changes: 13 additions & 13 deletions src/algorithms/admm_two_level.jl
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
function admm_two_level(
env::AdmmEnv, mod::AbstractOPFModel
env::AdmmEnv, mod::AbstractOPFModel, device::Union{Nothing,KA.GPU}=nothing
)
par = env.params
info = mod.info
Expand All @@ -13,7 +13,7 @@ function admm_two_level(
par.beta = par.initial_beta

if par.verbose > 0
admm_update_residual(env, mod)
admm_update_residual(env, mod, device)
@printf("%8s %8s %10s %10s %10s %10s %10s %10s %10s %10s %10s\n",
"Outer", "Inner", "Objval", "AugLag", "PrimRes", "EpsPrimRes",
"DualRes", "||z||", "||Ax+By||", "OuterTol", "Beta")
Expand All @@ -27,19 +27,19 @@ function admm_two_level(

overall_time = @timed begin
while info.outer < par.outer_iterlim
admm_increment_outer(env, mod)
admm_outer_prestep(env, mod)
admm_increment_outer(env, mod, device)
admm_outer_prestep(env, mod, device)

admm_increment_reset_inner(env, mod)
admm_increment_reset_inner(env, mod, device)
while info.inner < par.inner_iterlim
admm_increment_inner(env, mod)
admm_inner_prestep(env, mod)
admm_inner_prestep(env, mod, device)

admm_update_x(env, mod)
admm_update_xbar(env, mod)
admm_update_z(env, mod)
admm_update_l(env, mod)
admm_update_residual(env, mod)
admm_update_x(env, mod, device)
admm_update_xbar(env, mod, device)
admm_update_z(env, mod, device)
admm_update_l(env, mod, device)
admm_update_residual(env, mod, device)

info.eps_pri = sqrt_d / (2500*info.outer)

Expand All @@ -65,7 +65,7 @@ function admm_two_level(
break
end

admm_update_lz(env, mod)
admm_update_lz(env, mod, device)

if info.norm_z_curr > par.theta*info.norm_z_prev
par.beta = min(par.inc_c*par.beta, 1e24)
Expand All @@ -74,7 +74,7 @@ function admm_two_level(
end # @timed

info.time_overall = overall_time.time
admm_poststep(env, mod)
admm_poststep(env, mod, device)

if par.verbose > 0
print_statistics(env, mod)
Expand Down
26 changes: 21 additions & 5 deletions src/interface/solve_acopf.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,33 @@ function solve_acopf(case::String;
case_format="matpower",
outer_iterlim=20, inner_iterlim=1000, rho_pq=400.0, rho_va=40000.0,
obj_scale=1.0, scale=1e-4, storage_ratio=0.0, storage_charge_max=1.0,
use_gpu=false, use_linelimit=true, use_projection=false, tight_factor=1.0,
use_gpu=false, ka_device=nothing, use_linelimit=true, use_projection=false, tight_factor=1.0,
outer_eps=2*1e-4, gpu_no=0, verbose=1
)
T = Float64; TD = Array{Float64,1}; TI = Array{Int,1}; TM = Array{Float64,2}
if use_gpu
T = Float64
# 1. ka_device = nothing and use_gpu = false, CPU version of the code is used
# 2. ka_device = KA.CPU() and use_gpu = false, CPU version of the code is used, NOT the KA.CPU kernels
# due to nested kernels limitations and no added benefit
# 3. ka_device = nothing and use_gpu = true, use original CUDA.jl kernels
# 4. ka_device is a KA.GPU and use_gpu = true, use KA kernels
if !use_gpu && (isa(ka_device, Nothing) || isa(ka_device, KA.CPU))
TD = Array{Float64,1}; TI = Array{Int,1}; TM = Array{Float64,2}
ka_device = nothing
elseif use_gpu && isa(ka_device, Nothing)
CUDA.device!(gpu_no)
TD = CuArray{Float64,1}; TI = CuArray{Int,1}; TM = CuArray{Float64,2}
elseif use_gpu && isa(ka_device, KA.Device)
if has_cuda_gpu()
TD = CuArray{Float64,1}; TI = CuArray{Int,1}; TM = CuArray{Float64,2}
elseif has_rocm_gpu()
TD = ROCArray{Float64,1}; TI = ROCArray{Int,1}; TM = ROCArray{Float64,2}
end
else
error("Inconsistent device selection use_gpu=$use_gpu and ka_device=$(typepof(ka_device))")
end

env = AdmmEnv{T,TD,TI,TM}(case, rho_pq, rho_va; case_format=case_format,
use_gpu=use_gpu, use_linelimit=use_linelimit,
use_gpu=use_gpu, ka_device=ka_device, use_linelimit=use_linelimit,
use_projection=use_projection, tight_factor=tight_factor, gpu_no=gpu_no,
storage_ratio=storage_ratio, storage_charge_max=storage_charge_max,
verbose=verbose)
Expand All @@ -24,7 +40,7 @@ function solve_acopf(case::String;
env.params.outer_iterlim = outer_iterlim
env.params.inner_iterlim = inner_iterlim

admm_two_level(env, mod)
admm_two_level(env, mod, isa(ka_device, KA.CPU) ? nothing : ka_device)

return env, mod
end
9 changes: 6 additions & 3 deletions src/models/acopf/acopf_admm_increment.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@ Increment outer iteration counter by one.
"""
function admm_increment_outer(
env::AdmmEnv,
mod::AbstractOPFModel
mod::AbstractOPFModel,
device=nothing
)
mod.info.outer += 1
return
Expand All @@ -14,7 +15,8 @@ Reset inner iteration counter to zero.
"""
function admm_increment_reset_inner(
env::AdmmEnv,
mod::AbstractOPFModel
mod::AbstractOPFModel,
device=nothing
)
mod.info.inner = 0
return
Expand All @@ -25,7 +27,8 @@ Increment inner iteration counter by one.
"""
function admm_increment_inner(
env::AdmmEnv,
mod::AbstractOPFModel
mod::AbstractOPFModel,
device=nothing
)
mod.info.inner += 1
mod.info.cumul += 1
Expand Down
9 changes: 6 additions & 3 deletions src/models/acopf/acopf_admm_prepoststep_cpu.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@ Implement any algorithmic steps required before each outer iteration.
"""
function admm_outer_prestep(
env::AdmmEnv{Float64,Array{Float64,1},Array{Int,1},Array{Float64,2}},
mod::AbstractOPFModel{Float64,Array{Float64,1},Array{Int,1},Array{Float64,2}}
mod::AbstractOPFModel{Float64,Array{Float64,1},Array{Int,1},Array{Float64,2}},
device::Nothing=nothing
)
sol, info = mod.solution, mod.info
info.norm_z_prev = norm(sol.z_curr)
Expand All @@ -15,7 +16,8 @@ Implement any algorithmic steps required before each inner iteration.
"""
function admm_inner_prestep(
env::AdmmEnv{Float64,Array{Float64,1},Array{Int,1},Array{Float64,2}},
mod::AbstractOPFModel{Float64,Array{Float64,1},Array{Int,1},Array{Float64,2}}
mod::AbstractOPFModel{Float64,Array{Float64,1},Array{Int,1},Array{Float64,2}},
device::Nothing=nothing
)
sol = mod.solution
sol.z_prev .= sol.z_curr
Expand All @@ -27,7 +29,8 @@ Implement any steps required after the algorithm terminates.
"""
function admm_poststep(
env::AdmmEnv{Float64,Array{Float64,1},Array{Int,1},Array{Float64,2}},
mod::AbstractOPFModel{Float64,Array{Float64,1},Array{Int,1},Array{Float64,2}}
mod::AbstractOPFModel{Float64,Array{Float64,1},Array{Int,1},Array{Float64,2}},
device::Nothing=nothing
)
data, sol, info, grid_data = env.data, mod.solution, mod.info, mod.grid_data

Expand Down
9 changes: 6 additions & 3 deletions src/models/acopf/acopf_admm_prepoststep_gpu.jl
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
function admm_outer_prestep(
env::AdmmEnv{Float64,CuArray{Float64,1},CuArray{Int,1},CuArray{Float64,2}},
mod::AbstractOPFModel{Float64,CuArray{Float64,1},CuArray{Int,1},CuArray{Float64,2}}
mod::AbstractOPFModel{Float64,CuArray{Float64,1},CuArray{Int,1},CuArray{Float64,2}},
device::Nothing=nothing
)
sol, info = mod.solution, mod.info
info.norm_z_prev = CUDA.norm(sol.z_curr)
Expand All @@ -9,7 +10,8 @@ end

function admm_inner_prestep(
env::AdmmEnv{Float64,CuArray{Float64,1},CuArray{Int,1},CuArray{Float64,2}},
mod::AbstractOPFModel{Float64,CuArray{Float64,1},CuArray{Int,1},CuArray{Float64,2}}
mod::AbstractOPFModel{Float64,CuArray{Float64,1},CuArray{Int,1},CuArray{Float64,2}},
device::Nothing=nothing
)
sol = mod.solution
@cuda threads=64 blocks=(div(mod.nvar-1, 64)+1) copy_data_kernel(mod.nvar, sol.z_prev, sol.z_curr)
Expand All @@ -20,7 +22,8 @@ end

function admm_poststep(
env::AdmmEnv{Float64,CuArray{Float64,1},CuArray{Int,1},CuArray{Float64,2}},
mod::AbstractOPFModel{Float64,CuArray{Float64,1},CuArray{Int,1},CuArray{Float64,2}}
mod::AbstractOPFModel{Float64,CuArray{Float64,1},CuArray{Int,1},CuArray{Float64,2}},
device::Nothing=nothing
)
data, sol, info, grid_data = env.data, mod.solution, mod.info, mod.grid_data

Expand Down
Loading

0 comments on commit 18dcaee

Please sign in to comment.