diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 165bbe3..11f6028 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -35,7 +35,6 @@ jobs: env: CUDA_VISIBLE_DEVICES: 1 JULIA_DEPOT_PATH: /scratch/github-actions/julia_depot_exaadmmm - JULIA_CUDA_USE_BINARYBUILDER: true runs-on: self-hosted strategy: matrix: diff --git a/.gitignore b/.gitignore index 90ed0f8..8789fd2 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,5 @@ Manifest.toml docs/build docs/Manifest.toml +.vscode/settings.json +Manifest.toml diff --git a/Artifacts.toml b/Artifacts.toml new file mode 100644 index 0000000..c12cd5c --- /dev/null +++ b/Artifacts.toml @@ -0,0 +1,6 @@ +[ExaData] +git-tree-sha1 = "df547b1e5d707fdb88721eee9bf84f9211e1fb50" +lazy = true + [[ExaData.download]] + url = "https://web.cels.anl.gov/~mschanen/ExaData-e1b6123.tar.gz" + sha256 = "f6e22f26fbf273e97625a25b1b55f07d0eb0e15283483c545dda6eda84c48fdd" diff --git a/Project.toml b/Project.toml index fe0338d..4e10004 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "ExaAdmm" uuid = "4d6a948c-1075-4240-a564-361a5d4e22a2" -authors = ["Youngdae Kim ", "Kibaek Kim ", "Weiqi Zhang ", "François Pacaud ", "Michel Schanen "] -version = "0.4.0" +authors = ["Youngdae Kim ", "Kibaek Kim ", "Weiqi Zhang ", "Bowen Li ", "François Pacaud ", "Michel Schanen "] +version = "0.4.1" [deps] AMDGPU = "21141c5a-9bdb-4563-92ae-f87d6854732e" @@ -19,7 +19,7 @@ Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" [compat] AMDGPU = "0.4" -CUDA = "4" +CUDA = "4.1" ExaTron = "3" FileIO = "1.14" KernelAbstractions = "0.9" diff --git a/src/ExaAdmm.jl b/src/ExaAdmm.jl index 535caec..5897a09 100644 --- a/src/ExaAdmm.jl +++ b/src/ExaAdmm.jl @@ -27,6 +27,7 @@ include("utils/utilities_gpu.jl") include("utils/utilities_ka.jl") include("algorithms/admm_two_level.jl") +include("algorithms/admm_one_level.jl") # ---------------------------------------- # A single period ACOPF implementation @@ -158,4 +159,37 @@ include("models/mpec/mpec_admm_update_residual_gpu.jl") include("models/mpec/mpec_admm_update_lz_gpu.jl") include("models/mpec/mpec_admm_prepoststep_gpu.jl") =# + +# Interface to use ADMM solving QPsub. +include("interface/solve_qpsub.jl") + +# Define "struct ModelAcopf" for encapsulating an ACOPF model. +include("models/qpsub/qpsub_model.jl") + +# CPU +include("models/qpsub/qpsub_init_solution_cpu.jl") +include("models/qpsub/qpsub_generator_kernel_cpu.jl") +include("models/qpsub/qpsub_admm_update_x_cpu.jl") +include("models/qpsub/qpsub_admm_update_xbar_cpu.jl") +include("models/qpsub/qpsub_admm_update_l_single_cpu.jl") +include("models/qpsub/qpsub_admm_update_residual_cpu.jl") +include("models/qpsub/qpsub_admm_prepoststep_cpu.jl") +include("models/qpsub/qpsub_eval_Ab_linelimit_kernel_cpu.jl") +include("models/qpsub/qpsub_auglag_Ab_linelimit_kernel_red_cpu.jl") +include("models/qpsub/qpsub_auglag_tron_linelimit_kernel_cpu.jl") + +# GPU +include("models/qpsub/qpsub_init_solution_gpu.jl") +include("models/qpsub/qpsub_generator_kernel_gpu.jl") +include("models/qpsub/qpsub_admm_update_x_gpu.jl") +include("models/qpsub/qpsub_admm_update_xbar_gpu.jl") +include("models/qpsub/qpsub_admm_update_l_single_gpu.jl") +include("models/qpsub/qpsub_admm_update_residual_gpu.jl") +include("models/qpsub/qpsub_eval_Ab_linelimit_kernel_gpu.jl") +include("models/qpsub/qpsub_auglag_Ab_linelimit_kernel_red_gpu.jl") +include("models/qpsub/qpsub_tron_linelimit_kernel.jl") +include("models/qpsub/qpsub_admm_prepoststep_gpu.jl") + + + end # module diff --git a/src/algorithms/admm_one_level.jl b/src/algorithms/admm_one_level.jl new file mode 100644 index 0000000..97cf6d1 --- /dev/null +++ b/src/algorithms/admm_one_level.jl @@ -0,0 +1,80 @@ +function admm_one_level( + env::AdmmEnv, mod::AbstractOPFModel +) + par = env.params + info = mod.info + sol = mod.solution + + sqrt_d = sqrt(mod.nvar) + OUTER_TOL = sqrt_d*(par.outer_eps) #adjusted outer loop tolerance + + fill!(info, 0) + info.mismatch = Inf + + #eliminate second level + info.norm_z_prev = info.norm_z_curr = 0 + par.initial_beta = 0 + par.beta = 0 + sol.lz .= 0 + sol.z_curr .= 0 + sol.z_prev .= 0 + par.inner_iterlim = 1 + + if par.verbose > 0 + admm_update_residual(env, mod) + @printf("%8s %10s %10s %10s %10s %10s %10s\n", + "Iter", "Objval", "Auglag", "PrimRes", "PrimTol", "DualRes", "DualTol") + + @printf("%8d %10.3e %10.3e %10.3e %10.3e %10.3e %10.3e\n", + info.outer, info.objval, info.auglag, info.mismatch, OUTER_TOL, info.dualres, OUTER_TOL*norm(sol.rho)/sqrt_d) + end + + info.status = :IterationLimit + + overall_time = @timed begin + while info.outer < par.outer_iterlim + admm_increment_outer(env, mod) + + + admm_increment_reset_inner(env, mod) + while info.inner < par.inner_iterlim + admm_increment_inner(env, mod) + + admm_update_x(env, mod) + + admm_update_xbar(env, mod) + + admm_update_l_single(env, mod) + + admm_update_residual(env, mod) + + if par.verbose > 0 + if (info.cumul % 50) == 0 + @printf("%8s %10s %10s %10s %10s %10s %10s\n", + "Iter", "Objval", "Auglag", "PrimRes", "PrimTol", "DualRes", "DualTol") + end + + @printf("%8d %10.3e %10.3e %10.3e %10.3e %10.3e %10.3e\n", + info.outer, info.objval, info.auglag, info.mismatch, OUTER_TOL, info.dualres, OUTER_TOL*norm(sol.rho)/sqrt_d) + end + + end # while inner + + # mismatch: x-xbar + if info.mismatch <= OUTER_TOL && info.dualres <= OUTER_TOL*norm(sol.rho)/sqrt_d + info.status = :Solved + break + end + + end # while outer + end # @timed + + info.time_overall = overall_time.time + admm_poststep(env, mod) + + if par.verbose > 0 + print_statistics(env, mod) + end + + return +end diff --git a/src/algorithms/admm_two_level.jl b/src/algorithms/admm_two_level.jl index 2f6d26f..559ffbc 100644 --- a/src/algorithms/admm_two_level.jl +++ b/src/algorithms/admm_two_level.jl @@ -5,7 +5,7 @@ function admm_two_level( info = mod.info sqrt_d = sqrt(mod.nvar) - OUTER_TOL = sqrt_d*(par.outer_eps) + OUTER_TOL = sqrt_d*(par.outer_eps) #adjusted outer loop tolerance fill!(info, 0) info.mismatch = Inf @@ -41,6 +41,7 @@ function admm_two_level( admm_update_l(env, mod, device) admm_update_residual(env, mod, device) + # an adjusting termination criteria for inner loop (i.e., inner loop is not solved to exact) info.eps_pri = sqrt_d / (2500*info.outer) if par.verbose > 0 @@ -55,11 +56,13 @@ function admm_two_level( info.dualres, info.norm_z_curr, info.mismatch, OUTER_TOL, par.beta) end + # primres: x-xbar+z_curr if info.primres <= info.eps_pri #|| info.dualres <= par.DUAL_TOL break end end # while inner + # mismatch: x-xbar if info.mismatch <= OUTER_TOL info.status = :Solved break @@ -67,6 +70,7 @@ function admm_two_level( admm_update_lz(env, mod, device) + # if z_curr too large vs z_prev, increase penalty if info.norm_z_curr > par.theta*info.norm_z_prev par.beta = min(par.inc_c*par.beta, 1e24) end @@ -81,4 +85,4 @@ function admm_two_level( end return -end \ No newline at end of file +end diff --git a/src/interface/solve_acopf.jl b/src/interface/solve_acopf.jl index d1b4e28..2ec036a 100644 --- a/src/interface/solve_acopf.jl +++ b/src/interface/solve_acopf.jl @@ -37,6 +37,8 @@ function solve_acopf(case::String; env.params.outer_eps = outer_eps env.params.outer_iterlim = outer_iterlim env.params.inner_iterlim = inner_iterlim + #memory allocation for CuDynamicSharedArray + env.params.shmem_size = sizeof(Float64)*(14*mod.n+3*mod.n^2) + sizeof(Int)*(4*mod.n) admm_two_level(env, mod, isa(ka_device, KA.CPU) ? nothing : ka_device) diff --git a/src/interface/solve_pf.jl b/src/interface/solve_pf.jl index e761590..dea6741 100644 --- a/src/interface/solve_pf.jl +++ b/src/interface/solve_pf.jl @@ -70,4 +70,4 @@ function solve_pf(pf::PowerFlow; tol=1e-6, max_iter=50) @printf("Elapsed time (secs). . . %8.2f\n", timed.time) return pf -end \ No newline at end of file +end diff --git a/src/interface/solve_qpsub.jl b/src/interface/solve_qpsub.jl new file mode 100644 index 0000000..f4ede73 --- /dev/null +++ b/src/interface/solve_qpsub.jl @@ -0,0 +1,109 @@ +function solve_qpsub( + case::String, + Hs, + LH_1h, + RH_1h, + LH_1i, + RH_1i, + LH_1j, + RH_1j, + LH_1k, + RH_1k, + ls, + us, + pgmax, + pgmin, + qgmax, + qgmin, + c1, + c2, + Pd, + Qd, + initial_beta; + case_format = "matpower", + outer_iterlim = 20, + inner_iterlim = 1000, + rho_pq = 400.0, + rho_va = 40000.0, + obj_scale = 1.0, + scale = 1e-4, + storage_ratio = 0.0, + storage_charge_max = 1.0, + use_gpu = false, + use_linelimit = true, + use_projection = false, + tight_factor = 1.0, + outer_eps = 2 * 1e-4, + gpu_no = 0, + verbose = 1, + onelevel = true, +) + T = Float64 + TD = Array{Float64,1} + TI = Array{Int,1} + TM = Array{Float64,2} + if use_gpu + CUDA.device!(gpu_no) + TD = CuArray{Float64,1} + TI = CuArray{Int,1} + TM = CuArray{Float64,2} + end + + env = AdmmEnv{T,TD,TI,TM}( + case, + rho_pq, + rho_va; + case_format = case_format, + use_gpu = use_gpu, + use_linelimit = use_linelimit, + use_projection = use_projection, + tight_factor = tight_factor, + gpu_no = gpu_no, + storage_ratio = storage_ratio, + storage_charge_max = storage_charge_max, + verbose = verbose, + ) + mod = ModelQpsub{T,TD,TI,TM}(env) + + data = mod.grid_data + + mod.Hs = copy(Hs) + mod.LH_1h = copy(LH_1h) + mod.RH_1h = copy(RH_1h) + mod.LH_1i = copy(LH_1i) + mod.RH_1i = copy(RH_1i) + mod.LH_1j = copy(LH_1j) + mod.RH_1j = copy(RH_1j) + mod.LH_1k = copy(LH_1k) + mod.RH_1k = copy(RH_1k) + mod.ls = copy(ls) + mod.us = copy(us) + + mod.qpsub_pgmax = copy(pgmax) + mod.qpsub_pgmin = copy(pgmin) + mod.qpsub_qgmax = copy(qgmax) + mod.qpsub_qgmin = copy(qgmin) + + mod.qpsub_c1 = copy(c1) + mod.qpsub_c2 = copy(c2) + mod.qpsub_Pd = copy(Pd) + mod.qpsub_Qd = copy(Qd) + + env.params.scale = scale + env.params.obj_scale = obj_scale + env.params.outer_eps = outer_eps + env.params.outer_iterlim = outer_iterlim + env.params.inner_iterlim = inner_iterlim + env.params.shmem_size = sizeof(Float64) * (16 * mod.n + 4 * mod.n^2 + 178) + sizeof(Int) * (4 * mod.n) + env.params.initial_beta = initial_beta #use my initial beta + + init_solution!(mod, mod.solution, env.initial_rho_pq, env.initial_rho_va) + + #one-level or two-level admm + if onelevel + admm_one_level(env, mod) + else + @warn "two-level ADMM is not implemented in QPsub" + end + return env, mod +end diff --git a/src/models/qpsub/qpsub_admm_increment.jl b/src/models/qpsub/qpsub_admm_increment.jl new file mode 100644 index 0000000..69947af --- /dev/null +++ b/src/models/qpsub/qpsub_admm_increment.jl @@ -0,0 +1,44 @@ +""" + admm_increment_outer() + +increment outer iteration +""" + +function admm_increment_outer( + env::AdmmEnv, + mod::ModelQpsub +) + mod.info.outer += 1 + return +end + + +""" + admm_increment_reset_inner() + +reset inner iteration when outer iteration just incremented +""" + +function admm_increment_reset_inner( + env::AdmmEnv, + mod::ModelQpsub +) + mod.info.inner = 0 + return +end + + +""" + admm_increment_inner() + +increment inner iteration +""" + +function admm_increment_inner( + env::AdmmEnv, + mod::ModelQpsub +) + mod.info.inner += 1 + mod.info.cumul += 1 + return +end diff --git a/src/models/qpsub/qpsub_admm_prepoststep_cpu.jl b/src/models/qpsub/qpsub_admm_prepoststep_cpu.jl new file mode 100644 index 0000000..6a0a834 --- /dev/null +++ b/src/models/qpsub/qpsub_admm_prepoststep_cpu.jl @@ -0,0 +1,83 @@ +""" + admm_poststep() + +- after admm termination, fix solution pf_projection() and record time mod.info.time_projection +- update info.objval and info.auglag with projected solution +- feed step solution, multipliers and KKT error info to SQOPF +""" + +function admm_poststep( + env::AdmmEnv{Float64,Array{Float64,1},Array{Int,1},Array{Float64,2}}, + mod::ModelQpsub{Float64,Array{Float64,1},Array{Int,1},Array{Float64,2}} +) + par, data, sol, info, grid_data = env.params, env.data, mod.solution, mod.info, mod.grid_data + + if env.use_projection + time_projection = @timed pf_projection(env, mod) + mod.info.time_projection = time_projection.time + end + + #final objective and auglag + info.objval = sum(mod.qpsub_c2[g]*(grid_data.baseMVA*sol.u_curr[mod.gen_start+2*(g-1)])^2 + + mod.qpsub_c1[g]*(grid_data.baseMVA*sol.u_curr[mod.gen_start+2*(g-1)]) + for g in 1:grid_data.ngen) + + sum(0.5*dot(mod.sqp_line[:,l],mod.Hs[6*(l-1)+1:6*l,1:6],mod.sqp_line[:,l]) for l=1:grid_data.nline) + + info.auglag = info.objval + sum(sol.lz[i]*sol.z_curr[i] for i=1:mod.nvar) + + 0.5*par.beta*sum(sol.z_curr[i]^2 for i=1:mod.nvar) + + sum(sol.l_curr[i]*sol.rp[i] for i=1:mod.nvar) + + 0.5*sum(sol.rho[i]*(sol.rp[i])^2 for i=1:mod.nvar) + + + #find dual infeas kkt for SQP integration + pg_dual_infeas = [2*mod.qpsub_c2[g]*(grid_data.baseMVA)^2*sol.u_curr[mod.gen_start+2*(g-1)] for g = 1:grid_data.ngen] + line_dual_infeas = vcat([mod.Hs[6*(l-1)+1:6*l,1:6] * mod.sqp_line[:,l] for l = 1:grid_data.nline]...) + mod.dual_infeas = vcat(pg_dual_infeas, line_dual_infeas) #unscale + + #assign value to step variable; due to inexact steps, other options include use u_curr, v_curr alone or average + #generation + @inbounds begin + for g = 1: grid_data.ngen + pg_idx = mod.gen_start + 2*(g-1) + qg_idx = mod.gen_start + 2*(g-1) + 1 + mod.dpg_sol[g] = sol.u_curr[pg_idx] + mod.dqg_sol[g] = sol.u_curr[qg_idx] + end + + mod.dline_var = copy(mod.sqp_line) + + for l = 1:grid_data.nline + shift_idx = mod.line_start + 8*(l-1) + mod.dline_fl[1,l] = sol.u_curr[shift_idx] #pij + mod.dline_fl[2,l] = sol.u_curr[shift_idx + 1] #qij + mod.dline_fl[3,l] = sol.u_curr[shift_idx + 2] #pji + mod.dline_fl[4,l] = sol.u_curr[shift_idx + 3] #qji + end + + for b = 1: grid_data.nbus + dw_ct = 0 + dw_sum = 0 + dt_sum = 0 + dt_ct = 0 + if grid_data.FrStart[b] < grid_data.FrStart[b+1] + for k = grid_data.FrStart[b]:grid_data.FrStart[b+1]-1 + dw_sum += mod.dline_var[3, grid_data.FrIdx[k]] #wi(ij) + dw_ct += 1 + dt_sum += mod.dline_var[5, grid_data.FrIdx[k]] #ti(ij) + dt_ct += 1 + end + end + if grid_data.ToStart[b] < grid_data.ToStart[b+1] + for k = grid_data.ToStart[b]:grid_data.ToStart[b+1]-1 + dw_sum += mod.dline_var[4, grid_data.ToIdx[k]] #wj(ji) + dt_sum += mod.dline_var[6, grid_data.ToIdx[k]] #tj(ji) + dw_ct += 1 + dt_ct += 1 + end + end + mod.dw_sol[b] = dw_sum/dw_ct #average + mod.dtheta_sol[b] = dt_sum/dt_ct #average + end + end #inbound + return +end diff --git a/src/models/qpsub/qpsub_admm_prepoststep_gpu.jl b/src/models/qpsub/qpsub_admm_prepoststep_gpu.jl new file mode 100644 index 0000000..56d9899 --- /dev/null +++ b/src/models/qpsub/qpsub_admm_prepoststep_gpu.jl @@ -0,0 +1,105 @@ +""" + admm_poststep() + +- after admm termination, fix solution pf_projection() and record time mod.info.time_projection +- update info.objval and info.auglag with projected solution +- feed step solution, multipliers and KKT error info to SQOPF +""" + +function admm_poststep( + env::AdmmEnv{Float64,CuArray{Float64,1},CuArray{Int,1},CuArray{Float64,2}}, + mod::ModelQpsub{Float64,CuArray{Float64,1},CuArray{Int,1},CuArray{Float64,2}} +) + par, data, sol, info, grid_data = env.params, env.data, mod.solution, mod.info, mod.grid_data + + if env.use_projection + time_projection = @timed pf_projection(env, mod) + mod.info.time_projection = time_projection.time + end + + u_curr = zeros(mod.nvar) + copyto!(u_curr, sol.u_curr) + + l_curr = Array(sol.l_curr) + rho = Array(sol.rho) + rp = Array(sol.rp) + + c2 = zeros(grid_data.ngen) + copyto!(c2, mod.qpsub_c2) + + c1 = zeros(grid_data.ngen) + copyto!(c1, mod.qpsub_c1) + + sqp_line = Array(mod.sqp_line) + + FrStart = Array(grid_data.FrStart) + FrIdx = Array(grid_data.FrIdx) + + ToStart = Array(grid_data.ToStart) + ToIdx = Array(grid_data.ToIdx) + + Hs = Array(mod.Hs) + + #final objective and auglag + info.objval = sum(c2[g]*(grid_data.baseMVA*u_curr[mod.gen_start+2*(g-1)])^2 + + c1[g]*(grid_data.baseMVA*u_curr[mod.gen_start+2*(g-1)]) + for g in 1:grid_data.ngen) + + sum(0.5*dot(sqp_line[:,l],Hs[6*(l-1)+1:6*l,1:6],sqp_line[:,l]) for l=1:grid_data.nline) + + info.auglag = info.objval + + sum(l_curr[i]*rp[i] for i=1:mod.nvar) + + 0.5*sum(rho[i]*(rp[i])^2 for i=1:mod.nvar) + + + #find dual infeas kkt for SQP integration + pg_dual_infeas = [2*c2[g]*(grid_data.baseMVA)^2*u_curr[mod.gen_start+2*(g-1)] for g = 1:grid_data.ngen] + line_dual_infeas = vcat([Hs[6*(l-1)+1:6*l,1:6] * sqp_line[:,l] for l = 1:grid_data.nline]...) + mod.dual_infeas = vcat(pg_dual_infeas, line_dual_infeas) #unscale + + #assign value to step variable; due to inexact steps, other options include use u_curr, v_curr alone or average + #generation + @inbounds begin + for g = 1: grid_data.ngen + pg_idx = mod.gen_start + 2*(g-1) + qg_idx = mod.gen_start + 2*(g-1) + 1 + mod.dpg_sol[g] = u_curr[pg_idx] + mod.dqg_sol[g] = u_curr[qg_idx] + end + + copyto!(mod.dline_var, sqp_line) + + for l = 1:grid_data.nline + shift_idx = mod.line_start + 8*(l-1) + mod.dline_fl[1,l] = u_curr[shift_idx] #pij + mod.dline_fl[2,l] = u_curr[shift_idx + 1] #qij + mod.dline_fl[3,l] = u_curr[shift_idx + 2] #pji + mod.dline_fl[4,l] = u_curr[shift_idx + 3] #qji + end + + for b = 1: grid_data.nbus + dw_ct = 0 + dw_sum = 0 + dt_sum = 0 + dt_ct = 0 + if FrStart[b] < FrStart[b+1] + for k = FrStart[b]:FrStart[b+1]-1 + dw_sum += sqp_line[3, FrIdx[k]] #wi(ij) + dw_ct += 1 + dt_sum += sqp_line[5, FrIdx[k]] #ti(ij) + dt_ct += 1 + end + end + if ToStart[b] < ToStart[b+1] + for k = ToStart[b]:ToStart[b+1]-1 + dw_sum += sqp_line[4, ToIdx[k]] #wj(ji) + dt_sum += sqp_line[6, ToIdx[k]] #tj(ji) + dw_ct += 1 + dt_ct += 1 + end + end + mod.dw_sol[b] = dw_sum/dw_ct #average + mod.dtheta_sol[b] = dt_sum/dt_ct #average + end + end #inbound + return +end diff --git a/src/models/qpsub/qpsub_admm_update_l_single_cpu.jl b/src/models/qpsub/qpsub_admm_update_l_single_cpu.jl new file mode 100644 index 0000000..9de9c21 --- /dev/null +++ b/src/models/qpsub/qpsub_admm_update_l_single_cpu.jl @@ -0,0 +1,18 @@ +""" + admm_update_l() + +- update l +- record time info.time_l_update +- only used in one-level ADMM +""" + +function admm_update_l_single( + env::AdmmEnv{Float64,Array{Float64,1},Array{Int,1},Array{Float64,2}}, + mod::ModelQpsub{Float64,Array{Float64,1},Array{Int,1},Array{Float64,2}} +) + par, sol, info = env.params, mod.solution, mod.info + sol.l_prev = sol.l_curr + ltime = @timed sol.l_curr .= sol.l_prev + sol.rho .* (sol.u_curr-sol.v_curr) + info.time_l_update += ltime.time + return +end diff --git a/src/models/qpsub/qpsub_admm_update_l_single_gpu.jl b/src/models/qpsub/qpsub_admm_update_l_single_gpu.jl new file mode 100644 index 0000000..9d07703 --- /dev/null +++ b/src/models/qpsub/qpsub_admm_update_l_single_gpu.jl @@ -0,0 +1,37 @@ +""" + admm_update_l() + +- update l +- record time info.time_l_update +- only used in one-level ADMM +- GPU kernel: update_l_kernel_single +""" + + +function update_l_kernel_single( + n::Int, l_curr::CuDeviceArray{Float64,1}, l_prev::CuDeviceArray{Float64,1}, u::CuDeviceArray{Float64,1}, + v::CuDeviceArray{Float64,1}, rho::CuDeviceArray{Float64,1} + ) + tx = threadIdx().x + (blockDim().x * (blockIdx().x - 1)) + + if tx <= n + @inbounds begin + l_curr[tx] = l_prev[tx] + rho[tx] * (u[tx] - v[tx]) + end + end + + return +end + + + +function admm_update_l_single( + env::AdmmEnv{Float64,CuArray{Float64,1},CuArray{Int,1},CuArray{Float64,2}}, + mod::ModelQpsub{Float64,CuArray{Float64,1},CuArray{Int,1},CuArray{Float64,2}} + ) + par, sol, info = env.params, mod.solution, mod.info + sol.l_prev = sol.l_curr + ltime = CUDA.@timed @cuda threads=64 blocks=(div(mod.nvar-1, 64)+1) update_l_kernel_single(mod.nvar, sol.l_curr, sol.l_prev, sol.u_curr, sol.v_curr, sol.rho) + info.time_l_update += ltime.time + return +end diff --git a/src/models/qpsub/qpsub_admm_update_residual_cpu.jl b/src/models/qpsub/qpsub_admm_update_residual_cpu.jl new file mode 100644 index 0000000..0c91a0f --- /dev/null +++ b/src/models/qpsub/qpsub_admm_update_residual_cpu.jl @@ -0,0 +1,39 @@ +""" + admm_update_residual() + +- compute termination errors and other info +- update info.primres, info.dualres, info.mismatch, info.objval, info.auglag +- update sol.rp, sol.rd, sol.Ax_plus_By +- only used in one-level ADMM +""" + +function admm_update_residual( + env::AdmmEnv{Float64,Array{Float64,1},Array{Int,1},Array{Float64,2}}, + mod::ModelQpsub{Float64,Array{Float64,1},Array{Int,1},Array{Float64,2}} +) + sol, info, data, par, grid_data = mod.solution, mod.info, env.data, env.params, mod.grid_data + + + sol.rp .= sol.u_curr .- sol.v_curr #u-v + sol.rd .= sol.rho .* (sol.v_curr - mod.v_prev)#from Boyd's single-level admm + sol.Ax_plus_By .= sol.rp #x-xbar + + info.primres = norm(sol.rp) + info.dualres = norm(sol.rd) + info.mismatch = norm(sol.Ax_plus_By) + + + info.objval = sum(mod.qpsub_c2[g]*(grid_data.baseMVA*sol.u_curr[mod.gen_start+2*(g-1)])^2 + + mod.qpsub_c1[g]*(grid_data.baseMVA*sol.u_curr[mod.gen_start+2*(g-1)]) + for g in 1:grid_data.ngen) + + sum(0.5*dot(mod.sqp_line[:,l],mod.Hs[6*(l-1)+1:6*l,1:6],mod.sqp_line[:,l]) for l=1:grid_data.nline) + + info.auglag = info.objval + sum(sol.lz[i]*sol.z_curr[i] for i=1:mod.nvar) + + 0.5*par.beta*sum(sol.z_curr[i]^2 for i=1:mod.nvar) + + sum(sol.l_curr[i]*sol.rp[i] for i=1:mod.nvar) + + 0.5*sum(sol.rho[i]*(sol.rp[i])^2 for i=1:mod.nvar) + + + + return +end diff --git a/src/models/qpsub/qpsub_admm_update_residual_gpu.jl b/src/models/qpsub/qpsub_admm_update_residual_gpu.jl new file mode 100644 index 0000000..f59192c --- /dev/null +++ b/src/models/qpsub/qpsub_admm_update_residual_gpu.jl @@ -0,0 +1,52 @@ +""" + admm_update_residual() + +- compute termination errors and other info +- update info.primres, info.dualres, info.mismatch +- update sol.rp, sol.rd, sol.Ax_plus_By +- only used in one-level ADMM +- GPU kernel: compute_primal_residual_kernel_qpsub, compute_dual_residual_kernel_qpsub, copy_data_kernel +""" + +function compute_primal_residual_kernel_qpsub(n::Int, rp::CuDeviceArray{Float64,1}, u::CuDeviceArray{Float64,1}, v::CuDeviceArray{Float64,1}) + tx = threadIdx().x + (blockDim().x * (blockIdx().x - 1)) + + if tx <= n + rp[tx] = u[tx] - v[tx] #u-v or x-xbar + end + + return +end + +function compute_dual_residual_kernel_qpsub(n::Int, rd::CuDeviceArray{Float64,1}, v_curr::CuDeviceArray{Float64,1}, v_prev::CuDeviceArray{Float64,1}, rho) + tx = threadIdx().x + (blockDim().x * (blockIdx().x - 1)) + + if tx <= n + rd[tx] = rho[tx] * (v_curr[tx] - v_prev[tx]) #from Boyd's single-level admm + end + + return +end + + +function admm_update_residual( + env::AdmmEnv{Float64,CuArray{Float64,1},CuArray{Int,1},CuArray{Float64,2}}, + mod::ModelQpsub{Float64,CuArray{Float64,1},CuArray{Int,1},CuArray{Float64,2}} +) + sol, info, data, par, grid_data = mod.solution, mod.info, env.data, env.params, mod.grid_data + + @cuda threads=64 blocks=(div(mod.nvar-1, 64)+1) compute_primal_residual_kernel_qpsub(mod.nvar, sol.rp, sol.u_curr, sol.v_curr) + + @cuda threads=64 blocks=(div(mod.nvar-1, 64)+1) compute_dual_residual_kernel_qpsub(mod.nvar, sol.rd, sol.v_curr, mod.v_prev, sol.rho) + + @cuda threads=64 blocks=(div(mod.nvar-1, 64)+1) copy_data_kernel(mod.nvar, sol.Ax_plus_By, sol.rp) # from gpu utility + + + info.primres = CUDA.norm(sol.rp) + + info.dualres = CUDA.norm(sol.rd) + + info.mismatch = CUDA.norm(sol.Ax_plus_By) + + return +end diff --git a/src/models/qpsub/qpsub_admm_update_x_cpu.jl b/src/models/qpsub/qpsub_admm_update_x_cpu.jl new file mode 100644 index 0000000..d2d1b4b --- /dev/null +++ b/src/models/qpsub/qpsub_admm_update_x_cpu.jl @@ -0,0 +1,51 @@ +""" + acopf_admm_update_x_line() + +- update xline: call auglag_linelimit_two_level_alternative_qpsub() = update sol.x[pij_idx] +- record run time info.user.time_branches, info.time_x_update +""" + +function acopf_admm_update_x_line( + env::AdmmEnv{Float64,Array{Float64,1},Array{Int,1},Array{Float64,2}}, + mod::ModelQpsub{Float64,Array{Float64,1},Array{Int,1},Array{Float64,2}} + ) + par, sol, info, data = env.params, mod.solution, mod.info, mod.grid_data + +@inbounds begin + for i = 1 : mod.grid_data.nline + shift_idx = mod.line_start + 8*(i-1) + A_ipopt = eval_A_branch_kernel_cpu_qpsub(mod.Hs[6*(i-1)+1:6*i,1:6], sol.l_curr[shift_idx : shift_idx + 7], + sol.rho[shift_idx : shift_idx + 7], sol.v_curr[shift_idx : shift_idx + 7], + sol.z_curr[shift_idx : shift_idx + 7], + mod.grid_data.YffR[i], mod.grid_data.YffI[i], + mod.grid_data.YftR[i], mod.grid_data.YftI[i], + mod.grid_data.YttR[i], mod.grid_data.YttI[i], + mod.grid_data.YtfR[i], mod.grid_data.YtfI[i]) + + b_ipopt = eval_b_branch_kernel_cpu_qpsub(sol.l_curr[shift_idx : shift_idx + 7], + sol.rho[shift_idx : shift_idx + 7], sol.v_curr[shift_idx : shift_idx + 7], + sol.z_curr[shift_idx : shift_idx + 7], + mod.grid_data.YffR[i], mod.grid_data.YffI[i], + mod.grid_data.YftR[i], mod.grid_data.YftI[i], + mod.grid_data.YttR[i], mod.grid_data.YttI[i], + mod.grid_data.YtfR[i], mod.grid_data.YtfI[i], mod.line_res[:,i]) + + #call reduced branch kernel + time_br = @timed tronx, tronf = auglag_Ab_linelimit_two_level_alternative_qpsub_ij_red(info.inner, par.max_auglag, par.mu_max, par.scale, A_ipopt, b_ipopt, mod.ls[i,:], mod.us[i,:], mod.sqp_line, sol.l_curr[shift_idx : shift_idx + 7], + sol.rho[shift_idx : shift_idx + 7], sol.u_curr, shift_idx, sol.v_curr[shift_idx : shift_idx + 7], + sol.z_curr[shift_idx : shift_idx + 7], mod.qpsub_membuf,i, + mod.grid_data.YffR[i], mod.grid_data.YffI[i], + mod.grid_data.YftR[i], mod.grid_data.YftI[i], + mod.grid_data.YttR[i], mod.grid_data.YttI[i], + mod.grid_data.YtfR[i], mod.grid_data.YtfI[i], + mod.LH_1h[i,:], mod.RH_1h[i], mod.LH_1i[i,:], mod.RH_1i[i], mod.LH_1j[i,:], mod.RH_1j[i], mod.LH_1k[i,:], mod.RH_1k[i], mod.lambda, mod.line_res[:,i]) + + + + info.user.time_branches += time_br.time + info.time_x_update += time_br.time + end +end #@inbounds + +return +end diff --git a/src/models/qpsub/qpsub_admm_update_x_gpu.jl b/src/models/qpsub/qpsub_admm_update_x_gpu.jl new file mode 100644 index 0000000..61701af --- /dev/null +++ b/src/models/qpsub/qpsub_admm_update_x_gpu.jl @@ -0,0 +1,26 @@ +""" + acopf_admm_update_x_line() + +- update xline: call auglag_linelimit_qpsub() = update sol.x[pij_idx] +- record run time info.user.time_branches, info.time_x_update +""" + +function acopf_admm_update_x_line( + env::AdmmEnv{Float64,CuArray{Float64,1},CuArray{Int,1},CuArray{Float64,2}}, + mod::ModelQpsub{Float64,CuArray{Float64,1},CuArray{Int,1},CuArray{Float64,2}} + ) + par, sol, info, data = env.params, mod.solution, mod.info, mod.grid_data + + shmem_size = env.params.shmem_size + + time_br = CUDA.@timed @cuda threads=32 blocks=data.nline shmem=shmem_size auglag_linelimit_qpsub(mod.Hs, sol.l_curr, sol.rho, sol.u_curr, sol.v_curr, sol.z_curr, mod.grid_data.YffR, mod.grid_data.YffI, + mod.grid_data.YftR, mod.grid_data.YftI, + mod.grid_data.YttR, mod.grid_data.YttI, + mod.grid_data.YtfR, mod.grid_data.YtfI, info.inner, par.max_auglag, par.mu_max, par.scale, mod.ls, mod.us, mod.sqp_line, + mod.qpsub_membuf, mod.LH_1h, mod.RH_1h, mod.LH_1i, mod.RH_1i, mod.LH_1j, mod.RH_1j, mod.LH_1k, mod.RH_1k, mod.lambda, mod.line_start, mod.grid_data.nline, mod.supY, mod.line_res) + + info.user.time_branches += time_br.time + info.time_x_update += time_br.time + +return +end diff --git a/src/models/qpsub/qpsub_admm_update_xbar_cpu.jl b/src/models/qpsub/qpsub_admm_update_xbar_cpu.jl new file mode 100644 index 0000000..6a48073 --- /dev/null +++ b/src/models/qpsub/qpsub_admm_update_xbar_cpu.jl @@ -0,0 +1,23 @@ +""" + admm_update_xbar() + +- update xbar: call bus_kernel_two_level_alternative_qpsub() = update sol.v (full) +- record run time info.time_xbar_update, info.user.time_buses +""" + + + +function admm_update_xbar( + env::AdmmEnv{Float64,Array{Float64,1},Array{Int,1},Array{Float64,2}}, + mod::ModelQpsub{Float64,Array{Float64,1},Array{Int,1},Array{Float64,2}} +) +sol, info, data = mod.solution, mod.info, mod.grid_data +mod.v_prev .= sol.v_curr +bus_time = @timed bus_kernel_two_level_alternative(data.baseMVA, data.nbus, mod.gen_start, mod.line_start, + data.FrStart, data.FrIdx, data.ToStart, data.ToIdx, data.GenStart, + data.GenIdx, mod.qpsub_Pd, mod.qpsub_Qd, sol.u_curr, sol.v_curr, sol.z_curr, + sol.l_curr, sol.rho, data.YshR, data.YshI) +info.time_xbar_update += bus_time.time + info.user.time_buses += bus_time.time + return +end diff --git a/src/models/qpsub/qpsub_admm_update_xbar_gpu.jl b/src/models/qpsub/qpsub_admm_update_xbar_gpu.jl new file mode 100644 index 0000000..8b6d40b --- /dev/null +++ b/src/models/qpsub/qpsub_admm_update_xbar_gpu.jl @@ -0,0 +1,27 @@ +""" + admm_update_xbar() + +- update xbar: call bus_kernel_two_level_alternative_qpsub() = update sol.v (full) +- record run time info.time_xbar_update, info.user.time_buses +""" + + + +function admm_update_xbar( + env::AdmmEnv{Float64,CuArray{Float64,1},CuArray{Int,1},CuArray{Float64,2}}, + mod::ModelQpsub{Float64,CuArray{Float64,1},CuArray{Int,1},CuArray{Float64,2}} + ) + sol, info, data = mod.solution, mod.info, mod.grid_data + + nblk_bus = div(data.nbus, 32, RoundUp) + + @cuda threads=64 blocks=(div(mod.nvar-1, 64)+1) copy_data_kernel(mod.nvar, mod.v_prev, sol.v_curr) + + bus_time = CUDA.@timed @cuda threads=32 blocks=nblk_bus bus_kernel_two_level_alternative(data.baseMVA, data.nbus, mod.gen_start, mod.line_start, + data.FrStart, data.FrIdx, data.ToStart, data.ToIdx, data.GenStart, + data.GenIdx, mod.qpsub_Pd, mod.qpsub_Qd, sol.u_curr, sol.v_curr, sol.z_curr, + sol.l_curr, sol.rho, data.YshR, data.YshI) + info.time_xbar_update += bus_time.time + info.user.time_buses += bus_time.time + return +end diff --git a/src/models/qpsub/qpsub_auglag_Ab_linelimit_kernel_red_cpu.jl b/src/models/qpsub/qpsub_auglag_Ab_linelimit_kernel_red_cpu.jl new file mode 100644 index 0000000..c1603f7 --- /dev/null +++ b/src/models/qpsub/qpsub_auglag_Ab_linelimit_kernel_red_cpu.jl @@ -0,0 +1,157 @@ +""" + auglag_linelimit_two_level_alternative_qpsub_ij_red() + +- for certain line (i,j), update sol.u[pij_idx] +- use Exatron, eval_A_auglag_branch_kernel_cpu_qpsub, eval_b_auglag_branch_kernel_cpu_qpsub, build_QP_DS +- LANCELOT ALM algorithm +- with elimination of w_ijR and w_ijI (v2 in overleaf) +- with multiplier output +""" + + +""" + Internal Solution Structure for branch + +- branch structure from u (8*nline): + - |p_ij | q_ij | p_ji | q_ji | wi(ij) | wj(ji) | thetai(ij) | thetaj(ji) | + +- branch structure for Exatron (8*nline): + - | t_ij(linelimit) | t_ji(linelimit) | w_ijR | w_ijI | wi(ij) | wj(ji) | thetai(ij) | thetaj(ji) + +- branch structure for Exatron (6*nline): eliminate w_ijR, wij_I + - | t_ij(linelimit) | t_ji(linelimit) | wi(ij) | wj(ji) | thetai(ij) | thetaj(ji) + +- Hessian inherited from SQP ie sqp_line (6*nline): + - |w_ijR | w_ijI | wi(ij) | wj(ji) | thetai(ij) | thetaj(ji)| +""" + + +function auglag_Ab_linelimit_two_level_alternative_qpsub_ij_red( + major_iter::Int, max_auglag::Int, mu_max::Float64, scale::Float64, + Hbr::Array{Float64,2}, bbr::Array{Float64,1}, lqp::Array{Float64,1}, uqp::Array{Float64,1},sqp_line::Array{Float64,2}, + l::Array{Float64,1}, rho::Array{Float64,1}, + u::Array{Float64,1}, shift_idx::Int, v::Array{Float64,1}, z_curr::Array{Float64,1}, membuf::Array{Float64,2},lineidx::Int, + YffR::Float64, YffI::Float64, + YftR::Float64, YftI::Float64, + YttR::Float64, YttI::Float64, + YtfR::Float64, YtfI::Float64, + LH_1h::Array{Float64,1}, RH_1h::Float64, + LH_1i::Array{Float64,1}, RH_1i::Float64, LH_1j::Array{Float64,1},RH_1j::Float64, LH_1k::Array{Float64,1},RH_1k::Float64, lambda::Array{Float64,2}, line_res::Array{Float64,1}) + + + # variable wrt branch structure wrt Exatron + x = [0.0; 0.0; sqp_line[3:6,lineidx]] #initialization + f = 0.0 + xl = [0.0; 0.0; lqp[3:6]] + xu = [200000.0; 200000.0; uqp[3:6]] + trg = zeros(6) #hold multiplier + + Ctron = zeros(8,6) + dtron = zeros(8) + + # initialization on penalty + if major_iter == 1 #info.inner = 1 (first inner iteration) + membuf[5,lineidx] = 10.0 #reset ρ_1h = ρ_1i = ρ_1j = ρ_1k (let ρ the same for all AL terms) + mu = 10.0 # set mu = initial ρ_* + else + mu = membuf[5,lineidx] #inherit mu = ρ from the previous inner iteration + end + + + # set internal parameters eta omega mu to guide ALM convergence + eta = 1 / mu^0.1 + + + + it = 0 #iteration count + terminate = false #termination status + + #pij qij pji qji wrt branch structure ExaTron + supY = [0 0 YftR YftI YffR 0 0 0; + 0 0 -YftI YftR -YffI 0 0 0; + 0 0 YtfR -YtfI 0 YttR 0 0; + 0 0 -YtfI -YtfR 0 -YttI 0 0] + + #ALM on equality constraint wrt branch structure ExaTron + vec_1j = [1, 0, 0, 0, 0, 0, 0, 0] + LH_1j[1]* supY[1,:] + LH_1j[2]* supY[2,:] #1j with t_ij + vec_1k = [0, 1, 0, 0, 0, 0, 0, 0] + LH_1k[1]* supY[3,:] + LH_1k[2]* supY[4,:] #1k with t_ji + + + while !terminate + it += 1 + + #create QP parameters SCALED + Atron, Atron_red = eval_A_auglag_branch_kernel_cpu_qpsub_red(Hbr,l, rho, v, z_curr, membuf[:,lineidx], + YffR, YffI, YftR, YftI, YttR, YttI, YtfR, YtfI, + LH_1h, RH_1h, LH_1i, RH_1i, LH_1j, RH_1j, LH_1k, RH_1k, scale) + Ctron, dtron, btron_red = eval_b_auglag_branch_kernel_cpu_qpsub_red(Atron, bbr,l, rho, v, z_curr, membuf[:,lineidx], + YffR, YffI, YftR, YftI, YttR, YttI, YtfR, YtfI, + LH_1h, RH_1h, LH_1i, RH_1i, LH_1j, RH_1j, LH_1k, RH_1k, scale) + + + tron = build_QP_DS(Atron_red,btron_red,xl,xu) + tron.x .= x #initialization on tron + + # Solve the branch problem. + status = ExaTron.solveProblem(tron) + x .= tron.x + sqp_line[:,lineidx] .= (Ctron * x + dtron)[3:8] #write to sqp_line + f = tron.f #wont match with IPOPT obj because constant terms are ignored here + trg = tron.g + + + + # violation on 1h,1i,1j,1k + cviol3 = dot(vec_1j, Ctron * x + dtron) - RH_1j + cviol4 = dot(vec_1k, Ctron * x + dtron) - RH_1k + + cnorm = max(abs(cviol3), abs(cviol4)) #worst violation + + if cnorm <= eta + if cnorm <= 1e-6 + terminate = true + else + membuf[3,lineidx] += mu*cviol3 #λ_1j + membuf[4,lineidx] += mu*cviol4 #λ_1k + + eta = eta / mu^0.9 + end + else + mu = min(mu_max, mu*10) #increase penalty + eta = 1 / mu^0.1 + membuf[5,lineidx] = mu #save penalty for current inner iteration + end + + if it >= max_auglag && cnorm > 1e-6 #maximum iteration for auglag + println() + println("max_auglag reached for line with cnorm = ", cnorm, " for line = ", lineidx, " with mu ", mu) + + terminate = true + end + end #end while ALM + + + + #save variables + u[shift_idx] = dot(supY[1,:],Ctron * x + dtron) + line_res[1]#pij + u[shift_idx + 1] = dot(supY[2,:],Ctron * x + dtron) + line_res[2]#qij + u[shift_idx + 2] = dot(supY[3,:],Ctron * x + dtron) + line_res[3]#pji + u[shift_idx + 3] = dot(supY[4,:],Ctron * x + dtron) + line_res[4]#qji + u[shift_idx + 4] = x[3] #wi + u[shift_idx + 5] = x[4] #wj + u[shift_idx + 6] = x[5] #thetai + u[shift_idx + 7] = x[6] #thetaj + + #get multiplier + tmpH = inv([LH_1h[1] LH_1i[1]; LH_1h[2] LH_1i[2]]) + tmp14_i = [2*u[shift_idx]*YftR + 2*u[shift_idx + 1]*(-YftI), 2*u[shift_idx]*YftI + 2*u[shift_idx + 1]*YftR] + tmp14_h = [2*u[shift_idx + 2]*YtfR + 2*u[shift_idx + 3]*(-YtfI), 2*u[shift_idx + 2]*-YtfI + 2*u[shift_idx + 3]*(-YtfR)] + #14h 14i + lambda[1:2,lineidx] = -tmpH*(trg[1]*tmp14_i + trg[2]*tmp14_h + Hbr[1:2,1:2]*sqp_line[1:2,lineidx] + Hbr[1:2,3:6]*sqp_line[3:6,lineidx] + bbr[1:2]) + #14j + lambda[3,lineidx] = -abs(trg[1]) #<=0 one_side ineq + #14k + lambda[4,lineidx] = -abs(trg[2]) #<=0 one-side ineq + + return Ctron * x + dtron, f +end diff --git a/src/models/qpsub/qpsub_auglag_Ab_linelimit_kernel_red_gpu.jl b/src/models/qpsub/qpsub_auglag_Ab_linelimit_kernel_red_gpu.jl new file mode 100644 index 0000000..ce5e00b --- /dev/null +++ b/src/models/qpsub/qpsub_auglag_Ab_linelimit_kernel_red_gpu.jl @@ -0,0 +1,281 @@ +""" + auglag_linelimit_qpsub() + +- for certain line (i,j), update sol.u[pij_idx] +- use Exatron, eval_A_b_branch_kernel_gpu_qpsub, eval_A_b_auglag_branch_kernel_gpu_qpsub_red, tron_gpu_test +- LANCELOT ALM algorithm +- with elimination of w_ijR and w_ijI (v2 in overleaf) +- with multiplier output +""" + + +""" + Internal Solution Structure for branch + +- branch structure from u (8*nline): + - |p_ij | q_ij | p_ji | q_ji | wi(ij) | wj(ji) | thetai(ij) | thetaj(ji) | + +- branch structure for Exatron (8*nline): + - | t_ij(linelimit) | t_ji(linelimit) | w_ijR | w_ijI | wi(ij) | wj(ji) | thetai(ij) | thetaj(ji) + +- branch structure for Exatron (6*nline): eliminate w_ijR, wij_I + - | t_ij(linelimit) | t_ji(linelimit) | wi(ij) | wj(ji) | thetai(ij) | thetaj(ji) + +- Hessian inherited from SQP ie sqp_line (6*nline): + - |w_ijR | w_ijI | wi(ij) | wj(ji) | thetai(ij) | thetaj(ji)| +""" + + +function auglag_linelimit_qpsub(Hs, l_curr, rho, u_curr, v_curr, z_curr, YffR, YffI, + YftR, YftI, YttR, YttI, YtfR, YtfI, inner, max_auglag, mu_max, scale, lqp, uqp, sqp_line, + membuf, LH_1h, RH_1h, LH_1i, RH_1i, LH_1j, RH_1j, LH_1k, RH_1k, lambda, line_start, nline, supY, line_res) + + tx = threadIdx().x + lineidx = blockIdx().x + shift_idx = line_start + 8*(lineidx-1) + + + n = 6 + + x = CuDynamicSharedArray(Float64, n) #memory allocation + xl = CuDynamicSharedArray(Float64, n, n*sizeof(Float64)) + xu = CuDynamicSharedArray(Float64, n, (2*n)*sizeof(Float64)) + trg = CuDynamicSharedArray(Float64, n, (3*n)*sizeof(Float64)) + Hbr = CuDynamicSharedArray(Float64, (n,n), (4*n)*sizeof(Float64)) + bbr = CuDynamicSharedArray(Float64, n, (4*n + n^2)*sizeof(Float64)) + vec_1j = CuDynamicSharedArray(Float64, 8, (5*n + n^2)*sizeof(Float64)) + vec_1k = CuDynamicSharedArray(Float64, 8, (5*n + n^2 + 8)*sizeof(Float64)) + Ctron = CuDynamicSharedArray(Float64, (8,6), (5*n + n^2 + 16)*sizeof(Float64)) + dtron = CuDynamicSharedArray(Float64, 8, (5*n + n^2 + 64)*sizeof(Float64)) + A_aug = CuDynamicSharedArray(Float64, (8,8), (5*n + n^2 + 72)*sizeof(Float64)) + Atron = CuDynamicSharedArray(Float64, (6,6), (5*n + n^2 + 136)*sizeof(Float64)) + btron = CuDynamicSharedArray(Float64, 6, (5*n + n^2 + 172)*sizeof(Float64)) + + + #initialization: variable wrt branch structure wrt Exatron + x[1] = 0.0 + x[2] = 0.0 + x[3] = sqp_line[3,lineidx] + x[4] = sqp_line[4,lineidx] + x[5] = sqp_line[5,lineidx] + x[6] = sqp_line[6,lineidx] + + xl[1] = 0.0 + xl[2] = 0.0 + xl[3] = lqp[lineidx,3] + xl[4] = lqp[lineidx,4] + xl[5] = lqp[lineidx,5] + xl[6] = lqp[lineidx,6] + + xu[1] = 200000.0 + xu[2] = 200000.0 + xu[3] = uqp[lineidx,3] + xu[4] = uqp[lineidx,4] + xu[5] = uqp[lineidx,5] + xu[6] = uqp[lineidx,6] + + trg[1] = 0.0 + trg[2] = 0.0 + trg[3] = 0.0 + trg[4] = 0.0 + trg[5] = 0.0 + trg[6] = 0.0 + + eval_A_b_branch_kernel_gpu_qpsub( + Hs, l_curr, rho, v_curr, z_curr, Hbr, bbr, lineidx, shift_idx, supY, line_res) + + vec_1j[1] = 1 + LH_1j[lineidx,1]*supY[4*(lineidx - 1)+1,1] + LH_1j[lineidx,2]*supY[4*(lineidx - 1)+2,1] + vec_1j[2] = LH_1j[lineidx,1]*supY[4*(lineidx - 1)+1,2] + LH_1j[lineidx,2]*supY[4*(lineidx - 1)+2,2] + vec_1j[3] = LH_1j[lineidx,1]*supY[4*(lineidx - 1)+1,3] + LH_1j[lineidx,2]*supY[4*(lineidx - 1)+2,3] + vec_1j[4] = LH_1j[lineidx,1]*supY[4*(lineidx - 1)+1,4] + LH_1j[lineidx,2]*supY[4*(lineidx - 1)+2,4] + vec_1j[5] = LH_1j[lineidx,1]*supY[4*(lineidx - 1)+1,5] + LH_1j[lineidx,2]*supY[4*(lineidx - 1)+2,5] + vec_1j[6] = LH_1j[lineidx,1]*supY[4*(lineidx - 1)+1,6] + LH_1j[lineidx,2]*supY[4*(lineidx - 1)+2,6] + vec_1j[7] = LH_1j[lineidx,1]*supY[4*(lineidx - 1)+1,7] + LH_1j[lineidx,2]*supY[4*(lineidx - 1)+2,7] + vec_1j[8] = LH_1j[lineidx,1]*supY[4*(lineidx - 1)+1,8] + LH_1j[lineidx,2]*supY[4*(lineidx - 1)+2,8] + + vec_1k[1] = LH_1k[lineidx,1]*supY[4*(lineidx - 1)+3,1] + LH_1k[lineidx,2]*supY[4*(lineidx - 1)+4,1] + vec_1k[2] = 1 + LH_1k[lineidx,1]*supY[4*(lineidx - 1)+3,2] + LH_1k[lineidx,2]*supY[4*(lineidx - 1)+4,2] + vec_1k[3] = LH_1k[lineidx,1]*supY[4*(lineidx - 1)+3,3] + LH_1k[lineidx,2]*supY[4*(lineidx - 1)+4,3] + vec_1k[4] = LH_1k[lineidx,1]*supY[4*(lineidx - 1)+3,4] + LH_1k[lineidx,2]*supY[4*(lineidx - 1)+4,4] + vec_1k[5] = LH_1k[lineidx,1]*supY[4*(lineidx - 1)+3,5] + LH_1k[lineidx,2]*supY[4*(lineidx - 1)+4,5] + vec_1k[6] = LH_1k[lineidx,1]*supY[4*(lineidx - 1)+3,6] + LH_1k[lineidx,2]*supY[4*(lineidx - 1)+4,6] + vec_1k[7] = LH_1k[lineidx,1]*supY[4*(lineidx - 1)+3,7] + LH_1k[lineidx,2]*supY[4*(lineidx - 1)+4,7] + vec_1k[8] = LH_1k[lineidx,1]*supY[4*(lineidx - 1)+3,8] + LH_1k[lineidx,2]*supY[4*(lineidx - 1)+4,8] + + + prod = LH_1h[lineidx,1]*LH_1i[lineidx,2]-LH_1h[lineidx,2]*LH_1i[lineidx,1] + inv11 = LH_1i[lineidx,2]/prod + inv12 = -LH_1h[lineidx,2]/prod + inv21 = -LH_1i[lineidx,1]/prod + inv22 = LH_1h[lineidx,1]/prod + fill!(Ctron,0.0) + Ctron[1,1] = 1.0 + Ctron[2,2] = 1.0 + Ctron[3,1] = 0.0 + Ctron[3,2] = 0.0 + Ctron[3,3] = -inv11*LH_1h[lineidx,3] + Ctron[3,4] = -inv11*LH_1h[lineidx,4] + Ctron[3,5] = -inv12*LH_1i[lineidx,3] + Ctron[3,6] = -inv12*LH_1i[lineidx,4] + Ctron[4,1] = 0.0 + Ctron[4,2] = 0.0 + Ctron[4,3] = -inv21*LH_1h[lineidx,3] + Ctron[4,4] = -inv21*LH_1h[lineidx,4] + Ctron[4,5] = -inv22*LH_1i[lineidx,3] + Ctron[4,6] = -inv22*LH_1i[lineidx,4] + Ctron[5,3] = 1.0 + Ctron[6,4] = 1.0 + Ctron[7,5] = 1.0 + Ctron[8,6] = 1.0 + + + dtron[1] = 0.0 + dtron[2] = 0.0 + dtron[3] = inv11*RH_1h[lineidx] + inv12*RH_1i[lineidx] + dtron[4] = inv21*RH_1h[lineidx] + inv22*RH_1i[lineidx] + dtron[5] = 0.0 + dtron[6] = 0.0 + dtron[7] = 0.0 + dtron[8] = 0.0 + + + # initialization on penalty + if inner == 1 #info.inner = 1 (first inner iteration) + membuf[5,lineidx] = 10.0 #reset ρ_1h = ρ_1i = ρ_1j = ρ_1k (let ρ the same for all AL terms) + mu = 10.0 # set mu = initial ρ_* + else + mu = membuf[5,lineidx] #inherit mu = ρ from the previous inner iteration + end + + + + CUDA.sync_threads() + # set internal parameters eta omega mu to guide ALM convergence + eta = 1 / mu^0.1 + + + it = 0 #iteration count + terminate = false #termination status + + + + + + while !terminate + it += 1 + + eval_A_b_auglag_branch_kernel_gpu_qpsub_red(Hbr, bbr, A_aug, Atron, btron, scale,vec_1j,vec_1k,membuf,lineidx, Ctron,dtron,RH_1j,RH_1k) + + status, minor_iter = tron_gpu_test(n,Atron,btron,x,xl,xu) + + sqp0 = Ctron[1,1] * x[1] + Ctron[1,2]*x[2] + Ctron[1,3]*x[3] + Ctron[1,4]*x[4] + + Ctron[1,5]*x[5] + Ctron[1,6]*x[6] + dtron[1] + sqp1 = Ctron[2,1] * x[1] + Ctron[2,2]*x[2] + Ctron[2,3]*x[3] + Ctron[2,4]*x[4] + + Ctron[2,5]*x[5] + Ctron[2,6]*x[6] + dtron[2] + + sqp_line[1,lineidx] = Ctron[3,1] * x[1] + Ctron[3,2]*x[2] + Ctron[3,3]*x[3] + Ctron[3,4]*x[4] + + Ctron[3,5]*x[5] + Ctron[3,6]*x[6] + dtron[3] + sqp_line[2,lineidx] = Ctron[4,1] * x[1] + Ctron[4,2]*x[2] + Ctron[4,3]*x[3] + Ctron[4,4]*x[4] + + Ctron[4,5]*x[5] + Ctron[4,6]*x[6] + dtron[4] + sqp_line[3,lineidx] = Ctron[5,1] * x[1] + Ctron[5,2]*x[2] + Ctron[5,3]*x[3] + Ctron[5,4]*x[4] + + Ctron[5,5]*x[5] + Ctron[5,6]*x[6] + dtron[5] + sqp_line[4,lineidx] = Ctron[6,1] * x[1] + Ctron[6,2]*x[2] + Ctron[6,3]*x[3] + Ctron[6,4]*x[4] + + Ctron[6,5]*x[5] + Ctron[6,6]*x[6] + dtron[6] + sqp_line[5,lineidx] = Ctron[7,1] * x[1] + Ctron[7,2]*x[2] + Ctron[7,3]*x[3] + Ctron[7,4]*x[4] + + Ctron[7,5]*x[5] + Ctron[7,6]*x[6] + dtron[7] + sqp_line[6,lineidx] = Ctron[8,1] * x[1] + Ctron[8,2]*x[2] + Ctron[8,3]*x[3] + Ctron[8,4]*x[4] + + Ctron[8,5]*x[5] + Ctron[8,6]*x[6] + dtron[8] + + + + cviol3 = vec_1j[1]*sqp0 + vec_1j[2]*sqp1 + vec_1j[3]*sqp_line[1,lineidx] + vec_1j[4]*sqp_line[2,lineidx] + vec_1j[5]*sqp_line[3,lineidx] + + vec_1j[6]*sqp_line[4,lineidx] + vec_1j[7]*sqp_line[5,lineidx] + vec_1j[8]*sqp_line[6,lineidx] - RH_1j[lineidx] + + cviol4 = vec_1k[1]*sqp0 + vec_1k[2]*sqp1 + vec_1k[3]*sqp_line[1,lineidx] + vec_1k[4]*sqp_line[2,lineidx] + vec_1k[5]*sqp_line[3,lineidx] + + vec_1k[6]*sqp_line[4,lineidx] + vec_1k[7]*sqp_line[5,lineidx] + vec_1k[8]*sqp_line[6,lineidx] - RH_1k[lineidx] + + cnorm = max(abs(cviol3), abs(cviol4)) #worst violation + + if cnorm <= eta + if cnorm <= 1e-6 + terminate = true + else + if tx == 1 + membuf[3,lineidx] += mu*cviol3 #λ_1j + membuf[4,lineidx] += mu*cviol4 #λ_1k + end + eta = eta / mu^0.9 + + end + else + mu = min(mu_max, mu*10) #increase penalty + eta = 1 / mu^0.1 + membuf[5,lineidx] = mu #save penalty for current inner iteration + end + + if it >= max_auglag #maximum iteration for auglag + if tx == 1 + @cuprintln("max iteration reach at block = ",lineidx, "and threads = ",tx) + end + terminate = true + end + + CUDA.sync_threads() + end #end while ALM + + + + # #save variables + u_curr[shift_idx] = supY[4*(lineidx - 1) + 1,3]*sqp_line[1,lineidx] + supY[4*(lineidx - 1) + 1,4]*sqp_line[2,lineidx] + supY[4*(lineidx - 1) + 1,5]*sqp_line[3,lineidx] + + supY[4*(lineidx - 1) + 1,6]*sqp_line[4,lineidx] + supY[4*(lineidx - 1) + 1,7]*sqp_line[5,lineidx] + supY[4*(lineidx - 1) + 1,8]*sqp_line[6,lineidx] + line_res[1,lineidx] + + u_curr[shift_idx + 1] = supY[4*(lineidx - 1) + 2,3]*sqp_line[1,lineidx] + supY[4*(lineidx - 1) + 2,4]*sqp_line[2,lineidx] + supY[4*(lineidx - 1) + 2,5]*sqp_line[3,lineidx] + + supY[4*(lineidx - 1) + 2,6]*sqp_line[4,lineidx] + supY[4*(lineidx - 1) + 2,7]*sqp_line[5,lineidx] + supY[4*(lineidx - 1) + 2,8]*sqp_line[6,lineidx] + line_res[2,lineidx] + + u_curr[shift_idx + 2] = supY[4*(lineidx - 1) + 3,3]*sqp_line[1,lineidx] + supY[4*(lineidx - 1) + 3,4]*sqp_line[2,lineidx] + supY[4*(lineidx - 1) + 3,5]*sqp_line[3,lineidx] + + supY[4*(lineidx - 1) + 3,6]*sqp_line[4,lineidx] + supY[4*(lineidx - 1) + 3,7]*sqp_line[5,lineidx] + supY[4*(lineidx - 1) + 3,8]*sqp_line[6,lineidx] + line_res[3,lineidx] + + u_curr[shift_idx + 3] = supY[4*(lineidx - 1) + 4,3]*sqp_line[1,lineidx] + supY[4*(lineidx - 1) + 4,4]*sqp_line[2,lineidx] + supY[4*(lineidx - 1) + 4,5]*sqp_line[3,lineidx] + + supY[4*(lineidx - 1) + 4,6]*sqp_line[4,lineidx] + supY[4*(lineidx - 1) + 4,7]*sqp_line[5,lineidx] + supY[4*(lineidx - 1) + 4,8]*sqp_line[6,lineidx] + line_res[4,lineidx] + + u_curr[shift_idx + 4] = x[3] #wi + u_curr[shift_idx + 5] = x[4] #wj + u_curr[shift_idx + 6] = x[5] #thetai + u_curr[shift_idx + 7] = x[6] #thetaj + + + #get multiplier + tmpH11 = inv11 + tmpH12 = inv21 + tmpH21 = inv12 + tmpH22 = inv22 + + tmp14i_1 = 2*u_curr[shift_idx]*YftR[lineidx] + 2*u_curr[shift_idx + 1]*(-YftI[lineidx]) + tmp14i_2 = 2*u_curr[shift_idx]*YftI[lineidx] + 2*u_curr[shift_idx + 1]*YftR[lineidx] + + tmp14h_1 = 2*u_curr[shift_idx + 2]*YtfR[lineidx] + 2*u_curr[shift_idx + 3]*(-YtfI[lineidx]) + tmp14h_2 = 2*u_curr[shift_idx + 2]*(-YtfI[lineidx]) + 2*u_curr[shift_idx + 3]*(-YtfR[lineidx]) + + trg[1] = Atron[1,1] * x[1] + Atron[1,2]*x[2] + Atron[1,3]*x[3] + Atron[1,4]*x[4] + + Atron[1,5]*x[5] + Atron[1,6]*x[6] + btron[1] + + trg[2] = Atron[2,1] * x[1] + Atron[2,2]*x[2] + Atron[2,3]*x[3] + Atron[2,4]*x[4] + + Atron[2,5]*x[5] + Atron[2,6]*x[6] + btron[2] + + lambda[3,lineidx] = -abs(trg[1]) + lambda[4,lineidx] = -abs(trg[2]) + + rhs_1 = trg[1]*tmp14i_1 + trg[2]*tmp14h_1 + Hbr[1,1]*sqp_line[1,lineidx] + Hbr[1,2]*sqp_line[2,lineidx] + + Hbr[1,3]*sqp_line[3,lineidx] + Hbr[1,4]*sqp_line[4,lineidx] + Hbr[1,5]*sqp_line[5,lineidx] + + Hbr[1,6]*sqp_line[6,lineidx] + bbr[1] + + rhs_2 = trg[1]*tmp14i_2 + trg[2]*tmp14h_2 + Hbr[2,1]*sqp_line[1,lineidx] + Hbr[2,2]*sqp_line[2,lineidx] + + Hbr[2,3]*sqp_line[3,lineidx] + Hbr[2,4]*sqp_line[4,lineidx] + Hbr[2,5]*sqp_line[5,lineidx] + + Hbr[2,6]*sqp_line[6,lineidx] + bbr[2] + + lambda[1,lineidx] = -tmpH11*rhs_1 - tmpH12*rhs_2 + lambda[2,lineidx] = -tmpH21*rhs_1 - tmpH22*rhs_2 + + CUDA.sync_threads() + return +end diff --git a/src/models/qpsub/qpsub_auglag_tron_linelimit_kernel_cpu.jl b/src/models/qpsub/qpsub_auglag_tron_linelimit_kernel_cpu.jl new file mode 100644 index 0000000..f39ae96 --- /dev/null +++ b/src/models/qpsub/qpsub_auglag_tron_linelimit_kernel_cpu.jl @@ -0,0 +1,72 @@ +""" + build_QP_*() + +- build any box-constrained QP with Exatron.createproblem() +- implement eval_f, eval_g, eval_h callback functions +""" + +function build_QP_SP(A::Matrix{Float64}, b::Array{Float64, 1}, l::Array{Float64, 1}, u::Array{Float64, 1}) + n=length(b) + P = sparse(A) #make A sparse to use nnz() and findnz() + q = b + Iz, Jz, vals = findnz(P) #does not see symmetric record n*n + + eval_f(x) = 0.5 * dot(x, P, x) + dot(q, x) #obj watch out for 1/2 + + function eval_g(x, g) + fill!(g, 0) + mul!(g, P, x) + g .+= q + end + + #eval_h store all n*n entries + function eval_h(x, mode, rows, cols, obj_factor, lambda, values) + if mode == :Structure + for i in 1:nnz(P) #does not symmetric + rows[i] = Iz[i] + cols[i] = Jz[i] + end + else + copy!(values, vals) + end + end + + return ExaTron.createProblem(n, l, u, nnz(P), eval_f, eval_g, eval_h) #original without param +end + + +function build_QP_DS(A::Matrix{Float64}, b::Array{Float64, 1}, l::Array{Float64, 1}, u::Array{Float64, 1}) + n=length(b) + + + eval_f(x) = 0.5 * dot(x, A, x) + dot(b, x) #obj watch out for 1/2 + + function eval_g(x, g) + fill!(g, 0) + mul!(g, A, x) + g .+= b + end + + function eval_h(x, mode, rows, cols, obj_factor, lambda, values) + if mode == :Structure + nz = 1 + for j=1:n + for i in j:n + rows[nz] = i + cols[nz] = j + nz += 1 + end + end + else + nz = 1 + for j=1:n + for i in j:n + values[nz] = A[i, j] + nz += 1 + end + end + end + end + #with Youngdae's Param + return ExaTron.createProblem(n, l, u, Int64((n+1)*n/2), eval_f, eval_g, eval_h; :tol=> 1e-6, :matrix_type => :Dense, :max_minor => 200, :frtol => 1e-12) +end diff --git a/src/models/qpsub/qpsub_eval_Ab_linelimit_kernel_cpu.jl b/src/models/qpsub/qpsub_eval_Ab_linelimit_kernel_cpu.jl new file mode 100644 index 0000000..22b5e34 --- /dev/null +++ b/src/models/qpsub/qpsub_eval_Ab_linelimit_kernel_cpu.jl @@ -0,0 +1,254 @@ +""" + eval_A_*(), eval_b_*() + +- prepare call backs for build_QP_DS and IPOPT benchmark (solve branch kernel directly) +- use mod.membuf +""" + + +""" + Internal Solution Structure for branch + +- branch structure from u (8*nline): + - |p_ij | q_ij | p_ji | q_ji | wi(ij) | wj(ji) | thetai(ij) | thetaj(ji) | + +- branch structure for Exatron (8*nline): + - | t_ij(linelimit) | t_ji(linelimit) | w_ijR | w_ijI | wi(ij) | wj(ji) | thetai(ij) | thetaj(ji) + +- branch structure for Exatron (6*nline): eliminate w_ijR, wij_I + - | t_ij(linelimit) | t_ji(linelimit) | wi(ij) | wj(ji) | thetai(ij) | thetaj(ji) + +- Hessian inherited from SQP (6*nline): + - |w_ijR | w_ijI | wi(ij) | wj(ji) | thetai(ij) | thetaj(ji)| +""" + + + +function eval_A_branch_kernel_cpu_qpsub( + H::Array{Float64,2},l::Array{Float64,1}, rho::Array{Float64,1}, v::Array{Float64,1}, z_curr::Array{Float64,1}, + YffR::Float64, YffI::Float64, + YftR::Float64, YftI::Float64, + YttR::Float64, YttI::Float64, + YtfR::Float64, YtfI::Float64) + + + + #linear transform pij qij pji qji wrt Hessian inherited structure + supY = [YftR YftI YffR 0 0 0; + -YftI YftR -YffI 0 0 0; + YtfR -YtfI 0 YttR 0 0; + -YtfI -YtfR 0 -YttI 0 0] + + H_new = H + @inbounds begin + + H_new .+= rho[1]*supY[1,:]*transpose(supY[1,:]) #pij + H_new .+= rho[2]*supY[2,:]*transpose(supY[2,:]) #qij + H_new .+= rho[3]*supY[3,:]*transpose(supY[3,:]) #pji + H_new .+= rho[4]*supY[4,:]*transpose(supY[4,:]) #qji + + H_new[3,3] += rho[5] #wi(ij) + H_new[4,4] += rho[6] #wj(ji) + H_new[5,5] += rho[7] #thetai(ij) + H_new[6,6] += rho[8] #thetaj(ji) + end + + return H #6*6 #not scale +end + +function eval_A_auglag_branch_kernel_cpu_qpsub( + Hbr::Array{Float64,2},l::Array{Float64,1}, rho::Array{Float64,1}, + v::Array{Float64,1}, z_curr::Array{Float64,1}, membuf::Array{Float64,1}, + YffR::Float64, YffI::Float64, + YftR::Float64, YftI::Float64, + YttR::Float64, YttI::Float64, + YtfR::Float64, YtfI::Float64, + LH_1h::Array{Float64,1}, RH_1h::Float64, + LH_1i::Array{Float64,1}, RH_1i::Float64, LH_1j::Array{Float64,1},RH_1j::Float64, LH_1k::Array{Float64,1},RH_1k::Float64,scale::Float64) + + A = zeros(8,8) + A[3:8,3:8] = Hbr + + + #pij qij pji qji wrt branch structure ExaTron + supY = [0 0 YftR YftI YffR 0 0 0; + 0 0 -YftI YftR -YffI 0 0 0; + 0 0 YtfR -YtfI 0 YttR 0 0; + 0 0 -YtfI -YtfR 0 -YttI 0 0] + + #ALM on equality constraint wrt branch structure ExaTron + vec_1h = [0, 0, LH_1h[1], LH_1h[2], LH_1h[3], LH_1h[4], 0, 0 ] #1h + vec_1i = [0, 0, LH_1i[1], LH_1i[2], 0, 0, LH_1i[3], LH_1i[4]] #1i + vec_1j = [1, 0, 0, 0, 0, 0, 0, 0] + LH_1j[1]* supY[1,:] + LH_1j[2]* supY[2,:] #1j with t_ij + vec_1k = [0, 1, 0, 0, 0, 0, 0, 0] + LH_1k[1]* supY[3,:] + LH_1k[2]* supY[4,:] #1k with t_ji + + @inbounds begin + A .+= membuf[5]*vec_1h*transpose(vec_1h) #add auglag for 1h + + A .+= membuf[5]*vec_1i*transpose(vec_1i) #add auglag for 1i + + A .+= membuf[5]*vec_1j*transpose(vec_1j) #add auglag for 1j + + A .+= membuf[5]*vec_1k*transpose(vec_1k) #add auglag for 1k + end + + return A*scale #dim = 8*8 scaled for non-red +end + +function eval_A_auglag_branch_kernel_cpu_qpsub_red( + Hbr::Array{Float64,2},l::Array{Float64,1}, rho::Array{Float64,1}, + v::Array{Float64,1}, z_curr::Array{Float64,1}, membuf::Array{Float64,1}, + YffR::Float64, YffI::Float64, + YftR::Float64, YftI::Float64, + YttR::Float64, YttI::Float64, + YtfR::Float64, YtfI::Float64, + LH_1h::Array{Float64,1}, RH_1h::Float64, + LH_1i::Array{Float64,1}, RH_1i::Float64, LH_1j::Array{Float64,1},RH_1j::Float64, LH_1k::Array{Float64,1},RH_1k::Float64,scale::Float64) + + A = zeros(8,8) + A[3:8,3:8] = Hbr + + + #pij qij pji qji wrt branch structure ExaTron + supY = [0 0 YftR YftI YffR 0 0 0; + 0 0 -YftI YftR -YffI 0 0 0; + 0 0 YtfR -YtfI 0 YttR 0 0; + 0 0 -YtfI -YtfR 0 -YttI 0 0] + + #ALM on equality constraint wrt branch structure ExaTron + vec_1j = [1, 0, 0, 0, 0, 0, 0, 0] + LH_1j[1]* supY[1,:] + LH_1j[2]* supY[2,:] #1j with t_ij + vec_1k = [0, 1, 0, 0, 0, 0, 0, 0] + LH_1k[1]* supY[3,:] + LH_1k[2]* supY[4,:] #1k with t_ji + + @inbounds begin + A .+= membuf[5]*vec_1j*transpose(vec_1j) #add auglag for 1j + + A .+= membuf[5]*vec_1k*transpose(vec_1k) #add auglag for 1k + end + + + + inv_ij = inv([LH_1h[1] LH_1h[2]; LH_1i[1] LH_1i[2]]) + C_ij = -inv_ij * [0 0 LH_1h[3] LH_1h[4] 0 0; 0 0 0 0 LH_1i[3] LH_1i[4]] + C = zeros(8,6) + C[1,1] = C[2,2] = 1 + C[3:4,:] .= C_ij + C[5,3] = 1 + C[6,4] = 1 + C[7,5] = 1 + C[8,6] = 1 + + return A, transpose(C)*A*C*scale #dim 8*8 no scale + dim = 6*6 scaled for red +end + +function eval_b_branch_kernel_cpu_qpsub( + l::Array{Float64,1}, rho::Array{Float64,1}, v::Array{Float64,1}, z::Array{Float64,1}, + YffR::Float64, YffI::Float64, + YftR::Float64, YftI::Float64, + YttR::Float64, YttI::Float64, + YtfR::Float64, YtfI::Float64, res::Array{Float64,1}) + + + supY = [YftR YftI YffR 0 0 0; + -YftI YftR -YffI 0 0 0; + YtfR -YtfI 0 YttR 0 0; + -YtfI -YtfR 0 -YttI 0 0] + + b = zeros(6) + + @inbounds begin + b .+= (l[1] - rho[1]*(v[1]-z[1]-res[1])) * supY[1,:] #pij + b .+= (l[2] - rho[2]*(v[2]-z[2]-res[2])) * supY[2,:] #qij + b .+= (l[3] - rho[3]*(v[3]-z[3]-res[3])) * supY[3,:] #pji + b .+= (l[4] - rho[4]*(v[4]-z[4]-res[4])) * supY[4,:] #qji + b[3] += (l[5] - rho[5]*(v[5]-z[5])) #wi(ij) + b[4] += (l[6] - rho[6]*(v[6]-z[6])) #wj(ji) + b[5] += (l[7] - rho[7]*(v[7]-z[7])) #thetai(ij) + b[6] += (l[8] - rho[8]*(v[8]-z[8])) #thetaj(ji) + end + + return b #size 6 noscale +end + +function eval_b_auglag_branch_kernel_cpu_qpsub( + bbr::Array{Float64,1}, l::Array{Float64,1}, rho::Array{Float64,1}, + v::Array{Float64,1}, z_curr::Array{Float64,1}, membuf::Array{Float64,1}, + YffR::Float64, YffI::Float64, + YftR::Float64, YftI::Float64, + YttR::Float64, YttI::Float64, + YtfR::Float64, YtfI::Float64, LH_1h::Array{Float64,1}, RH_1h::Float64, + LH_1i::Array{Float64,1}, RH_1i::Float64, LH_1j::Array{Float64,1},RH_1j::Float64, LH_1k::Array{Float64,1},RH_1k::Float64, scale::Float64) + + b = zeros(8) + b[3:8] = bbr + + + #pij qij pji qji wrt branch structure ExaTron + supY = [0 0 YftR YftI YffR 0 0 0; + 0 0 -YftI YftR -YffI 0 0 0; + 0 0 YtfR -YtfI 0 YttR 0 0; + 0 0 -YtfI -YtfR 0 -YttI 0 0] + + #ALM on equality constraint wrt branch structure ExaTron + vec_1h = [0, 0, LH_1h[1], LH_1h[2], LH_1h[3], LH_1h[4], 0, 0 ] #1h + vec_1i = [0, 0, LH_1i[1], LH_1i[2], 0, 0, LH_1i[3], LH_1i[4]] #1i + vec_1j = [1, 0, 0, 0, 0, 0, 0, 0] + LH_1j[1]* supY[1,:] + LH_1j[2]* supY[2,:] #1j with t_ij + vec_1k = [0, 1, 0, 0, 0, 0, 0, 0] + LH_1k[1]* supY[3,:] + LH_1k[2]* supY[4,:] #1k with t_ji + + @inbounds begin + b .+= (membuf[1] - membuf[5]*RH_1h)*vec_1h #1h + b .+= (membuf[2] - membuf[5]*RH_1i)*vec_1i #1i + b .+= (membuf[3] - membuf[5]*RH_1j)*vec_1j #1j + b .+= (membuf[4] - membuf[5]*RH_1k)*vec_1k #1k + end + + return b*scale #dim = 8 scaled +end + +function eval_b_auglag_branch_kernel_cpu_qpsub_red( + A_aug::Array{Float64,2}, bbr::Array{Float64,1}, l::Array{Float64,1}, rho::Array{Float64,1}, + v::Array{Float64,1}, z_curr::Array{Float64,1}, membuf::Array{Float64,1}, + YffR::Float64, YffI::Float64, + YftR::Float64, YftI::Float64, + YttR::Float64, YttI::Float64, + YtfR::Float64, YtfI::Float64, LH_1h::Array{Float64,1}, RH_1h::Float64, + LH_1i::Array{Float64,1}, RH_1i::Float64, LH_1j::Array{Float64,1},RH_1j::Float64, LH_1k::Array{Float64,1},RH_1k::Float64, scale::Float64) + + b = zeros(8) + b[3:8] = bbr + + + #pij qij pji qji wrt branch structure ExaTron + supY = [0 0 YftR YftI YffR 0 0 0; + 0 0 -YftI YftR -YffI 0 0 0; + 0 0 YtfR -YtfI 0 YttR 0 0; + 0 0 -YtfI -YtfR 0 -YttI 0 0] + + #ALM on equality constraint wrt branch structure ExaTron + vec_1j = [1, 0, 0, 0, 0, 0, 0, 0] + LH_1j[1]* supY[1,:] + LH_1j[2]* supY[2,:] #1j with t_ij + vec_1k = [0, 1, 0, 0, 0, 0, 0, 0] + LH_1k[1]* supY[3,:] + LH_1k[2]* supY[4,:] #1k with t_ji + + @inbounds begin + b .+= (membuf[3] - membuf[5]*RH_1j)*vec_1j #1j + b .+= (membuf[4] - membuf[5]*RH_1k)*vec_1k #1k + end + + inv_ij = inv([LH_1h[1] LH_1h[2]; LH_1i[1] LH_1i[2]]) + C_ij = -inv_ij * [0 0 LH_1h[3] LH_1h[4] 0 0; 0 0 0 0 LH_1i[3] LH_1i[4]] + d_ij = inv_ij * [RH_1h; RH_1i] + C = zeros(8,6) + C[1,1] = C[2,2] = 1 + C[3:4,:] .= C_ij + C[5,3] = 1 + C[6,4] = 1 + C[7,5] = 1 + C[8,6] = 1 + + d=zeros(8) + d[3:4] .= d_ij + + + + + + return C, d, transpose(C) * (A_aug * d + b)*scale #dims = 8*6, 8, 6, coeff, coeff, scaled +end diff --git a/src/models/qpsub/qpsub_eval_Ab_linelimit_kernel_gpu.jl b/src/models/qpsub/qpsub_eval_Ab_linelimit_kernel_gpu.jl new file mode 100644 index 0000000..9b7dba1 --- /dev/null +++ b/src/models/qpsub/qpsub_eval_Ab_linelimit_kernel_gpu.jl @@ -0,0 +1,180 @@ +""" + Internal Solution Structure for branch + +- branch structure from u (8*nline): + - |p_ij | q_ij | p_ji | q_ji | wi(ij) | wj(ji) | thetai(ij) | thetaj(ji) | + +- branch structure for Exatron (8*nline): + - | t_ij(linelimit) | t_ji(linelimit) | w_ijR | w_ijI | wi(ij) | wj(ji) | thetai(ij) | thetaj(ji) + +- branch structure for Exatron (6*nline): eliminate w_ijR, wij_I + - | t_ij(linelimit) | t_ji(linelimit) | wi(ij) | wj(ji) | thetai(ij) | thetaj(ji) + +- Hessian inherited from SQP (6*nline): + - |w_ijR | w_ijI | wi(ij) | wj(ji) | thetai(ij) | thetaj(ji)| +""" + + +""" + eval_A_b_branch_kernel_gpu_qpsub() + +- prepare QP parameter of ADMM branch kernel (before ALM) +""" + + +function eval_A_b_branch_kernel_gpu_qpsub( + H, l, rho, v, z, A_ipopt, b_ipopt, id_line, shift_idx, supY, line_res) + tx = threadIdx().x + if tx == 1 + @inbounds begin + for i = 1:6 + for j = 1:6 + A_ipopt[i, j] = H[6*(id_line - 1) + i, j] + + rho[shift_idx]*supY[4*(id_line - 1) + 1,i+2]*supY[4*(id_line - 1) + 1,j+2] + + rho[shift_idx + 1]*supY[4*(id_line - 1) + 2,i+2]*supY[4*(id_line - 1) + 2,j+2] + + rho[shift_idx + 2]*supY[4*(id_line - 1) + 3,i+2]*supY[4*(id_line - 1) + 3,j+2] + + rho[shift_idx + 3]*supY[4*(id_line - 1) + 4,i+2]*supY[4*(id_line - 1) + 4,j+2] + end + end + + for j = 1:6 + b_ipopt[j] = (l[shift_idx] - rho[shift_idx]*(v[shift_idx] - z[shift_idx] - line_res[1, id_line]))*supY[4*(id_line - 1) + 1,j+2] + + (l[shift_idx+1] - rho[shift_idx+1]*(v[shift_idx+1] - z[shift_idx+1] - line_res[2, id_line]))*supY[4*(id_line - 1) + 2,j+2] + + (l[shift_idx+2] - rho[shift_idx+2]*(v[shift_idx+2] - z[shift_idx+2] - line_res[3, id_line]))*supY[4*(id_line - 1) + 3,j+2] + + (l[shift_idx+3] - rho[shift_idx+3]*(v[shift_idx+3] - z[shift_idx+3] - line_res[4, id_line]))*supY[4*(id_line - 1) + 4,j+2] + end + + b_ipopt[3] += l[shift_idx+4] - rho[shift_idx+4]*(v[shift_idx+4] - z[shift_idx+4]) + b_ipopt[4] += l[shift_idx+5] - rho[shift_idx+5]*(v[shift_idx+5] - z[shift_idx+5]) + b_ipopt[5] += l[shift_idx+6] - rho[shift_idx+6]*(v[shift_idx+6] - z[shift_idx+6]) + b_ipopt[6] += l[shift_idx+7] - rho[shift_idx+7]*(v[shift_idx+7] - z[shift_idx+7]) + + A_ipopt[3,3] += rho[shift_idx + 4] #wi(ij) + A_ipopt[4,4] += rho[shift_idx + 5] #wj(ji) + A_ipopt[5,5] += rho[shift_idx + 6] #thetai(ij) + A_ipopt[6,6] += rho[shift_idx + 7] #thetaj(ji) + end #@inbounds + end + + CUDA.sync_threads() + return #6*6 #not scale +end + + + +""" + eval_A_b_auglag_branch_kernel_gpu_qpsub_red() + +- prepare QP parameter of ADMM branch kernel (after model reduction and ALM) +- Input for Tron GPU +- use mod.membuf +""" + +function eval_A_b_auglag_branch_kernel_gpu_qpsub_red(Hbr, bbr, A_aug, Atron, btron, scale,vec_1j,vec_1k,membuf,lineidx, Ctron,dtron,RH_1j,RH_1k) + tx = threadIdx().x + if tx == 1 + fill!(A_aug, 0.0) + fill!(Atron, 0.0) + + @inbounds begin + for i = 3:8 + for j = 3:8 + A_aug[i,j] = Hbr[i-2,j-2] + end + end + + + + for i=1:8 + for j=1:8 + A_aug[i,j] += membuf[5,lineidx]*vec_1j[i]*vec_1j[j] + A_aug[i,j] += membuf[5,lineidx]*vec_1k[i]*vec_1k[j] + end + end + + + for i=1:6 + for j=1:6 + c1 = A_aug[1,1]*Ctron[1,j] + A_aug[1,2]*Ctron[2,j] + A_aug[1,3]*Ctron[3,j] + A_aug[1,4]*Ctron[4,j] + + A_aug[1,5]*Ctron[5,j] + A_aug[1,6]*Ctron[6,j] + A_aug[1,7]*Ctron[7,j] + A_aug[1,8]*Ctron[8,j] + + c2 = A_aug[2,1]*Ctron[1,j] + A_aug[2,2]*Ctron[2,j] + A_aug[2,3]*Ctron[3,j] + A_aug[2,4]*Ctron[4,j] + + A_aug[2,5]*Ctron[5,j] + A_aug[2,6]*Ctron[6,j] + A_aug[2,7]*Ctron[7,j] + A_aug[2,8]*Ctron[8,j] + + c3 = A_aug[3,1]*Ctron[1,j] + A_aug[3,2]*Ctron[2,j] + A_aug[3,3]*Ctron[3,j] + A_aug[3,4]*Ctron[4,j] + + A_aug[3,5]*Ctron[5,j] + A_aug[3,6]*Ctron[6,j] + A_aug[3,7]*Ctron[7,j] + A_aug[3,8]*Ctron[8,j] + + c4 = A_aug[4,1]*Ctron[1,j] + A_aug[4,2]*Ctron[2,j] + A_aug[4,3]*Ctron[3,j] + A_aug[4,4]*Ctron[4,j] + + A_aug[4,5]*Ctron[5,j] + A_aug[4,6]*Ctron[6,j] + A_aug[4,7]*Ctron[7,j] + A_aug[4,8]*Ctron[8,j] + + c5 = A_aug[5,1]*Ctron[1,j] + A_aug[5,2]*Ctron[2,j] + A_aug[5,3]*Ctron[3,j] + A_aug[5,4]*Ctron[4,j] + + A_aug[5,5]*Ctron[5,j] + A_aug[5,6]*Ctron[6,j] + A_aug[5,7]*Ctron[7,j] + A_aug[5,8]*Ctron[8,j] + + c6 = A_aug[6,1]*Ctron[1,j] + A_aug[6,2]*Ctron[2,j] + A_aug[6,3]*Ctron[3,j] + A_aug[6,4]*Ctron[4,j] + + A_aug[6,5]*Ctron[5,j] + A_aug[6,6]*Ctron[6,j] + A_aug[6,7]*Ctron[7,j] + A_aug[6,8]*Ctron[8,j] + + c7 = A_aug[7,1]*Ctron[1,j] + A_aug[7,2]*Ctron[2,j] + A_aug[7,3]*Ctron[3,j] + A_aug[7,4]*Ctron[4,j] + + A_aug[7,5]*Ctron[5,j] + A_aug[7,6]*Ctron[6,j] + A_aug[7,7]*Ctron[7,j] + A_aug[7,8]*Ctron[8,j] + + c8 = A_aug[8,1]*Ctron[1,j] + A_aug[8,2]*Ctron[2,j] + A_aug[8,3]*Ctron[3,j] + A_aug[8,4]*Ctron[4,j] + + A_aug[8,5]*Ctron[5,j] + A_aug[8,6]*Ctron[6,j] + A_aug[8,7]*Ctron[7,j] + A_aug[8,8]*Ctron[8,j] + + Atron[i,j] = scale*(Ctron[1,i]*c1 + Ctron[2,i]*c2 + Ctron[3,i]*c3 + Ctron[4,i]*c4 + Ctron[5,i]*c5 + Ctron[6,i]*c6 + + Ctron[7,i]*c7 + Ctron[8,i]*c8) + + end + end + + b1 = A_aug[1,1]*dtron[1] + A_aug[1,2]*dtron[2] + A_aug[1,3]*dtron[3] + A_aug[1,4]*dtron[4] + + A_aug[1,5]*dtron[5] + A_aug[1,6]*dtron[6] + A_aug[1,7]*dtron[7] + A_aug[1,8]*dtron[8] + + (membuf[3,lineidx] - membuf[5,lineidx]*RH_1j[lineidx])*vec_1j[1] + + (membuf[4,lineidx] - membuf[5,lineidx]*RH_1k[lineidx])*vec_1k[1] + + b2 = A_aug[2,1]*dtron[1] + A_aug[2,2]*dtron[2] + A_aug[2,3]*dtron[3] + A_aug[2,4]*dtron[4] + + A_aug[2,5]*dtron[5] + A_aug[2,6]*dtron[6] + A_aug[2,7]*dtron[7] + A_aug[2,8]*dtron[8] + + (membuf[3,lineidx] - membuf[5,lineidx]*RH_1j[lineidx])*vec_1j[2] + + (membuf[4,lineidx] - membuf[5,lineidx]*RH_1k[lineidx])*vec_1k[2] + + b3 = A_aug[3,1]*dtron[1] + A_aug[3,2]*dtron[2] + A_aug[3,3]*dtron[3] + A_aug[3,4]*dtron[4] + + A_aug[3,5]*dtron[5] + A_aug[3,6]*dtron[6] + A_aug[3,7]*dtron[7] + A_aug[3,8]*dtron[8] + + (membuf[3,lineidx] - membuf[5,lineidx]*RH_1j[lineidx])*vec_1j[3] + + (membuf[4,lineidx] - membuf[5,lineidx]*RH_1k[lineidx])*vec_1k[3] + bbr[1] + + b4 = A_aug[4,1]*dtron[1] + A_aug[4,2]*dtron[2] + A_aug[4,3]*dtron[3] + A_aug[4,4]*dtron[4] + + A_aug[4,5]*dtron[5] + A_aug[4,6]*dtron[6] + A_aug[4,7]*dtron[7] + A_aug[4,8]*dtron[8] + + (membuf[3,lineidx] - membuf[5,lineidx]*RH_1j[lineidx])*vec_1j[4] + + (membuf[4,lineidx] - membuf[5,lineidx]*RH_1k[lineidx])*vec_1k[4] + bbr[2] + + b5 = A_aug[5,1]*dtron[1] + A_aug[5,2]*dtron[2] + A_aug[5,3]*dtron[3] + A_aug[5,4]*dtron[4] + + A_aug[5,5]*dtron[5] + A_aug[5,6]*dtron[6] + A_aug[5,7]*dtron[7] + A_aug[5,8]*dtron[8] + + (membuf[3,lineidx] - membuf[5,lineidx]*RH_1j[lineidx])*vec_1j[5] + + (membuf[4,lineidx] - membuf[5,lineidx]*RH_1k[lineidx])*vec_1k[5] +bbr[3] + + b6 = A_aug[6,1]*dtron[1] + A_aug[6,2]*dtron[2] + A_aug[6,3]*dtron[3] + A_aug[6,4]*dtron[4] + + A_aug[6,5]*dtron[5] + A_aug[6,6]*dtron[6] + A_aug[6,7]*dtron[7] + A_aug[6,8]*dtron[8] + + (membuf[3,lineidx] - membuf[5,lineidx]*RH_1j[lineidx])*vec_1j[6] + + (membuf[4,lineidx] - membuf[5,lineidx]*RH_1k[lineidx])*vec_1k[6] + bbr[4] + + b7 = A_aug[7,1]*dtron[1] + A_aug[7,2]*dtron[2] + A_aug[7,3]*dtron[3] + A_aug[7,4]*dtron[4] + + A_aug[7,5]*dtron[5] + A_aug[7,6]*dtron[6] + A_aug[7,7]*dtron[7] + A_aug[7,8]*dtron[8] + + (membuf[3,lineidx] - membuf[5,lineidx]*RH_1j[lineidx])*vec_1j[7] + + (membuf[4,lineidx] - membuf[5,lineidx]*RH_1k[lineidx])*vec_1k[7] + bbr[5] + + b8 = A_aug[8,1]*dtron[1] + A_aug[8,2]*dtron[2] + A_aug[8,3]*dtron[3] + A_aug[8,4]*dtron[4] + + A_aug[8,5]*dtron[5] + A_aug[8,6]*dtron[6] + A_aug[8,7]*dtron[7] + A_aug[8,8]*dtron[8] + + (membuf[3,lineidx] - membuf[5,lineidx]*RH_1j[lineidx])*vec_1j[8] + + (membuf[4,lineidx] - membuf[5,lineidx]*RH_1k[lineidx])*vec_1k[8] + bbr[6] + + + for i = 1:6 + btron[i] = scale*(Ctron[1,i]*b1 + Ctron[2,i]*b2 + Ctron[3,i]*b3 + Ctron[4,i]*b4 + Ctron[5,i]*b5 + Ctron[6,i]*b6 + + Ctron[7,i]*b7 + Ctron[8,i]*b8) + end + + + + end #inbounds + end #tx + CUDA.sync_threads() + return +end diff --git a/src/models/qpsub/qpsub_generator_kernel_cpu.jl b/src/models/qpsub/qpsub_generator_kernel_cpu.jl new file mode 100644 index 0000000..8fdc354 --- /dev/null +++ b/src/models/qpsub/qpsub_generator_kernel_cpu.jl @@ -0,0 +1,15 @@ +""" + generator_kernel_two_level_qpsub() + +record cpu time: return tcpu +""" + +function generator_kernel_two_level( + model::ModelQpsub{Float64,Array{Float64,1},Array{Int,1}}, + baseMVA::Float64, u, xbar, zu, lu, rho_u +) +tcpu = @timed generator_kernel_two_level(baseMVA, model.grid_data.ngen, model.gen_start, +u, xbar, zu, lu, rho_u, model.qpsub_pgmin, model.qpsub_pgmax, model.qpsub_qgmin, model.qpsub_qgmax, +model.qpsub_c2, model.qpsub_c1) +return tcpu +end diff --git a/src/models/qpsub/qpsub_generator_kernel_gpu.jl b/src/models/qpsub/qpsub_generator_kernel_gpu.jl new file mode 100644 index 0000000..c9b060f --- /dev/null +++ b/src/models/qpsub/qpsub_generator_kernel_gpu.jl @@ -0,0 +1,16 @@ +""" + generator_kernel_two_level_qpsub() + +record cpu time: return tcpu +""" + +function generator_kernel_two_level( + model::ModelQpsub{Float64,CuArray{Float64,1},CuArray{Int,1}}, + baseMVA::Float64, u, xbar, zu, lu, rho_u + ) + nblk = div(model.grid_data.ngen, 32, RoundUp) + tgpu = CUDA.@timed @cuda threads=32 blocks=nblk generator_kernel_two_level(baseMVA, model.grid_data.ngen, model.gen_start, + u, xbar, zu, lu, rho_u, model.qpsub_pgmin, model.qpsub_pgmax, model.qpsub_qgmin, model.qpsub_qgmax, + model.qpsub_c2, model.qpsub_c1) + return tgpu +end diff --git a/src/models/qpsub/qpsub_init_solution_cpu.jl b/src/models/qpsub/qpsub_init_solution_cpu.jl new file mode 100644 index 0000000..898c97c --- /dev/null +++ b/src/models/qpsub/qpsub_init_solution_cpu.jl @@ -0,0 +1,66 @@ +""" + init_solution() + +- initialize sol.v_curr and sol.rho for all coupling +- initialize sqp_line, supY +""" + +function init_solution!( + model::ModelQpsub{Float64,Array{Float64,1},Array{Int,1},Array{Float64,2}}, + sol::Solution{Float64,Array{Float64,1}}, + rho_pq::Float64, rho_va::Float64 + ) + + ngen = model.grid_data.ngen + nline = model.grid_data.nline + nbus = model.grid_data.nbus + + sqp_line = model.sqp_line + ls = model.ls + us = model.us + + brBusIdx = model.grid_data.brBusIdx + Vmax = model.grid_data.Vmax; Vmin = model.grid_data.Vmin + YffR = model.grid_data.YffR; YffI = model.grid_data.YffI + YttR = model.grid_data.YttR; YttI = model.grid_data.YttI + YftR = model.grid_data.YftR; YftI = model.grid_data.YftI + YtfR = model.grid_data.YtfR; YtfI = model.grid_data.YtfI + + fill!(sol, 0.0) + fill!(model.lambda, 0.0) + + #qpsub var + sol.rho .= rho_pq + + for g=1:ngen + pg_idx = model.gen_start + 2*(g-1) + sol.v_curr[pg_idx] = 0.5*(model.qpsub_pgmin[g] + model.qpsub_pgmax[g]) + sol.v_curr[pg_idx+1] = 0.5*(model.qpsub_qgmin[g] + model.qpsub_qgmax[g]) + end + + for l=1:nline + + pij_idx = model.line_start + 8*(l-1) + + supY = [YftR[l] YftI[l] YffR[l] 0 0 0; + -YftI[l] YftR[l] -YffI[l] 0 0 0; + YtfR[l] -YtfI[l] 0 YttR[l] 0 0; + -YtfI[l] -YtfR[l] 0 -YttI[l] 0 0] #wijR, wijI, wi, wj, theta_i, theta_j + + #initialize sqp_line + sqp_line[:,l] = (ls[l,:] + us[l,:])/2 # order |w_ijR | w_ijI | wi(ij) | wj(ji) | thetai(ij) | thetaj(ji)| + + sol.v_curr[pij_idx] = dot(supY[1,:],sqp_line[:,l]) #p_ij + sol.v_curr[pij_idx+1] = dot(supY[2,:],sqp_line[:,l]) #q_ij + sol.v_curr[pij_idx+2] = dot(supY[3,:],sqp_line[:,l]) #p_ji + sol.v_curr[pij_idx+3] = dot(supY[4,:],sqp_line[:,l]) #q_ji + sol.v_curr[pij_idx+4] = sqp_line[3,l] #w_i + sol.v_curr[pij_idx+5] = sqp_line[4,l] #w_j + sol.v_curr[pij_idx+6] = sqp_line[5,l] #theta_i + sol.v_curr[pij_idx+7] = sqp_line[6,l] #theta_j + + sol.rho[pij_idx:pij_idx+7] .= rho_va + end + + return +end diff --git a/src/models/qpsub/qpsub_init_solution_gpu.jl b/src/models/qpsub/qpsub_init_solution_gpu.jl new file mode 100644 index 0000000..3104682 --- /dev/null +++ b/src/models/qpsub/qpsub_init_solution_gpu.jl @@ -0,0 +1,97 @@ +""" + init_solution() + +- initialize sol.v_curr and sol.rho for all coupling +- initialize sqp_line, supY +""" + +function init_generator_kernel_qpsub(n::Int, gen_start::Int, + pgmax::CuDeviceArray{Float64,1}, pgmin::CuDeviceArray{Float64,1}, + qgmax::CuDeviceArray{Float64,1}, qgmin::CuDeviceArray{Float64,1}, + v::CuDeviceArray{Float64,1} +) + g = threadIdx().x + (blockDim().x * (blockIdx().x - 1)) + if g <= n + v[gen_start + 2*(g-1)] = 0.5*(pgmin[g] + pgmax[g]) + v[gen_start + 2*(g-1)+1] = 0.5*(qgmin[g] + qgmax[g]) + end + + return +end + + +function init_branch_bus_kernel_qpsub(n::Int, line_start::Int, rho_va::Float64, + YffR::CuDeviceArray{Float64,1}, YffI::CuDeviceArray{Float64,1}, + YftR::CuDeviceArray{Float64,1}, YftI::CuDeviceArray{Float64,1}, + YtfR::CuDeviceArray{Float64,1}, YtfI::CuDeviceArray{Float64,1}, + YttR::CuDeviceArray{Float64,1}, YttI::CuDeviceArray{Float64,1}, + us::CuDeviceArray{Float64,2}, ls::CuDeviceArray{Float64,2}, sqp_line::CuDeviceArray{Float64,2}, + v::CuDeviceArray{Float64,1}, rho::CuDeviceArray{Float64,1}, supY::CuDeviceArray{Float64,2} +) + l = threadIdx().x + (blockDim().x * (blockIdx().x - 1)) + if l <= n + sqp_line[1,l] = (ls[l,1] + us[l,1])/2 # order |w_ijR | w_ijI | wi(ij) | wj(ji) | thetai(ij) | thetaj(ji)| + sqp_line[2,l] = (ls[l,2] + us[l,2])/2 + sqp_line[3,l] = (ls[l,3] + us[l,3])/2 + sqp_line[4,l] = (ls[l,4] + us[l,4])/2 + sqp_line[5,l] = (ls[l,5] + us[l,5])/2 + sqp_line[6,l] = (ls[l,6] + us[l,6])/2 + + pij_idx = line_start + 8*(l-1) + + supY[4*(l-1) + 1,3] = YftR[l] + supY[4*(l-1) + 1,4] = YftI[l] + supY[4*(l-1) + 1,5] = YffR[l] + supY[4*(l-1) + 2,3] = -YftI[l] + supY[4*(l-1) + 2,4] = YftR[l] + supY[4*(l-1) + 2,5] = -YffI[l] + supY[4*(l-1) + 3,3] = YtfR[l] + supY[4*(l-1) + 3,4] = -YtfI[l] + supY[4*(l-1) + 3,6] = YttR[l] + supY[4*(l-1) + 4,3] = -YtfI[l] + supY[4*(l-1) + 4,4] = -YtfR[l] + supY[4*(l-1) + 4,6] = -YttI[l] + + + v[pij_idx] = YftR[l]*sqp_line[1,l] + YftI[l]*sqp_line[2,l] + YffR[l]*sqp_line[3,l] #CUBLAS.dot(4, supY[1,:],sqp_line[:,l]) #p_ij + v[pij_idx+1] = -YftI[l]*sqp_line[1,l] + YftR[l]*sqp_line[2,l] - YffI[l]*sqp_line[3,l] #CUBLAS.dot(4, supY[2,:],sqp_line[:,l]) #q_ij + v[pij_idx+2] = YtfR[l]*sqp_line[1,l] - YtfI[l]*sqp_line[2,l] + YttR[l]*sqp_line[4,l] #CUBLAS.dot(4, supY[3,:],sqp_line[:,l]) #p_ji + v[pij_idx+3] = -YtfI[l]*sqp_line[1,l] - YtfR[l]*sqp_line[2,l] - YttI[l]*sqp_line[4,l] #CUBLAS.dot(4, supY[4,:],sqp_line[:,l]) #q_ji + v[pij_idx+4] = sqp_line[3,l] #w_i + v[pij_idx+5] = sqp_line[4,l] #w_j + v[pij_idx+6] = sqp_line[5,l] #theta_i + v[pij_idx+7] = sqp_line[6,l] #theta_j + + rho[pij_idx:pij_idx+7] .= rho_va + + + end + + return +end + +function init_solution!( + model::ModelQpsub{Float64,CuArray{Float64,1},CuArray{Int,1},CuArray{Float64,2}}, + sol::Solution{Float64,CuArray{Float64,1}}, + rho_pq::Float64, rho_va::Float64 + ) + + data = model.grid_data + + fill!(sol, 0.0) + fill!(model.lambda, 0.0) + + #qpsub var + sol.rho .= rho_pq + + + @cuda threads=64 blocks=(div(data.ngen-1,64)+1) init_generator_kernel_qpsub(data.ngen, model.gen_start, + model.qpsub_pgmax, model.qpsub_pgmin, model.qpsub_qgmax, model.qpsub_qgmin, sol.v_curr) + + @cuda threads=64 blocks=(div(data.nline-1,64)+1) init_branch_bus_kernel_qpsub(data.nline, model.line_start, rho_va, + data.YffR, data.YffI, data.YftR, data.YftI, + data.YtfR, data.YtfI, data.YttR, data.YttI, model.us, model.ls, model.sqp_line, sol.v_curr, sol.rho, model.supY) + CUDA.synchronize() + + return +end diff --git a/src/models/qpsub/qpsub_model.jl b/src/models/qpsub/qpsub_model.jl new file mode 100644 index 0000000..9e85e86 --- /dev/null +++ b/src/models/qpsub/qpsub_model.jl @@ -0,0 +1,384 @@ +""" + Model{T,TD,TI,TM} + +This contains the parameters specific to QPSUB model instance. +""" + + +""" + Solution Structure: +- solution.u contain variables for generator and branch kernel +- solution.v contains variables for bus kernel +- Summary Table: + +| dimension | ngen | ngen | nline | nline | nline | nline | nline | nline | nline | nline | +|:--------------:|:-------:| :----: |:----: |:----: |:----: |:----: |:----: |:----: |:----: |:----: | +|structure for u | pg | qg | p_ij | q_ij | p_ji | q_ji | wi(ij) | wj(ji) | thetai(ij) | thetaj(ji) | +|structure for v | pg(i) | qg(i) | p_ij(i) | q_ij(i)| p_ji(j) | q_ji(j) | wi | wj | thetai | thetaj | + +- structure for l and ρ is wrt all element of [x - xbar + z] with same dimension + +- structure for sqp_line: 6*nline + |w_ijR | w_ijI | wi(ij) | wj(ji) | thetai(ij) | thetaj(ji)| + +- Note: line has shared nodes => xbar contain duplications. + For example line(1,2) and line(2,3): w2 and theta2 exist twice in v. + +- qpsub_membuf structure (dim = 5, used in auglag): + - |λ_1h | λ_1i | λ_1j| λ_1k | ρ_{1h,1i,1j,1k}| + - For c(x) = 0, ALM = λ*c(x) + (ρ/2)c(x)^2 + - For 1j and 1k, introduce slack t_ij and t_ji (see internal branch structure for Exatron) + +""" +mutable struct ModelQpsub{T,TD,TI,TM} <: AbstractOPFModel{T,TD,TI,TM} + info::IterationInformation + solution::AbstractSolution{T,TD} + + # Used for multiple dispatch for multi-period case. + gen_solution::AbstractSolution{T,TD} + + n::Int + nvar::Int + + gen_start::Int + line_start::Int + + pgmin_curr::TD # taking ramping into account for rolling horizon + pgmax_curr::TD # taking ramping into account for rolling horizon + + grid_data::GridData{T,TD,TI,TM} + + membuf::TM # memory buffer for line kernel + gen_membuf::TM # memory buffer for generator kernel + + v_prev::TD + + + + #qpsub_construct + Hs::TM # Hessian information for all lines 6*nline x 6: |w_ijR | w_ijI | wi(ij) | wj(ji) | thetai(ij) | thetaj(ji)| + + #QP coefficient for orignal branch kernel QP problem + LH_1h::TM # nline * 4 : w_ijR w_ijI wi(ij) wj(ji) + RH_1h::TD # nline + LH_1i::TM # nline * 4 : w_ijR w_ijI thetai(ij) thetaj(ji) + RH_1i::TD # nline + + LH_1j::TM #nline * 2 : pij qij + RH_1j::TD #nline + LH_1k::TM #nline * 2 : pji qji + RH_1k::TD #nline + + ls::TM # nline * 6 + us::TM # nline * 6 + + is_HS_sym::Array{Bool,1} #nline + is_HS_PSD::Array{Bool,1} #nline + + line_res::TM #4*nline + + qpsub_c1::TD #ngen + qpsub_c2::TD #ngen + + qpsub_pgmax::TD #ngen + qpsub_pgmin::TD #ngen + qpsub_qgmax::TD #ngen + qpsub_qgmin::TD #ngen + + qpsub_Pd::TD #nbus + qpsub_Qd::TD #nbus + + + + #qpsub_solve + qpsub_membuf::TM #memory buffer for qpsub 5*nline + sqp_line::TM #6 * nline + + #collect qpsub solution + dpg_sol::Array{Float64,1} #ngen + dqg_sol::Array{Float64,1} #ngen + + dline_var::Array{Float64,2} #6*nline: w_ijR, w_ijI, w_i, w_j, theta_i, theta_j + dline_fl::Array{Float64,2} #4*nline: p_ij, q_ij, p_ji, q_ji + + dtheta_sol::Array{Float64,1} #nbus consensus with line_var + dw_sol::Array{Float64,1} #nbus consensus with line_var + + # Two-Level ADMM + nvar_u::Int + nvar_v::Int + bus_start::Int # this is for varibles of type v. + + # Padded sizes for MPI + nline_padded::Int + nvar_u_padded::Int + nvar_padded::Int + + # for integration + dual_infeas::Array{Float64,1} #kkt error vector |pg |w_ijR | w_ijI | wi(ij) | wj(ji) | thetai(ij) | thetaj(ji)| + lambda::TM #14h i j k multiplier + + # additional memory allocation for branch kernel (GPU) + # NOTE: added by bowen + supY::TM #in gpu initialization + + + function ModelQpsub{T,TD,TI,TM}() where {T, TD<:AbstractArray{T}, TI<:AbstractArray{Int}, TM<:AbstractArray{T,2}} + return new{T,TD,TI,TM}() + end + + function ModelQpsub{T,TD,TI,TM}(env::AdmmEnv{T,TD,TI,TM}; ramp_ratio=0.02) where {T, TD<:AbstractArray{T}, TI<:AbstractArray{Int}, TM<:AbstractArray{T,2}} + model = new{T,TD,TI,TM}() + + model.grid_data = GridData{T,TD,TI,TM}(env) + + model.n = (env.use_linelimit == true) ? 6 : 4 # branch kernel size (use linelimt or not) + model.nline_padded = model.grid_data.nline + + # Memory space is padded for the lines as a multiple of # processes. + if env.use_mpi + nprocs = MPI.Comm_size(env.comm) + model.nline_padded = nprocs * div(model.grid_data.nline, nprocs, RoundUp) + end + + model.nvar = 2*model.grid_data.ngen + 8*model.grid_data.nline + model.nvar_padded = model.nvar + 8*(model.nline_padded - model.grid_data.nline) #useless + model.gen_start = 1 #location starting generator variables + model.line_start = 2*model.grid_data.ngen + 1 #location starting branch variables + + + model.pgmin_curr = TD(undef, model.grid_data.ngen) + model.pgmax_curr = TD(undef, model.grid_data.ngen) + copyto!(model.pgmin_curr, model.grid_data.pgmin) + copyto!(model.pgmax_curr, model.grid_data.pgmax) + + model.grid_data.ramp_rate = TD(undef, model.grid_data.ngen) + model.grid_data.ramp_rate .= ramp_ratio.*model.grid_data.pgmax + + #scale the obj params with obj_scale + if env.params.obj_scale != 1.0 + model.grid_data.c2 .*= env.params.obj_scale + model.grid_data.c1 .*= env.params.obj_scale + model.grid_data.c0 .*= env.params.obj_scale + model.Hs .*=env.params.obj_scale + end + + # These are only for two-level ADMM. + model.nvar_u = 2*model.grid_data.ngen + 8*model.grid_data.nline + model.nvar_u_padded = model.nvar_u + 8*(model.nline_padded - model.grid_data.nline) + model.nvar_v = 2*model.grid_data.ngen + 4*model.grid_data.nline + 2*model.grid_data.nbus + model.bus_start = 2*model.grid_data.ngen + 4*model.grid_data.nline + 1 + + # Memory space is allocated based on the padded size. + model.solution = Solution{T,TD}(model.nvar_padded) + + model.gen_solution = EmptyGeneratorSolution{T,TD}() + + #old memory buffer used in the auglag_linelimit with Tron + model.membuf = TM(undef, (31, model.grid_data.nline)) + fill!(model.membuf, 0.0) + model.membuf[29,:] .= model.grid_data.rateA + + model.info = IterationInformation{ComponentInformation}() + + #new solution structure for tron (Hessian inherited from SQP: 6*nline) + model.sqp_line = TM(undef, (6,model.grid_data.nline)) + fill!(model.sqp_line, 0.0) + + #new v_prev for dual reshape + model.v_prev = TD(undef, model.nvar) + fill!(model.v_prev, 0.0) + + #new memory buffer used in the new auglag_Ab with Tron + model.qpsub_membuf = TM(undef, (5,model.grid_data.nline)) + fill!(model.qpsub_membuf, 0.0) + + #new qpsub parameters + model.Hs = TM(undef,(6*model.grid_data.nline,6)) + fill!(model.Hs, 0.0) + + model.line_res = TM(undef,(4,model.grid_data.nline)) + fill!(model.line_res, 0.0) + + #1h + model.LH_1h = TM(undef,(model.grid_data.nline,4)) + fill!(model.LH_1h, 0.0) + + model.RH_1h = TD(undef,model.grid_data.nline) + fill!(model.RH_1h, 0.0) + + #1i + model.LH_1i = TM(undef,(model.grid_data.nline,4)) + fill!(model.LH_1i, 0.0) + + model.RH_1i = TD(undef,model.grid_data.nline) + fill!(model.RH_1i, 0.0) + + #1j + model.LH_1j = TM(undef,(model.grid_data.nline,2)) + fill!(model.LH_1j, 0.0) + + model.RH_1j = TD(undef,model.grid_data.nline) + fill!(model.RH_1j, 0.0) + + #1k + model.LH_1k = TM(undef,(model.grid_data.nline,2)) + fill!(model.LH_1k, 0.0) + + model.RH_1k = TD(undef,model.grid_data.nline) + fill!(model.RH_1k, 0.0) + + # l and u + model.ls = TM(undef,(model.grid_data.nline,6)) + fill!(model.ls, 0.0) + + model.us = TM(undef,(model.grid_data.nline,6)) + fill!(model.us, 0.0) + + model.is_HS_sym = Array{Bool,1}(undef, model.grid_data.nline) + fill!(model.is_HS_sym, true) + + model.is_HS_PSD = Array{Bool,1}(undef, model.grid_data.nline) + fill!(model.is_HS_PSD, true) + + model.qpsub_c1 = TD(undef, model.grid_data.ngen) + fill!(model.qpsub_c1, 0.0) + + model.qpsub_c2 = TD(undef, model.grid_data.ngen) + fill!(model.qpsub_c2, 0.0) + + model.qpsub_pgmax = TD(undef, model.grid_data.ngen) + fill!(model.qpsub_pgmax, 0.0) + + model.qpsub_pgmin = TD(undef, model.grid_data.ngen) + fill!(model.qpsub_pgmin, 0.0) + + model.qpsub_qgmax = TD(undef, model.grid_data.ngen) + fill!(model.qpsub_qgmax, 0.0) + + model.qpsub_qgmin = TD(undef, model.grid_data.ngen) + fill!(model.qpsub_qgmin, 0.0) + + model.qpsub_Pd = TD(undef, model.grid_data.nbus) + fill!(model.qpsub_Pd, 0.0) + + model.qpsub_Qd = TD(undef, model.grid_data.nbus) + fill!(model.qpsub_Qd, 0.0) + + # qpsub solution + model.dpg_sol = Array{Float64,1}(undef, model.grid_data.ngen) + fill!(model.dpg_sol, 0.0) + + model.dqg_sol = Array{Float64,1}(undef, model.grid_data.ngen) + fill!(model.dqg_sol, 0.0) + + model.dline_var = Array{Float64,2}(undef,(6, model.grid_data.nline)) + fill!(model.dline_var, 0.0) + + model.dline_fl = Array{Float64,2}(undef,(4, model.grid_data.nline)) + fill!(model.dline_fl, 0.0) + + model.dtheta_sol = Array{Float64,1}(undef, model.grid_data.nbus) + fill!(model.dtheta_sol, 0.0) + + model.dw_sol = Array{Float64,1}(undef, model.grid_data.nbus) + fill!(model.dw_sol, 0.0) + + # for SQP + model.dual_infeas = Array{Float64,1}(undef, model.grid_data.ngen + 6*model.grid_data.nline) + fill!(model.dual_infeas, 1000.0) + + model.lambda = TM(undef, (4, model.grid_data.nline)) + fill!(model.lambda, 0.0) + + #additional options + model.supY = TM(undef, (4*model.grid_data.nline, 8)) #see supY def in eval_A_b_branch_kernel_gpu_qpsub + fill!(model.supY, 0.0) + + + return model + end +end + +""" +This is to share power network data between models. Some fields that could be modified are deeply copied. +""" +function Base.copy(ref::ModelQpsub{T,TD,TI,TM}) where {T, TD<:AbstractArray{T}, TI<:AbstractArray{Int}, TM<:AbstractArray{T,2}} + model = ModelQpsub{T,TD,TI,TM}() + + model.solution = copy(ref.solution) + model.gen_solution = copy(ref.gen_solution) + model.info = copy(ref.info) + model.grid_data = copy(ref.grid_data) + + model.n = ref.n + model.nvar = ref.nvar + + model.gen_start = ref.gen_start + model.line_start = ref.line_start + + model.pgmin_curr = copy(ref.pgmin_curr) + model.pgmax_curr = copy(ref.pgmax_curr) + + model.membuf = copy(ref.membuf) + model.gen_membuf = copy(ref.gen_membuf) + model.qpsub_membuf = copy(ref.qpsub_membuf) + + model.nvar_u = ref.nvar_u + model.nvar_v = ref.nvar_v + model.bus_start = ref.bus_start + + model.nline_padded = ref.nline_padded + model.nvar_padded = ref.nvar_padded + model.nvar_u_padded = ref.nvar_u_padded + + model.Hs = copy(ref.Hs) + model.LH_1h = copy(ref.LH_1h) + model.RH_1h = copy(ref.RH_1h) + + model.LH_1i = copy(ref.LH_1i) + model.RH_1i = copy(ref.RH_1i) + + model.LH_1j = copy(ref.LH_1j) + model.RH_1j = copy(ref.RH_1j) + + model.LH_1k = copy(ref.LH_1k) + model.RH_1k = copy(ref.RH_1k) + + model.ls = copy(ref.ls) + model.us = copy(ref.us) + + model.line_res = copy(ref.line_res) + + model.sqp_line = copy(ref.sqp_line) + + model.dpg_sol = copy(ref.dpg_sol) + model.dqg_sol = copy(ref.dqg_sol) + model.dline_var = copy(ref.dline_var) + model.dline_fl = copy(ref.dline_fl) + model.dtheta_sol = copy(ref.dtheta_sol) + model.dw_sol = copy(ref.dw_sol) + + model.is_HS_sym = copy(ref.is_HS_sym) + model.is_HS_PSD = copy(ref.is_HS_PSD) + + model.qpsub_c1 = copy(ref.qpsub_c1) + model.qpsub_c2 = copy(ref.qpsub_c2) + + model.qpsub_pgmax = copy(ref.qpsub_pgmax) + model.qpsub_pgmin = copy(ref.qpsub_pgmin) + model.qpsub_qgmax = copy(ref.qpsub_qgmax) + model.qpsub_qgmin = copy(ref.qpsub_qgmin) + + model.qpsub_Pd = copy(ref.qpsub_Pd) + model.qpsub_Qd = copy(ref.qpsub_Qd) + + # for SQP + model.dual_infeas = copy(ref.dual_infeas) + model.lambda = copy(ref.lambda) + + #additional parameters + model.supY = copy(ref.supY) + + return model +end diff --git a/src/models/qpsub/qpsub_tron_linelimit_kernel.jl b/src/models/qpsub/qpsub_tron_linelimit_kernel.jl new file mode 100644 index 0000000..67c0150 --- /dev/null +++ b/src/models/qpsub/qpsub_tron_linelimit_kernel.jl @@ -0,0 +1,170 @@ +""" +Driver to run TRON on GPU. This should be called from a kernel. + + tron_gpu_test() +- solves bounded QP: 1/2 x^THx + b*x s.t xl <= x <= xu +- includes eval_f_kernel(), eval_g_kernel(), and eval_h_kernel +""" +@inline function tron_gpu_test(n::Int, H::CuDeviceArray{Float64,2}, b::CuDeviceArray{Float64,1}, x::CuDeviceArray{Float64,1}, xl::CuDeviceArray{Float64,1}, xu::CuDeviceArray{Float64,1}) + tx = threadIdx().x + + + g = CuDynamicSharedArray(Float64, n, (3*n + (2*n + n^2 + 178))*sizeof(Float64)) + xc = CuDynamicSharedArray(Float64, n, (4*n + (2*n + n^2 + 178))*sizeof(Float64)) + s = CuDynamicSharedArray(Float64, n, (5*n + (2*n + n^2 + 178))*sizeof(Float64)) + wa = CuDynamicSharedArray(Float64, n, (6*n + (2*n + n^2 + 178))*sizeof(Float64)) + wa1 = CuDynamicSharedArray(Float64, n, (7*n + (2*n + n^2 + 178))*sizeof(Float64)) + wa2 = CuDynamicSharedArray(Float64, n, (8*n + (2*n + n^2 + 178))*sizeof(Float64)) + wa3 = CuDynamicSharedArray(Float64, n, (9*n + (2*n + n^2 + 178))*sizeof(Float64)) + wa4 = CuDynamicSharedArray(Float64, n, (10*n + (2*n + n^2 + 178))*sizeof(Float64)) + wa5 = CuDynamicSharedArray(Float64, n, (11*n + (2*n + n^2 + 178))*sizeof(Float64)) + gfree = CuDynamicSharedArray(Float64, n, (12*n + (2*n + n^2 + 178))*sizeof(Float64)) + dsave = CuDynamicSharedArray(Float64, n, (13*n + (2*n + n^2 + 178))*sizeof(Float64)) + indfree = CuDynamicSharedArray(Int, n, (14*n + (2*n + n^2 + 178))*sizeof(Float64)) + iwa = CuDynamicSharedArray(Int, 2*n, n*sizeof(Int) + (14*n + (2*n + n^2 + 178))*sizeof(Float64)) + isave = CuDynamicSharedArray(Int, n, (3*n)*sizeof(Int) + (14*n + (2*n + n^2 + 178))*sizeof(Float64)) + + + A = CuDynamicSharedArray(Float64, (n,n), (14*n + (2*n + n^2 + 178))*sizeof(Float64)+(4*n)*sizeof(Int)) + B = CuDynamicSharedArray(Float64, (n,n), (14*n+n^2 + (2*n + n^2 + 178))*sizeof(Float64)+(4*n)*sizeof(Int)) + L = CuDynamicSharedArray(Float64, (n,n), (14*n+2*n^2 + (2*n + n^2 + 178))*sizeof(Float64)+(4*n)*sizeof(Int)) + + if tx <= n + @inbounds for j=1:n + A[tx,j] = 0.0 + B[tx,j] = 0.0 + L[tx,j] = 0.0 + end + end + CUDA.sync_threads() + + max_feval = 500 + max_minor = 200 + gtol = 1e-6 + + task = 0 + status = 0 + + delta = 0.0 + fatol = 0.0 + frtol = 1e-12 + fmin = -1e32 + cgtol = 0.1 + cg_itermax = n + + f = 0.0 + nfev = 0 + ngev = 0 + nhev = 0 + minor_iter = 0 + search = true + + while search + + if task == 0 || task == 1 + + f = eval_f_kernel(x,H,b) + nfev += 1 + if nfev >= max_feval + search = false + end + end + + # [2] G or H: Evaluate gradient and Hessian. + + if task == 0 || task == 2 + eval_g_kernel(x,g,H,b) + eval_h_kernel(A,H) + ngev += 1 + nhev += 1 + minor_iter += 1 + end + + # Initialize the trust region bound. + + if task == 0 + gnorm0 = ExaAdmm.ExaTron.dnrm2(n, g, 1) + delta = gnorm0 + end + + if search + delta, task = ExaAdmm.ExaTron.dtron(n, x, xl, xu, f, g, A, frtol, fatol, fmin, cgtol, + cg_itermax, delta, task, B, L, xc, s, indfree, gfree, + isave, dsave, wa, iwa, wa1, wa2, wa3, wa4, wa5) + end + + # [3] NEWX: a new point was computed. + + if task == 3 + gnorm_inf = ExaAdmm.ExaTron.dgpnorm(n, x, xl, xu, g) + + if gnorm_inf <= gtol + task = 4 + end + + if minor_iter >= max_minor + status = 1 + search = false + end + end + + # [4] CONV: convergence was achieved. + # [10] : warning fval is less than fmin + + if task == 4 || task == 10 + search = false + end + end # end while + + + CUDA.sync_threads() + + return status, minor_iter +end + +@inline function eval_f_kernel(x::CuDeviceArray{Float64,1},A::CuDeviceArray{Float64,2},b::CuDeviceArray{Float64,1}) #f gpu + f = 0.0 + @inbounds begin + for i = 1:6 + for j = 1:6 + f += 0.5*x[i]*A[i,j]*x[j] + end + f += b[i]*x[i] + end + end + CUDA.sync_threads() + return f +end + +@inline function eval_g_kernel(x::CuDeviceArray{Float64,1}, g::CuDeviceArray{Float64,1}, A::CuDeviceArray{Float64,2}, b::CuDeviceArray{Float64,1}) + tx = threadIdx().x + g1 = A[1,1]*x[1] + A[1,2]*x[2] + A[1,3]*x[3] + A[1,4]*x[4] + A[1,5]*x[5] + A[1,6]*x[6] + b[1] + g2 = A[2,1]*x[1] + A[2,2]*x[2] + A[2,3]*x[3] + A[2,4]*x[4] + A[2,5]*x[5] + A[2,6]*x[6] + b[2] + g3 = A[3,1]*x[1] + A[3,2]*x[2] + A[3,3]*x[3] + A[3,4]*x[4] + A[3,5]*x[5] + A[3,6]*x[6] + b[3] + g4 = A[4,1]*x[1] + A[4,2]*x[2] + A[4,3]*x[3] + A[4,4]*x[4] + A[4,5]*x[5] + A[4,6]*x[6] + b[4] + g5 = A[5,1]*x[1] + A[5,2]*x[2] + A[5,3]*x[3] + A[5,4]*x[4] + A[5,5]*x[5] + A[5,6]*x[6] + b[5] + g6 = A[6,1]*x[1] + A[6,2]*x[2] + A[6,3]*x[3] + A[6,4]*x[4] + A[6,5]*x[5] + A[6,6]*x[6] + b[6] + if tx == 1 + g[1] = g1 + g[2] = g2 + g[3] = g3 + g[4] = g4 + g[5] = g5 + g[6] = g6 + end + CUDA.sync_threads() +end + +@inline function eval_h_kernel(A::CuDeviceArray{Float64,2}, H::CuDeviceArray{Float64,2}) + tx = threadIdx().x + if tx == 1 + @inbounds begin + for i = 1:6 + for j = 1:6 + A[i,j] = H[i,j] + end + end + end + end + CUDA.sync_threads() +end diff --git a/src/not_used/check_violations.jl b/src/not_used/check_violations.jl deleted file mode 100644 index e5ba462..0000000 --- a/src/not_used/check_violations.jl +++ /dev/null @@ -1,440 +0,0 @@ -function check_generator_bounds(model::Model, v::Vector{Float64}) - ngen = model.ngen - gen_start = model.gen_start - - pgmax = model.pgmax_curr; pgmin = model.pgmin_curr - qgmax = model.qgmax; qgmin = model.qgmin - - max_viol_real = 0.0 - max_viol_reactive = 0.0 - - for g=1:ngen - pidx = gen_start + 2*(g-1) - qidx = gen_start + 2*(g-1) + 1 - - real_err = max(max(0.0, v[pidx] - pgmax[g]), max(0.0, pgmin[g] - v[pidx])) - reactive_err = max(max(0.0, v[qidx] - qgmax[g]), max(0.0, qgmin[g] - v[qidx])) - - max_viol_real = (max_viol_real < real_err) ? real_err : max_viol_real - max_viol_reactive = (max_viol_reactive < reactive_err) ? reactive_err : max_viol_reactive - end - - return max_viol_real, max_viol_reactive -end - -function check_voltage_bounds_alternative(model::Model, v::Vector{Float64}) - max_viol = 0.0 - - for b=1:model.nbus - if model.FrStart[b] < model.FrStart[b+1] - l = model.FrIdx[model.FrStart[b]] - wi = v[model.line_start + 8*(l-1) + 4] - elseif model.ToStart[b] < model.ToStart[b+1] - l = model.ToIdx[model.ToStart[b]] - wi = v[model.line_start + 8*(l-1) + 5] - else - println("No lines connected to bus ", b) - end - - err = max(max(0.0, wi - model.Vmax[b]^2), max(0.0, model.Vmin[b]^2 - wi)) - max_viol = (max_viol < err) ? err : max_viol - end - - return max_viol -end - -function check_power_balance_alternative(model::Model, u::Vector{Float64}, v::Vector{Float64}) - baseMVA = model.baseMVA - nbus = model.nbus - gen_start, line_start = model.gen_start, model.line_start - - YffR, YffI = model.YffR, model.YffI - YftR, YftI = model.YftR, model.YftI - YtfR, YtfI = model.YtfR, model.YtfI - YttR, YttI = model.YttR, model.YttI - YshR, YshI = model.YshR, model.YshI - - vm = [0.0 for i=1:nbus] - va = [0.0 for i=1:nbus] - - for b=1:nbus - count = 0 - for k=model.FrStart[b]:model.FrStart[b+1]-1 - l = model.FrIdx[k] - vm[b] = sqrt(v[line_start + 8*(l-1) + 4]) - va[b] = v[line_start + 8*(l-1) + 6] - count += 1 - end - for k=model.ToStart[b]:model.ToStart[b+1]-1 - l = model.ToIdx[k] - vm[b] = sqrt(v[line_start + 8*(l-1) + 5]) - va[b] = v[line_start + 8*(l-1) + 7] - count += 1 - end - #vm[b] /= count - #va[b] /= count - end - - max_viol_real = 0.0 - max_viol_reactive = 0.0 - max_viol_reactive_idx = -1 - for b=1:nbus - real_err = 0.0 - reactive_err = 0.0 - for k=model.GenStart[b]:model.GenStart[b+1]-1 - g = model.GenIdx[k] - real_err += v[gen_start + 2*(g-1)] - reactive_err += v[gen_start + 2*(g-1)+1] - end - - real_err -= (model.Pd[b] / baseMVA) - reactive_err -= (model.Qd[b] / baseMVA) - - for k=model.FrStart[b]:model.FrStart[b+1]-1 - l = model.FrIdx[k] - i = model.brBusIdx[1+2*(l-1)] - j = model.brBusIdx[1+2*(l-1)+1] - pij = YffR[l]*vm[i]^2 + YftR[l]*vm[i]*vm[j]*cos(va[i] - va[j]) + YftI[l]*vm[i]*vm[j]*sin(va[i] - va[j]) - qij = -YffI[l]*vm[i]^2 - YftI[l]*vm[i]*vm[j]*cos(va[i] - va[j]) + YftR[l]*vm[i]*vm[j]*sin(va[i] - va[j]) - real_err -= pij - reactive_err -= qij - end - - for k=model.ToStart[b]:model.ToStart[b+1]-1 - l = model.ToIdx[k] - i = model.brBusIdx[1+2*(l-1)] - j = model.brBusIdx[1+2*(l-1)+1] - pji = YttR[l]*vm[j]^2 + YtfR[l]*vm[i]*vm[j]*cos(va[i] - va[j]) - YtfI[l]*vm[i]*vm[j]*sin(va[i] - va[j]) - qji = -YttI[l]*vm[j]^2 - YtfI[l]*vm[i]*vm[j]*cos(va[i] - va[j]) - YtfR[l]*vm[i]*vm[j]*sin(va[i] - va[j]) - real_err -= pji - reactive_err -= qji - end - - real_err -= YshR[b] * vm[b]^2 - reactive_err += YshI[b] * vm[b]^2 - - if max_viol_reactive < abs(reactive_err) - max_viol_reactive_idx = b - end - max_viol_real = (max_viol_real < abs(real_err)) ? abs(real_err) : max_viol_real - max_viol_reactive = (max_viol_reactive < abs(reactive_err)) ? abs(reactive_err) : max_viol_reactive - end - - return max_viol_real, max_viol_reactive -end - -function check_linelimit_violation(model::Model, data::OPFData, v::Vector{Float64}) - lines = data.lines - nbus = length(data.buses) - nline = length(data.lines) - line_start = 2*length(data.generators) + 1 - - YffR, YffI = model.YffR, model.YffI - YftR, YftI = model.YftR, model.YftI - YtfR, YtfI = model.YtfR, model.YtfI - YttR, YttI = model.YttR, model.YttI - - rateA_nviols = 0 - rateA_maxviol = 0.0 - - vm = [0.0 for i=1:nbus] - va = [0.0 for i=1:nbus] - - for b=1:nbus - count = 0 - for k=model.FrStart[b]:model.FrStart[b+1]-1 - l = model.FrIdx[k] - vm[b] = sqrt(v[line_start + 8*(l-1) + 4]) - va[b] = v[line_start + 8*(l-1) + 6] - count += 1 - end - for k=model.ToStart[b]:model.ToStart[b+1]-1 - l = model.ToIdx[k] - vm[b] = sqrt(v[line_start + 8*(l-1) + 5]) - va[b] = v[line_start + 8*(l-1) + 7] - count += 1 - end - #vm[b] /= count - #va[b] /= count - end - - for l=1:nline - i = model.brBusIdx[1+2*(l-1)] - j = model.brBusIdx[1+2*(l-1)+1] - pij = YffR[l]*vm[i]^2 + YftR[l]*vm[i]*vm[j]*cos(va[i] - va[j]) + YftI[l]*vm[i]*vm[j]*sin(va[i] - va[j]) - qij = -YffI[l]*vm[i]^2 - YftI[l]*vm[i]*vm[j]*cos(va[i] - va[j]) + YftR[l]*vm[i]*vm[j]*sin(va[i] - va[j]) - pji = YttR[l]*vm[j]^2 + YtfR[l]*vm[i]*vm[j]*cos(va[i] - va[j]) - YtfI[l]*vm[i]*vm[j]*sin(va[i] - va[j]) - qji = -YttI[l]*vm[j]^2 - YtfI[l]*vm[i]*vm[j]*cos(va[i] - va[j]) - YtfR[l]*vm[i]*vm[j]*sin(va[i] - va[j]) - - ij_val = sqrt(pij^2 + qij^2) - ji_val = sqrt(pji^2 + qji^2) - - limit = lines[l].rateA / data.baseMVA - if limit > 0 - if ij_val > limit || ji_val > limit - rateA_nviols += 1 - rateA_maxviol = max(rateA_maxviol, max(ij_val - limit, ji_val - limit)) - end - end - end - - return rateA_nviols, rateA_maxviol -end - -function check_generator_bounds(model::ComplementarityModel, v::Vector{Float64}) - grid = model.grid - ngen = grid.ngen - gen_start = model.gen_start - - pgmax = model.pgmax_curr; pgmin = model.pgmin_curr - qgmax = grid.qgmax; qgmin = grid.qgmin - - max_viol_real = 0.0 - max_viol_reactive = 0.0 - - for g=1:ngen - pidx = gen_start + 2*(g-1) - qidx = gen_start + 2*(g-1) + 1 - - real_err = max(max(0.0, v[pidx] - pgmax[g]), max(0.0, pgmin[g] - v[pidx])) - reactive_err = max(max(0.0, v[qidx] - qgmax[g]), max(0.0, qgmin[g] - v[qidx])) - - max_viol_real = (max_viol_real < real_err) ? real_err : max_viol_real - max_viol_reactive = (max_viol_reactive < reactive_err) ? reactive_err : max_viol_reactive - end - - return max_viol_real, max_viol_reactive -end - -function check_voltage_bounds_alternative(model::ComplementarityModel, v::Vector{Float64}) - grid = model.grid - max_viol = 0.0 - - for b=1:grid.nbus - if grid.FrStart[b] < grid.FrStart[b+1] - l = grid.FrIdx[grid.FrStart[b]] - wi = v[model.line_start + 8*(l-1) + 4] - elseif grid.ToStart[b] < grid.ToStart[b+1] - l = grid.ToIdx[grid.ToStart[b]] - wi = v[model.line_start + 8*(l-1) + 5] - else - println("No lines connected to bus ", b) - end - - err = max(max(0.0, wi - grid.Vmax[b]^2), max(0.0, grid.Vmin[b]^2 - wi)) - max_viol = (max_viol < err) ? err : max_viol - end - - return max_viol -end - -function check_power_balance_alternative(model::ComplementarityModel, u::Vector{Float64}, v::Vector{Float64}) - grid = model.grid - baseMVA = grid.baseMVA - nbus = grid.nbus - gen_start, line_start = model.gen_start, model.line_start - - YffR, YffI = grid.YffR, grid.YffI - YftR, YftI = grid.YftR, grid.YftI - YtfR, YtfI = grid.YtfR, grid.YtfI - YttR, YttI = grid.YttR, grid.YttI - YshR, YshI = grid.YshR, grid.YshI - - vm = [0.0 for i=1:nbus] - va = [0.0 for i=1:nbus] - - for b=1:nbus - count = 0 - for k=grid.FrStart[b]:grid.FrStart[b+1]-1 - l = grid.FrIdx[k] - vm[b] = sqrt(v[line_start + 8*(l-1) + 4]) - va[b] = v[line_start + 8*(l-1) + 6] - count += 1 - end - for k=grid.ToStart[b]:grid.ToStart[b+1]-1 - l = grid.ToIdx[k] - vm[b] = sqrt(v[line_start + 8*(l-1) + 5]) - va[b] = v[line_start + 8*(l-1) + 7] - count += 1 - end - #vm[b] /= count - #va[b] /= count - end - - max_viol_real = 0.0 - max_viol_reactive = 0.0 - max_viol_reactive_idx = -1 - for b=1:nbus - real_err = 0.0 - reactive_err = 0.0 - for k=grid.GenStart[b]:grid.GenStart[b+1]-1 - g = grid.GenIdx[k] - real_err += v[gen_start + 2*(g-1)] - reactive_err += v[gen_start + 2*(g-1)+1] - end - - real_err -= (grid.Pd[b] / baseMVA) - reactive_err -= (grid.Qd[b] / baseMVA) - - for k=grid.FrStart[b]:grid.FrStart[b+1]-1 - l = grid.FrIdx[k] - i = grid.brBusIdx[1+2*(l-1)] - j = grid.brBusIdx[1+2*(l-1)+1] - pij = YffR[l]*vm[i]^2 + YftR[l]*vm[i]*vm[j]*cos(va[i] - va[j]) + YftI[l]*vm[i]*vm[j]*sin(va[i] - va[j]) - qij = -YffI[l]*vm[i]^2 - YftI[l]*vm[i]*vm[j]*cos(va[i] - va[j]) + YftR[l]*vm[i]*vm[j]*sin(va[i] - va[j]) - real_err -= pij - reactive_err -= qij - end - - for k=grid.ToStart[b]:grid.ToStart[b+1]-1 - l = grid.ToIdx[k] - i = grid.brBusIdx[1+2*(l-1)] - j = grid.brBusIdx[1+2*(l-1)+1] - pji = YttR[l]*vm[j]^2 + YtfR[l]*vm[i]*vm[j]*cos(va[i] - va[j]) - YtfI[l]*vm[i]*vm[j]*sin(va[i] - va[j]) - qji = -YttI[l]*vm[j]^2 - YtfI[l]*vm[i]*vm[j]*cos(va[i] - va[j]) - YtfR[l]*vm[i]*vm[j]*sin(va[i] - va[j]) - real_err -= pji - reactive_err -= qji - end - - real_err -= YshR[b] * vm[b]^2 - reactive_err += YshI[b] * vm[b]^2 - - if max_viol_reactive < abs(reactive_err) - max_viol_reactive_idx = b - end - max_viol_real = (max_viol_real < abs(real_err)) ? abs(real_err) : max_viol_real - max_viol_reactive = (max_viol_reactive < abs(reactive_err)) ? abs(reactive_err) : max_viol_reactive - end - - return max_viol_real, max_viol_reactive -end - -function check_linelimit_violation(model::ComplementarityModel, data::OPFData, v::Vector{Float64}) - grid = model.grid - lines = data.lines - nbus = grid.nbus - nline = grid.nline - line_start = model.line_start - - YffR, YffI = grid.YffR, grid.YffI - YftR, YftI = grid.YftR, grid.YftI - YtfR, YtfI = grid.YtfR, grid.YtfI - YttR, YttI = grid.YttR, grid.YttI - - rateA_nviols = 0 - rateA_maxviol = 0.0 - - vm = [0.0 for i=1:nbus] - va = [0.0 for i=1:nbus] - - for b=1:nbus - count = 0 - for k=grid.FrStart[b]:grid.FrStart[b+1]-1 - l = grid.FrIdx[k] - vm[b] += sqrt(v[line_start + 8*(l-1) + 4]) - va[b] += v[line_start + 8*(l-1) + 6] - count += 1 - end - for k=grid.ToStart[b]:grid.ToStart[b+1]-1 - l = grid.ToIdx[k] - vm[b] += sqrt(v[line_start + 8*(l-1) + 5]) - va[b] += v[line_start + 8*(l-1) + 7] - count += 1 - end - vm[b] /= count - va[b] /= count - end - - for l=1:nline - i = grid.brBusIdx[1+2*(l-1)] - j = grid.brBusIdx[1+2*(l-1)+1] - pij = YffR[l]*vm[i]^2 + YftR[l]*vm[i]*vm[j]*cos(va[i] - va[j]) + YftI[l]*vm[i]*vm[j]*sin(va[i] - va[j]) - qij = -YffI[l]*vm[i]^2 - YftI[l]*vm[i]*vm[j]*cos(va[i] - va[j]) + YftR[l]*vm[i]*vm[j]*sin(va[i] - va[j]) - pji = YttR[l]*vm[j]^2 + YtfR[l]*vm[i]*vm[j]*cos(va[i] - va[j]) - YtfI[l]*vm[i]*vm[j]*sin(va[i] - va[j]) - qji = -YttI[l]*vm[j]^2 - YtfI[l]*vm[i]*vm[j]*cos(va[i] - va[j]) - YtfR[l]*vm[i]*vm[j]*sin(va[i] - va[j]) - - ij_val = sqrt(pij^2 + qij^2) - ji_val = sqrt(pji^2 + qji^2) - - limit = lines[l].rateA / data.baseMVA - if limit > 0 - if ij_val > limit || ji_val > limit - rateA_nviols += 1 - rateA_maxviol = max(rateA_maxviol, max(ij_val - limit, ji_val - limit)) - end - end - end - - return rateA_nviols, rateA_maxviol -end - -#= -function check_power_balance_alternative(model::Model, u::Vector{Float64}, v::Vector{Float64}) - baseMVA = model.baseMVA - nbus = model.nbus - gen_start, line_start, YshR, YshI = model.gen_start, model.line_start, model.YshR, model.YshI - - max_viol_real = 0.0 - max_viol_reactive = 0.0 - for b=1:nbus - real_err = 0.0 - reactive_err = 0.0 - for k=model.GenStart[b]:model.GenStart[b+1]-1 - g = model.GenIdx[k] - real_err += u[gen_start + 2*(g-1)] - reactive_err += u[gen_start + 2*(g-1)+1] - end - - real_err -= (model.Pd[b] / baseMVA) - reactive_err -= (model.Qd[b] / baseMVA) - - wi = 0 - for k=model.FrStart[b]:model.FrStart[b+1]-1 - l = model.FrIdx[k] - real_err -= v[line_start + 8*(l-1)] - reactive_err -= v[line_start + 8*(l-1) + 1] - wi = v[line_start + 8*(l-1) + 4] - end - - for k=model.ToStart[b]:model.ToStart[b+1]-1 - l = model.ToIdx[k] - real_err -= v[line_start + 8*(l-1) + 2] - reactive_err -= v[line_start + 8*(l-1) + 3] - wi = v[line_start + 8*(l-1) + 5] - end - - real_err -= YshR[b] * wi - reactive_err += YshI[b] * wi - - max_viol_real = (max_viol_real < abs(real_err)) ? abs(real_err) : max_viol_real - max_viol_reactive = (max_viol_reactive < abs(reactive_err)) ? abs(reactive_err) : max_viol_reactive - end - - return max_viol_real, max_viol_reactive -end - -function check_linelimit_violation(data::OPFData, v::Vector{Float64}) - lines = data.lines - nline = length(data.lines) - line_start = 2*length(data.generators) + 1 - - rateA_nviols = 0 - rateA_maxviol = 0.0 - - for l=1:nline - pij_idx = line_start + 8*(l-1) - ij_val = sqrt(v[pij_idx]^2 + v[pij_idx+1]^2) - ji_val = sqrt(v[pij_idx+2]^2 + v[pij_idx+3]^2) - - limit = lines[l].rateA / data.baseMVA - if limit > 0 - if ij_val > limit || ji_val > limit - rateA_nviols += 1 - rateA_maxviol = max(rateA_maxviol, max(ij_val - limit, ji_val - limit)) - end - end - end - - return rateA_nviols, rateA_maxviol -end -=# diff --git a/src/not_used/mpacopf_check_violations.jl b/src/not_used/mpacopf_check_violations.jl deleted file mode 100644 index 3fa8475..0000000 --- a/src/not_used/mpacopf_check_violations.jl +++ /dev/null @@ -1,8 +0,0 @@ -function check_ramp_violations(mod::Model, u_curr::Vector{Float64}, u_prev::Vector{Float64}, ramp_rate::Vector{Float64}) - max_viol = 0.0 - for g=1:mod.grid_data.ngen - pg_idx = mod.gen_start + 2*(g-1) - max_viol = max(max_viol, max(0.0, -(ramp_rate[g]-abs(u_curr[pg_idx]-u_prev[pg_idx])))) - end - return max_viol -end \ No newline at end of file diff --git a/src/utils/environment.jl b/src/utils/environment.jl index adbf4f5..20908d0 100644 --- a/src/utils/environment.jl +++ b/src/utils/environment.jl @@ -9,34 +9,36 @@ mutable struct Parameters ABSTOL::Float64 RELTOL::Float64 - rho_max::Float64 # TODO: not used - rho_min_pq::Float64 # TODO: not used - rho_min_w::Float64 # TODO: not used - eps_rp::Float64 # TODO: not used - eps_rp_min::Float64 # TODO: not used - rt_inc::Float64 # TODO: not used - rt_dec::Float64 # TODO: not used - eta::Float64 # TODO: not used + rho_max::Float64 #? not used + rho_min_pq::Float64 #? not used + rho_min_w::Float64 #? not used + eps_rp::Float64 #? not used + eps_rp_min::Float64 #? not used + rt_inc::Float64 #? not used + rt_dec::Float64 #? not used + eta::Float64 #? not used verbose::Int # MPI implementation shift_lines::Int # Two-Level ADMM - initial_beta::Float64 - beta::Float64 - inc_c::Float64 - theta::Float64 + initial_beta::Float64 #initial penalty for z when outer_iter = 1 + beta::Float64 #updated penalty for z + inc_c::Float64 #increase rate on beta + theta::Float64 #dynamic tolerance rate for norm_z_curr outer_eps::Float64 - Kf::Int # TODO: not used - Kf_mean::Int # TODO: not used + shmem_size::Int + gen_shmem_size::Int + Kf::Int #? not used + Kf_mean::Int #? not used MAX_MULTIPLIER::Float64 DUAL_TOL::Float64 outer_iterlim::Int inner_iterlim::Int - scale::Float64 - obj_scale::Float64 + scale::Float64 #scale Exatron eval_f, eval_g, eval_h + obj_scale::Float64 #scale obj c2, c1, c0 function Parameters() par = new() @@ -475,7 +477,7 @@ mutable struct ComplementarityModel{T,TD,TI,TM} <: AbstractOPFModel{T,TD,TI,TM} model.line_start = 4*model.grid.ngen + model.grid.nstorage + 1 # model.line_start = 3*model.grid.ngen + 1 - model.solution = SolutionOneLevel{T,TD}(model.nvar_padded) + model.solution = Solution{T,TD}(model.nvar_padded) init_solution!(model, model.solution, env.initial_rho_pq, env.initial_rho_va) model.gen_solution = EmptyGeneratorSolution{T,TD}() diff --git a/test/algorithms/qpsub_update_cpu.jl b/test/algorithms/qpsub_update_cpu.jl new file mode 100644 index 0000000..f01acca --- /dev/null +++ b/test/algorithms/qpsub_update_cpu.jl @@ -0,0 +1,239 @@ + + +case = joinpath(INSTANCES_DIR, "case9.m") +rho_pq = 20.0 #for two level +rho_va = 20.0 #for two level +initial_beta = 100000.0 #for two level +verbose = 0 + + +# Initialize an qpsub model with default options as shell for qpsub. + T = Float64; TD = Array{Float64,1}; TI = Array{Int,1}; TM = Array{Float64,2} + + env1 = ExaAdmm.AdmmEnv{T,TD,TI,TM}(case, rho_pq, rho_va; verbose=verbose) + mod1 = ExaAdmm.ModelQpsub{T,TD,TI,TM}(env1) + sol = mod1.solution + par = env1.params + data = mod1.grid_data + + pg = [0.8409840263442788, 1.3754683725459356, 0.9683515492257173] + qg = [0.1339605333132542, 0.004939066051572686, -0.22522034650172912] + line_var = [1.202283876496575 1.1854600895233969 1.1882407994325652 1.1939274133312217 1.197851907882065 1.1976278981260697 1.2041729663674776 1.1724433539605357 1.1711057984763673; 0.048440679917430465 0.02887729733428878 -0.1001598280647882 0.05674540078462704 0.03747916294088837 -0.0440601695432392 -0.08596677328412097 0.11837782697248565 -0.04146540783240968; 1.2100000032154183 1.1965562109975751 1.1751637818852145 1.1807295010262204 1.2100000106478777 1.1869866679607968 1.2100000106803914 1.2100000106803914 1.1476336495105626; 1.1965562109975751 1.1751637818852145 1.2100000106478777 1.2100000106478777 1.1869866679607968 1.2100000106803914 1.204481657995701 1.1476336495105626 1.1965562109975751; 3.25491739361945e-19 -0.04026877067245247 -0.064623523567375 0.06696282911866715 0.01947021797594214 -0.011808222565670412 0.024964724778954814 0.024964724778954814 -0.07566104101514581; -0.04026877067245247 -0.064623523567375 0.01947021797594214 0.01947021797594214 -0.011808222565670412 0.024964724778954814 0.0962345286951661 -0.07566104101514581 -0.04026877067245247] + line_fl = [0.8409840263442788 0.3250708808197873 -0.5764825106130851 0.9683515492257173 0.3807383869887371 -0.6207434318226953 -1.3754683725459356 0.7519258919133632 -0.5132124444249647; 0.1339605333132542 -0.03398537001400827 -0.1550260795071022 -0.22522034650172912 -0.05087639039200361 -0.1629431594683535 0.09323270900662127 -0.10130995083516688 -0.3167567555281643; -0.8409840263442788 -0.323517489386915 0.5876131622369802 -0.9683515492257173 -0.37925656817730474 0.6235424806325723 1.3754683725459356 -0.7367875555750353 0.5159131455244915; -0.09943863713541591 -0.14497392049289778 -0.2234000143290868 0.2742764047210904 -0.18705684053164653 0.008077241828545615 0.004939066051572686 -0.1832432444718357 0.13342400714942418] + pgb = [0.8409840263442788, 1.3754683725459356, 0.9683515492257173, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] + pft = [0.8409840263442788, -3.009265538105056e-36, 0.9683515492257173, 0.3250708808197873, -0.5764825106130851, 0.3807383869887371, -0.6207434318226953, -0.6235424806325723, -0.5132124444249647] + ptf = [0.0, 1.3754683725459356, 0.0, -0.3250708808197873, -0.323517489386915, -0.3807383869887371, -0.37925656817730474, 0.6235424806325723, -0.7367875555750353] + qgb = [0.1339605333132542, 0.004939066051572686, -0.22522034650172912, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] + qft = [0.1339605333132542, 0.0, -0.22522034650172912, -0.03398537001400827, -0.1550260795071022, -0.05087639039200361, -0.1629431594683535, -0.008077241828545615, -0.3167567555281643] + qtf = [0.0, 0.004939066051572686, 0.0, 0.03398537001400827, -0.14497392049289778, 0.05087639039200361, -0.18705684053164653, 0.008077241828545615, -0.1832432444718357] + bus_w = [1.2100000032154183, 1.204481657995701, 1.1807295010262204, 1.1965562109975751, 1.1751637818852145, 1.2100000106478777, 1.1869866679607968, 1.2100000106803914, 1.1476336495105626] + + # save variable to Hs, 1h, 1j, 1i, 1k, new bound, new cost + @inbounds begin + pi_14 = -ones(4,data.nline) #set multiplier for the hessian evaluation 14h 14i 14j 14k + is_Hs_sym = zeros(data.nline) #is Hs symmetric + is_Hs_PSD = zeros(data.nline) #is Hs positive semidefinite + + + #gen bound + mod1.qpsub_pgmax .= data.pgmax - (pg) + mod1.qpsub_pgmin .= data.pgmin - (pg) + mod1.qpsub_qgmax .= data.qgmax - (qg) + mod1.qpsub_qgmin .= data.qgmin - (qg) + + #new cost coeff + mod1.qpsub_c1 = data.c1 + 2*data.c2.*(pg) + mod1.qpsub_c2 = data.c2 + + #w theta bound + for l = 1: data.nline + mod1.ls[l,1] = -2*data.FrVmBound[2*l]*data.ToVmBound[2*l] #wijR lb + mod1.us[l,1] = 2*data.FrVmBound[2*l]*data.ToVmBound[2*l] #wijR ub + mod1.ls[l,2] = -2*data.FrVmBound[2*l]*data.ToVmBound[2*l] #wijI lb + mod1.us[l,2] = 2*data.FrVmBound[2*l]*data.ToVmBound[2*l] #wijI ub + + mod1.ls[l,3] = data.FrVmBound[1+2*(l-1)]^2 - (line_var)[3,l] #wi lb + mod1.us[l,3] = data.FrVmBound[2*l]^2 - (line_var)[3,l] #wi ub + mod1.ls[l,4] = data.ToVmBound[1+2*(l-1)]^2 - (line_var)[4,l] #wj lb + mod1.us[l,4] = data.ToVmBound[2*l]^2 - (line_var)[4,l] #wj ub + + mod1.ls[l,5] = data.FrVaBound[1+2*(l-1)] - (line_var)[5,l] #ti lb + mod1.us[l,5] = data.FrVaBound[2*l] - (line_var)[5,l] #ti ub + mod1.ls[l,6] = data.ToVaBound[1+2*(l-1)] - (line_var)[6,l] #tj lb + mod1.us[l,6] = data.ToVaBound[2*l] - (line_var)[6,l] #tj ub + end + + for b = 1:data.nbus + mod1.qpsub_Pd[b] = data.baseMVA * (data.Pd[b]/data.baseMVA - ((pgb)[b] - (pft)[b] - (ptf)[b] - data.YshR[b]*(bus_w)[b])) + mod1.qpsub_Qd[b] = data.baseMVA * (data.Qd[b]/data.baseMVA - ((qgb)[b] - (qft)[b] - (qtf)[b] + data.YshI[b]*(bus_w)[b])) + end + + for l = 1: data.nline + + #Hs:(6,6) #w_ijR, w_ijI, w_i, w_j, theta_i, theta_j + Hs = zeros(6,6) + + Hs_14h = zeros(6,6) + Hs_14h[1,1] = 2*pi_14[1,l] + Hs_14h[2,2] = 2*pi_14[1,l] + Hs_14h[3,4] = -pi_14[1,l] + Hs_14h[4,3] = -pi_14[1,l] + + Hs_14i = zeros(6,6) + cons_1 = pi_14[2,l]*cos((line_var)[5,l] - (line_var)[6,l]) + cons_2 = pi_14[2,l]*sin((line_var)[5,l] - (line_var)[6,l]) + cons_3 = pi_14[2,l]*(-(line_var)[1,l]*sin((line_var)[5,l] - (line_var)[6,l]) + (line_var)[1,2]*cos((line_var)[5,l] - (line_var)[6,l])) + + Hs_14i[1,5] = cons_1 #wijR theta_i + Hs_14i[5,1] = cons_1 #wijR theta_i + Hs_14i[1,6] = -cons_1 #wijR theta_j + Hs_14i[6,1] = -cons_1 #wijR theta_j + + Hs_14i[2,5] = cons_2 #wijR theta_i + Hs_14i[5,2] = cons_2 #wijR theta_i + Hs_14i[2,6] = -cons_2 #wijR theta_j + Hs_14i[6,2] = -cons_2 #wijR theta_j + + Hs_14i[5,5] = cons_3 #thetai thetai + Hs_14i[6,6] = cons_3 #thetaj thetaj + Hs_14i[5,6] = -cons_3 #thetai thetaj + Hs_14i[6,5] = -cons_3 #thetaj thetai + + supY = [data.YftR[l] data.YftI[l] data.YffR[l] 0 0 0; + -data.YftI[l] data.YftR[l] -data.YffI[l] 0 0 0; + data.YtfR[l] -data.YtfI[l] 0 data.YttR[l] 0 0; + -data.YtfI[l] -data.YtfR[l] 0 -data.YttI[l] 0 0] + Hs_14j = -2*pi_14[3,l]*(supY[1,:]*transpose(supY[1,:]) + supY[2,:]*transpose(supY[2,:]) ) + Hs_14k = -2*pi_14[4,l]*(supY[3,:]*transpose(supY[3,:]) + supY[4,:]*transpose(supY[4,:]) ) + Hs .= Hs_14h + Hs_14i + Hs_14j + Hs_14k + UniformScaling(4) + mod1.Hs[6*(l-1)+1:6*l,1:6] .= Hs + + is_Hs_sym[l] = maximum(abs.(Hs - transpose(Hs))) + @assert is_Hs_sym[l] <= 1e-6 + eival, eivec = eigen(Hs) + is_Hs_PSD[l] = minimum(eival) + @assert is_Hs_PSD[l] >= 0.0 + + #inherit structure of Linear Constraint (overleaf): ignore 1h and 1i with zero assignment in ipopt benchmark + LH_1h = [2*(line_var[1,l]), 2*(line_var[2,l]), -(line_var[4,l]), -(line_var[3,l])] #LH * x = RH + mod1.LH_1h[l,:] .= LH_1h + RH_1h = -((line_var)[1,l])^2 - ((line_var)[2,l])^2 + (line_var)[3,l]*(line_var)[4,l] + mod1.RH_1h[l] = RH_1h + + LH_1i = [sin((line_var)[5,l] - (line_var)[6,l]), -cos((line_var)[5,l] - (line_var)[6,l]), + (line_var)[1,l]*cos((line_var)[5,l] - (line_var)[6,l]) + (line_var)[2,l]*sin((line_var)[5,l] - (line_var)[6,l]), + -(line_var)[1,l]*cos((line_var)[5,l] - (line_var)[6,l]) - (line_var)[2,l]*sin((line_var)[5,l] - (line_var)[6,l])] #Lf * x = RH + mod1.LH_1i[l,:] .= LH_1i + RH_1i = -(line_var)[1,l]*sin((line_var)[5,l] - (line_var)[6,l]) + (line_var)[2,l]*cos((line_var)[5,l] - (line_var)[6,l]) + mod1.RH_1i[l] = RH_1i + + #inherit structure line limit constraint (overleaf) + LH_1j = [2*(line_fl)[1,l], 2*(line_fl)[2,l]] #rand(2) + mod1.LH_1j[l,:] .= LH_1j + RH_1j = -(((line_fl)[1,l])^2 + ((line_fl)[2,l])^2 - data.rateA[l]) + mod1.RH_1j[l] = RH_1j + + LH_1k = [2*(line_fl)[3,l], 2*(line_fl)[4,l]] #zeros(2) #rand(2) + mod1.LH_1k[l,:] .= LH_1k + RH_1k = -(((line_fl)[3,l])^2 + ((line_fl)[4,l])^2 - data.rateA[l]) + mod1.RH_1k[l] = RH_1k + end + end #inbound + +# test one-iteration update +@testset "Testing [x,xbar,l,residual] updates" begin + atol = 2e-6 + env2 = ExaAdmm.AdmmEnv{T,TD,TI,TM}(case, rho_pq, rho_va; verbose=verbose) + mod2 = ExaAdmm.ModelQpsub{T,TD,TI,TM}(env2) + sol2 = mod2.solution + par2 = env2.params + data2 = mod2.grid_data + info2 = mod2.info + + env2.params.scale = 1e-4 + env2.params.initial_beta = 1e3 + env2.params.beta = 1e3 + + mod2.Hs .= mod1.Hs + mod2.LH_1h .= mod1.LH_1h + mod2.RH_1h .= mod1.RH_1h + mod2.LH_1i .= mod1.LH_1i + mod2.RH_1i .= mod1.RH_1i + mod2.LH_1j .= mod1.LH_1j + mod2.RH_1j .= mod1.RH_1j + mod2.LH_1k .= mod1.LH_1k + mod2.RH_1k .= mod1.RH_1k + mod2.ls .= mod1.ls + mod2.us .= mod1.us + + mod2.qpsub_pgmax .= mod1.qpsub_pgmax + mod2.qpsub_pgmin .= mod1.qpsub_pgmin + mod2.qpsub_qgmax .= mod1.qpsub_qgmax + mod2.qpsub_qgmin .= mod1.qpsub_qgmin + + mod2.qpsub_c1 .= mod1.qpsub_c1 + mod2.qpsub_c2 .= mod1.qpsub_c2 + mod2.qpsub_Pd .= mod1.qpsub_Pd + mod2.qpsub_Qd .= mod1.qpsub_Qd + + ExaAdmm.init_solution!(mod2, mod2.solution, env2.initial_rho_pq, env2.initial_rho_va) + + info2.norm_z_prev = info2.norm_z_curr = 0 + par2.initial_beta = 0 + par2.beta = 0 + sol2.lz .= 0 + sol2.z_curr .= 0 + sol2.z_prev .= 0 + par2.inner_iterlim = 1 + + + ExaAdmm.admm_increment_outer(env2, mod2) + ExaAdmm.admm_outer_prestep(env2, mod2) + ExaAdmm.admm_increment_reset_inner(env2, mod2) + ExaAdmm.admm_increment_inner(env2, mod2) + ExaAdmm.admm_inner_prestep(env2, mod2) + + + ExaAdmm.admm_update_x(env2, mod2) + U_SOL = [-0.229424, -0.1339605, -0.0813327, -0.0049391, -0.0465958, 0.2252203, -0.1236772, -0.1271249, 0.1236772, 0.1189746, -0.1182455, -0.1040701, -0.0, 0.0022044, -0.0630247, -0.1092983, 0.0622891, 0.1082608, -0.0297814, -0.0074735, 0.0422504, 0.0451598, 0.0984247, 0.0700557, -0.102193, -0.0905333, 0.0294319, -0.0067947, 0.0250279, 0.0125998, -0.1368894, 0.2542204, 0.1368894, -0.2698603, -0.0770438, -0.1077549, -0.0375397, -0.0344878, -0.0665263, -0.1146067, 0.0658612, 0.1071325, -0.0032826, 0.0208988, -0.0055371, -0.0008479, 0.1049601, 0.1480269, -0.1061072, -0.1593811, 0.0230133, -0.0010432, -0.0026878, -0.0082759, 0.2045771, -0.05922, -0.2045771, 0.0340703, -0.0553384, -0.0495078, -0.0467405, -0.0542589, -0.1322638, -0.1856806, 0.1264451, 0.1533923, -0.0223818, 0.0420754, 0.0141644, 0.0280826, 0.0937455, 0.2743344, -0.0957246, -0.2938647, 0.0406687, -0.0099012, 0.0507601, 0.0458481] + @test norm( sol2.u_curr .- U_SOL, Inf) <= atol + + + + ExaAdmm.admm_update_xbar(env2, mod2) + V_SOL = [-0.1765506, -0.1305427, -0.1429549, 0.0145656, -0.0917426, 0.2397204, -0.1765506, -0.1305427, 0.1353679, 0.2137041, -0.1182455, -0.0479176, -0.0, 0.030101, -0.051334, -0.0145688, -0.0180678, 0.0191025, -0.0479176, 0.0109792, 0.030101, 0.0350939, 0.0180678, -0.0191025, -0.091583, 0.0678001, 0.0109792, -0.0392774, 0.0350939, -0.0091417, -0.0917426, 0.2397204, 0.1474993, -0.1115269, -0.0770438, -0.0392774, -0.0375397, -0.0091417, -0.0559163, 0.0437267, -0.0195494, -0.0204472, -0.0392774, 0.0219561, -0.0091417, -0.0017678, 0.0195494, 0.0204472, -0.0948426, -0.0246205, 0.0219561, -0.0262545, -0.0017678, -0.0136173, 0.2158417, 0.0755405, -0.1429549, 0.0145656, -0.0262545, -0.0495078, -0.0136173, -0.0542589, -0.1209991, -0.05092, 0.0163498, -0.0604711, -0.0262545, 0.041372, -0.0136173, 0.0394213, -0.0163498, 0.0604711, -0.0840339, -0.1991353, 0.041372, -0.0479176, 0.0394213, 0.030101] + @test norm( sol2.v_curr .- V_SOL, Inf) <= atol + + + + + ExaAdmm.admm_update_l_single(env2, mod2) + L_SOL = [-1.057468, -0.0683565, 1.2324431, -0.390094, 0.9029359, -0.2900002, 1.057468, 0.0683565, -0.2338139, -1.8945894, 0.0, -1.1230512, 0.0, -0.5579311, -0.2338139, -1.8945894, 1.6071386, 1.7831649, 0.362723, -0.3690544, 0.2429884, 0.201319, 1.6071386, 1.7831649, -0.2121989, -3.166669, 0.3690544, 0.6496544, -0.201319, 0.4348308, -0.9029359, 0.2900002, -0.2121989, -3.166669, 0.0, -1.3695503, 0.0, -0.5069225, -0.2121989, -3.166669, 1.7082129, 2.5515935, 0.7198958, -0.0211449, 0.0720918, 0.0183997, 1.7082129, 2.5515935, -0.2252933, -2.6952112, 0.0211449, 0.5042264, -0.0183997, 0.1068284, -0.2252933, -2.6952112, -1.2324431, 0.390094, -0.5816791, 0.0, -0.6624625, 0.0, -0.2252933, -2.6952112, 2.2019063, 4.277267, 0.0774527, 0.0140663, 0.5556341, -0.2267749, 2.2019063, 4.277267, -0.2338139, -1.8945894, -0.0140663, 0.7603282, 0.2267749, 0.3149427] + @test norm( sol2.l_curr .- L_SOL, Inf) <= atol + + + + ExaAdmm.admm_update_residual(env2, mod2) + RP_SOL = [-0.0528734, -0.0034178, 0.0616222, -0.0195047, 0.0451468, -0.0145, 0.0528734, 0.0034178, -0.0116907, -0.0947295, 0.0, -0.0561526, 0.0, -0.0278966, -0.0116907, -0.0947295, 0.0803569, 0.0891582, 0.0181362, -0.0184527, 0.0121494, 0.0100659, 0.0803569, 0.0891582, -0.0106099, -0.1583335, 0.0184527, 0.0324827, -0.0100659, 0.0217415, -0.0451468, 0.0145, -0.0106099, -0.1583335, 0.0, -0.0684775, 0.0, -0.0253461, -0.0106099, -0.1583335, 0.0854106, 0.1275797, 0.0359948, -0.0010572, 0.0036046, 0.00092, 0.0854106, 0.1275797, -0.0112647, -0.1347606, 0.0010572, 0.0252113, -0.00092, 0.0053414, -0.0112647, -0.1347606, -0.0616222, 0.0195047, -0.029084, 0.0, -0.0331231, 0.0, -0.0112647, -0.1347606, 0.1100953, 0.2138633, 0.0038726, 0.0007033, 0.0277817, -0.0113387, 0.1100953, 0.2138633, -0.0116907, -0.0947295, -0.0007033, 0.0380164, 0.0113387, 0.0157471] + RD_SOL = [-12.7113319, 0.0683565, -6.3497307, 0.390094, -10.4678211, 0.2900002, -3.5310125, 66.8335931, 2.7073584, 69.0505462, 1.6350909, 2.7727729, 0.0, -0.2033556, 6.2198775, 38.6305254, 6.0542368, 34.8407725, 2.7727729, 3.5228599, -0.2033556, -0.5905927, 4.596186, 17.4861768, 3.2963767, 22.9929842, 3.5228599, 3.2144525, -0.5905927, 0.2065699, -1.8348521, 63.0638615, 2.9499869, 66.0288518, 1.8737146, 3.2144525, 0.5884628, 0.2065699, 3.5020235, 39.5936187, 3.6977135, 33.8548621, 3.2144525, 3.9788551, 0.2065699, -0.2715214, 6.1151706, 48.6324263, 4.5716387, 54.0015076, 3.9788551, 3.4749108, -0.2715214, 0.2269477, 4.3168345, 65.510814, -2.8590981, 62.5254434, 3.4749108, 2.8994775, 0.2269477, 0.8395123, 2.3304349, 22.2701391, 3.5960831, 14.8170115, 3.4749108, 3.580114, 0.2269477, -0.7247947, 3.4309189, 32.9094673, 3.4130062, 38.9852785, 3.580114, 2.7727729, -0.7247947, -0.2033556] + + @test norm( sol2.rp .- RP_SOL, Inf) <= atol + @test norm( sol2.rd .- RD_SOL, Inf) <= atol + + +end #@testset + + +#test solution +@testset "Qpsub ADMM (CPU) vs IPOPT" begin + env3, mod3 = ExaAdmm.solve_qpsub(case, mod1.Hs, mod1.LH_1h, mod1.RH_1h, + mod1.LH_1i, mod1.RH_1i, mod1.LH_1j, mod1.RH_1j, mod1.LH_1k, mod1.RH_1k, mod1.ls, mod1.us, mod1.qpsub_pgmax, mod1.qpsub_pgmin, mod1.qpsub_qgmax, mod1.qpsub_qgmin, mod1.qpsub_c1, mod1.qpsub_c2, mod1.qpsub_Pd, mod1.qpsub_Qd, + initial_beta; + outer_iterlim=10000, inner_iterlim=1, scale = 1e-4, obj_scale = 1, rho_pq = 4000.0, rho_va = 4000.0, verbose=0, outer_eps=2*1e-6, onelevel = true) + + + @test mod3.info.status == :Solved + @test mod3.info.outer == 5107 + @test mod3.info.cumul == 5107 + @test isapprox(mod3.info.objval, -21.92744641968529; atol=1e-6) +end \ No newline at end of file diff --git a/test/algorithms/qpsub_update_gpu.jl b/test/algorithms/qpsub_update_gpu.jl new file mode 100644 index 0000000..49a7ae4 --- /dev/null +++ b/test/algorithms/qpsub_update_gpu.jl @@ -0,0 +1,358 @@ +@testset "Testing [x,xbar,l,res] updates and a solve for ACOPF" begin + +rho_pq = 20.0 #for two level +rho_va = 20.0 #for two level +initial_beta = 100000.0 #for two level +atol = 2e-6 +verbose = 0 +use_gpu = true + +case = joinpath(INSTANCES_DIR, "case9.m") + +# Initialize an cpu qpsub model with default options as shell for qpsub. + T = Float64; TD = Array{Float64,1}; TI = Array{Int,1}; TM = Array{Float64,2} + + env1 = ExaAdmm.AdmmEnv{T,TD,TI,TM}(case, rho_pq, rho_va; verbose=verbose) + mod1 = ExaAdmm.ModelQpsub{T,TD,TI,TM}(env1) + sol = mod1.solution + par = env1.params + data = mod1.grid_data + pg = [0.8409840263442788, 1.3754683725459356, 0.9683515492257173] + qg = [0.1339605333132542, 0.004939066051572686, -0.22522034650172912] + line_var = [1.202283876496575 1.1854600895233969 1.1882407994325652 1.1939274133312217 1.197851907882065 1.1976278981260697 1.2041729663674776 1.1724433539605357 1.1711057984763673; 0.048440679917430465 0.02887729733428878 -0.1001598280647882 0.05674540078462704 0.03747916294088837 -0.0440601695432392 -0.08596677328412097 0.11837782697248565 -0.04146540783240968; 1.2100000032154183 1.1965562109975751 1.1751637818852145 1.1807295010262204 1.2100000106478777 1.1869866679607968 1.2100000106803914 1.2100000106803914 1.1476336495105626; 1.1965562109975751 1.1751637818852145 1.2100000106478777 1.2100000106478777 1.1869866679607968 1.2100000106803914 1.204481657995701 1.1476336495105626 1.1965562109975751; 3.25491739361945e-19 -0.04026877067245247 -0.064623523567375 0.06696282911866715 0.01947021797594214 -0.011808222565670412 0.024964724778954814 0.024964724778954814 -0.07566104101514581; -0.04026877067245247 -0.064623523567375 0.01947021797594214 0.01947021797594214 -0.011808222565670412 0.024964724778954814 0.0962345286951661 -0.07566104101514581 -0.04026877067245247] + line_fl = [0.8409840263442788 0.3250708808197873 -0.5764825106130851 0.9683515492257173 0.3807383869887371 -0.6207434318226953 -1.3754683725459356 0.7519258919133632 -0.5132124444249647; 0.1339605333132542 -0.03398537001400827 -0.1550260795071022 -0.22522034650172912 -0.05087639039200361 -0.1629431594683535 0.09323270900662127 -0.10130995083516688 -0.3167567555281643; -0.8409840263442788 -0.323517489386915 0.5876131622369802 -0.9683515492257173 -0.37925656817730474 0.6235424806325723 1.3754683725459356 -0.7367875555750353 0.5159131455244915; -0.09943863713541591 -0.14497392049289778 -0.2234000143290868 0.2742764047210904 -0.18705684053164653 0.008077241828545615 0.004939066051572686 -0.1832432444718357 0.13342400714942418] + pgb = [0.8409840263442788, 1.3754683725459356, 0.9683515492257173, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] + pft = [0.8409840263442788, -3.009265538105056e-36, 0.9683515492257173, 0.3250708808197873, -0.5764825106130851, 0.3807383869887371, -0.6207434318226953, -0.6235424806325723, -0.5132124444249647] + ptf = [0.0, 1.3754683725459356, 0.0, -0.3250708808197873, -0.323517489386915, -0.3807383869887371, -0.37925656817730474, 0.6235424806325723, -0.7367875555750353] + qgb = [0.1339605333132542, 0.004939066051572686, -0.22522034650172912, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] + qft = [0.1339605333132542, 0.0, -0.22522034650172912, -0.03398537001400827, -0.1550260795071022, -0.05087639039200361, -0.1629431594683535, -0.008077241828545615, -0.3167567555281643] + qtf = [0.0, 0.004939066051572686, 0.0, 0.03398537001400827, -0.14497392049289778, 0.05087639039200361, -0.18705684053164653, 0.008077241828545615, -0.1832432444718357] + bus_w = [1.2100000032154183, 1.204481657995701, 1.1807295010262204, 1.1965562109975751, 1.1751637818852145, 1.2100000106478777, 1.1869866679607968, 1.2100000106803914, 1.1476336495105626] + + + # save variable to Hs, 1h, 1j, 1i, 1k, new bound, new cost + @inbounds begin + pi_14 = -ones(4,data.nline) #set multiplier for the hessian evaluation 14h 14i 14j 14k + is_Hs_sym = zeros(data.nline) #is Hs symmetric + is_Hs_PSD = zeros(data.nline) #is Hs positive semidefinite + + + #gen bound + mod1.qpsub_pgmax .= data.pgmax - (pg) + mod1.qpsub_pgmin .= data.pgmin - (pg) + mod1.qpsub_qgmax .= data.qgmax - (qg) + mod1.qpsub_qgmin .= data.qgmin - (qg) + + #new cost coeff + mod1.qpsub_c1 = data.c1 + 2*data.c2.*(pg) + mod1.qpsub_c2 = data.c2 + + #w theta bound + for l = 1: data.nline + mod1.ls[l,1] = -2*data.FrVmBound[2*l]*data.ToVmBound[2*l] #wijR lb + mod1.us[l,1] = 2*data.FrVmBound[2*l]*data.ToVmBound[2*l] #wijR ub + mod1.ls[l,2] = -2*data.FrVmBound[2*l]*data.ToVmBound[2*l] #wijI lb + mod1.us[l,2] = 2*data.FrVmBound[2*l]*data.ToVmBound[2*l] #wijI ub + + mod1.ls[l,3] = data.FrVmBound[1+2*(l-1)]^2 - (line_var)[3,l] #wi lb + mod1.us[l,3] = data.FrVmBound[2*l]^2 - (line_var)[3,l] #wi ub + mod1.ls[l,4] = data.ToVmBound[1+2*(l-1)]^2 - (line_var)[4,l] #wj lb + mod1.us[l,4] = data.ToVmBound[2*l]^2 - (line_var)[4,l] #wj ub + + mod1.ls[l,5] = data.FrVaBound[1+2*(l-1)] - (line_var)[5,l] #ti lb + mod1.us[l,5] = data.FrVaBound[2*l] - (line_var)[5,l] #ti ub + mod1.ls[l,6] = data.ToVaBound[1+2*(l-1)] - (line_var)[6,l] #tj lb + mod1.us[l,6] = data.ToVaBound[2*l] - (line_var)[6,l] #tj ub + end + + for b = 1:data.nbus + mod1.qpsub_Pd[b] = data.baseMVA * (data.Pd[b]/data.baseMVA - ((pgb)[b] - (pft)[b] - (ptf)[b] - data.YshR[b]*(bus_w)[b])) + mod1.qpsub_Qd[b] = data.baseMVA * (data.Qd[b]/data.baseMVA - ((qgb)[b] - (qft)[b] - (qtf)[b] + data.YshI[b]*(bus_w)[b])) + end + + for l = 1: data.nline + #Hs:(6,6) #w_ijR, w_ijI, w_i, w_j, theta_i, theta_j + Hs = zeros(6,6) + + Hs_14h = zeros(6,6) + Hs_14h[1,1] = 2*pi_14[1,l] + Hs_14h[2,2] = 2*pi_14[1,l] + Hs_14h[3,4] = -pi_14[1,l] + Hs_14h[4,3] = -pi_14[1,l] + + Hs_14i = zeros(6,6) + cons_1 = pi_14[2,l]*cos((line_var)[5,l] - (line_var)[6,l]) + cons_2 = pi_14[2,l]*sin((line_var)[5,l] - (line_var)[6,l]) + cons_3 = pi_14[2,l]*(-(line_var)[1,l]*sin((line_var)[5,l] - (line_var)[6,l]) + (line_var)[1,2]*cos((line_var)[5,l] - (line_var)[6,l])) + + Hs_14i[1,5] = cons_1 #wijR theta_i + Hs_14i[5,1] = cons_1 #wijR theta_i + Hs_14i[1,6] = -cons_1 #wijR theta_j + Hs_14i[6,1] = -cons_1 #wijR theta_j + + Hs_14i[2,5] = cons_2 #wijR theta_i + Hs_14i[5,2] = cons_2 #wijR theta_i + Hs_14i[2,6] = -cons_2 #wijR theta_j + Hs_14i[6,2] = -cons_2 #wijR theta_j + + Hs_14i[5,5] = cons_3 #thetai thetai + Hs_14i[6,6] = cons_3 #thetaj thetaj + Hs_14i[5,6] = -cons_3 #thetai thetaj + Hs_14i[6,5] = -cons_3 #thetaj thetai + + supY = [data.YftR[l] data.YftI[l] data.YffR[l] 0 0 0; + -data.YftI[l] data.YftR[l] -data.YffI[l] 0 0 0; + data.YtfR[l] -data.YtfI[l] 0 data.YttR[l] 0 0; + -data.YtfI[l] -data.YtfR[l] 0 -data.YttI[l] 0 0] + Hs_14j = -2*pi_14[3,l]*(supY[1,:]*transpose(supY[1,:]) + supY[2,:]*transpose(supY[2,:]) ) + Hs_14k = -2*pi_14[4,l]*(supY[3,:]*transpose(supY[3,:]) + supY[4,:]*transpose(supY[4,:]) ) + Hs .= Hs_14h + Hs_14i + Hs_14j + Hs_14k + UniformScaling(4) + mod1.Hs[6*(l-1)+1:6*l,1:6] .= Hs + + is_Hs_sym[l] = maximum(abs.(Hs - transpose(Hs))) + @assert is_Hs_sym[l] <= 1e-6 + eival, eivec = eigen(Hs) + is_Hs_PSD[l] = minimum(eival) + @assert is_Hs_PSD[l] >= 0.0 + + #inherit structure of Linear Constraint (overleaf): ignore 1h and 1i with zero assignment in ipopt benchmark + LH_1h = [2*(line_var[1,l]), 2*(line_var[2,l]), -(line_var[4,l]), -(line_var[3,l])] #LH * x = RH + mod1.LH_1h[l,:] .= LH_1h + RH_1h = -((line_var)[1,l])^2 - ((line_var)[2,l])^2 + (line_var)[3,l]*(line_var)[4,l] + mod1.RH_1h[l] = RH_1h + + LH_1i = [sin((line_var)[5,l] - (line_var)[6,l]), -cos((line_var)[5,l] - (line_var)[6,l]), + (line_var)[1,l]*cos((line_var)[5,l] - (line_var)[6,l]) + (line_var)[2,l]*sin((line_var)[5,l] - (line_var)[6,l]), + -(line_var)[1,l]*cos((line_var)[5,l] - (line_var)[6,l]) - (line_var)[2,l]*sin((line_var)[5,l] - (line_var)[6,l])] #Lf * x = RH + mod1.LH_1i[l,:] .= LH_1i + RH_1i = -(line_var)[1,l]*sin((line_var)[5,l] - (line_var)[6,l]) + (line_var)[2,l]*cos((line_var)[5,l] - (line_var)[6,l]) + mod1.RH_1i[l] = RH_1i + + #inherit structure line limit constraint (overleaf) + LH_1j = [2*(line_fl)[1,l], 2*(line_fl)[2,l]] #rand(2) + mod1.LH_1j[l,:] .= LH_1j + RH_1j = -(((line_fl)[1,l])^2 + ((line_fl)[2,l])^2 - data.rateA[l]) + mod1.RH_1j[l] = RH_1j + + LH_1k = [2*(line_fl)[3,l], 2*(line_fl)[4,l]] #zeros(2) #rand(2) + mod1.LH_1k[l,:] .= LH_1k + RH_1k = -(((line_fl)[3,l])^2 + ((line_fl)[4,l])^2 - data.rateA[l]) + mod1.RH_1k[l] = RH_1k + end + end #inbound + + # initialize type + if use_gpu + TD = CuArray{Float64,1}; TI = CuArray{Int,1}; TM = CuArray{Float64,2} + end + + env2 = ExaAdmm.AdmmEnv{T,TD,TI,TM}(case, rho_pq, rho_va; use_gpu = true, verbose=verbose) + mod2 = ExaAdmm.ModelQpsub{T,TD,TI,TM}(env2) + sol2 = mod2.solution + par2 = env2.params + data2 = mod2.grid_data + info2 = mod2.info + + env2.params.scale = 1e-4 + env2.params.initial_beta = 1e3 + env2.params.beta = 1e3 + + mod2.Hs = copy(mod1.Hs) + mod2.LH_1h = copy(mod1.LH_1h) + mod2.RH_1h = copy(mod1.RH_1h) + mod2.LH_1i = copy(mod1.LH_1i) + mod2.RH_1i = copy(mod1.RH_1i) + mod2.LH_1j = copy(mod1.LH_1j) + mod2.RH_1j = copy(mod1.RH_1j) + mod2.LH_1k = copy(mod1.LH_1k) + mod2.RH_1k = copy(mod1.RH_1k) + mod2.ls = copy(mod1.ls) + mod2.us = copy(mod1.us) + + mod2.qpsub_pgmax = copy(mod1.qpsub_pgmax) + mod2.qpsub_pgmin = copy(mod1.qpsub_pgmin) + mod2.qpsub_qgmax = copy(mod1.qpsub_qgmax) + mod2.qpsub_qgmin = copy(mod1.qpsub_qgmin) + + mod2.qpsub_c1 = copy(mod1.qpsub_c1) + mod2.qpsub_c2 = copy(mod1.qpsub_c2) + mod2.qpsub_Pd = copy(mod1.qpsub_Pd) + mod2.qpsub_Qd = copy(mod1.qpsub_Qd) + + ExaAdmm.init_solution!(mod2, mod2.solution, env2.initial_rho_pq, env2.initial_rho_va) + + info2.norm_z_prev = info2.norm_z_curr = 0 + par2.initial_beta = 0 + par2.beta = 0 + sol2.lz .= 0 + sol2.z_curr .= 0 + sol2.z_prev .= 0 + par2.inner_iterlim = 1 + par2.shmem_size = sizeof(Float64)*(16*mod2.n+4*mod2.n^2+178) + sizeof(Int)*(4*mod2.n) + + + ExaAdmm.admm_increment_outer(env2, mod2) + ExaAdmm.admm_increment_reset_inner(env2, mod2) + ExaAdmm.admm_increment_inner(env2, mod2) + + + + ExaAdmm.admm_update_x(env2, mod2) + U_SOL_CPU = [-0.229424, -0.1339605, -0.0813327, -0.0049391, -0.0465958, 0.2252203, -0.1236772, -0.1271249, 0.1236772, 0.1189746, -0.1182455, -0.1040701, -0.0, 0.0022044, -0.0630247, -0.1092983, 0.0622891, 0.1082608, -0.0297814, -0.0074735, 0.0422504, 0.0451598, 0.0984247, 0.0700557, -0.102193, -0.0905333, 0.0294319, -0.0067947, 0.0250279, 0.0125998, -0.1368894, 0.2542204, 0.1368894, -0.2698603, -0.0770438, -0.1077549, -0.0375397, -0.0344878, -0.0665263, -0.1146067, 0.0658612, 0.1071325, -0.0032826, 0.0208988, -0.0055371, -0.0008479, 0.1049601, 0.1480269, -0.1061072, -0.1593811, 0.0230133, -0.0010432, -0.0026878, -0.0082759, 0.2045771, -0.05922, -0.2045771, 0.0340703, -0.0553384, -0.0495078, -0.0467405, -0.0542589, -0.1322638, -0.1856806, 0.1264451, 0.1533923, -0.0223818, 0.0420754, 0.0141644, 0.0280826, 0.0937455, 0.2743344, -0.0957246, -0.2938647, 0.0406687, -0.0099012, 0.0507601, 0.0458481] + @test norm(U_SOL_CPU - Array(sol2.u_curr), Inf) <= atol + + + ExaAdmm.admm_update_xbar(env2, mod2) + V_SOL_CPU = [-0.1765506, -0.1305427, -0.1429549, 0.0145656, -0.0917426, 0.2397204, -0.1765506, -0.1305427, 0.1353679, 0.2137041, -0.1182455, -0.0479176, -0.0, 0.030101, -0.051334, -0.0145688, -0.0180678, 0.0191025, -0.0479176, 0.0109792, 0.030101, 0.0350939, 0.0180678, -0.0191025, -0.091583, 0.0678001, 0.0109792, -0.0392774, 0.0350939, -0.0091417, -0.0917426, 0.2397204, 0.1474993, -0.1115269, -0.0770438, -0.0392774, -0.0375397, -0.0091417, -0.0559163, 0.0437267, -0.0195494, -0.0204472, -0.0392774, 0.0219561, -0.0091417, -0.0017678, 0.0195494, 0.0204472, -0.0948426, -0.0246205, 0.0219561, -0.0262545, -0.0017678, -0.0136173, 0.2158417, 0.0755405, -0.1429549, 0.0145656, -0.0262545, -0.0495078, -0.0136173, -0.0542589, -0.1209991, -0.05092, 0.0163498, -0.0604711, -0.0262545, 0.041372, -0.0136173, 0.0394213, -0.0163498, 0.0604711, -0.0840339, -0.1991353, 0.041372, -0.0479176, 0.0394213, 0.030101] + @test norm(V_SOL_CPU - Array(sol2.v_curr), Inf) <= atol + + + + ExaAdmm.admm_update_l_single(env2, mod2) + L_SOL_CPU = [-1.057468, -0.0683565, 1.2324431, -0.390094, 0.9029359, -0.2900002, 1.057468, 0.0683565, -0.2338139, -1.8945894, 0.0, -1.1230512, 0.0, -0.5579311, -0.2338139, -1.8945894, 1.6071386, 1.7831649, 0.362723, -0.3690544, 0.2429884, 0.201319, 1.6071386, 1.7831649, -0.2121989, -3.166669, 0.3690544, 0.6496544, -0.201319, 0.4348308, -0.9029359, 0.2900002, -0.2121989, -3.166669, 0.0, -1.3695503, 0.0, -0.5069225, -0.2121989, -3.166669, 1.7082129, 2.5515935, 0.7198958, -0.0211449, 0.0720918, 0.0183997, 1.7082129, 2.5515935, -0.2252933, -2.6952112, 0.0211449, 0.5042264, -0.0183997, 0.1068284, -0.2252933, -2.6952112, -1.2324431, 0.390094, -0.5816791, 0.0, -0.6624625, 0.0, -0.2252933, -2.6952112, 2.2019063, 4.277267, 0.0774527, 0.0140663, 0.5556341, -0.2267749, 2.2019063, 4.277267, -0.2338139, -1.8945894, -0.0140663, 0.7603282, 0.2267749, 0.3149427] + @test norm(L_SOL_CPU - Array(sol2.l_curr), Inf) <= atol + + + + + ExaAdmm.admm_update_residual(env2, mod2) + RD_SOL_CPU = [-12.7113319, 0.0683565, -6.3497307, 0.390094, -10.4678211, 0.2900002, -3.5310124, 66.8335914, 2.7073584, 69.0505445, 1.6350909, 2.7727729, 0.0, -0.2033556, 6.2198773, 38.6305246, 6.0542367, 34.8407717, 2.7727729, 3.5228599, -0.2033556, -0.5905927, 4.5961859, 17.4861766, 3.2963767, 22.992984, 3.5228599, 3.2144525, -0.5905927, 0.2065699, -1.8348521, 63.0638614, 2.9499869, 66.0288516, 1.8737146, 3.2144525, 0.5884628, 0.2065699, 3.5020235, 39.5936185, 3.6977135, 33.8548619, 3.2144525, 3.9788551, 0.2065699, -0.2715214, 6.1151706, 48.632426, 4.5716386, 54.0015074, 3.9788551, 3.4749108, -0.2715214, 0.2269477, 4.3168345, 65.5108139, -2.8590981, 62.5254432, 3.4749108, 2.8994775, 0.2269477, 0.8395123, 2.3304348, 22.2701389, 3.596083, 14.8170113, 3.4749108, 3.5801139, 0.2269477, -0.7247947, 3.4309188, 32.9094664, 3.4130061, 38.9852777, 3.5801139, 2.7727729, -0.7247947, -0.2033556] + RP_SOL_CPU = [-0.0528734, -0.0034178, 0.0616222, -0.0195047, 0.0451468, -0.0145, 0.0528734, 0.0034178, -0.0116907, -0.0947295, 0.0, -0.0561526, 0.0, -0.0278966, -0.0116907, -0.0947295, 0.0803569, 0.0891582, 0.0181362, -0.0184527, 0.0121494, 0.0100659, 0.0803569, 0.0891582, -0.0106099, -0.1583335, 0.0184527, 0.0324827, -0.0100659, 0.0217415, -0.0451468, 0.0145, -0.0106099, -0.1583335, 0.0, -0.0684775, 0.0, -0.0253461, -0.0106099, -0.1583335, 0.0854106, 0.1275797, 0.0359948, -0.0010572, 0.0036046, 0.00092, 0.0854106, 0.1275797, -0.0112647, -0.1347606, 0.0010572, 0.0252113, -0.00092, 0.0053414, -0.0112647, -0.1347606, -0.0616222, 0.0195047, -0.029084, 0.0, -0.0331231, 0.0, -0.0112647, -0.1347606, 0.1100953, 0.2138633, 0.0038726, 0.0007033, 0.0277817, -0.0113387, 0.1100953, 0.2138633, -0.0116907, -0.0947295, -0.0007033, 0.0380164, 0.0113387, 0.0157471] + @test norm(RD_SOL_CPU - Array(sol2.rd), Inf) <= atol + @test norm(RP_SOL_CPU - Array(sol2.rp), Inf) <= atol +end + + +@testset "Qpsub ADMM (GPU) vs ADMM (CPU)" begin + env3, mod3 = ExaAdmm.solve_qpsub(case, mod1.Hs, mod1.LH_1h, mod1.RH_1h, + mod1.LH_1i, mod1.RH_1i, mod1.LH_1j, mod1.RH_1j, mod1.LH_1k, mod1.RH_1k, mod1.ls, mod1.us, mod1.qpsub_pgmax, mod1.qpsub_pgmin, mod1.qpsub_qgmax, mod1.qpsub_qgmin, mod1.qpsub_c1, mod1.qpsub_c2, mod1.qpsub_Pd, mod1.qpsub_Qd, + initial_beta; + outer_iterlim=10000, inner_iterlim=1, scale = 1e-4, obj_scale = 1, rho_pq = 4000.0, rho_va = 4000.0, verbose=0, outer_eps=2*1e-6, onelevel = true, use_gpu = true) + + + @test mod3.info.status == :Solved + @test mod3.info.outer == 5107 + @test mod3.info.cumul == 5107 + @test isapprox(mod3.info.objval, -21.92744641968529; atol=1e-6) + + sol3 = mod3.solution + atol = 1e-6 + + dpg_sol_cpu = [-0.11550455128840183 + 0.06549995591228526 + 0.053676513010466456] + + dqg_sol_cpu = [-0.007903958357205233 + 0.019785362536583685 + 0.011746408694041054] + + dline_var_cpu = [-0.00630369 -0.00556137 -0.00333908 0.000404039 9.04974e-6 -8.98924e-6 0.000661247 -0.00373931 -0.00567469 + -0.00665306 -0.00509225 -0.00938981 0.00314544 -0.000313391 -0.000223849 -0.00409375 0.00970327 0.00512255 + -0.00675896 -0.0063758 -0.00500354 0.00109238 -1.06479e-8 -1.48199e-6 -1.06804e-8 -1.06804e-8 -0.00534789 + -0.00637581 -0.00500357 -1.06479e-8 -1.06479e-8 -1.48608e-6 -1.06804e-8 0.00189783 -0.00534788 -0.0063758 + -3.25492e-19 0.00531377 0.00949253 0.0201868 0.0175742 0.0178357 0.0180226 0.0180226 0.00951113 + 0.00531381 0.00949261 0.0175743 0.0175742 0.0178358 0.0180226 0.021366 0.00951121 0.00531385] + + dline_fl_cpu = [ -0.115505 -0.0551048 -0.0546064 0.0536765 -0.00307677 -0.00305414 -0.0655 0.0624192 0.0598887 + -0.00790396 0.00183362 0.00363203 0.0117464 0.000273346 0.000464936 -0.0105801 0.0108191 -0.00273044 + 0.115505 0.0546064 0.0567533 -0.0536765 0.00305414 0.0030808 0.0655 -0.0598887 -0.0603997 + -0.00125214 -0.00363203 0.00662172 -0.00689505 -0.000464924 -0.000239002 0.0197854 0.00273044 -0.000581486] + + dtheta_sol_cpu = [-3.25491739361945e-19 + 0.021365998519708898 + 0.020186769395364127 + 0.005313812154732135 + 0.009492570006911722 + 0.017574220141571076 + 0.017835751182986154 + 0.01802260774972062 + 0.009511171536802466] + + dw_sol_cpu = [-0.006758956828784941 + 0.0018978329222831053 + 0.0010923793242977361 + -0.006375802955645012 + -0.005003557142581212 + -1.0647877468628053e-8 + -1.484035364757708e-6 + -1.068039123808262e-8 + -0.0053478835040742636] + + dual_infeas_1_cpu = [-254.11001283448402 + 111.34992505088496 + 131.50745687564282 + 0.31062229996245705 + -8.03424172781389 + -0.3078548536034032 + -0.07573915941618026 + 0.012603427412195068 + 0.008651826283358158 + 0.03279612499623894] + + dual_infeas_2_cpu = [-2.337593793742112 + -0.2062996739647522 + 0.10994572761041102 + 0.031770554831216935 + 0.02745497289931917 + -0.11873053563714611 + -1.2564033339874354 + -0.12073294817351554 + 0.2121498763329952 + 0.05086202591219009] + + dual_infeas_3_cpu = [0.057405134084524315 + -0.16737701814667477 + 3.670092553395319 + 0.4052710265115234 + -0.2342336895471848 + 0.07724866682808983 + 0.07379529136930803 + 0.004080775509334406 + -0.12229701235980252 + -0.0018175498192630844] + + dual_infeas_4_cpu = [-0.0019510899995607155 + 0.07059765612753019 + 0.07104222896868044 + -0.0061070561232362 + -0.17080362240460334 + 0.0027843138173676155 + 0.003450443341928419 + 0.07157328724238488 + 0.07186014778306046 + -0.28991023775276664] + + dual_infeas_5_cpu = [-4.200422692029046 + -0.33666635407314044 + 0.6407231013041463 + 0.0753794333813387 + 0.08217499064640721 + -0.18387848500073856 + 1.4609510208883514 + 0.26889163927860954 + -0.13184568446925882 + 0.065799538338272 + 0.04433557788772256 + 0.06271507036787678 + 2.807937471979002 + 0.0728632119983576 + -0.20915786828282304 + 0.03875039932653859 + 0.020549534704044203] + + dual_infeas_cpu = vcat(dual_infeas_1_cpu,dual_infeas_2_cpu,dual_infeas_3_cpu,dual_infeas_4_cpu,dual_infeas_5_cpu) + + lambda_cpu = [0.488894 435.795 282.155 -0.0489912 248.787 348.203 0.135763 267.164 315.305 + 0.790211 0.88808 0.816501 -0.0653646 0.576725 0.456339 0.0687841 0.210784 0.140033 + -8.67362e-19 -8.67362e-19 -0.0 -0.0 -4.33681e-19 -1.73472e-18 -0.0 -8.67362e-19 -8.67362e-19 + -2.60209e-18 -0.0 -0.0 -1.73472e-18 -8.67362e-19 -8.67362e-19 -8.67362e-19 -0.0 -8.67362e-19] + + @test norm(dpg_sol_cpu - Array(mod3.dpg_sol), Inf) <= atol + @test norm(dqg_sol_cpu - Array(mod3.dqg_sol), Inf) <= atol + @test norm(dline_var_cpu - Array(mod3.dline_var), Inf) <= atol + @test norm(dline_fl_cpu - Array(mod3.dline_fl), Inf) <= atol + @test norm(dtheta_sol_cpu - Array(mod3.dtheta_sol), Inf) <= atol + @test norm(dw_sol_cpu - Array(mod3.dw_sol), Inf) <= atol + @test norm(dual_infeas_cpu - Array(mod3.dual_infeas), Inf) <= atol + @test norm((lambda_cpu - Array(mod3.lambda))./max.(abs.(lambda_cpu),1), Inf) <= 2e-6 +end \ No newline at end of file diff --git a/test/runtests.jl b/test/runtests.jl index 858ef53..8f0c818 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,7 +1,6 @@ using Test using LinearAlgebra using Printf -using CUDA using ExaAdmm using LazyArtifacts @@ -10,12 +9,14 @@ using LazyArtifacts const INSTANCES_DIR = joinpath(artifact"ExaData", "ExaData") const MP_DEMAND_DIR = joinpath(INSTANCES_DIR, "mp_demand") + init_time = time() @testset "Testing ExaAdmm" begin @testset "Testing ADMM algorithms on CPUs" begin include("algorithms/acopf_update_cpu.jl") include("algorithms/mpacopf_update_cpu.jl") + include("algorithms/qpsub_update_cpu.jl") end using CUDA @@ -23,6 +24,7 @@ init_time = time() @testset "Testing ADMM algorithms using CUDA.jl" begin include("algorithms/acopf_update_gpu.jl") include("algorithms/mpacopf_update_gpu.jl") + include("algorithms/qpsub_update_gpu.jl") end end