近端算子求解分布式约束优化问题

· 2021-12-02 · # Optimization

min⁡∑i=1nw12∥pei−fh(θi)−pbi∗∥2+w22∥θi−θi∗∥2s.t.L(pe−pe∗)=0,pei∈Pi,θi∈Θi(1)\begin{aligned} \min &\quad \sum_{i=1}^n\frac{w_1}{2}\|p_{e_i}-f_h(\theta_i)-p_{b_i}^*\|^2+\frac{w_2}{2}\|\theta_i-\theta_i^*\|^2 \\ s.t.&\quad L(p_e-p_e^*)=0,\\ &\quad p_{e_i}\in \mathcal P_i,\theta_i\in\Theta_i \end{aligned}\qquad (1) mins.t.i=1∑n2w1∥pei−fh(θi)−pbi∗∥2+2w2∥θi−θi∗∥2L(pe−pe∗)=0,pei∈Pi,θi∈Θi(1)

prox⁡f(v)=arg⁡min⁡xf(x)+12∥x−v∥2\operatorname{prox}_f(v)=\arg\min_x f(x)+\frac{1}{2}\|x-v\|^2 proxf(v)=argxminf(x)+21∥x−v∥2

可知x=prox⁡f(v)x=\operatorname{prox}_f(v)x=proxf(v)等价于v−x∈∇f(x)v-x\in\nabla f(x)v−x∈∇f(x)。

min⁡f1(pe,θ)+f2(θ)+g1(pe)+g2(θ)s.t.L(pe−pe∗)=0(2)\begin{aligned} \min&\quad f_1(p_e,\theta)+f_2(\theta)+g_1(p_e)+g_2(\theta)\\ s.t.&\quad L(p_e-p_e^*)=0 \end{aligned}\qquad(2) mins.t.f1(pe,θ)+f2(θ)+g1(pe)+g2(θ)L(pe−pe∗)=0(2)

拉格朗日函数

L(pe,θ,λ)=f1(pe,θ)+f2(θ)+λTL(pe−pe∗)+g1(pe)+g2(θ)(3)\mathcal L(p_e,\theta,\lambda)=f_1(p_e,\theta)+f_2(\theta)+\lambda^TL(p_e-p_e^*)+g_1(p_e)+g_2(\theta)\qquad (3) L(pe,θ,λ)=f1(pe,θ)+f2(θ)+λTL(pe−pe∗)+g1(pe)+g2(θ)(3)

根据KKT条件

0∈−∇pef1(p^e,θ^)−Lλ^−∂g1(p^e)0∈−∇θf1(p^e,θ^)−∇f2(θ^)−∂g2(θ^)0=L(p^e−pe∗)(4)\begin{aligned} 0&\in-\nabla_{p_e}f_1(\hat p_e,\hat \theta)-L\hat\lambda-\partial g_1(\hat p_e)\\ 0&\in-\nabla_{\theta} f_1(\hat p_e,\hat \theta)-\nabla f_2(\hat \theta)-\partial g_2(\hat \theta)\\ 0&=L(\hat p_e-p_e^*) \end{aligned}\qquad (4) 000∈−∇pef1(p^e,θ^)−Lλ^−∂g1(p^e)∈−∇θf1(p^e,θ^)−∇f2(θ^)−∂g2(θ^)=L(p^e−pe∗)(4)

0=prox⁡g1(p^e−∇pef1(p^e,θ^)−Lλ^)−p^e0=prox⁡g2(θ^−∇θf1(p^e,θ^)−∇f2(θ^))−θ^0=L(p^e−pe∗)(5)\begin{aligned} 0&=\operatorname{prox}_{g_1}(\hat p_e-\nabla_{p_e}f_1(\hat p_e,\hat \theta)-L\hat\lambda)-\hat p_e\\ 0&=\operatorname{prox}_{g_2}(\hat \theta-\nabla_{\theta} f_1(\hat p_e,\hat \theta)-\nabla f_2(\hat \theta))-\hat \theta\\ 0&=L(\hat p_e-p_e^*) \end{aligned}\qquad (5) 000=proxg1(p^e−∇pef1(p^e,θ^)−Lλ^)−p^e=proxg2(θ^−∇θf1(p^e,θ^)−∇f2(θ^))−θ^=L(p^e−pe∗)(5)

p˙e=prox⁡g1(pe−∇pef1(pe,θ)−Lλ)−peθ˙=prox⁡g2(θ−∇θf1(pe,θ)−∇f2(θ))−θλ˙=L(pe+p˙e−pe∗)(6)\begin{aligned} \dot p_e&=\operatorname{prox}_{g_1}(p_e-\nabla_{p_e}f_1(p_e,\theta)-L\lambda)-p_e\\ \dot \theta&=\operatorname{prox}_{g_2}(\theta-\nabla_{\theta} f_1(p_e,\theta)-\nabla f_2(\theta))-\theta\\ \dot \lambda&=L(p_e+\dot p_e-p_e^*) \end{aligned}\qquad (6) p˙eθ˙λ˙=proxg1(pe−∇pef1(pe,θ)−Lλ)−pe=proxg2(θ−∇θf1(pe,θ)−∇f2(θ))−θ=L(pe+p˙e−pe∗)(6)

定理：假设图无向连通，且Slater条件满足。则

(6)的平衡点李雅普诺夫稳定，解(pe(t),θ(t))(p_e(t),\theta(t))(pe(t),θ(t))有界；

(pe(t),θ(t),λ(t))(p_e(t),\theta(t),\lambda(t))(pe(t),θ(t),λ(t))收敛，且lim⁡t→∞(pe(t),θ(t))\lim_{t\to\infty}(p_e(t),\theta(t))limt→∞(pe(t),θ(t))是问题(2)的解。

证明：1、李雅普诺夫函数

V=12∥pe−p^e∥2+12∥θ−θ^∥2+12∥λ−λ^∥2+f2(θ)−f2(θ^)−(θ−θ^)T∇f2(θ^)+f1(pe,θ)−f1(p^e,θ^)−(pe−p^e)T∇pef1(p^e,θ^)−(θ−θ^)T∇θf1(p^e,θ^)\begin{aligned} V&=\frac{1}{2}\|p_e-\hat p_e\|^2+\frac{1}{2}\|\theta-\hat \theta\|^2+\frac{1}{2}\|\lambda-\hat \lambda\|^2+f_2(\theta)-f_2(\hat\theta)-(\theta-\hat \theta)^T\nabla f_2(\hat \theta)\\ &\quad+f_1(p_e,\theta)-f_1(\hat p_e,\hat\theta)-(p_e-\hat p_e)^T\nabla_{p_e}f_1(\hat p_e,\hat \theta)-(\theta-\hat \theta)^T\nabla_{\theta}f_1(\hat p_e,\hat \theta) \end{aligned} V=21∥pe−p^e∥2+21∥θ−θ^∥2+21∥λ−λ^∥2+f2(θ)−f2(θ^)−(θ−θ^)T∇f2(θ^)+f1(pe,θ)−f1(p^e,θ^)−(pe−p^e)T∇pef1(p^e,θ^)−(θ−θ^)T∇θf1(p^e,θ^)

(6)的前两个等式可以重写为

p˙e+pe=prox⁡g1(pe−∇pef1(pe,θ)−Lλ)θ˙+θ=prox⁡g2(θ−∇θf1(pe,θ)−∇f2(θ))\begin{aligned} \dot p_e+p_e&=\operatorname{prox}_{g_1}(p_e-\nabla_{p_e}f_1(p_e,\theta)-L\lambda)\\ \dot \theta+\theta&=\operatorname{prox}_{g_2}(\theta-\nabla_{\theta} f_1(p_e,\theta)-\nabla f_2(\theta)) \end{aligned} p˙e+peθ˙+θ=proxg1(pe−∇pef1(pe,θ)−Lλ)=proxg2(θ−∇θf1(pe,θ)−∇f2(θ))

由x=prox⁡f(v)x=\operatorname{prox}_f(v)x=proxf(v)等价于v−x∈∇f(x)v-x\in\nabla f(x)v−x∈∇f(x)可得

pe−∇pef1(pe,θ)−Lλ−(p˙e+pe)∈∂g1(p˙e+pe)θ−∇θf1(pe,θ)−∇f2(θ)−(θ˙+θ)∈∂g2(θ˙+θ)(7)\begin{aligned} p_e-\nabla_{p_e}f_1(p_e,\theta)-L\lambda-(\dot p_e+p_e)&\in\partial g_1(\dot p_e+p_e)\\ \theta-\nabla_{\theta} f_1(p_e,\theta)-\nabla f_2(\theta)-(\dot \theta+\theta)&\in\partial g_2(\dot \theta+\theta)\\ \end{aligned}\qquad (7) pe−∇pef1(pe,θ)−Lλ−(p˙e+pe)θ−∇θf1(pe,θ)−∇f2(θ)−(θ˙+θ)∈∂g1(p˙e+pe)∈∂g2(θ˙+θ)(7)

令(p^e,θ^,λ^)(\hat p_e,\hat \theta,\hat \lambda)(p^e,θ^,λ^)为(6)的平衡点。则由(5)可得

−∇pef1(p^e,θ^)−Lλ^∈∂g1(p^e)−∇θf1(p^e,θ^)−∇f2(θ^)∈∂g2(θ^)(8)\begin{aligned} -\nabla_{p_e}f_1(\hat p_e,\hat \theta)-L\hat \lambda&\in\partial g_1(\hat p_e)\\ -\nabla_{\theta} f_1(\hat p_e,\hat \theta)-\nabla f_2(\hat \theta)&\in\partial g_2(\hat \theta)\\ \end{aligned}\qquad (8) −∇pef1(p^e,θ^)−Lλ^−∇θf1(p^e,θ^)−∇f2(θ^)∈∂g1(p^e)∈∂g2(θ^)(8)

因为g1,g2g_1,g_2g1,g2是凸的，所以∂g1,∂g2\partial g_1,\partial g_2∂g1,∂g2单调。结合(7)(8)有

(−(∇pef1(pe,θ)−∇pef1(p^e,θ^))−L(λ−λ^)−p˙e)T(p˙e+pe−p^e)≥0(9)(−(∇θf1(pe,θ)−∇θf1(p^e,θ^))−(∇f2(θ)−∇f2(θ^))−θ˙)T(θ˙+θ−θ^)≥0(10)\begin{aligned} (-(\nabla_{p_e}f_1(p_e,\theta)-\nabla_{p_e}f_1(\hat p_e,\hat \theta))-L(\lambda-\hat \lambda)-\dot p_e)^T(\dot p_e+p_e-\hat p_e)&\geq 0\qquad (9)\\ (-(\nabla_{\theta} f_1(p_e,\theta)-\nabla_{\theta} f_1(\hat p_e,\hat \theta))-(\nabla f_2(\theta)-\nabla f_2(\hat \theta))-\dot \theta)^T(\dot \theta+\theta-\hat \theta)&\geq 0\qquad (10) \end{aligned} (−(∇pef1(pe,θ)−∇pef1(p^e,θ^))−L(λ−λ^)−p˙e)T(p˙e+pe−p^e)(−(∇θf1(pe,θ)−∇θf1(p^e,θ^))−(∇f2(θ)−∇f2(θ^))−θ˙)T(θ˙+θ−θ^)≥0(9)≥0(10)

由平衡点定义(5)，得L(p^e−pe∗)=0L(\hat p_e-p_e^*)=0L(p^e−pe∗)=0。

由(9)得

−(∇pef1(pe,θ)−∇pef1(p^e,θ^))T(pe−p^e)−(λ−λ^)TL(pe+p˙e−pe∗)−p˙eTp˙e≥p˙eT(pe−p^e)+p˙eT(∇pef1(pe,θ)−∇pef1(p^e,θ^))\begin{aligned} &-(\nabla_{p_e}f_1(p_e,\theta)-\nabla_{p_e}f_1(\hat p_e,\hat \theta))^T(p_e-\hat p_e)-(\lambda-\hat \lambda)^TL(p_e+\dot p_e-p_e^*)-\dot p_e^T\dot p_e\\ &\geq \dot p_e^T(p_e-\hat p_e)+\dot p_e^T(\nabla_{p_e}f_1(p_e,\theta)-\nabla_{p_e}f_1(\hat p_e,\hat \theta)) \end{aligned} −(∇pef1(pe,θ)−∇pef1(p^e,θ^))T(pe−p^e)−(λ−λ^)TL(pe+p˙e−pe∗)−p˙eTp˙e≥p˙eT(pe−p^e)+p˙eT(∇pef1(pe,θ)−∇pef1(p^e,θ^))

同理，由(10)得

−(∇θf1(pe,θ)−∇θf1(p^e,θ^))T(θ−θ^)−(∇f2(θ)−∇f2(θ^))T(θ−θ^)−θ˙Tθ˙≥θ˙T(θ−θ^)+θ˙T(∇θf1(pe,θ)−∇θf1(p^e,θ^))+θ˙T(∇f2(θ)−∇f2(θ^))\begin{aligned} &-(\nabla_{\theta} f_1(p_e,\theta)-\nabla_{\theta} f_1(\hat p_e,\hat \theta))^T(\theta-\hat \theta)-(\nabla f_2(\theta)-\nabla f_2(\hat \theta))^T(\theta-\hat \theta)-\dot \theta^T\dot \theta\\ &\geq \dot \theta^T(\theta-\hat \theta)+\dot \theta^T(\nabla_{\theta} f_1(p_e,\theta)-\nabla_{\theta} f_1(\hat p_e,\hat \theta))+\dot \theta^T(\nabla f_2(\theta)-\nabla f_2(\hat \theta)) \end{aligned} −(∇θf1(pe,θ)−∇θf1(p^e,θ^))T(θ−θ^)−(∇f2(θ)−∇f2(θ^))T(θ−θ^)−θ˙Tθ˙≥θ˙T(θ−θ^)+θ˙T(∇θf1(pe,θ)−∇θf1(p^e,θ^))+θ˙T(∇f2(θ)−∇f2(θ^))

λ˙T(λ−λ^)=(λ−λ^)TL(pe+p˙e−pe∗)\dot \lambda^T(\lambda-\hat \lambda)=(\lambda-\hat \lambda)^TL(p_e+\dot p_e-p_e^*) λ˙T(λ−λ^)=(λ−λ^)TL(pe+p˙e−pe∗)

综上所述，李雅普诺夫函数对时间的导数为

V˙=p˙eT(pe−p^e)+θ˙T(θ−θ^)+λ˙T(λ−λ^)+p˙eT(∇pef1(pe,θ)−∇pef1(p^e,θ^))+θ˙T(∇θf1(pe,θ)−∇θf1(p^e,θ^))+θ˙T(∇f2(θ)−∇f2(θ^))≤−p˙eTp˙e−θ˙Tθ˙−(∇pef1(pe,θ)−∇pef1(p^e,θ^))T(pe−p^e)−(∇θf1(pe,θ)−∇θf1(p^e,θ^))T(θ−θ^)−(∇f2(θ)−∇f2(θ^))T(θ−θ^)≤−∥p˙e∥2−∥θ˙∥2≤0\begin{aligned} \dot V&=\dot p_e^T(p_e-\hat p_e)+\dot \theta^T(\theta-\hat \theta)+\dot \lambda^T(\lambda-\hat \lambda)+\dot p_e^T(\nabla_{p_e}f_1(p_e,\theta)-\nabla_{p_e}f_1(\hat p_e,\hat \theta))\\ &\quad +\dot \theta^T(\nabla_{\theta} f_1(p_e,\theta)-\nabla_{\theta} f_1(\hat p_e,\hat \theta))+\dot \theta^T(\nabla f_2(\theta)-\nabla f_2(\hat \theta))\\ &\leq -\dot p_e^T\dot p_e-\dot \theta^T\dot \theta-(\nabla_{p_e}f_1(p_e,\theta)-\nabla_{p_e}f_1(\hat p_e,\hat \theta))^T(p_e-\hat p_e)\\ &\quad-(\nabla_{\theta} f_1(p_e,\theta)-\nabla_{\theta} f_1(\hat p_e,\hat \theta))^T(\theta-\hat \theta)-(\nabla f_2(\theta)-\nabla f_2(\hat \theta))^T(\theta-\hat \theta)\\ &\leq-\|\dot p_e\|^2-\|\dot \theta\|^2\leq 0 \end{aligned} V˙=p˙eT(pe−p^e)+θ˙T(θ−θ^)+λ˙T(λ−λ^)+p˙eT(∇pef1(pe,θ)−∇pef1(p^e,θ^))+θ˙T(∇θf1(pe,θ)−∇θf1(p^e,θ^))+θ˙T(∇f2(θ)−∇f2(θ^))≤−p˙eTp˙e−θ˙Tθ˙−(∇pef1(pe,θ)−∇pef1(p^e,θ^))T(pe−p^e)−(∇θf1(pe,θ)−∇θf1(p^e,θ^))T(θ−θ^)−(∇f2(θ)−∇f2(θ^))T(θ−θ^)≤−∥p˙e∥2−∥θ˙∥2≤0

因为f1,f2f_1,f_2f1,f2是凸的，所以∇f1,∇f2\nabla f_1,\nabla f_2∇f1,∇f2单调，故第二个不等式成立。

因此(pe(t),θ(t),λ(t))→M(p_e(t),\theta(t),\lambda(t))\to \mathcal M(pe(t),θ(t),λ(t))→M，其中M\mathcal MM是{(pe,θ,λ)∣p˙e=0,θ˙=0}\{(p_e,\theta,\lambda)| \dot p_e=0,\dot \theta=0 \}{(pe,θ,λ)∣p˙e=0,θ˙=0}中的最大不变集。对于任何起始于M\mathcal MM的解轨迹(pˉe(t),θˉ(t),λˉ(t))(\bar p_e(t),\bar \theta(t),\bar \lambda(t))(pˉe(t),θˉ(t),λˉ(t))，取(pˉe(0),θˉ(0),λˉ(0))∈M(\bar p_e(0),\bar \theta(0),\bar \lambda(0))\in\mathcal M(pˉe(0),θˉ(0),λˉ(0))∈M，都满足pˉe˙(t)≡0\dot {\bar p_e}(t)\equiv 0pˉe˙(t)≡0，θˉ˙(t)≡0\dot {\bar \theta}(t)\equiv0θˉ˙(t)≡0，代入(5)得

λˉ˙(t)≡L(pˉe−pe∗)\dot {\bar \lambda}(t)\equiv L(\bar p_e-p_e^*) λˉ˙(t)≡L(pˉe−pe∗)

那么λˉ˙(t)≡0\dot {\bar \lambda}(t)\equiv 0λˉ˙(t)≡0，否则λˉ(t)\bar \lambda(t)λˉ(t)趋于无穷大，与稳定性矛盾。因此M⊂{(pe,θ,λ)∣p˙e=0,θ˙=0,λ˙=0}\mathcal M\subset\{(p_e,\theta,\lambda)| \dot p_e=0,\dot \theta=0,\dot \lambda=0 \}M⊂{(pe,θ,λ)∣p˙e=0,θ˙=0,λ˙=0}，即M\mathcal MM里的每一个点都是平衡点。然后又有每个平衡点都李雅普诺夫稳定，故(pe,θ,λ)(p_e,\theta,\lambda)(pe,θ,λ)最终收敛到一个平衡点。

近端算子求解分布式约束优化问题

近端算子求解分布式约束优化问题

Recommend

Issue #1: Project Updates, DBUI, and Interesting Open Source Repositories

产品经理如何做即兴演讲

互联网之子:亚伦·斯沃茨的故事

JCommander：Java外部参数解析利器

Potato dauphinoise

Kanan Rahimov (KenanBek)

萨摩耶云杜亚威：云原生+AI打造决策最优路径

Accessible forms with ARIA live regions

Domain-Driven Design to the rescue

monorepo - AruSeito

About Joyk