Our papers on nullspace property for subspace-preserving recovery[1], and on convergence and implicit bias of overparametrized linear networks [2] have been accepted to the International Conference of Machine Learning (ICML), 2021.
[Bibtex] [Abstract] [Download PDF]
Economic dispatch and frequency regulation are typically viewed as fundamentally different problems in power systems and, hence, are typically studied separately. In this paper, we frame and study a joint problem that co-optimizes both slow timescale economic dispatch resources and fast timescale frequency regulation resources. We show how the joint problem can be decomposed without loss of optimality into slow and fast timescale sub-problems that have appealing interpretations as the economic dispatch and frequency regulation problems respectively. We solve the fast timescale sub-problem using a distributed frequency control algorithm that preserves the stability of the network during transients. We solve the slow timescale sub-problem using an efficient market mechanism that coordinates with the fast timescale sub-problem. We investigate the performance of the decomposition on the IEEE 24-bus reliability test system.
@inproceedings{kcrmv2021icml,
abstract = {Economic dispatch and frequency regulation are typically viewed as fundamentally different problems in power systems and, hence, are typically studied separately. In this paper, we frame and study a joint problem that co-optimizes both slow timescale economic dispatch resources and fast timescale frequency regulation resources. We show how the joint problem can be decomposed without loss of optimality into slow and fast timescale sub-problems that have appealing interpretations as the economic dispatch and frequency regulation problems respectively. We solve the fast timescale sub-problem using a distributed frequency control algorithm that preserves the stability of the network during transients. We solve the slow timescale sub-problem using an efficient market mechanism that coordinates with the fast timescale sub-problem. We investigate the performance of the decomposition on the IEEE
24-bus reliability test system.},
author = {Kaba, Mustafa Devrim and You, Chong and Robinson, Daniel R. and Mallada, Enrique and Vidal, Rene},
booktitle = {International Conference on Machine Learning (ICML)},
grants = {CAREER-1752362;TRIPODS-1934979;CPS-2136324},
month = {11},
note = {(21.5$%$ acceptance)},
pages = {5180--5188},
publisher = {PMLR},
record = {accepted May 2021},
series = {Proceedings of Machine Learning Research},
title = {Characterization of Subspace-Preserving Recovery by a Nullspace Property},
url = {https://mallada.ece.jhu.edu/pubs/2021-ICML-KCRMV.pdf},
volume = {139},
year = {2021}
}
[Bibtex] [Abstract] [Download PDF]
Neural networks trained via gradient descent with random initialization and without any regularization enjoy good generalization performance in practice despite being highly overparametrized. A promising direction to explain this phenomenon is to study how initialization and overparametrization affect convergence and implicit bias of training algorithms. In this paper, we present a novel analysis of single-hidden-layer linear networks trained under gradient flow, which connects initialization, optimization, and overparametrization. Firstly, we show that the squared loss converges exponentially to its optimum at a rate that depends on the level of imbalance of the initialization. Secondly, we show that proper initialization constrains the dynamics of the network parameters to lie within an invariant set. In turn, minimizing the loss over this set leads to the min-norm solution. Finally, we show that large hidden layer width, together with (properly scaled) random initialization, ensures proximity to such an invariant set during training, allowing us to derive a novel non-asymptotic upper-bound on the distance between the trained network and the min-norm solution.
@inproceedings{mtvm2021icml,
abstract = {Neural networks trained via gradient descent with random initialization and without any regularization enjoy good generalization performance in practice despite being highly overparametrized. A promising direction to explain this phenomenon is to study how initialization and overparametrization affect convergence and implicit bias of training algorithms. In this paper, we present a novel analysis of single-hidden-layer linear networks trained under gradient flow, which connects initialization, optimization, and overparametrization. Firstly, we show that the squared loss converges exponentially to its optimum at a rate that depends on the level of imbalance of the initialization. Secondly, we show that proper initialization constrains the dynamics of the network parameters to lie within an invariant set. In turn, minimizing the loss over this set leads to the min-norm solution. Finally, we show that large hidden layer width, together with (properly scaled) random initialization, ensures proximity to such an invariant set during training, allowing us to derive a novel non-asymptotic upper-bound on the distance between the trained network and the min-norm solution. },
author = {Min, Hancheng and Tarmoun, Salma and Vidal, Rene and Mallada, Enrique},
booktitle = {International Conference on Machine Learning (ICML)},
grants = {TRIPODS-1934979, CAREER-1752362, AMPS-1736448},
month = {7},
note = {(21.5$%$ acceptance)},
pages = {7760--7768},
publisher = {PMLR},
record = {accepted May 2021},
series = {Proceedings of Machine Learning Research},
title = {On the Explicit Role of Initialization on the Convergence and Implicit Bias of Overparametrized Linear Networks},
url = {https://mallada.ece.jhu.edu/pubs/2021-ICML-MTVM.pdf},
volume = {139},
year = {2021}
}