BibTeX records: Prashanth L. A.

download as .bib file

@inproceedings{DBLP:conf/aistats/ManiyarAMB24,
  author       = {Mizhaan Prajit Maniyar and
                  Prashanth L. A. and
                  Akash Mondal and
                  Shalabh Bhatnagar},
  editor       = {Sanjoy Dasgupta and
                  Stephan Mandt and
                  Yingzhen Li},
  title        = {A Cubic-regularized Policy Newton Algorithm for Reinforcement Learning},
  booktitle    = {International Conference on Artificial Intelligence and Statistics,
                  2-4 May 2024, Palau de Congressos, Valencia, Spain},
  series       = {Proceedings of Machine Learning Research},
  volume       = {238},
  pages        = {4708--4716},
  publisher    = {{PMLR}},
  year         = {2024},
  url          = {https://proceedings.mlr.press/v238/maniyar24a.html},
  timestamp    = {Fri, 10 May 2024 10:02:41 +0200},
  biburl       = {https://dblp.org/rec/conf/aistats/ManiyarAMB24.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/tac/BhavsarA23,
  author       = {Nirav Bhavsar and
                  Prashanth L. A.},
  title        = {Nonasymptotic Bounds for Stochastic Optimization With Biased Noisy
                  Gradient Oracles},
  journal      = {{IEEE} Trans. Autom. Control.},
  volume       = {68},
  number       = {3},
  pages        = {1628--1641},
  year         = {2023},
  url          = {https://doi.org/10.1109/TAC.2022.3159748},
  doi          = {10.1109/TAC.2022.3159748},
  timestamp    = {Sat, 11 Mar 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/tac/BhavsarA23.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/aistats/PatilANP23,
  author       = {Gandharv Patil and
                  Prashanth L. A. and
                  Dheeraj Nagaraj and
                  Doina Precup},
  editor       = {Francisco J. R. Ruiz and
                  Jennifer G. Dy and
                  Jan{-}Willem van de Meent},
  title        = {Finite time analysis of temporal difference learning with linear function
                  approximation: Tail averaging and regularisation},
  booktitle    = {International Conference on Artificial Intelligence and Statistics,
                  25-27 April 2023, Palau de Congressos, Valencia, Spain},
  series       = {Proceedings of Machine Learning Research},
  volume       = {206},
  pages        = {5438--5448},
  publisher    = {{PMLR}},
  year         = {2023},
  url          = {https://proceedings.mlr.press/v206/patil23a.html},
  timestamp    = {Mon, 19 Jun 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/aistats/PatilANP23.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/ciss/BhatnagarA23,
  author       = {Shalabh Bhatnagar and
                  Prashanth L. A.},
  title        = {Generalized Simultaneous Perturbation Stochastic Approximation with
                  Reduced Estimator Bias},
  booktitle    = {57th Annual Conference on Information Sciences and Systems, {CISS}
                  2023, Baltimore, MD, USA, March 22-24, 2023},
  pages        = {1--6},
  publisher    = {{IEEE}},
  year         = {2023},
  url          = {https://doi.org/10.1109/CISS56502.2023.10089720},
  doi          = {10.1109/CISS56502.2023.10089720},
  timestamp    = {Sat, 22 Apr 2023 16:25:53 +0200},
  biburl       = {https://dblp.org/rec/conf/ciss/BhatnagarA23.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/uai/VijayanA23,
  author       = {Nithia Vijayan and
                  Prashanth L. A.},
  editor       = {Robin J. Evans and
                  Ilya Shpitser},
  title        = {A policy gradient approach for optimization of smooth risk measures},
  booktitle    = {Uncertainty in Artificial Intelligence, {UAI} 2023, July 31 - 4 August
                  2023, Pittsburgh, PA, {USA}},
  series       = {Proceedings of Machine Learning Research},
  volume       = {216},
  pages        = {2168--2178},
  publisher    = {{PMLR}},
  year         = {2023},
  url          = {https://proceedings.mlr.press/v216/vijayan23a.html},
  timestamp    = {Mon, 28 Aug 2023 17:23:08 +0200},
  biburl       = {https://dblp.org/rec/conf/uai/VijayanA23.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2304-10951,
  author       = {Mizhaan Prajit Maniyar and
                  Akash Mondal and
                  Prashanth L. A. and
                  Shalabh Bhatnagar},
  title        = {A Cubic-regularized Policy Newton Algorithm for Reinforcement Learning},
  journal      = {CoRR},
  volume       = {abs/2304.10951},
  year         = {2023},
  url          = {https://doi.org/10.48550/arXiv.2304.10951},
  doi          = {10.48550/ARXIV.2304.10951},
  eprinttype    = {arXiv},
  eprint       = {2304.10951},
  timestamp    = {Tue, 02 May 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2304-10951.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2310-11389,
  author       = {Sanjay Bhat and
                  Prashanth L. A. and
                  Gugan Thoppe},
  title        = {VaR{\textbackslash} and CVaR Estimation in a Markov Cost Process:
                  Lower and Upper Bounds},
  journal      = {CoRR},
  volume       = {abs/2310.11389},
  year         = {2023},
  url          = {https://doi.org/10.48550/arXiv.2310.11389},
  doi          = {10.48550/ARXIV.2310.11389},
  eprinttype    = {arXiv},
  eprint       = {2310.11389},
  timestamp    = {Thu, 26 Oct 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2310-11389.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2310-18743,
  author       = {Sumedh Gupte and
                  Prashanth L. A. and
                  Sanjay P. Bhat},
  title        = {Optimization of utility-based shortfall risk: {A} non-asymptotic viewpoint},
  journal      = {CoRR},
  volume       = {abs/2310.18743},
  year         = {2023},
  url          = {https://doi.org/10.48550/arXiv.2310.18743},
  doi          = {10.48550/ARXIV.2310.18743},
  eprinttype    = {arXiv},
  eprint       = {2310.18743},
  timestamp    = {Thu, 02 Nov 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2310-18743.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/ftml/A022,
  author       = {Prashanth L. A. and
                  Michael C. Fu},
  title        = {Risk-Sensitive Reinforcement Learning via Policy Gradient Search},
  journal      = {Found. Trends Mach. Learn.},
  volume       = {15},
  number       = {5},
  pages        = {537--693},
  year         = {2022},
  url          = {https://doi.org/10.1561/2200000091},
  doi          = {10.1561/2200000091},
  timestamp    = {Thu, 14 Jul 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/ftml/A022.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/jmlr/AB22,
  author       = {Prashanth L. A. and
                  Sanjay P. Bhat},
  title        = {A Wasserstein Distance Approach for Concentration of Empirical Risk
                  Estimates},
  journal      = {J. Mach. Learn. Res.},
  volume       = {23},
  pages        = {238:1--238:61},
  year         = {2022},
  url          = {http://jmlr.org/papers/v23/20-965.html},
  timestamp    = {Wed, 07 Jun 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/jmlr/AB22.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/ijcai/TanAJ22,
  author       = {Vincent Y. F. Tan and
                  Prashanth L. A. and
                  Krishna P. Jagannathan},
  editor       = {Luc De Raedt},
  title        = {A Survey of Risk-Aware Multi-Armed Bandits},
  booktitle    = {Proceedings of the Thirty-First International Joint Conference on
                  Artificial Intelligence, {IJCAI} 2022, Vienna, Austria, 23-29 July
                  2022},
  pages        = {5623--5629},
  publisher    = {ijcai.org},
  year         = {2022},
  url          = {https://doi.org/10.24963/ijcai.2022/784},
  doi          = {10.24963/IJCAI.2022/784},
  timestamp    = {Wed, 27 Jul 2022 16:43:00 +0200},
  biburl       = {https://dblp.org/rec/conf/ijcai/TanAJ22.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2202-11046,
  author       = {Nithia Vijayan and
                  Prashanth L. A.},
  title        = {Approximate gradient ascent methods for distortion risk measures},
  journal      = {CoRR},
  volume       = {abs/2202.11046},
  year         = {2022},
  url          = {https://arxiv.org/abs/2202.11046},
  eprinttype    = {arXiv},
  eprint       = {2202.11046},
  timestamp    = {Wed, 02 Mar 2022 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2202-11046.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2203-16810,
  author       = {Dipayan Sen and
                  Prashanth L. A. and
                  Aditya Gopalan},
  title        = {Adaptive Estimation of Random Vectors with Bandit Feedback},
  journal      = {CoRR},
  volume       = {abs/2203.16810},
  year         = {2022},
  url          = {https://doi.org/10.48550/arXiv.2203.16810},
  doi          = {10.48550/ARXIV.2203.16810},
  eprinttype    = {arXiv},
  eprint       = {2203.16810},
  timestamp    = {Mon, 04 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2203-16810.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2205-05843,
  author       = {Vincent Y. F. Tan and
                  Prashanth L. A. and
                  Krishna P. Jagannathan},
  title        = {A Survey of Risk-Aware Multi-Armed Bandits},
  journal      = {CoRR},
  volume       = {abs/2205.05843},
  year         = {2022},
  url          = {https://doi.org/10.48550/arXiv.2205.05843},
  doi          = {10.48550/ARXIV.2205.05843},
  eprinttype    = {arXiv},
  eprint       = {2205.05843},
  timestamp    = {Wed, 18 May 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2205-05843.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2208-00290,
  author       = {Akash Mondal and
                  Prashanth L. A. and
                  Shalabh Bhatnagar},
  title        = {A Gradient Smoothed Functional Algorithm with Truncated Cauchy Random
                  Perturbations for Stochastic Optimization},
  journal      = {CoRR},
  volume       = {abs/2208.00290},
  year         = {2022},
  url          = {https://doi.org/10.48550/arXiv.2208.00290},
  doi          = {10.48550/ARXIV.2208.00290},
  eprinttype    = {arXiv},
  eprint       = {2208.00290},
  timestamp    = {Wed, 10 Aug 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2208-00290.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2210-05918,
  author       = {Gandharv Patil and
                  Prashanth L. A. and
                  Dheeraj Nagaraj and
                  Doina Precup},
  title        = {Finite time analysis of temporal difference learning with linear function
                  approximation: Tail averaging and regularisation},
  journal      = {CoRR},
  volume       = {abs/2210.05918},
  year         = {2022},
  url          = {https://doi.org/10.48550/arXiv.2210.05918},
  doi          = {10.48550/ARXIV.2210.05918},
  eprinttype    = {arXiv},
  eprint       = {2210.05918},
  timestamp    = {Tue, 18 Oct 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2210-05918.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2212-10477,
  author       = {Shalabh Bhatnagar and
                  Prashanth L. A.},
  title        = {Generalized Simultaneous Perturbation Stochastic Approximation with
                  Reduced Estimator Bias},
  journal      = {CoRR},
  volume       = {abs/2212.10477},
  year         = {2022},
  url          = {https://doi.org/10.48550/arXiv.2212.10477},
  doi          = {10.48550/ARXIV.2212.10477},
  eprinttype    = {arXiv},
  eprint       = {2212.10477},
  timestamp    = {Wed, 04 Jan 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2212-10477.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/ml/AKM21,
  author       = {Prashanth L. A. and
                  Nathaniel Korda and
                  R{\'{e}}mi Munos},
  title        = {Concentration bounds for temporal difference learning with linear
                  function approximation: the case of batch data and uniform sampling},
  journal      = {Mach. Learn.},
  volume       = {110},
  number       = {3},
  pages        = {559--618},
  year         = {2021},
  url          = {https://doi.org/10.1007/s10994-020-05912-5},
  doi          = {10.1007/S10994-020-05912-5},
  timestamp    = {Wed, 07 Apr 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/ml/AKM21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/scl/VijayanA21,
  author       = {Nithia Vijayan and
                  Prashanth L. A.},
  title        = {Smoothed functional-based gradient algorithms for off-policy reinforcement
                  learning: {A} non-asymptotic viewpoint},
  journal      = {Syst. Control. Lett.},
  volume       = {155},
  pages        = {104988},
  year         = {2021},
  url          = {https://doi.org/10.1016/j.sysconle.2021.104988},
  doi          = {10.1016/J.SYSCONLE.2021.104988},
  timestamp    = {Mon, 28 Aug 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/scl/VijayanA21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/aaai/PandeyAB21,
  author       = {Ajay Kumar Pandey and
                  Prashanth L. A. and
                  Sanjay P. Bhat},
  title        = {Estimation of Spectral Risk Measures},
  booktitle    = {Thirty-Fifth {AAAI} Conference on Artificial Intelligence, {AAAI}
                  2021, Thirty-Third Conference on Innovative Applications of Artificial
                  Intelligence, {IAAI} 2021, The Eleventh Symposium on Educational Advances
                  in Artificial Intelligence, {EAAI} 2021, Virtual Event, February 2-9,
                  2021},
  pages        = {12166--12173},
  publisher    = {{AAAI} Press},
  year         = {2021},
  url          = {https://doi.org/10.1609/aaai.v35i13.17444},
  doi          = {10.1609/AAAI.V35I13.17444},
  timestamp    = {Mon, 04 Sep 2023 12:29:24 +0200},
  biburl       = {https://dblp.org/rec/conf/aaai/PandeyAB21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2101-02137,
  author       = {Nithia Vijayan and
                  Prashanth L. A.},
  title        = {Smoothed functional-based gradient algorithms for off-policy reinforcement
                  learning},
  journal      = {CoRR},
  volume       = {abs/2101.02137},
  year         = {2021},
  url          = {https://arxiv.org/abs/2101.02137},
  eprinttype    = {arXiv},
  eprint       = {2101.02137},
  timestamp    = {Tue, 30 Mar 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2101-02137.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2107-04422,
  author       = {Nithia Vijayan and
                  Prashanth L. A.},
  title        = {Likelihood ratio-based policy gradient methods for distorted risk
                  measures: {A} non-asymptotic analysis},
  journal      = {CoRR},
  volume       = {abs/2107.04422},
  year         = {2021},
  url          = {https://arxiv.org/abs/2107.04422},
  eprinttype    = {arXiv},
  eprint       = {2107.04422},
  timestamp    = {Tue, 20 Jul 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2107-04422.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2111-08805,
  author       = {Arvind S. Menon and
                  Prashanth L. A. and
                  Krishna P. Jagannathan},
  title        = {Online Estimation and Optimization of Utility-Based Shortfall Risk},
  journal      = {CoRR},
  volume       = {abs/2111.08805},
  year         = {2021},
  url          = {https://arxiv.org/abs/2111.08805},
  eprinttype    = {arXiv},
  eprint       = {2111.08805},
  timestamp    = {Mon, 22 Nov 2021 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2111-08805.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/tac/ABBFM20,
  author       = {Prashanth L. A. and
                  Shalabh Bhatnagar and
                  Nirav Bhavsar and
                  Michael C. Fu and
                  Steven I. Marcus},
  title        = {Random Directions Stochastic Approximation With Deterministic Perturbations},
  journal      = {{IEEE} Trans. Autom. Control.},
  volume       = {65},
  number       = {6},
  pages        = {2450--2465},
  year         = {2020},
  url          = {https://doi.org/10.1109/TAC.2019.2930821},
  doi          = {10.1109/TAC.2019.2930821},
  timestamp    = {Tue, 16 Jun 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/tac/ABBFM20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/AJK20,
  author       = {Prashanth L. A. and
                  Krishna P. Jagannathan and
                  Ravi Kumar Kolla},
  title        = {Concentration bounds for CVaR estimation: The cases of light-tailed
                  and heavy-tailed distributions},
  booktitle    = {Proceedings of the 37th International Conference on Machine Learning,
                  {ICML} 2020, 13-18 July 2020, Virtual Event},
  series       = {Proceedings of Machine Learning Research},
  volume       = {119},
  pages        = {5577--5586},
  publisher    = {{PMLR}},
  year         = {2020},
  url          = {http://proceedings.mlr.press/v119/l-a-20a.html},
  timestamp    = {Tue, 15 Dec 2020 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/icml/AJK20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2002-11440,
  author       = {Nirav Bhavsar and
                  Prashanth L. A.},
  title        = {Non-Asymptotic Bounds for Zeroth-Order Stochastic Optimization},
  journal      = {CoRR},
  volume       = {abs/2002.11440},
  year         = {2020},
  url          = {https://arxiv.org/abs/2002.11440},
  eprinttype    = {arXiv},
  eprint       = {2002.11440},
  timestamp    = {Tue, 03 Mar 2020 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2002-11440.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/orl/KollaABJ19,
  author       = {Ravi Kumar Kolla and
                  Prashanth L. A. and
                  Sanjay P. Bhat and
                  Krishna P. Jagannathan},
  title        = {Concentration bounds for empirical conditional value-at-risk: The
                  unbounded case},
  journal      = {Oper. Res. Lett.},
  volume       = {47},
  number       = {1},
  pages        = {16--20},
  year         = {2019},
  url          = {https://doi.org/10.1016/j.orl.2018.11.005},
  doi          = {10.1016/J.ORL.2018.11.005},
  timestamp    = {Wed, 13 Feb 2019 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/orl/KollaABJ19.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/BodaA19,
  author       = {Vinay Praneeth Boda and
                  Prashanth L. A.},
  editor       = {Kamalika Chaudhuri and
                  Ruslan Salakhutdinov},
  title        = {Correlated bandits or: How to minimize mean-squared error online},
  booktitle    = {Proceedings of the 36th International Conference on Machine Learning,
                  {ICML} 2019, 9-15 June 2019, Long Beach, California, {USA}},
  series       = {Proceedings of Machine Learning Research},
  volume       = {97},
  pages        = {686--694},
  publisher    = {{PMLR}},
  year         = {2019},
  url          = {http://proceedings.mlr.press/v97/boda19a.html},
  timestamp    = {Tue, 11 Jun 2019 15:37:38 +0200},
  biburl       = {https://dblp.org/rec/conf/icml/BodaA19.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/BhatA19,
  author       = {Sanjay P. Bhat and
                  Prashanth L. A.},
  editor       = {Hanna M. Wallach and
                  Hugo Larochelle and
                  Alina Beygelzimer and
                  Florence d'Alch{\'{e}}{-}Buc and
                  Emily B. Fox and
                  Roman Garnett},
  title        = {Concentration of risk measures: {A} Wasserstein distance approach},
  booktitle    = {Advances in Neural Information Processing Systems 32: Annual Conference
                  on Neural Information Processing Systems 2019, NeurIPS 2019, December
                  8-14, 2019, Vancouver, BC, Canada},
  pages        = {11739--11748},
  year         = {2019},
  url          = {https://proceedings.neurips.cc/paper/2019/hash/091bc5440296cc0e41dd60ce22fbaf88-Abstract.html},
  timestamp    = {Mon, 16 May 2022 15:41:51 +0200},
  biburl       = {https://dblp.org/rec/conf/nips/BhatA19.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1901-00997,
  author       = {Ravi Kumar Kolla and
                  Prashanth L. A. and
                  Krishna P. Jagannathan},
  title        = {Risk-aware Multi-armed Bandits Using Conditional Value-at-Risk},
  journal      = {CoRR},
  volume       = {abs/1901.00997},
  year         = {2019},
  url          = {http://arxiv.org/abs/1901.00997},
  eprinttype    = {arXiv},
  eprint       = {1901.00997},
  timestamp    = {Thu, 31 Jan 2019 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1901-00997.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1902-02953,
  author       = {Vinay Praneeth Boda and
                  Prashanth L. A.},
  title        = {Correlated bandits or: How to minimize mean-squared error online},
  journal      = {CoRR},
  volume       = {abs/1902.02953},
  year         = {2019},
  url          = {http://arxiv.org/abs/1902.02953},
  eprinttype    = {arXiv},
  eprint       = {1902.02953},
  timestamp    = {Tue, 21 May 2019 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1902-02953.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1902-10709,
  author       = {Sanjay P. Bhat and
                  Prashanth L. A.},
  title        = {Improved Concentration Bounds for Conditional Value-at-Risk and Cumulative
                  Prospect Theory using Wasserstein distance},
  journal      = {CoRR},
  volume       = {abs/1902.10709},
  year         = {2019},
  url          = {http://arxiv.org/abs/1902.10709},
  eprinttype    = {arXiv},
  eprint       = {1902.10709},
  timestamp    = {Tue, 21 May 2019 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1902-10709.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1912-10398,
  author       = {Ajay Kumar Pandey and
                  Prashanth L. A. and
                  Sanjay P. Bhat},
  title        = {Estimation of Spectral Risk Measures},
  journal      = {CoRR},
  volume       = {abs/1912.10398},
  year         = {2019},
  url          = {http://arxiv.org/abs/1912.10398},
  eprinttype    = {arXiv},
  eprint       = {1912.10398},
  timestamp    = {Fri, 03 Jan 2020 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1912-10398.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/tac/JieAFMS18,
  author       = {Cheng Jie and
                  Prashanth L. A. and
                  Michael C. Fu and
                  Steven I. Marcus and
                  Csaba Szepesv{\'{a}}ri},
  title        = {Stochastic Optimization in a Cumulative Prospect Theory Framework},
  journal      = {{IEEE} Trans. Autom. Control.},
  volume       = {63},
  number       = {9},
  pages        = {2867--2882},
  year         = {2018},
  url          = {https://doi.org/10.1109/TAC.2018.2822658},
  doi          = {10.1109/TAC.2018.2822658},
  timestamp    = {Wed, 20 May 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/tac/JieAFMS18.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1808-01739,
  author       = {Ravi Kumar Kolla and
                  Prashanth L. A. and
                  Sanjay P. Bhat and
                  Krishna P. Jagannathan},
  title        = {Concentration bounds for empirical conditional value-at-risk: The
                  unbounded case},
  journal      = {CoRR},
  volume       = {abs/1808.01739},
  year         = {2018},
  url          = {http://arxiv.org/abs/1808.01739},
  eprinttype    = {arXiv},
  eprint       = {1808.01739},
  timestamp    = {Sun, 02 Sep 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1808-01739.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1808-02871,
  author       = {Prashanth L. A. and
                  Shalabh Bhatnagar and
                  Nirav Bhavsar and
                  Michael C. Fu and
                  Steven I. Marcus},
  title        = {Random directions stochastic approximation with deterministic perturbations},
  journal      = {CoRR},
  volume       = {abs/1808.02871},
  year         = {2018},
  url          = {http://arxiv.org/abs/1808.02871},
  eprinttype    = {arXiv},
  eprint       = {1808.02871},
  timestamp    = {Wed, 20 Nov 2019 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1808-02871.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1810-09126,
  author       = {Prashanth L. A. and
                  Michael C. Fu},
  title        = {Risk-Sensitive Reinforcement Learning: {A} Constrained Optimization
                  Viewpoint},
  journal      = {CoRR},
  volume       = {abs/1810.09126},
  year         = {2018},
  url          = {http://arxiv.org/abs/1810.09126},
  eprinttype    = {arXiv},
  eprint       = {1810.09126},
  timestamp    = {Wed, 20 Nov 2019 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1810-09126.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/tac/ABFM17,
  author       = {Prashanth L. A. and
                  Shalabh Bhatnagar and
                  Michael C. Fu and
                  Steven I. Marcus},
  title        = {Adaptive System Optimization Using Random Directions Stochastic Approximation},
  journal      = {{IEEE} Trans. Autom. Control.},
  volume       = {62},
  number       = {5},
  pages        = {2223--2238},
  year         = {2017},
  url          = {https://doi.org/10.1109/TAC.2016.2600643},
  doi          = {10.1109/TAC.2016.2600643},
  timestamp    = {Sat, 09 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/tac/ABFM17.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/aaai/GopalanAFM17,
  author       = {Aditya Gopalan and
                  Prashanth L. A. and
                  Michael C. Fu and
                  Steven I. Marcus},
  editor       = {Satinder Singh and
                  Shaul Markovitch},
  title        = {Weighted Bandits or: How Bandits Learn Distorted Values That Are Not
                  Expected},
  booktitle    = {Proceedings of the Thirty-First {AAAI} Conference on Artificial Intelligence,
                  February 4-9, 2017, San Francisco, California, {USA}},
  pages        = {1941--1947},
  publisher    = {{AAAI} Press},
  year         = {2017},
  url          = {https://doi.org/10.1609/aaai.v31i1.10922},
  doi          = {10.1609/AAAI.V31I1.10922},
  timestamp    = {Mon, 04 Sep 2023 14:40:32 +0200},
  biburl       = {https://dblp.org/rec/conf/aaai/GopalanAFM17.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/ml/AG16,
  author       = {Prashanth L. A. and
                  Mohammad Ghavamzadeh},
  title        = {Variance-constrained actor-critic algorithms for discounted and average
                  reward MDPs},
  journal      = {Mach. Learn.},
  volume       = {105},
  number       = {3},
  pages        = {367--417},
  year         = {2016},
  url          = {https://doi.org/10.1007/s10994-016-5569-5},
  doi          = {10.1007/S10994-016-5569-5},
  timestamp    = {Mon, 02 Mar 2020 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/ml/AG16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/scl/APBC16,
  author       = {Prashanth L. A. and
                  H. L. Prasad and
                  Shalabh Bhatnagar and
                  Prakash Chandra},
  title        = {A constrained optimization perspective on actor-critic algorithms
                  and application to network routing},
  journal      = {Syst. Control. Lett.},
  volume       = {92},
  pages        = {46--51},
  year         = {2016},
  url          = {https://doi.org/10.1016/j.sysconle.2016.02.020},
  doi          = {10.1016/J.SYSCONLE.2016.02.020},
  timestamp    = {Sat, 09 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/scl/APBC16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/aistats/HuAGS16,
  author       = {Xiaowei Hu and
                  Prashanth L. A. and
                  Andr{\'{a}}s Gy{\"{o}}rgy and
                  Csaba Szepesv{\'{a}}ri},
  editor       = {Arthur Gretton and
                  Christian C. Robert},
  title        = {(Bandit) Convex Optimization with Biased Noisy Gradient Oracles},
  booktitle    = {Proceedings of the 19th International Conference on Artificial Intelligence
                  and Statistics, {AISTATS} 2016, Cadiz, Spain, May 9-11, 2016},
  series       = {{JMLR} Workshop and Conference Proceedings},
  volume       = {51},
  pages        = {819--828},
  publisher    = {JMLR.org},
  year         = {2016},
  url          = {http://proceedings.mlr.press/v51/hu16b.html},
  timestamp    = {Tue, 15 Feb 2022 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/aistats/HuAGS16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/cdc/ReddyAB16,
  author       = {Sai Koti Reddy Danda and
                  Prashanth L. A. and
                  Shalabh Bhatnagar},
  title        = {Improved Hessian estimation for adaptive random directions stochastic
                  approximation},
  booktitle    = {55th {IEEE} Conference on Decision and Control, {CDC} 2016, Las Vegas,
                  NV, USA, December 12-14, 2016},
  pages        = {3682--3687},
  publisher    = {{IEEE}},
  year         = {2016},
  url          = {https://doi.org/10.1109/CDC.2016.7798823},
  doi          = {10.1109/CDC.2016.7798823},
  timestamp    = {Sat, 09 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/cdc/ReddyAB16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/AJFMS16,
  author       = {Prashanth L. A. and
                  Cheng Jie and
                  Michael C. Fu and
                  Steven I. Marcus and
                  Csaba Szepesv{\'{a}}ri},
  editor       = {Maria{-}Florina Balcan and
                  Kilian Q. Weinberger},
  title        = {Cumulative Prospect Theory Meets Reinforcement Learning: Prediction
                  and Control},
  booktitle    = {Proceedings of the 33nd International Conference on Machine Learning,
                  {ICML} 2016, New York City, NY, USA, June 19-24, 2016},
  series       = {{JMLR} Workshop and Conference Proceedings},
  volume       = {48},
  pages        = {1406--1415},
  publisher    = {JMLR.org},
  year         = {2016},
  url          = {http://proceedings.mlr.press/v48/la16.html},
  timestamp    = {Wed, 20 Nov 2019 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/icml/AJFMS16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/HuAGS16,
  author       = {Xiaowei Hu and
                  Prashanth L. A. and
                  Andr{\'{a}}s Gy{\"{o}}rgy and
                  Csaba Szepesv{\'{a}}ri},
  title        = {(Bandit) Convex Optimization with Biased Noisy Gradient Oracles},
  journal      = {CoRR},
  volume       = {abs/1609.07087},
  year         = {2016},
  url          = {http://arxiv.org/abs/1609.07087},
  eprinttype    = {arXiv},
  eprint       = {1609.07087},
  timestamp    = {Tue, 15 Feb 2022 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/HuAGS16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/GopalanAFM16,
  author       = {Aditya Gopalan and
                  Prashanth L. A. and
                  Michael C. Fu and
                  Steven I. Marcus},
  title        = {Weighted bandits or: How bandits learn distorted values that are not
                  expected},
  journal      = {CoRR},
  volume       = {abs/1611.10283},
  year         = {2016},
  url          = {http://arxiv.org/abs/1611.10283},
  eprinttype    = {arXiv},
  eprint       = {1611.10283},
  timestamp    = {Wed, 20 Nov 2019 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/GopalanAFM16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/jota/BhatnagarA15,
  author       = {Shalabh Bhatnagar and
                  Prashanth L. A.},
  title        = {Simultaneous Perturbation Newton Algorithms for Simulation Optimization},
  journal      = {J. Optim. Theory Appl.},
  volume       = {164},
  number       = {2},
  pages        = {621--643},
  year         = {2015},
  url          = {https://doi.org/10.1007/s10957-013-0507-1},
  doi          = {10.1007/S10957-013-0507-1},
  timestamp    = {Sat, 09 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/jota/BhatnagarA15.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/simulation/APDBD15,
  author       = {Prashanth L. A. and
                  H. L. Prasad and
                  Nirmit Desai and
                  Shalabh Bhatnagar and
                  Gargi Dasgupta},
  title        = {Simultaneous perturbation methods for adaptive labor staffing in service
                  systems},
  journal      = {Simul.},
  volume       = {91},
  number       = {5},
  pages        = {432--455},
  year         = {2015},
  url          = {https://doi.org/10.1177/0037549715581198},
  doi          = {10.1177/0037549715581198},
  timestamp    = {Sat, 09 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/simulation/APDBD15.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/aaai/KordaAM15,
  author       = {Nathaniel Korda and
                  Prashanth L. A. and
                  R{\'{e}}mi Munos},
  editor       = {Blai Bonet and
                  Sven Koenig},
  title        = {Fast Gradient Descent for Drifting Least Squares Regression, with
                  Application to Bandits},
  booktitle    = {Proceedings of the Twenty-Ninth {AAAI} Conference on Artificial Intelligence,
                  January 25-30, 2015, Austin, Texas, {USA}},
  pages        = {2708--2714},
  publisher    = {{AAAI} Press},
  year         = {2015},
  url          = {https://doi.org/10.1609/aaai.v29i1.9619},
  doi          = {10.1609/AAAI.V29I1.9619},
  timestamp    = {Mon, 18 Sep 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/aaai/KordaAM15.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/atal/PrasadAB15,
  author       = {H. L. Prasad and
                  Prashanth L. A. and
                  Shalabh Bhatnagar},
  editor       = {Gerhard Weiss and
                  Pinar Yolum and
                  Rafael H. Bordini and
                  Edith Elkind},
  title        = {Two-Timescale Algorithms for Learning Nash Equilibria in General-Sum
                  Stochastic Games},
  booktitle    = {Proceedings of the 2015 International Conference on Autonomous Agents
                  and Multiagent Systems, {AAMAS} 2015, Istanbul, Turkey, May 4-8, 2015},
  pages        = {1371--1379},
  publisher    = {{ACM}},
  year         = {2015},
  url          = {http://dl.acm.org/citation.cfm?id=2773328},
  timestamp    = {Tue, 08 Mar 2022 10:12:47 +0100},
  biburl       = {https://dblp.org/rec/conf/atal/PrasadAB15.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/KordaA15,
  author       = {Nathaniel Korda and
                  Prashanth L. A.},
  editor       = {Francis R. Bach and
                  David M. Blei},
  title        = {On {TD(0)} with function approximation: Concentration bounds and a
                  centered variant with exponential convergence},
  booktitle    = {Proceedings of the 32nd International Conference on Machine Learning,
                  {ICML} 2015, Lille, France, 6-11 July 2015},
  series       = {{JMLR} Workshop and Conference Proceedings},
  volume       = {37},
  pages        = {626--634},
  publisher    = {JMLR.org},
  year         = {2015},
  url          = {http://proceedings.mlr.press/v37/korda15.html},
  timestamp    = {Wed, 29 May 2019 08:41:45 +0200},
  biburl       = {https://dblp.org/rec/conf/icml/KordaA15.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/AB15,
  author       = {Prashanth L. A. and
                  Shalabh Bhatnagar},
  title        = {Adaptive system optimization using (simultaneous) random directions
                  stochastic approximation},
  journal      = {CoRR},
  volume       = {abs/1502.05577},
  year         = {2015},
  url          = {http://arxiv.org/abs/1502.05577},
  eprinttype    = {arXiv},
  eprint       = {1502.05577},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/AB15.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/ACFM15,
  author       = {Prashanth L. A. and
                  Cheng Jie and
                  Michael C. Fu and
                  Steven I. Marcus},
  title        = {Cumulative Prospect Theory Meets Reinforcement Learning: Estimation
                  and Control},
  journal      = {CoRR},
  volume       = {abs/1506.02632},
  year         = {2015},
  url          = {http://arxiv.org/abs/1506.02632},
  eprinttype    = {arXiv},
  eprint       = {1506.02632},
  timestamp    = {Wed, 20 Nov 2019 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/ACFM15.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/APBC15,
  author       = {Prashanth L. A. and
                  H. L. Prasad and
                  Shalabh Bhatnagar and
                  Prakash Chandra},
  title        = {A constrained optimization perspective on actor critic algorithms
                  and application to network routing},
  journal      = {CoRR},
  volume       = {abs/1507.07984},
  year         = {2015},
  url          = {http://arxiv.org/abs/1507.07984},
  eprinttype    = {arXiv},
  eprint       = {1507.07984},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/APBC15.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/winet/ACB14,
  author       = {Prashanth L. A. and
                  Abhranil Chatterjee and
                  Shalabh Bhatnagar},
  title        = {Two timescale convergent Q-learning for sleep-scheduling in wireless
                  sensor networks},
  journal      = {Wirel. Networks},
  volume       = {20},
  number       = {8},
  pages        = {2589--2604},
  year         = {2014},
  url          = {https://doi.org/10.1007/s11276-014-0762-6},
  doi          = {10.1007/S11276-014-0762-6},
  timestamp    = {Sat, 09 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/winet/ACB14.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/alt/A14,
  author       = {Prashanth L. A.},
  editor       = {Peter Auer and
                  Alexander Clark and
                  Thomas Zeugmann and
                  Sandra Zilles},
  title        = {Policy Gradients for CVaR-Constrained MDPs},
  booktitle    = {Algorithmic Learning Theory - 25th International Conference, {ALT}
                  2014, Bled, Slovenia, October 8-10, 2014. Proceedings},
  series       = {Lecture Notes in Computer Science},
  volume       = {8776},
  pages        = {155--169},
  publisher    = {Springer},
  year         = {2014},
  url          = {https://doi.org/10.1007/978-3-319-11662-4\_12},
  doi          = {10.1007/978-3-319-11662-4\_12},
  timestamp    = {Sun, 25 Oct 2020 22:46:29 +0100},
  biburl       = {https://dblp.org/rec/conf/alt/A14.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/cdc/FonteneauA14,
  author       = {Raphael Fonteneau and
                  Prashanth L. A.},
  title        = {Simultaneous perturbation algorithms for batch off-policy search},
  booktitle    = {53rd {IEEE} Conference on Decision and Control, {CDC} 2014, Los Angeles,
                  CA, USA, December 15-17, 2014},
  pages        = {2622--2627},
  publisher    = {{IEEE}},
  year         = {2014},
  url          = {https://doi.org/10.1109/CDC.2014.7039790},
  doi          = {10.1109/CDC.2014.7039790},
  timestamp    = {Fri, 04 Mar 2022 13:29:15 +0100},
  biburl       = {https://dblp.org/rec/conf/cdc/FonteneauA14.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/comsnets/ACB14,
  author       = {Prashanth L. A. and
                  Abhranil Chatterjee and
                  Shalabh Bhatnagar},
  title        = {Adaptive sleep-wake control using reinforcement learning in sensor
                  networks},
  booktitle    = {Sixth International Conference on Communication Systems and Networks,
                  {COMSNETS} 2014, Bangalore, India, January 6-10, 2014},
  pages        = {1--8},
  publisher    = {{IEEE}},
  year         = {2014},
  url          = {https://doi.org/10.1109/COMSNETS.2014.6734874},
  doi          = {10.1109/COMSNETS.2014.6734874},
  timestamp    = {Sat, 09 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/comsnets/ACB14.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/pkdd/AKM14,
  author       = {Prashanth L. A. and
                  Nathaniel Korda and
                  R{\'{e}}mi Munos},
  editor       = {Toon Calders and
                  Floriana Esposito and
                  Eyke H{\"{u}}llermeier and
                  Rosa Meo},
  title        = {Fast {LSTD} Using Stochastic Approximation: Finite Time Analysis and
                  Application to Traffic Control},
  booktitle    = {Machine Learning and Knowledge Discovery in Databases - European Conference,
                  {ECML} {PKDD} 2014, Nancy, France, September 15-19, 2014. Proceedings,
                  Part {II}},
  series       = {Lecture Notes in Computer Science},
  volume       = {8725},
  pages        = {66--81},
  publisher    = {Springer},
  year         = {2014},
  url          = {https://doi.org/10.1007/978-3-662-44851-9\_5},
  doi          = {10.1007/978-3-662-44851-9\_5},
  timestamp    = {Thu, 31 Oct 2019 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/pkdd/AKM14.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/PrasadAB14,
  author       = {H. L. Prasad and
                  Prashanth L. A. and
                  Shalabh Bhatnagar},
  title        = {Algorithms for Nash Equilibria in General-Sum Stochastic Games},
  journal      = {CoRR},
  volume       = {abs/1401.2086},
  year         = {2014},
  url          = {http://arxiv.org/abs/1401.2086},
  eprinttype    = {arXiv},
  eprint       = {1401.2086},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/PrasadAB14.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/FonteneauA14,
  author       = {Raphael Fonteneau and
                  Prashanth L. A.},
  title        = {Simultaneous Perturbation Algorithms for Batch Off-Policy Search},
  journal      = {CoRR},
  volume       = {abs/1403.4514},
  year         = {2014},
  url          = {http://arxiv.org/abs/1403.4514},
  eprinttype    = {arXiv},
  eprint       = {1403.4514},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/FonteneauA14.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/AG14,
  author       = {Prashanth L. A. and
                  Mohammad Ghavamzadeh},
  title        = {Actor-Critic Algorithms for Risk-Sensitive Reinforcement Learning},
  journal      = {CoRR},
  volume       = {abs/1403.6530},
  year         = {2014},
  url          = {http://arxiv.org/abs/1403.6530},
  eprinttype    = {arXiv},
  eprint       = {1403.6530},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/AG14.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/A14a,
  author       = {Prashanth L. A.},
  title        = {Policy Gradients for CVaR-Constrained MDPs},
  journal      = {CoRR},
  volume       = {abs/1405.2690},
  year         = {2014},
  url          = {http://arxiv.org/abs/1405.2690},
  eprinttype    = {arXiv},
  eprint       = {1405.2690},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/A14a.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/KordaP14,
  author       = {Nathaniel Korda and
                  Prashanth L. A.},
  title        = {On {TD(0)} with function approximation: Concentration bounds and a
                  centered variant with exponential convergence},
  journal      = {CoRR},
  volume       = {abs/1411.3224},
  year         = {2014},
  url          = {http://arxiv.org/abs/1411.3224},
  eprinttype    = {arXiv},
  eprint       = {1411.3224},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/KordaP14.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/atal/AnanthapadmanabharaoPDB13,
  author       = {Prashanth Lakshmanrao Ananthapadmanabharao and
                  Horabailu Laxminarayana Prasad and
                  Nirmit Desai and
                  Shalabh Bhatnagar},
  editor       = {Maria L. Gini and
                  Onn Shehory and
                  Takayuki Ito and
                  Catholijn M. Jonker},
  title        = {Mechanisms for hostile agents with capacity constraints},
  booktitle    = {International conference on Autonomous Agents and Multi-Agent Systems,
                  {AAMAS} '13, Saint Paul, MN, USA, May 6-10, 2013},
  pages        = {659--666},
  publisher    = {{IFAAMAS}},
  year         = {2013},
  url          = {http://dl.acm.org/citation.cfm?id=2485024},
  timestamp    = {Thu, 09 Sep 2021 16:09:54 +0200},
  biburl       = {https://dblp.org/rec/conf/atal/AnanthapadmanabharaoPDB13.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/LAG13,
  author       = {Prashanth L. A. and
                  Mohammad Ghavamzadeh},
  editor       = {Christopher J. C. Burges and
                  L{\'{e}}on Bottou and
                  Zoubin Ghahramani and
                  Kilian Q. Weinberger},
  title        = {Actor-Critic Algorithms for Risk-Sensitive MDPs},
  booktitle    = {Advances in Neural Information Processing Systems 26: 27th Annual
                  Conference on Neural Information Processing Systems 2013. Proceedings
                  of a meeting held December 5-8, 2013, Lake Tahoe, Nevada, United States},
  pages        = {252--260},
  year         = {2013},
  url          = {https://proceedings.neurips.cc/paper/2013/hash/eb163727917cbba1eea208541a643e74-Abstract.html},
  timestamp    = {Mon, 16 May 2022 15:41:51 +0200},
  biburl       = {https://dblp.org/rec/conf/nips/LAG13.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/PrashanthKM13,
  author       = {Prashanth L. A. and
                  Nathaniel Korda and
                  R{\'{e}}mi Munos},
  title        = {Analysis of stochastic approximation for efficient least squares regression
                  and {LSTD}},
  journal      = {CoRR},
  volume       = {abs/1306.2557},
  year         = {2013},
  url          = {http://arxiv.org/abs/1306.2557},
  eprinttype    = {arXiv},
  eprint       = {1306.2557},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/PrashanthKM13.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/KordaPM13,
  author       = {Nathaniel Korda and
                  Prashanth L. A. and
                  R{\'{e}}mi Munos},
  title        = {Online gradient descent for least squares regression: Non-asymptotic
                  bounds and application to bandits},
  journal      = {CoRR},
  volume       = {abs/1307.3176},
  year         = {2013},
  url          = {http://arxiv.org/abs/1307.3176},
  eprinttype    = {arXiv},
  eprint       = {1307.3176},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/KordaPM13.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/AnanthapadmanabharaoCB13,
  author       = {Prashanth Lakshmanrao Ananthapadmanabharao and
                  Abhranil Chatterjee and
                  Shalabh Bhatnagar},
  title        = {Reinforcement Learning for Sleep-Wake Scheduling in Sensor Networks},
  journal      = {CoRR},
  volume       = {abs/1312.7292},
  year         = {2013},
  url          = {http://arxiv.org/abs/1312.7292},
  eprinttype    = {arXiv},
  eprint       = {1312.7292},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/AnanthapadmanabharaoCB13.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/PrashanthPDBD13,
  author       = {Prashanth L. A. and
                  H. L. Prasad and
                  Nirmit Desai and
                  Shalabh Bhatnagar and
                  Gargi Dasgupta},
  title        = {Simultaneous Perturbation Methods for Adaptive Labor Staffing in Service
                  Systems},
  journal      = {CoRR},
  volume       = {abs/1312.7430},
  year         = {2013},
  url          = {http://arxiv.org/abs/1312.7430},
  eprinttype    = {arXiv},
  eprint       = {1312.7430},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/PrashanthPDBD13.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/tvt/PrashanthB12,
  author       = {Prashanth L. A. and
                  Shalabh Bhatnagar},
  title        = {Threshold Tuning Using Stochastic Optimization for Graded Signal Control},
  journal      = {{IEEE} Trans. Veh. Technol.},
  volume       = {61},
  number       = {9},
  pages        = {3865--3880},
  year         = {2012},
  url          = {https://doi.org/10.1109/TVT.2012.2209904},
  doi          = {10.1109/TVT.2012.2209904},
  timestamp    = {Sat, 09 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/tvt/PrashanthB12.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/tits/PrashanthB11,
  author       = {Prashanth L. A. and
                  Shalabh Bhatnagar},
  title        = {Reinforcement Learning With Function Approximation for Traffic Signal
                  Control},
  journal      = {{IEEE} Trans. Intell. Transp. Syst.},
  volume       = {12},
  number       = {2},
  pages        = {412--421},
  year         = {2011},
  url          = {https://doi.org/10.1109/TITS.2010.2091408},
  doi          = {10.1109/TITS.2010.2091408},
  timestamp    = {Sat, 09 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/tits/PrashanthB11.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icsoc/PrashanthPDBD11,
  author       = {Prashanth L. A. and
                  H. L. Prasad and
                  Nirmit Desai and
                  Shalabh Bhatnagar and
                  Gargi Banerjee Dasgupta},
  editor       = {Gerti Kappel and
                  Zakaria Maamar and
                  Hamid R. Motahari Nezhad},
  title        = {Stochastic Optimization for Adaptive Labor Staffing in Service Systems},
  booktitle    = {Service-Oriented Computing - 9th International Conference, {ICSOC}
                  2011, Paphos, Cyprus, December 5-8, 2011 Proceedings},
  series       = {Lecture Notes in Computer Science},
  volume       = {7084},
  pages        = {487--494},
  publisher    = {Springer},
  year         = {2011},
  url          = {https://doi.org/10.1007/978-3-642-25535-9\_33},
  doi          = {10.1007/978-3-642-25535-9\_33},
  timestamp    = {Sat, 09 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icsoc/PrashanthPDBD11.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/itsc/AB11,
  author       = {Prashanth L. A. and
                  Shalabh Bhatnagar},
  title        = {Reinforcement learning with average cost for adaptive control of traffic
                  lights at intersections},
  booktitle    = {14th International {IEEE} Conference on Intelligent Transportation
                  Systems, {ITSC} 2011, Washington, DC, USA, October 5-7, 2011},
  pages        = {1640--1645},
  publisher    = {{IEEE}},
  year         = {2011},
  url          = {https://doi.org/10.1109/ITSC.2011.6082823},
  doi          = {10.1109/ITSC.2011.6082823},
  timestamp    = {Sat, 09 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/itsc/AB11.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/ccnc/ADG08,
  author       = {Prashanth L. A. and
                  Sajal Kumar Das and
                  K. Gopinath},
  title        = {{MAC} Design for Heterogeneous Application Support in {OFDM} Based
                  Wireless Systems},
  booktitle    = {5th {IEEE} Consumer Communications and Networking Conference, {CCNC}
                  2008, Las Vegas, NV, USA, January 10-12, 2008},
  pages        = {412--413},
  publisher    = {{IEEE}},
  year         = {2008},
  url          = {https://doi.org/10.1109/ccnc08.2007.98},
  doi          = {10.1109/CCNC08.2007.98},
  timestamp    = {Mon, 09 Aug 2021 14:54:01 +0200},
  biburl       = {https://dblp.org/rec/conf/ccnc/ADG08.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/comsware/PrashanthG08,
  author       = {Prashanth L. A. and
                  K. Gopinath},
  editor       = {Sunghyun Choi and
                  Jim Kurose and
                  Krithi Ramamritham},
  title        = {{OFDM-MAC} algorithms and their impact on {TCP} performance in next
                  generation mobile networks},
  booktitle    = {Proceedings of the Third International Conference on COMmunication
                  System softWAre and MiddlewaRE {(COMSWARE} 2008), January 5-10, 2008,
                  Bangalore, India},
  pages        = {133--140},
  publisher    = {{IEEE}},
  year         = {2008},
  url          = {https://doi.org/10.1109/COMSWA.2008.4554395},
  doi          = {10.1109/COMSWA.2008.4554395},
  timestamp    = {Thu, 04 Nov 2021 14:40:53 +0100},
  biburl       = {https://dblp.org/rec/conf/comsware/PrashanthG08.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
a service of  Schloss Dagstuhl - Leibniz Center for Informatics