Stop the war!
Остановите войну!
for scientists:
default search action
BibTeX records: Hado van Hasselt
@article{DBLP:journals/corr/abs-2402-18762, author = {Clare Lyle and Zeyu Zheng and Khimya Khetarpal and Hado van Hasselt and Razvan Pascanu and James Martens and Will Dabney}, title = {Disentangling the Causes of Plasticity Loss in Neural Networks}, journal = {CoRR}, volume = {abs/2402.18762}, year = {2024}, url = {https://doi.org/10.48550/arXiv.2402.18762}, doi = {10.48550/ARXIV.2402.18762}, eprinttype = {arXiv}, eprint = {2402.18762}, timestamp = {Tue, 26 Mar 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-2402-18762.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/aaai/SchmittSH23, author = {Simon Schmitt and John Shawe{-}Taylor and Hado van Hasselt}, editor = {Brian Williams and Yiling Chen and Jennifer Neville}, title = {Exploration via Epistemic Value Estimation}, booktitle = {Thirty-Seventh {AAAI} Conference on Artificial Intelligence, {AAAI} 2023, Thirty-Fifth Conference on Innovative Applications of Artificial Intelligence, {IAAI} 2023, Thirteenth Symposium on Educational Advances in Artificial Intelligence, {EAAI} 2023, Washington, DC, USA, February 7-14, 2023}, pages = {9742--9751}, publisher = {{AAAI} Press}, year = {2023}, url = {https://doi.org/10.1609/aaai.v37i8.26164}, doi = {10.1609/AAAI.V37I8.26164}, timestamp = {Sun, 12 Nov 2023 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/aaai/SchmittSH23.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/iclr/Kapturowski0JRH23, author = {Steven Kapturowski and Victor Campos and Ray Jiang and Nemanja Rakicevic and Hado van Hasselt and Charles Blundell and Adri{\`{a}} Puigdom{\`{e}}nech Badia}, title = {Human-level Atari 200x faster}, booktitle = {The Eleventh International Conference on Learning Representations, {ICLR} 2023, Kigali, Rwanda, May 1-5, 2023}, publisher = {OpenReview.net}, year = {2023}, url = {https://openreview.net/pdf?id=JtC6yOHRoJJ}, timestamp = {Fri, 30 Jun 2023 14:38:38 +0200}, biburl = {https://dblp.org/rec/conf/iclr/Kapturowski0JRH23.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/Abel0RPHS23, author = {David Abel and Andr{\'{e}} Barreto and Benjamin Van Roy and Doina Precup and Hado Philip van Hasselt and Satinder Singh}, editor = {Alice Oh and Tristan Naumann and Amir Globerson and Kate Saenko and Moritz Hardt and Sergey Levine}, title = {A Definition of Continual Reinforcement Learning}, booktitle = {Advances in Neural Information Processing Systems 36: Annual Conference on Neural Information Processing Systems 2023, NeurIPS 2023, New Orleans, LA, USA, December 10 - 16, 2023}, year = {2023}, url = {http://papers.nips.cc/paper\_files/paper/2023/hash/9d8cf1247786d6dfeefeeb53b8b5f6d7-Abstract-Conference.html}, timestamp = {Fri, 01 Mar 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/nips/Abel0RPHS23.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/FlennerhagZOH0S23, author = {Sebastian Flennerhag and Tom Zahavy and Brendan O'Donoghue and Hado Philip van Hasselt and Andr{\'{a}}s Gy{\"{o}}rgy and Satinder Singh}, editor = {Alice Oh and Tristan Naumann and Amir Globerson and Kate Saenko and Moritz Hardt and Sergey Levine}, title = {Optimistic Meta-Gradients}, booktitle = {Advances in Neural Information Processing Systems 36: Annual Conference on Neural Information Processing Systems 2023, NeurIPS 2023, New Orleans, LA, USA, December 10 - 16, 2023}, year = {2023}, url = {http://papers.nips.cc/paper\_files/paper/2023/hash/b46bc1449205888e1883f692aff1a252-Abstract-Conference.html}, timestamp = {Fri, 01 Mar 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/nips/FlennerhagZOH0S23.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2301-03236, author = {Sebastian Flennerhag and Tom Zahavy and Brendan O'Donoghue and Hado van Hasselt and Andr{\'{a}}s Gy{\"{o}}rgy and Satinder Singh}, title = {Optimistic Meta-Gradients}, journal = {CoRR}, volume = {abs/2301.03236}, year = {2023}, url = {https://doi.org/10.48550/arXiv.2301.03236}, doi = {10.48550/ARXIV.2301.03236}, eprinttype = {arXiv}, eprint = {2301.03236}, timestamp = {Tue, 10 Jan 2023 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-2301-03236.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2302-04250, author = {Chentian Jiang and Nan Rosemary Ke and Hado van Hasselt}, title = {Learning How to Infer Partial MDPs for In-Context Adaptation and Exploration}, journal = {CoRR}, volume = {abs/2302.04250}, year = {2023}, url = {https://doi.org/10.48550/arXiv.2302.04250}, doi = {10.48550/ARXIV.2302.04250}, eprinttype = {arXiv}, eprint = {2302.04250}, timestamp = {Fri, 10 Feb 2023 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-2302-04250.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2303-04012, author = {Simon Schmitt and John Shawe{-}Taylor and Hado van Hasselt}, title = {Exploration via Epistemic Value Estimation}, journal = {CoRR}, volume = {abs/2303.04012}, year = {2023}, url = {https://doi.org/10.48550/arXiv.2303.04012}, doi = {10.48550/ARXIV.2303.04012}, eprinttype = {arXiv}, eprint = {2303.04012}, timestamp = {Wed, 15 Mar 2023 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-2303-04012.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2307-11044, author = {David Abel and Andr{\'{e}} Barreto and Hado van Hasselt and Benjamin Van Roy and Doina Precup and Satinder Singh}, title = {On the Convergence of Bounded Agents}, journal = {CoRR}, volume = {abs/2307.11044}, year = {2023}, url = {https://doi.org/10.48550/arXiv.2307.11044}, doi = {10.48550/ARXIV.2307.11044}, eprinttype = {arXiv}, eprint = {2307.11044}, timestamp = {Wed, 26 Jul 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2307-11044.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2307-11046, author = {David Abel and Andr{\'{e}} Barreto and Benjamin Van Roy and Doina Precup and Hado van Hasselt and Satinder Singh}, title = {A Definition of Continual Reinforcement Learning}, journal = {CoRR}, volume = {abs/2307.11046}, year = {2023}, url = {https://doi.org/10.48550/arXiv.2307.11046}, doi = {10.48550/ARXIV.2307.11046}, eprinttype = {arXiv}, eprint = {2307.11046}, timestamp = {Wed, 26 Jul 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2307-11046.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2312-01072, author = {Eduardo Pignatelli and Johan Ferret and Matthieu Geist and Thomas Mesnard and Hado van Hasselt and Laura Toni}, title = {A Survey of Temporal Credit Assignment in Deep Reinforcement Learning}, journal = {CoRR}, volume = {abs/2312.01072}, year = {2023}, url = {https://doi.org/10.48550/arXiv.2312.01072}, doi = {10.48550/ARXIV.2312.01072}, eprinttype = {arXiv}, eprint = {2312.01072}, timestamp = {Fri, 08 Dec 2023 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-2312-01072.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/aaai/JiangZC0H22, author = {Ray Jiang and Shangtong Zhang and Veronica Chelu and Adam White and Hado van Hasselt}, title = {Learning Expected Emphatic Traces for Deep {RL}}, booktitle = {Thirty-Sixth {AAAI} Conference on Artificial Intelligence, {AAAI} 2022, Thirty-Fourth Conference on Innovative Applications of Artificial Intelligence, {IAAI} 2022, The Twelveth Symposium on Educational Advances in Artificial Intelligence, {EAAI} 2022 Virtual Event, February 22 - March 1, 2022}, pages = {7015--7023}, publisher = {{AAAI} Press}, year = {2022}, url = {https://doi.org/10.1609/aaai.v36i6.20660}, doi = {10.1609/AAAI.V36I6.20660}, timestamp = {Mon, 04 Sep 2023 12:29:24 +0200}, biburl = {https://dblp.org/rec/conf/aaai/JiangZC0H22.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/aaai/KirschFHFOC22, author = {Louis Kirsch and Sebastian Flennerhag and Hado van Hasselt and Abram L. Friesen and Junhyuk Oh and Yutian Chen}, title = {Introducing Symmetries to Black Box Meta Reinforcement Learning}, booktitle = {Thirty-Sixth {AAAI} Conference on Artificial Intelligence, {AAAI} 2022, Thirty-Fourth Conference on Innovative Applications of Artificial Intelligence, {IAAI} 2022, The Twelveth Symposium on Educational Advances in Artificial Intelligence, {EAAI} 2022 Virtual Event, February 22 - March 1, 2022}, pages = {7202--7210}, publisher = {{AAAI} Press}, year = {2022}, url = {https://doi.org/10.1609/aaai.v36i7.20681}, doi = {10.1609/AAAI.V36I7.20681}, timestamp = {Sat, 16 Dec 2023 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/aaai/KirschFHFOC22.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/aaai/SchmittSH22, author = {Simon Schmitt and John Shawe{-}Taylor and Hado van Hasselt}, title = {Chaining Value Functions for Off-Policy Learning}, booktitle = {Thirty-Sixth {AAAI} Conference on Artificial Intelligence, {AAAI} 2022, Thirty-Fourth Conference on Innovative Applications of Artificial Intelligence, {IAAI} 2022, The Twelveth Symposium on Educational Advances in Artificial Intelligence, {EAAI} 2022 Virtual Event, February 22 - March 1, 2022}, pages = {8187--8195}, publisher = {{AAAI} Press}, year = {2022}, url = {https://doi.org/10.1609/aaai.v36i8.20792}, doi = {10.1609/AAAI.V36I8.20792}, timestamp = {Sat, 21 Oct 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/aaai/SchmittSH22.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/iclr/FlennerhagSZHS022, author = {Sebastian Flennerhag and Yannick Schroecker and Tom Zahavy and Hado van Hasselt and David Silver and Satinder Singh}, title = {Bootstrapped Meta-Learning}, booktitle = {The Tenth International Conference on Learning Representations, {ICLR} 2022, Virtual Event, April 25-29, 2022}, publisher = {OpenReview.net}, year = {2022}, url = {https://openreview.net/forum?id=b-ny3x071E5}, timestamp = {Sat, 20 Aug 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/iclr/FlennerhagSZHS022.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/iclr/SilverGDHH22, author = {David Silver and Anirudh Goyal and Ivo Danihelka and Matteo Hessel and Hado van Hasselt}, title = {Learning by Directional Gradient Descent}, booktitle = {The Tenth International Conference on Learning Representations, {ICLR} 2022, Virtual Event, April 25-29, 2022}, publisher = {OpenReview.net}, year = {2022}, url = {https://openreview.net/forum?id=5i7lJLuhTm}, timestamp = {Sat, 20 Aug 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/iclr/SilverGDHH22.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2201-06468, author = {Simon Schmitt and John Shawe{-}Taylor and Hado van Hasselt}, title = {Chaining Value Functions for Off-Policy Learning}, journal = {CoRR}, volume = {abs/2201.06468}, year = {2022}, url = {https://arxiv.org/abs/2201.06468}, eprinttype = {arXiv}, eprint = {2201.06468}, timestamp = {Fri, 21 Jan 2022 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-2201-06468.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2202-09699, author = {Veronica Chelu and Diana Borsa and Doina Precup and Hado van Hasselt}, title = {Selective Credit Assignment}, journal = {CoRR}, volume = {abs/2202.09699}, year = {2022}, url = {https://arxiv.org/abs/2202.09699}, eprinttype = {arXiv}, eprint = {2202.09699}, timestamp = {Tue, 01 Mar 2022 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-2202-09699.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2209-07550, author = {Steven Kapturowski and V{\'{\i}}ctor Campos and Ray Jiang and Nemanja Rakicevic and Hado van Hasselt and Charles Blundell and Adri{\`{a}} Puigdom{\`{e}}nech Badia}, title = {Human-level Atari 200x faster}, journal = {CoRR}, volume = {abs/2209.07550}, year = {2022}, url = {https://doi.org/10.48550/arXiv.2209.07550}, doi = {10.48550/ARXIV.2209.07550}, eprinttype = {arXiv}, eprint = {2209.07550}, timestamp = {Wed, 02 Nov 2022 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-2209-07550.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/aaai/HasseltMHSBB21, author = {Hado van Hasselt and Sephora Madjiheurem and Matteo Hessel and David Silver and Andr{\'{e}} Barreto and Diana Borsa}, title = {Expected Eligibility Traces}, booktitle = {Thirty-Fifth {AAAI} Conference on Artificial Intelligence, {AAAI} 2021, Thirty-Third Conference on Innovative Applications of Artificial Intelligence, {IAAI} 2021, The Eleventh Symposium on Educational Advances in Artificial Intelligence, {EAAI} 2021, Virtual Event, February 2-9, 2021}, pages = {9997--10005}, publisher = {{AAAI} Press}, year = {2021}, url = {https://doi.org/10.1609/aaai.v35i11.17200}, doi = {10.1609/AAAI.V35I11.17200}, timestamp = {Mon, 04 Sep 2023 12:29:24 +0200}, biburl = {https://dblp.org/rec/conf/aaai/HasseltMHSBB21.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/atal/GarneloCLTOGHB21, author = {Marta Garnelo and Wojciech Marian Czarnecki and Siqi Liu and Dhruva Tirumala and Junhyuk Oh and Gauthier Gidel and Hado van Hasselt and David Balduzzi}, editor = {Frank Dignum and Alessio Lomuscio and Ulle Endriss and Ann Now{\'{e}}}, title = {Pick Your Battles: Interaction Graphs as Population-Level Objectives for Strategic Diversity}, booktitle = {{AAMAS} '21: 20th International Conference on Autonomous Agents and Multiagent Systems, Virtual Event, United Kingdom, May 3-7, 2021}, pages = {1501--1503}, publisher = {{ACM}}, year = {2021}, url = {https://www.ifaamas.org/Proceedings/aamas2021/pdfs/p1501.pdf}, doi = {10.5555/3463952.3464139}, timestamp = {Thu, 11 May 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/atal/GarneloCLTOGHB21.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icml/HesselDVGSSWSH21, author = {Matteo Hessel and Ivo Danihelka and Fabio Viola and Arthur Guez and Simon Schmitt and Laurent Sifre and Theophane Weber and David Silver and Hado van Hasselt}, editor = {Marina Meila and Tong Zhang}, title = {Muesli: Combining Improvements in Policy Optimization}, booktitle = {Proceedings of the 38th International Conference on Machine Learning, {ICML} 2021, 18-24 July 2021, Virtual Event}, series = {Proceedings of Machine Learning Research}, volume = {139}, pages = {4214--4226}, publisher = {{PMLR}}, year = {2021}, url = {http://proceedings.mlr.press/v139/hessel21a.html}, timestamp = {Wed, 25 Aug 2021 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/icml/HesselDVGSSWSH21.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icml/JiangZXWHBH21, author = {Ray Jiang and Tom Zahavy and Zhongwen Xu and Adam White and Matteo Hessel and Charles Blundell and Hado van Hasselt}, editor = {Marina Meila and Tong Zhang}, title = {Emphatic Algorithms for Deep Reinforcement Learning}, booktitle = {Proceedings of the 38th International Conference on Machine Learning, {ICML} 2021, 18-24 July 2021, Virtual Event}, series = {Proceedings of Machine Learning Research}, volume = {139}, pages = {5023--5033}, publisher = {{PMLR}}, year = {2021}, url = {http://proceedings.mlr.press/v139/jiang21j.html}, timestamp = {Mon, 25 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/icml/JiangZXWHBH21.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/FarquharBMFHHS21, author = {Gregory Farquhar and Kate Baumli and Zita Marinho and Angelos Filos and Matteo Hessel and Hado Philip van Hasselt and David Silver}, editor = {Marc'Aurelio Ranzato and Alina Beygelzimer and Yann N. Dauphin and Percy Liang and Jennifer Wortman Vaughan}, title = {Self-Consistent Models and Values}, booktitle = {Advances in Neural Information Processing Systems 34: Annual Conference on Neural Information Processing Systems 2021, NeurIPS 2021, December 6-14, 2021, virtual}, pages = {1111--1125}, year = {2021}, url = {https://proceedings.neurips.cc/paper/2021/hash/08f0efebb1c51aada9430a089a2050cc-Abstract.html}, timestamp = {Tue, 03 May 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/nips/FarquharBMFHHS21.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/VeeriahZHXOKHSS21, author = {Vivek Veeriah and Tom Zahavy and Matteo Hessel and Zhongwen Xu and Junhyuk Oh and Iurii Kemaev and Hado van Hasselt and David Silver and Satinder Singh}, editor = {Marc'Aurelio Ranzato and Alina Beygelzimer and Yann N. Dauphin and Percy Liang and Jennifer Wortman Vaughan}, title = {Discovery of Options via Meta-Learned Subgoals}, booktitle = {Advances in Neural Information Processing Systems 34: Annual Conference on Neural Information Processing Systems 2021, NeurIPS 2021, December 6-14, 2021, virtual}, pages = {29861--29873}, year = {2021}, url = {https://proceedings.neurips.cc/paper/2021/hash/fa246d0262c3925617b0c72bb20eeb1d-Abstract.html}, timestamp = {Tue, 03 May 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/nips/VeeriahZHXOKHSS21.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2102-06741, author = {Vivek Veeriah and Tom Zahavy and Matteo Hessel and Zhongwen Xu and Junhyuk Oh and Iurii Kemaev and Hado van Hasselt and David Silver and Satinder Singh}, title = {Discovery of Options via Meta-Learned Subgoals}, journal = {CoRR}, volume = {abs/2102.06741}, year = {2021}, url = {https://arxiv.org/abs/2102.06741}, eprinttype = {arXiv}, eprint = {2102.06741}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2102-06741.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2102-12425, author = {David Raposo and Samuel Ritter and Adam Santoro and Greg Wayne and Theophane Weber and Matt M. Botvinick and Hado van Hasselt and H. Francis Song}, title = {Synthetic Returns for Long-Term Credit Assignment}, journal = {CoRR}, volume = {abs/2102.12425}, year = {2021}, url = {https://arxiv.org/abs/2102.12425}, eprinttype = {arXiv}, eprint = {2102.12425}, timestamp = {Tue, 02 Mar 2021 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-2102-12425.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2104-06159, author = {Matteo Hessel and Ivo Danihelka and Fabio Viola and Arthur Guez and Simon Schmitt and Laurent Sifre and Theophane Weber and David Silver and Hado van Hasselt}, title = {Muesli: Combining Improvements in Policy Optimization}, journal = {CoRR}, volume = {abs/2104.06159}, year = {2021}, url = {https://arxiv.org/abs/2104.06159}, eprinttype = {arXiv}, eprint = {2104.06159}, timestamp = {Mon, 19 Apr 2021 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2104-06159.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2104-06272, author = {Matteo Hessel and Manuel Kroiss and Aidan Clark and Iurii Kemaev and John Quan and Thomas Keck and Fabio Viola and Hado van Hasselt}, title = {Podracer architectures for scalable Reinforcement Learning}, journal = {CoRR}, volume = {abs/2104.06272}, year = {2021}, url = {https://arxiv.org/abs/2104.06272}, eprinttype = {arXiv}, eprint = {2104.06272}, timestamp = {Mon, 19 Apr 2021 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2104-06272.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2106-11779, author = {Ray Jiang and Tom Zahavy and Zhongwen Xu and Adam White and Matteo Hessel and Charles Blundell and Hado van Hasselt}, title = {Emphatic Algorithms for Deep Reinforcement Learning}, journal = {CoRR}, volume = {abs/2106.11779}, year = {2021}, url = {https://arxiv.org/abs/2106.11779}, eprinttype = {arXiv}, eprint = {2106.11779}, timestamp = {Mon, 25 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2106-11779.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2107-05405, author = {Ray Jiang and Shangtong Zhang and Veronica Chelu and Adam White and Hado van Hasselt}, title = {Learning Expected Emphatic Traces for Deep {RL}}, journal = {CoRR}, volume = {abs/2107.05405}, year = {2021}, url = {https://arxiv.org/abs/2107.05405}, eprinttype = {arXiv}, eprint = {2107.05405}, timestamp = {Mon, 25 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2107-05405.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2109-04504, author = {Sebastian Flennerhag and Yannick Schroecker and Tom Zahavy and Hado van Hasselt and David Silver and Satinder Singh}, title = {Bootstrapped Meta-Learning}, journal = {CoRR}, volume = {abs/2109.04504}, year = {2021}, url = {https://arxiv.org/abs/2109.04504}, eprinttype = {arXiv}, eprint = {2109.04504}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2109-04504.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2109-10781, author = {Louis Kirsch and Sebastian Flennerhag and Hado van Hasselt and Abram L. Friesen and Junhyuk Oh and Yutian Chen}, title = {Introducing Symmetries to Black Box Meta Reinforcement Learning}, journal = {CoRR}, volume = {abs/2109.10781}, year = {2021}, url = {https://arxiv.org/abs/2109.10781}, eprinttype = {arXiv}, eprint = {2109.10781}, timestamp = {Sat, 16 Dec 2023 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-2109-10781.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2110-04041, author = {Marta Garnelo and Wojciech Marian Czarnecki and Siqi Liu and Dhruva Tirumala and Junhyuk Oh and Gauthier Gidel and Hado van Hasselt and David Balduzzi}, title = {Pick Your Battles: Interaction Graphs as Population-Level Objectives for Strategic Diversity}, journal = {CoRR}, volume = {abs/2110.04041}, year = {2021}, url = {https://arxiv.org/abs/2110.04041}, eprinttype = {arXiv}, eprint = {2110.04041}, timestamp = {Thu, 08 Feb 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-2110-04041.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2110-12840, author = {Gregory Farquhar and Kate Baumli and Zita Marinho and Angelos Filos and Matteo Hessel and Hado van Hasselt and David Silver}, title = {Self-Consistent Models and Values}, journal = {CoRR}, volume = {abs/2110.12840}, year = {2021}, url = {https://arxiv.org/abs/2110.12840}, eprinttype = {arXiv}, eprint = {2110.12840}, timestamp = {Thu, 28 Oct 2021 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2110-12840.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/aistats/RowlandHHBSMD20, author = {Mark Rowland and Anna Harutyunyan and Hado van Hasselt and Diana Borsa and Tom Schaul and R{\'{e}}mi Munos and Will Dabney}, editor = {Silvia Chiappa and Roberto Calandra}, title = {Conditional Importance Sampling for Off-Policy Learning}, booktitle = {The 23rd International Conference on Artificial Intelligence and Statistics, {AISTATS} 2020, 26-28 August 2020, Online [Palermo, Sicily, Italy]}, series = {Proceedings of Machine Learning Research}, volume = {108}, pages = {45--55}, publisher = {{PMLR}}, year = {2020}, url = {http://proceedings.mlr.press/v108/rowland20b.html}, timestamp = {Mon, 29 Jun 2020 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/aistats/RowlandHHBSMD20.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/iclr/OsbandDHASSMLSS20, author = {Ian Osband and Yotam Doron and Matteo Hessel and John Aslanides and Eren Sezener and Andre Saraiva and Katrina McKinney and Tor Lattimore and Csaba Szepesv{\'{a}}ri and Satinder Singh and Benjamin Van Roy and Richard S. Sutton and David Silver and Hado van Hasselt}, title = {Behaviour Suite for Reinforcement Learning}, booktitle = {8th International Conference on Learning Representations, {ICLR} 2020, Addis Ababa, Ethiopia, April 26-30, 2020}, publisher = {OpenReview.net}, year = {2020}, url = {https://openreview.net/forum?id=rygf-kSYwH}, timestamp = {Mon, 15 May 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/iclr/OsbandDHASSMLSS20.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icml/ZhengOHXKHSS20, author = {Zeyu Zheng and Junhyuk Oh and Matteo Hessel and Zhongwen Xu and Manuel Kroiss and Hado van Hasselt and David Silver and Satinder Singh}, title = {What Can Learned Intrinsic Rewards Capture?}, booktitle = {Proceedings of the 37th International Conference on Machine Learning, {ICML} 2020, 13-18 July 2020, Virtual Event}, series = {Proceedings of Machine Learning Research}, volume = {119}, pages = {11436--11446}, publisher = {{PMLR}}, year = {2020}, url = {http://proceedings.mlr.press/v119/zheng20b.html}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/icml/ZhengOHXKHSS20.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/CheluPH20, author = {Veronica Chelu and Doina Precup and Hado van Hasselt}, editor = {Hugo Larochelle and Marc'Aurelio Ranzato and Raia Hadsell and Maria{-}Florina Balcan and Hsuan{-}Tien Lin}, title = {Forethought and Hindsight in Credit Assignment}, booktitle = {Advances in Neural Information Processing Systems 33: Annual Conference on Neural Information Processing Systems 2020, NeurIPS 2020, December 6-12, 2020, virtual}, year = {2020}, url = {https://proceedings.neurips.cc/paper/2020/hash/18064d61b6f93dab8681a460779b8429-Abstract.html}, timestamp = {Tue, 19 Jan 2021 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/nips/CheluPH20.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/OhHCXHSS20, author = {Junhyuk Oh and Matteo Hessel and Wojciech M. Czarnecki and Zhongwen Xu and Hado van Hasselt and Satinder Singh and David Silver}, editor = {Hugo Larochelle and Marc'Aurelio Ranzato and Raia Hadsell and Maria{-}Florina Balcan and Hsuan{-}Tien Lin}, title = {Discovering Reinforcement Learning Algorithms}, booktitle = {Advances in Neural Information Processing Systems 33: Annual Conference on Neural Information Processing Systems 2020, NeurIPS 2020, December 6-12, 2020, virtual}, year = {2020}, url = {https://proceedings.neurips.cc/paper/2020/hash/0b96d81f0494fde5428c7aea243c9157-Abstract.html}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/nips/OhHCXHSS20.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/XuHHOSS20, author = {Zhongwen Xu and Hado Philip van Hasselt and Matteo Hessel and Junhyuk Oh and Satinder Singh and David Silver}, editor = {Hugo Larochelle and Marc'Aurelio Ranzato and Raia Hadsell and Maria{-}Florina Balcan and Hsuan{-}Tien Lin}, title = {Meta-Gradient Reinforcement Learning with an Objective Discovered Online}, booktitle = {Advances in Neural Information Processing Systems 33: Annual Conference on Neural Information Processing Systems 2020, NeurIPS 2020, December 6-12, 2020, virtual}, year = {2020}, url = {https://proceedings.neurips.cc/paper/2020/hash/ae3d525daf92cee0003a7f2d92c34ea3-Abstract.html}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/nips/XuHHOSS20.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/ZahavyXVHOHSS20, author = {Tom Zahavy and Zhongwen Xu and Vivek Veeriah and Matteo Hessel and Junhyuk Oh and Hado van Hasselt and David Silver and Satinder Singh}, editor = {Hugo Larochelle and Marc'Aurelio Ranzato and Raia Hadsell and Maria{-}Florina Balcan and Hsuan{-}Tien Lin}, title = {A Self-Tuning Actor-Critic Algorithm}, booktitle = {Advances in Neural Information Processing Systems 33: Annual Conference on Neural Information Processing Systems 2020, NeurIPS 2020, December 6-12, 2020, virtual}, year = {2020}, url = {https://proceedings.neurips.cc/paper/2020/hash/f02208a057804ee16ac72ff4d3cec53b-Abstract.html}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/nips/ZahavyXVHOHSS20.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2002-12928, author = {Tom Zahavy and Zhongwen Xu and Vivek Veeriah and Matteo Hessel and Junhyuk Oh and Hado van Hasselt and David Silver and Satinder Singh}, title = {Self-Tuning Deep Reinforcement Learning}, journal = {CoRR}, volume = {abs/2002.12928}, year = {2020}, url = {https://arxiv.org/abs/2002.12928}, eprinttype = {arXiv}, eprint = {2002.12928}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2002-12928.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2007-01839, author = {Hado van Hasselt and Sephora Madjiheurem and Matteo Hessel and David Silver and Andr{\'{e}} Barreto and Diana Borsa}, title = {Expected Eligibility Traces}, journal = {CoRR}, volume = {abs/2007.01839}, year = {2020}, url = {https://arxiv.org/abs/2007.01839}, eprinttype = {arXiv}, eprint = {2007.01839}, timestamp = {Mon, 10 Jan 2022 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-2007-01839.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2007-08433, author = {Zhongwen Xu and Hado van Hasselt and Matteo Hessel and Junhyuk Oh and Satinder Singh and David Silver}, title = {Meta-Gradient Reinforcement Learning with an Objective Discovered Online}, journal = {CoRR}, volume = {abs/2007.08433}, year = {2020}, url = {https://arxiv.org/abs/2007.08433}, eprinttype = {arXiv}, eprint = {2007.08433}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2007-08433.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2007-08794, author = {Junhyuk Oh and Matteo Hessel and Wojciech M. Czarnecki and Zhongwen Xu and Hado van Hasselt and Satinder Singh and David Silver}, title = {Discovering Reinforcement Learning Algorithms}, journal = {CoRR}, volume = {abs/2007.08794}, year = {2020}, url = {https://arxiv.org/abs/2007.08794}, eprinttype = {arXiv}, eprint = {2007.08794}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2007-08794.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2010-13685, author = {Veronica Chelu and Doina Precup and Hado van Hasselt}, title = {Forethought and Hindsight in Credit Assignment}, journal = {CoRR}, volume = {abs/2010.13685}, year = {2020}, url = {https://arxiv.org/abs/2010.13685}, eprinttype = {arXiv}, eprint = {2010.13685}, timestamp = {Mon, 02 Nov 2020 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-2010-13685.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/aaai/HesselSE0SH19, author = {Matteo Hessel and Hubert Soyer and Lasse Espeholt and Wojciech Czarnecki and Simon Schmitt and Hado van Hasselt}, title = {Multi-Task Deep Reinforcement Learning with PopArt}, booktitle = {The Thirty-Third {AAAI} Conference on Artificial Intelligence, {AAAI} 2019, The Thirty-First Innovative Applications of Artificial Intelligence Conference, {IAAI} 2019, The Ninth {AAAI} Symposium on Educational Advances in Artificial Intelligence, {EAAI} 2019, Honolulu, Hawaii, USA, January 27 - February 1, 2019}, pages = {3796--3803}, publisher = {{AAAI} Press}, year = {2019}, url = {https://doi.org/10.1609/aaai.v33i01.33013796}, doi = {10.1609/AAAI.V33I01.33013796}, timestamp = {Mon, 04 Sep 2023 12:29:24 +0200}, biburl = {https://dblp.org/rec/conf/aaai/HesselSE0SH19.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/iclr/BorsaBQMHMSS19, author = {Diana Borsa and Andr{\'{e}} Barreto and John Quan and Daniel J. Mankowitz and Hado van Hasselt and R{\'{e}}mi Munos and David Silver and Tom Schaul}, title = {Universal Successor Features Approximators}, booktitle = {7th International Conference on Learning Representations, {ICLR} 2019, New Orleans, LA, USA, May 6-9, 2019}, publisher = {OpenReview.net}, year = {2019}, url = {https://openreview.net/forum?id=S1VWjiRcKX}, timestamp = {Mon, 10 Jan 2022 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/iclr/BorsaBQMHMSS19.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/VeeriahHXRLOHSS19, author = {Vivek Veeriah and Matteo Hessel and Zhongwen Xu and Janarthanan Rajendran and Richard L. Lewis and Junhyuk Oh and Hado van Hasselt and David Silver and Satinder Singh}, editor = {Hanna M. Wallach and Hugo Larochelle and Alina Beygelzimer and Florence d'Alch{\'{e}}{-}Buc and Emily B. Fox and Roman Garnett}, title = {Discovery of Useful Questions as Auxiliary Tasks}, booktitle = {Advances in Neural Information Processing Systems 32: Annual Conference on Neural Information Processing Systems 2019, NeurIPS 2019, December 8-14, 2019, Vancouver, BC, Canada}, pages = {9306--9317}, year = {2019}, url = {https://proceedings.neurips.cc/paper/2019/hash/10ff0b5e85e5b85cc3095d431d8c08b4-Abstract.html}, timestamp = {Mon, 16 May 2022 15:41:51 +0200}, biburl = {https://dblp.org/rec/conf/nips/VeeriahHXRLOHSS19.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/HarutyunyanDMAP19, author = {Anna Harutyunyan and Will Dabney and Thomas Mesnard and Mohammad Gheshlaghi Azar and Bilal Piot and Nicolas Heess and Hado van Hasselt and Gregory Wayne and Satinder Singh and Doina Precup and R{\'{e}}mi Munos}, editor = {Hanna M. Wallach and Hugo Larochelle and Alina Beygelzimer and Florence d'Alch{\'{e}}{-}Buc and Emily B. Fox and Roman Garnett}, title = {Hindsight Credit Assignment}, booktitle = {Advances in Neural Information Processing Systems 32: Annual Conference on Neural Information Processing Systems 2019, NeurIPS 2019, December 8-14, 2019, Vancouver, BC, Canada}, pages = {12467--12476}, year = {2019}, url = {https://proceedings.neurips.cc/paper/2019/hash/195f15384c2a79cedf293e4a847ce85c-Abstract.html}, timestamp = {Wed, 20 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/nips/HarutyunyanDMAP19.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/HasseltHA19, author = {Hado van Hasselt and Matteo Hessel and John Aslanides}, editor = {Hanna M. Wallach and Hugo Larochelle and Alina Beygelzimer and Florence d'Alch{\'{e}}{-}Buc and Emily B. Fox and Roman Garnett}, title = {When to use parametric models in reinforcement learning?}, booktitle = {Advances in Neural Information Processing Systems 32: Annual Conference on Neural Information Processing Systems 2019, NeurIPS 2019, December 8-14, 2019, Vancouver, BC, Canada}, pages = {14322--14333}, year = {2019}, url = {https://proceedings.neurips.cc/paper/2019/hash/1b742ae215adf18b75449c6e272fd92d-Abstract.html}, timestamp = {Thu, 21 Jan 2021 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/nips/HasseltHA19.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1905-03030, author = {Pedro A. Ortega and Jane X. Wang and Mark Rowland and Tim Genewein and Zeb Kurth{-}Nelson and Razvan Pascanu and Nicolas Heess and Joel Veness and Alexander Pritzel and Pablo Sprechmann and Siddhant M. Jayakumar and Tom McGrath and Kevin J. Miller and Mohammad Gheshlaghi Azar and Ian Osband and Neil C. Rabinowitz and Andr{\'{a}}s Gy{\"{o}}rgy and Silvia Chiappa and Simon Osindero and Yee Whye Teh and Hado van Hasselt and Nando de Freitas and Matthew M. Botvinick and Shane Legg}, title = {Meta-learning of Sequential Strategies}, journal = {CoRR}, volume = {abs/1905.03030}, year = {2019}, url = {http://arxiv.org/abs/1905.03030}, eprinttype = {arXiv}, eprint = {1905.03030}, timestamp = {Wed, 20 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1905-03030.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1906-05243, author = {Hado van Hasselt and Matteo Hessel and John Aslanides}, title = {When to use parametric models in reinforcement learning?}, journal = {CoRR}, volume = {abs/1906.05243}, year = {2019}, url = {http://arxiv.org/abs/1906.05243}, eprinttype = {arXiv}, eprint = {1906.05243}, timestamp = {Fri, 14 Jun 2019 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1906-05243.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1907-02908, author = {Matteo Hessel and Hado van Hasselt and Joseph Modayil and David Silver}, title = {On Inductive Biases in Deep Reinforcement Learning}, journal = {CoRR}, volume = {abs/1907.02908}, year = {2019}, url = {http://arxiv.org/abs/1907.02908}, eprinttype = {arXiv}, eprint = {1907.02908}, timestamp = {Mon, 08 Jul 2019 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1907-02908.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1907-03687, author = {Hado van Hasselt and John Quan and Matteo Hessel and Zhongwen Xu and Diana Borsa and Andr{\'{e}} Barreto}, title = {General non-linear Bellman equations}, journal = {CoRR}, volume = {abs/1907.03687}, year = {2019}, url = {http://arxiv.org/abs/1907.03687}, eprinttype = {arXiv}, eprint = {1907.03687}, timestamp = {Mon, 10 Jan 2022 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-1907-03687.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1908-03568, author = {Ian Osband and Yotam Doron and Matteo Hessel and John Aslanides and Eren Sezener and Andre Saraiva and Katrina McKinney and Tor Lattimore and Csaba Szepesv{\'{a}}ri and Satinder Singh and Benjamin Van Roy and Richard S. Sutton and David Silver and Hado van Hasselt}, title = {Behaviour Suite for Reinforcement Learning}, journal = {CoRR}, volume = {abs/1908.03568}, year = {2019}, url = {http://arxiv.org/abs/1908.03568}, eprinttype = {arXiv}, eprint = {1908.03568}, timestamp = {Mon, 15 May 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1908-03568.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1909-04607, author = {Vivek Veeriah and Matteo Hessel and Zhongwen Xu and Richard L. Lewis and Janarthanan Rajendran and Junhyuk Oh and Hado van Hasselt and David Silver and Satinder Singh}, title = {Discovery of Useful Questions as Auxiliary Tasks}, journal = {CoRR}, volume = {abs/1909.04607}, year = {2019}, url = {http://arxiv.org/abs/1909.04607}, eprinttype = {arXiv}, eprint = {1909.04607}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1909-04607.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1910-07479, author = {Mark Rowland and Anna Harutyunyan and Hado van Hasselt and Diana Borsa and Tom Schaul and R{\'{e}}mi Munos and Will Dabney}, title = {Conditional Importance Sampling for Off-Policy Learning}, journal = {CoRR}, volume = {abs/1910.07479}, year = {2019}, url = {http://arxiv.org/abs/1910.07479}, eprinttype = {arXiv}, eprint = {1910.07479}, timestamp = {Tue, 22 Oct 2019 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1910-07479.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1912-02503, author = {Anna Harutyunyan and Will Dabney and Thomas Mesnard and Mohammad Gheshlaghi Azar and Bilal Piot and Nicolas Heess and Hado van Hasselt and Greg Wayne and Satinder Singh and Doina Precup and R{\'{e}}mi Munos}, title = {Hindsight Credit Assignment}, journal = {CoRR}, volume = {abs/1912.02503}, year = {2019}, url = {http://arxiv.org/abs/1912.02503}, eprinttype = {arXiv}, eprint = {1912.02503}, timestamp = {Wed, 20 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1912-02503.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1912-05500, author = {Zeyu Zheng and Junhyuk Oh and Matteo Hessel and Zhongwen Xu and Manuel Kroiss and Hado van Hasselt and David Silver and Satinder Singh}, title = {What Can Learned Intrinsic Rewards Capture?}, journal = {CoRR}, volume = {abs/1912.05500}, year = {2019}, url = {http://arxiv.org/abs/1912.05500}, eprinttype = {arXiv}, eprint = {1912.05500}, timestamp = {Tue, 19 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1912-05500.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/aaai/HesselMHSODHPAS18, author = {Matteo Hessel and Joseph Modayil and Hado van Hasselt and Tom Schaul and Georg Ostrovski and Will Dabney and Dan Horgan and Bilal Piot and Mohammad Gheshlaghi Azar and David Silver}, editor = {Sheila A. McIlraith and Kilian Q. Weinberger}, title = {Rainbow: Combining Improvements in Deep Reinforcement Learning}, booktitle = {Proceedings of the Thirty-Second {AAAI} Conference on Artificial Intelligence, (AAAI-18), the 30th innovative Applications of Artificial Intelligence (IAAI-18), and the 8th {AAAI} Symposium on Educational Advances in Artificial Intelligence (EAAI-18), New Orleans, Louisiana, USA, February 2-7, 2018}, pages = {3215--3222}, publisher = {{AAAI} Press}, year = {2018}, url = {https://doi.org/10.1609/aaai.v32i1.11796}, doi = {10.1609/AAAI.V32I1.11796}, timestamp = {Mon, 04 Sep 2023 12:29:24 +0200}, biburl = {https://dblp.org/rec/conf/aaai/HesselMHSODHPAS18.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/iclr/HorganQBBHHS18, author = {Dan Horgan and John Quan and David Budden and Gabriel Barth{-}Maron and Matteo Hessel and Hado van Hasselt and David Silver}, title = {Distributed Prioritized Experience Replay}, booktitle = {6th International Conference on Learning Representations, {ICLR} 2018, Vancouver, BC, Canada, April 30 - May 3, 2018, Conference Track Proceedings}, publisher = {OpenReview.net}, year = {2018}, url = {https://openreview.net/forum?id=H1Dy---0Z}, timestamp = {Thu, 25 Jul 2019 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/iclr/HorganQBBHHS18.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icml/BargiacchiVRNH18, author = {Eugenio Bargiacchi and Timothy Verstraeten and Diederik M. Roijers and Ann Now{\'{e}} and Hado van Hasselt}, editor = {Jennifer G. Dy and Andreas Krause}, title = {Learning to Coordinate with Coordination Graphs in Repeated Single-Stage Multi-Agent Decision Problems}, booktitle = {Proceedings of the 35th International Conference on Machine Learning, {ICML} 2018, Stockholmsm{\"{a}}ssan, Stockholm, Sweden, July 10-15, 2018}, series = {Proceedings of Machine Learning Research}, volume = {80}, pages = {491--499}, publisher = {{PMLR}}, year = {2018}, url = {http://proceedings.mlr.press/v80/bargiacchi18a.html}, timestamp = {Wed, 03 Apr 2019 18:17:30 +0200}, biburl = {https://dblp.org/rec/conf/icml/BargiacchiVRNH18.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/XuHS18, author = {Zhongwen Xu and Hado van Hasselt and David Silver}, editor = {Samy Bengio and Hanna M. Wallach and Hugo Larochelle and Kristen Grauman and Nicol{\`{o}} Cesa{-}Bianchi and Roman Garnett}, title = {Meta-Gradient Reinforcement Learning}, booktitle = {Advances in Neural Information Processing Systems 31: Annual Conference on Neural Information Processing Systems 2018, NeurIPS 2018, December 3-8, 2018, Montr{\'{e}}al, Canada}, pages = {2402--2413}, year = {2018}, url = {https://proceedings.neurips.cc/paper/2018/hash/2715518c875999308842e3455eda2fe3-Abstract.html}, timestamp = {Mon, 16 May 2022 15:41:51 +0200}, biburl = {https://dblp.org/rec/conf/nips/XuHS18.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1802-08294, author = {Daniel J. Mankowitz and Augustin Z{\'{\i}}dek and Andr{\'{e}} Barreto and Dan Horgan and Matteo Hessel and John Quan and Junhyuk Oh and Hado van Hasselt and David Silver and Tom Schaul}, title = {Unicorn: Continual Learning with a Universal, Off-policy Agent}, journal = {CoRR}, volume = {abs/1802.08294}, year = {2018}, url = {http://arxiv.org/abs/1802.08294}, eprinttype = {arXiv}, eprint = {1802.08294}, timestamp = {Mon, 10 Jan 2022 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-1802-08294.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1803-00933, author = {Dan Horgan and John Quan and David Budden and Gabriel Barth{-}Maron and Matteo Hessel and Hado van Hasselt and David Silver}, title = {Distributed Prioritized Experience Replay}, journal = {CoRR}, volume = {abs/1803.00933}, year = {2018}, url = {http://arxiv.org/abs/1803.00933}, eprinttype = {arXiv}, eprint = {1803.00933}, timestamp = {Mon, 13 Aug 2018 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1803-00933.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1805-09801, author = {Zhongwen Xu and Hado van Hasselt and David Silver}, title = {Meta-Gradient Reinforcement Learning}, journal = {CoRR}, volume = {abs/1805.09801}, year = {2018}, url = {http://arxiv.org/abs/1805.09801}, eprinttype = {arXiv}, eprint = {1805.09801}, timestamp = {Mon, 13 Aug 2018 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1805-09801.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1805-11593, author = {Tobias Pohlen and Bilal Piot and Todd Hester and Mohammad Gheshlaghi Azar and Dan Horgan and David Budden and Gabriel Barth{-}Maron and Hado van Hasselt and John Quan and Mel Vecer{\'{\i}}k and Matteo Hessel and R{\'{e}}mi Munos and Olivier Pietquin}, title = {Observe and Look Further: Achieving Consistent Performance on Atari}, journal = {CoRR}, volume = {abs/1805.11593}, year = {2018}, url = {http://arxiv.org/abs/1805.11593}, eprinttype = {arXiv}, eprint = {1805.11593}, timestamp = {Mon, 13 Aug 2018 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1805-11593.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1809-04474, author = {Matteo Hessel and Hubert Soyer and Lasse Espeholt and Wojciech Czarnecki and Simon Schmitt and Hado van Hasselt}, title = {Multi-task Deep Reinforcement Learning with PopArt}, journal = {CoRR}, volume = {abs/1809.04474}, year = {2018}, url = {http://arxiv.org/abs/1809.04474}, eprinttype = {arXiv}, eprint = {1809.04474}, timestamp = {Fri, 05 Oct 2018 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1809-04474.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1811-07004, author = {Tom Schaul and Hado van Hasselt and Joseph Modayil and Martha White and Adam White and Pierre{-}Luc Bacon and Jean Harb and Shibl Mourad and Marc G. Bellemare and Doina Precup}, title = {The Barbados 2018 List of Open Issues in Continual Learning}, journal = {CoRR}, volume = {abs/1811.07004}, year = {2018}, url = {http://arxiv.org/abs/1811.07004}, eprinttype = {arXiv}, eprint = {1811.07004}, timestamp = {Thu, 21 Apr 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1811-07004.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1812-02648, author = {Hado van Hasselt and Yotam Doron and Florian Strub and Matteo Hessel and Nicolas Sonnerat and Joseph Modayil}, title = {Deep Reinforcement Learning and the Deadly Triad}, journal = {CoRR}, volume = {abs/1812.02648}, year = {2018}, url = {http://arxiv.org/abs/1812.02648}, eprinttype = {arXiv}, eprint = {1812.02648}, timestamp = {Tue, 01 Jan 2019 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-1812-02648.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1812-07626, author = {Diana Borsa and Andr{\'{e}} Barreto and John Quan and Daniel J. Mankowitz and R{\'{e}}mi Munos and Hado van Hasselt and David Silver and Tom Schaul}, title = {Universal Successor Features Approximators}, journal = {CoRR}, volume = {abs/1812.07626}, year = {2018}, url = {http://arxiv.org/abs/1812.07626}, eprinttype = {arXiv}, eprint = {1812.07626}, timestamp = {Mon, 10 Jan 2022 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-1812-07626.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icml/SilverHHSGHDRRB17, author = {David Silver and Hado van Hasselt and Matteo Hessel and Tom Schaul and Arthur Guez and Tim Harley and Gabriel Dulac{-}Arnold and David P. Reichert and Neil C. Rabinowitz and Andr{\'{e}} Barreto and Thomas Degris}, editor = {Doina Precup and Yee Whye Teh}, title = {The Predictron: End-To-End Learning and Planning}, booktitle = {Proceedings of the 34th International Conference on Machine Learning, {ICML} 2017, Sydney, NSW, Australia, 6-11 August 2017}, series = {Proceedings of Machine Learning Research}, volume = {70}, pages = {3191--3199}, publisher = {{PMLR}}, year = {2017}, url = {http://proceedings.mlr.press/v70/silver17a.html}, timestamp = {Mon, 10 Jan 2022 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/icml/SilverHHSGHDRRB17.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/XuMHBSS17, author = {Zhongwen Xu and Joseph Modayil and Hado van Hasselt and Andr{\'{e}} Barreto and David Silver and Tom Schaul}, editor = {Isabelle Guyon and Ulrike von Luxburg and Samy Bengio and Hanna M. Wallach and Rob Fergus and S. V. N. Vishwanathan and Roman Garnett}, title = {Natural Value Approximators: Learning when to Trust Past Estimates}, booktitle = {Advances in Neural Information Processing Systems 30: Annual Conference on Neural Information Processing Systems 2017, December 4-9, 2017, Long Beach, CA, {USA}}, pages = {2120--2128}, year = {2017}, url = {https://proceedings.neurips.cc/paper/2017/hash/fb60d411a5c5b72b2e7d3527cfc84fd0-Abstract.html}, timestamp = {Mon, 10 Jan 2022 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/nips/XuMHBSS17.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/BarretoDMHSSH17, author = {Andr{\'{e}} Barreto and Will Dabney and R{\'{e}}mi Munos and Jonathan J. Hunt and Tom Schaul and David Silver and Hado van Hasselt}, editor = {Isabelle Guyon and Ulrike von Luxburg and Samy Bengio and Hanna M. Wallach and Rob Fergus and S. V. N. Vishwanathan and Roman Garnett}, title = {Successor Features for Transfer in Reinforcement Learning}, booktitle = {Advances in Neural Information Processing Systems 30: Annual Conference on Neural Information Processing Systems 2017, December 4-9, 2017, Long Beach, CA, {USA}}, pages = {4055--4065}, year = {2017}, url = {https://proceedings.neurips.cc/paper/2017/hash/350db081a661525235354dd3e19b8c05-Abstract.html}, timestamp = {Mon, 10 Jan 2022 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/nips/BarretoDMHSSH17.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1708-04782, author = {Oriol Vinyals and Timo Ewalds and Sergey Bartunov and Petko Georgiev and Alexander Sasha Vezhnevets and Michelle Yeo and Alireza Makhzani and Heinrich K{\"{u}}ttler and John P. Agapiou and Julian Schrittwieser and John Quan and Stephen Gaffney and Stig Petersen and Karen Simonyan and Tom Schaul and Hado van Hasselt and David Silver and Timothy P. Lillicrap and Kevin Calderone and Paul Keet and Anthony Brunasso and David Lawrence and Anders Ekermo and Jacob Repp and Rodney Tsing}, title = {StarCraft {II:} {A} New Challenge for Reinforcement Learning}, journal = {CoRR}, volume = {abs/1708.04782}, year = {2017}, url = {http://arxiv.org/abs/1708.04782}, eprinttype = {arXiv}, eprint = {1708.04782}, timestamp = {Sun, 02 Oct 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1708-04782.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1710-02298, author = {Matteo Hessel and Joseph Modayil and Hado van Hasselt and Tom Schaul and Georg Ostrovski and Will Dabney and Daniel Horgan and Bilal Piot and Mohammad Gheshlaghi Azar and David Silver}, title = {Rainbow: Combining Improvements in Deep Reinforcement Learning}, journal = {CoRR}, volume = {abs/1710.02298}, year = {2017}, url = {http://arxiv.org/abs/1710.02298}, eprinttype = {arXiv}, eprint = {1710.02298}, timestamp = {Mon, 13 Aug 2018 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1710-02298.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/aaai/HasseltGS16, author = {Hado van Hasselt and Arthur Guez and David Silver}, editor = {Dale Schuurmans and Michael P. Wellman}, title = {Deep Reinforcement Learning with Double Q-Learning}, booktitle = {Proceedings of the Thirtieth {AAAI} Conference on Artificial Intelligence, February 12-17, 2016, Phoenix, Arizona, {USA}}, pages = {2094--2100}, publisher = {{AAAI} Press}, year = {2016}, url = {https://doi.org/10.1609/aaai.v30i1.10295}, doi = {10.1609/AAAI.V30I1.10295}, timestamp = {Mon, 04 Sep 2023 15:08:28 +0200}, biburl = {https://dblp.org/rec/conf/aaai/HasseltGS16.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icml/WangSHHLF16, author = {Ziyu Wang and Tom Schaul and Matteo Hessel and Hado van Hasselt and Marc Lanctot and Nando de Freitas}, editor = {Maria{-}Florina Balcan and Kilian Q. Weinberger}, title = {Dueling Network Architectures for Deep Reinforcement Learning}, booktitle = {Proceedings of the 33nd International Conference on Machine Learning, {ICML} 2016, New York City, NY, USA, June 19-24, 2016}, series = {{JMLR} Workshop and Conference Proceedings}, volume = {48}, pages = {1995--2003}, publisher = {JMLR.org}, year = {2016}, url = {http://proceedings.mlr.press/v48/wangf16.html}, timestamp = {Wed, 29 May 2019 08:41:46 +0200}, biburl = {https://dblp.org/rec/conf/icml/WangSHHLF16.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/HasseltGHMS16, author = {Hado van Hasselt and Arthur Guez and Matteo Hessel and Volodymyr Mnih and David Silver}, editor = {Daniel D. Lee and Masashi Sugiyama and Ulrike von Luxburg and Isabelle Guyon and Roman Garnett}, title = {Learning values across many orders of magnitude}, booktitle = {Advances in Neural Information Processing Systems 29: Annual Conference on Neural Information Processing Systems 2016, December 5-10, 2016, Barcelona, Spain}, pages = {4287--4295}, year = {2016}, url = {https://proceedings.neurips.cc/paper/2016/hash/5227b6aaf294f5f027273aebf16015f2-Abstract.html}, timestamp = {Mon, 16 May 2022 15:41:51 +0200}, biburl = {https://dblp.org/rec/conf/nips/HasseltGHMS16.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/HasseltGHS16, author = {Hado van Hasselt and Arthur Guez and Matteo Hessel and David Silver}, title = {Learning functions across many orders of magnitudes}, journal = {CoRR}, volume = {abs/1602.07714}, year = {2016}, url = {http://arxiv.org/abs/1602.07714}, eprinttype = {arXiv}, eprint = {1602.07714}, timestamp = {Mon, 13 Aug 2018 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/HasseltGHS16.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/SilverHHSGHDRRB16, author = {David Silver and Hado van Hasselt and Matteo Hessel and Tom Schaul and Arthur Guez and Tim Harley and Gabriel Dulac{-}Arnold and David P. Reichert and Neil C. Rabinowitz and Andr{\'{e}} Barreto and Thomas Degris}, title = {The Predictron: End-To-End Learning and Planning}, journal = {CoRR}, volume = {abs/1612.08810}, year = {2016}, url = {http://arxiv.org/abs/1612.08810}, eprinttype = {arXiv}, eprint = {1612.08810}, timestamp = {Mon, 10 Jan 2022 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/SilverHHSGHDRRB16.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/HasseltS15, author = {Hado van Hasselt and Richard S. Sutton}, title = {Learning to Predict Independent of Span}, journal = {CoRR}, volume = {abs/1508.04582}, year = {2015}, url = {http://arxiv.org/abs/1508.04582}, eprinttype = {arXiv}, eprint = {1508.04582}, timestamp = {Mon, 13 Aug 2018 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/HasseltS15.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/HasseltGS15, author = {Hado van Hasselt and Arthur Guez and David Silver}, title = {Deep Reinforcement Learning with Double Q-learning}, journal = {CoRR}, volume = {abs/1509.06461}, year = {2015}, url = {http://arxiv.org/abs/1509.06461}, eprinttype = {arXiv}, eprint = {1509.06461}, timestamp = {Mon, 13 Aug 2018 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/HasseltGS15.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icml/SuttonMPH14, author = {Richard S. Sutton and Ashique Rupam Mahmood and Doina Precup and Hado van Hasselt}, title = {A new Q(lambda) with interim forward view and Monte Carlo equivalence}, booktitle = {Proceedings of the 31th International Conference on Machine Learning, {ICML} 2014, Beijing, China, 21-26 June 2014}, series = {{JMLR} Workshop and Conference Proceedings}, volume = {32}, pages = {568--576}, publisher = {JMLR.org}, year = {2014}, url = {http://proceedings.mlr.press/v32/sutton14.html}, timestamp = {Wed, 29 May 2019 08:41:45 +0200}, biburl = {https://dblp.org/rec/conf/icml/SuttonMPH14.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/MahmoodHS14, author = {Ashique Rupam Mahmood and Hado van Hasselt and Richard S. Sutton}, editor = {Zoubin Ghahramani and Max Welling and Corinna Cortes and Neil D. Lawrence and Kilian Q. Weinberger}, title = {Weighted importance sampling for off-policy learning with linear function approximation}, booktitle = {Advances in Neural Information Processing Systems 27: Annual Conference on Neural Information Processing Systems 2014, December 8-13 2014, Montreal, Quebec, Canada}, pages = {3014--3022}, year = {2014}, url = {https://proceedings.neurips.cc/paper/2014/hash/be53ee61104935234b174e62a07e53cf-Abstract.html}, timestamp = {Mon, 16 May 2022 15:41:51 +0200}, biburl = {https://dblp.org/rec/conf/nips/MahmoodHS14.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/uai/HasseltMS14, author = {Hado van Hasselt and Ashique Rupam Mahmood and Richard S. Sutton}, editor = {Nevin L. Zhang and Jin Tian}, title = {Off-policy {TD(} l) with a true online equivalence}, booktitle = {Proceedings of the Thirtieth Conference on Uncertainty in Artificial Intelligence, {UAI} 2014, Quebec City, Quebec, Canada, July 23-27, 2014}, pages = {330--339}, publisher = {{AUAI} Press}, year = {2014}, url = {https://dslpitt.org/uai/displayArticleDetails.jsp?mmnu=1\&\#38;smnu=2\&\#38;article\_id=2468\&\#38;proceeding\_id=30}, timestamp = {Wed, 03 Feb 2021 11:09:27 +0100}, biburl = {https://dblp.org/rec/conf/uai/HasseltMS14.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/cipls/HasseltP13, author = {Hado van Hasselt and Han La Poutr{\'{e}}}, title = {Stacking under uncertainty: We know how to predict, but how should we act?}, booktitle = {{IEEE} Symposium on Computational Intelligence In Production And Logistics Systems, {CIPLS} 2013, Singapore, April 16-19, 2013}, pages = {25--32}, publisher = {{IEEE}}, year = {2013}, url = {https://doi.org/10.1109/CIPLS.2013.6595196}, doi = {10.1109/CIPLS.2013.6595196}, timestamp = {Wed, 16 Oct 2019 14:14:52 +0200}, biburl = {https://dblp.org/rec/conf/cipls/HasseltP13.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1302-7175, author = {Hado van Hasselt}, title = {Estimating the Maximum Expected Value: An Analysis of (Nested) Cross Validation and the Maximum Sample Average}, journal = {CoRR}, volume = {abs/1302.7175}, year = {2013}, url = {http://arxiv.org/abs/1302.7175}, eprinttype = {arXiv}, eprint = {1302.7175}, timestamp = {Mon, 13 Aug 2018 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1302-7175.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@incollection{DBLP:books/sp/12/Hasselt12, author = {Hado van Hasselt}, editor = {Marco A. Wiering and Martijn van Otterlo}, title = {Reinforcement Learning in Continuous State and Action Spaces}, booktitle = {Reinforcement Learning}, series = {Adaptation, Learning, and Optimization}, volume = {12}, pages = {207--251}, publisher = {Springer}, year = {2012}, url = {https://doi.org/10.1007/978-3-642-27645-3\_7}, doi = {10.1007/978-3-642-27645-3\_7}, timestamp = {Tue, 29 Dec 2020 18:14:51 +0100}, biburl = {https://dblp.org/rec/books/sp/12/Hasselt12.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@phdthesis{DBLP:phd/basesearch/vanHasselt11, author = {Hado Philip van Hasselt}, title = {Insights in reinforcement rearning : formal analysis and empirical evaluation of temporal-difference learning algorithms}, school = {Utrecht University, Netherlands}, year = {2011}, url = {https://dspace.library.uu.nl/handle/1874/192729}, timestamp = {Wed, 24 Jul 2019 01:00:00 +0200}, biburl = {https://dblp.org/rec/phd/basesearch/vanHasselt11.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/jmlr/SeijenWHW11, author = {Harm van Seijen and Shimon Whiteson and Hado van Hasselt and Marco A. Wiering}, title = {Exploiting Best-Match Equations for Efficient Reinforcement Learning}, journal = {J. Mach. Learn. Res.}, volume = {12}, pages = {2045--2094}, year = {2011}, url = {https://dl.acm.org/doi/10.5555/1953048.2021066}, doi = {10.5555/1953048.2021066}, timestamp = {Thu, 02 Jun 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/jmlr/SeijenWHW11.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/adprl/WieringHPS11, author = {Marco A. Wiering and Hado van Hasselt and Auke{-}Dirk Pietersma and Lambert Schomaker}, title = {Reinforcement learning algorithms for solving classification problems}, booktitle = {2011 {IEEE} Symposium on Adaptive Dynamic Programming And Reinforcement Learning, {ADPRL} 2011, Paris, France, April 12-14, 2011}, pages = {91--96}, publisher = {{IEEE}}, year = {2011}, url = {https://doi.org/10.1109/ADPRL.2011.5967372}, doi = {10.1109/ADPRL.2011.5967372}, timestamp = {Tue, 29 Dec 2020 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/adprl/WieringHPS11.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/Hasselt10, author = {Hado van Hasselt}, editor = {John D. Lafferty and Christopher K. I. Williams and John Shawe{-}Taylor and Richard S. Zemel and Aron Culotta}, title = {Double Q-learning}, booktitle = {Advances in Neural Information Processing Systems 23: 24th Annual Conference on Neural Information Processing Systems 2010. Proceedings of a meeting held 6-9 December 2010, Vancouver, British Columbia, Canada}, pages = {2613--2621}, publisher = {Curran Associates, Inc.}, year = {2010}, url = {https://proceedings.neurips.cc/paper/2010/hash/091d584fced301b442654dd8c23b3fc9-Abstract.html}, timestamp = {Mon, 16 May 2022 15:41:51 +0200}, biburl = {https://dblp.org/rec/conf/nips/Hasselt10.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/adprl/WieringH09, author = {Marco A. Wiering and Hado van Hasselt}, title = {The {QV} family compared to other reinforcement learning algorithms}, booktitle = {{IEEE} Symposium on Adaptive Dynamic Programming and Reinforcement Learning, {ADPRL} 2009, Nashville, TN, USA, March 31 - April 1, 2009}, pages = {101--108}, publisher = {{IEEE}}, year = {2009}, url = {https://doi.org/10.1109/ADPRL.2009.4927532}, doi = {10.1109/ADPRL.2009.4927532}, timestamp = {Wed, 16 Oct 2019 14:14:48 +0200}, biburl = {https://dblp.org/rec/conf/adprl/WieringH09.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/adprl/SeijenHWW09, author = {Harm van Seijen and Hado van Hasselt and Shimon Whiteson and Marco A. Wiering}, title = {A theoretical and empirical analysis of Expected Sarsa}, booktitle = {{IEEE} Symposium on Adaptive Dynamic Programming and Reinforcement Learning, {ADPRL} 2009, Nashville, TN, USA, March 31 - April 1, 2009}, pages = {177--184}, publisher = {{IEEE}}, year = {2009}, url = {https://doi.org/10.1109/ADPRL.2009.4927542}, doi = {10.1109/ADPRL.2009.4927542}, timestamp = {Fri, 26 May 2017 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/adprl/SeijenHWW09.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/ags/WestraHDD09, author = {Joost Westra and Hado van Hasselt and Frank Dignum and Virginia Dignum}, editor = {Frank Dignum and Jeffrey M. Bradshaw and Barry G. Silverman and Willem A. van Doesburg}, title = {Adaptive Serious Games Using Agent Organizations}, booktitle = {Agents for Games and Simulations, Trends in Techniques, Concepts and Design {[AGS} 2009, The First International Workshop on Agents for Games and Simulations, May 11, 2009, Budapest, Hungary]}, series = {Lecture Notes in Computer Science}, volume = {5920}, pages = {206--220}, publisher = {Springer}, year = {2009}, url = {https://doi.org/10.1007/978-3-642-11198-3\_14}, doi = {10.1007/978-3-642-11198-3\_14}, timestamp = {Thu, 28 Nov 2019 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/ags/WestraHDD09.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/ijcnn/HasseltW09, author = {Hado van Hasselt and Marco A. Wiering}, title = {Using continuous action spaces to solve discrete problems}, booktitle = {International Joint Conference on Neural Networks, {IJCNN} 2009, Atlanta, Georgia, USA, 14-19 June 2009}, pages = {1149--1156}, publisher = {{IEEE} Computer Society}, year = {2009}, url = {https://doi.org/10.1109/IJCNN.2009.5178745}, doi = {10.1109/IJCNN.2009.5178745}, timestamp = {Fri, 24 Mar 2023 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/ijcnn/HasseltW09.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/tsmc/WieringH08, author = {Marco A. Wiering and Hado van Hasselt}, title = {Ensemble Algorithms in Reinforcement Learning}, journal = {{IEEE} Trans. Syst. Man Cybern. Part {B}}, volume = {38}, number = {4}, pages = {930--936}, year = {2008}, url = {https://doi.org/10.1109/TSMCB.2008.920231}, doi = {10.1109/TSMCB.2008.920231}, timestamp = {Tue, 29 Dec 2020 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/tsmc/WieringH08.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/cig/WestraHDD08, author = {Joost Westra and Hado van Hasselt and Virginia Dignum and Frank Dignum}, editor = {Philip Hingston and Luigi Barone}, title = {On-line adapting games using agent organizations}, booktitle = {Proceedings of the 2008 {IEEE} Symposium on Computational Intelligence and Games, {CIG} 2009, Perth, Australia, 15-18 December, 2008}, pages = {243--250}, publisher = {{IEEE}}, year = {2008}, url = {https://doi.org/10.1109/CIG.2008.5035646}, doi = {10.1109/CIG.2008.5035646}, timestamp = {Thu, 28 Nov 2019 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/cig/WestraHDD08.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
manage site settings
To protect your privacy, all features that rely on external API calls from your browser are turned off by default. You need to opt-in for them to become active. All settings here will be stored as cookies with your web browser. For more information see our F.A.Q.