BibTeX records: Hado van Hasselt

download as .bib file

@article{DBLP:journals/corr/abs-2402-18762,
  author       = {Clare Lyle and
                  Zeyu Zheng and
                  Khimya Khetarpal and
                  Hado van Hasselt and
                  Razvan Pascanu and
                  James Martens and
                  Will Dabney},
  title        = {Disentangling the Causes of Plasticity Loss in Neural Networks},
  journal      = {CoRR},
  volume       = {abs/2402.18762},
  year         = {2024},
  url          = {https://doi.org/10.48550/arXiv.2402.18762},
  doi          = {10.48550/ARXIV.2402.18762},
  eprinttype    = {arXiv},
  eprint       = {2402.18762},
  timestamp    = {Tue, 26 Mar 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2402-18762.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/aaai/SchmittSH23,
  author       = {Simon Schmitt and
                  John Shawe{-}Taylor and
                  Hado van Hasselt},
  editor       = {Brian Williams and
                  Yiling Chen and
                  Jennifer Neville},
  title        = {Exploration via Epistemic Value Estimation},
  booktitle    = {Thirty-Seventh {AAAI} Conference on Artificial Intelligence, {AAAI}
                  2023, Thirty-Fifth Conference on Innovative Applications of Artificial
                  Intelligence, {IAAI} 2023, Thirteenth Symposium on Educational Advances
                  in Artificial Intelligence, {EAAI} 2023, Washington, DC, USA, February
                  7-14, 2023},
  pages        = {9742--9751},
  publisher    = {{AAAI} Press},
  year         = {2023},
  url          = {https://doi.org/10.1609/aaai.v37i8.26164},
  doi          = {10.1609/AAAI.V37I8.26164},
  timestamp    = {Sun, 12 Nov 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/aaai/SchmittSH23.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/iclr/Kapturowski0JRH23,
  author       = {Steven Kapturowski and
                  Victor Campos and
                  Ray Jiang and
                  Nemanja Rakicevic and
                  Hado van Hasselt and
                  Charles Blundell and
                  Adri{\`{a}} Puigdom{\`{e}}nech Badia},
  title        = {Human-level Atari 200x faster},
  booktitle    = {The Eleventh International Conference on Learning Representations,
                  {ICLR} 2023, Kigali, Rwanda, May 1-5, 2023},
  publisher    = {OpenReview.net},
  year         = {2023},
  url          = {https://openreview.net/pdf?id=JtC6yOHRoJJ},
  timestamp    = {Fri, 30 Jun 2023 14:38:38 +0200},
  biburl       = {https://dblp.org/rec/conf/iclr/Kapturowski0JRH23.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/Abel0RPHS23,
  author       = {David Abel and
                  Andr{\'{e}} Barreto and
                  Benjamin Van Roy and
                  Doina Precup and
                  Hado Philip van Hasselt and
                  Satinder Singh},
  editor       = {Alice Oh and
                  Tristan Naumann and
                  Amir Globerson and
                  Kate Saenko and
                  Moritz Hardt and
                  Sergey Levine},
  title        = {A Definition of Continual Reinforcement Learning},
  booktitle    = {Advances in Neural Information Processing Systems 36: Annual Conference
                  on Neural Information Processing Systems 2023, NeurIPS 2023, New Orleans,
                  LA, USA, December 10 - 16, 2023},
  year         = {2023},
  url          = {http://papers.nips.cc/paper\_files/paper/2023/hash/9d8cf1247786d6dfeefeeb53b8b5f6d7-Abstract-Conference.html},
  timestamp    = {Fri, 01 Mar 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/nips/Abel0RPHS23.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/FlennerhagZOH0S23,
  author       = {Sebastian Flennerhag and
                  Tom Zahavy and
                  Brendan O'Donoghue and
                  Hado Philip van Hasselt and
                  Andr{\'{a}}s Gy{\"{o}}rgy and
                  Satinder Singh},
  editor       = {Alice Oh and
                  Tristan Naumann and
                  Amir Globerson and
                  Kate Saenko and
                  Moritz Hardt and
                  Sergey Levine},
  title        = {Optimistic Meta-Gradients},
  booktitle    = {Advances in Neural Information Processing Systems 36: Annual Conference
                  on Neural Information Processing Systems 2023, NeurIPS 2023, New Orleans,
                  LA, USA, December 10 - 16, 2023},
  year         = {2023},
  url          = {http://papers.nips.cc/paper\_files/paper/2023/hash/b46bc1449205888e1883f692aff1a252-Abstract-Conference.html},
  timestamp    = {Fri, 01 Mar 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/nips/FlennerhagZOH0S23.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2301-03236,
  author       = {Sebastian Flennerhag and
                  Tom Zahavy and
                  Brendan O'Donoghue and
                  Hado van Hasselt and
                  Andr{\'{a}}s Gy{\"{o}}rgy and
                  Satinder Singh},
  title        = {Optimistic Meta-Gradients},
  journal      = {CoRR},
  volume       = {abs/2301.03236},
  year         = {2023},
  url          = {https://doi.org/10.48550/arXiv.2301.03236},
  doi          = {10.48550/ARXIV.2301.03236},
  eprinttype    = {arXiv},
  eprint       = {2301.03236},
  timestamp    = {Tue, 10 Jan 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2301-03236.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2302-04250,
  author       = {Chentian Jiang and
                  Nan Rosemary Ke and
                  Hado van Hasselt},
  title        = {Learning How to Infer Partial MDPs for In-Context Adaptation and Exploration},
  journal      = {CoRR},
  volume       = {abs/2302.04250},
  year         = {2023},
  url          = {https://doi.org/10.48550/arXiv.2302.04250},
  doi          = {10.48550/ARXIV.2302.04250},
  eprinttype    = {arXiv},
  eprint       = {2302.04250},
  timestamp    = {Fri, 10 Feb 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2302-04250.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2303-04012,
  author       = {Simon Schmitt and
                  John Shawe{-}Taylor and
                  Hado van Hasselt},
  title        = {Exploration via Epistemic Value Estimation},
  journal      = {CoRR},
  volume       = {abs/2303.04012},
  year         = {2023},
  url          = {https://doi.org/10.48550/arXiv.2303.04012},
  doi          = {10.48550/ARXIV.2303.04012},
  eprinttype    = {arXiv},
  eprint       = {2303.04012},
  timestamp    = {Wed, 15 Mar 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2303-04012.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2307-11044,
  author       = {David Abel and
                  Andr{\'{e}} Barreto and
                  Hado van Hasselt and
                  Benjamin Van Roy and
                  Doina Precup and
                  Satinder Singh},
  title        = {On the Convergence of Bounded Agents},
  journal      = {CoRR},
  volume       = {abs/2307.11044},
  year         = {2023},
  url          = {https://doi.org/10.48550/arXiv.2307.11044},
  doi          = {10.48550/ARXIV.2307.11044},
  eprinttype    = {arXiv},
  eprint       = {2307.11044},
  timestamp    = {Wed, 26 Jul 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2307-11044.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2307-11046,
  author       = {David Abel and
                  Andr{\'{e}} Barreto and
                  Benjamin Van Roy and
                  Doina Precup and
                  Hado van Hasselt and
                  Satinder Singh},
  title        = {A Definition of Continual Reinforcement Learning},
  journal      = {CoRR},
  volume       = {abs/2307.11046},
  year         = {2023},
  url          = {https://doi.org/10.48550/arXiv.2307.11046},
  doi          = {10.48550/ARXIV.2307.11046},
  eprinttype    = {arXiv},
  eprint       = {2307.11046},
  timestamp    = {Wed, 26 Jul 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2307-11046.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2312-01072,
  author       = {Eduardo Pignatelli and
                  Johan Ferret and
                  Matthieu Geist and
                  Thomas Mesnard and
                  Hado van Hasselt and
                  Laura Toni},
  title        = {A Survey of Temporal Credit Assignment in Deep Reinforcement Learning},
  journal      = {CoRR},
  volume       = {abs/2312.01072},
  year         = {2023},
  url          = {https://doi.org/10.48550/arXiv.2312.01072},
  doi          = {10.48550/ARXIV.2312.01072},
  eprinttype    = {arXiv},
  eprint       = {2312.01072},
  timestamp    = {Fri, 08 Dec 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2312-01072.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/aaai/JiangZC0H22,
  author       = {Ray Jiang and
                  Shangtong Zhang and
                  Veronica Chelu and
                  Adam White and
                  Hado van Hasselt},
  title        = {Learning Expected Emphatic Traces for Deep {RL}},
  booktitle    = {Thirty-Sixth {AAAI} Conference on Artificial Intelligence, {AAAI}
                  2022, Thirty-Fourth Conference on Innovative Applications of Artificial
                  Intelligence, {IAAI} 2022, The Twelveth Symposium on Educational Advances
                  in Artificial Intelligence, {EAAI} 2022 Virtual Event, February 22
                  - March 1, 2022},
  pages        = {7015--7023},
  publisher    = {{AAAI} Press},
  year         = {2022},
  url          = {https://doi.org/10.1609/aaai.v36i6.20660},
  doi          = {10.1609/AAAI.V36I6.20660},
  timestamp    = {Mon, 04 Sep 2023 12:29:24 +0200},
  biburl       = {https://dblp.org/rec/conf/aaai/JiangZC0H22.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/aaai/KirschFHFOC22,
  author       = {Louis Kirsch and
                  Sebastian Flennerhag and
                  Hado van Hasselt and
                  Abram L. Friesen and
                  Junhyuk Oh and
                  Yutian Chen},
  title        = {Introducing Symmetries to Black Box Meta Reinforcement Learning},
  booktitle    = {Thirty-Sixth {AAAI} Conference on Artificial Intelligence, {AAAI}
                  2022, Thirty-Fourth Conference on Innovative Applications of Artificial
                  Intelligence, {IAAI} 2022, The Twelveth Symposium on Educational Advances
                  in Artificial Intelligence, {EAAI} 2022 Virtual Event, February 22
                  - March 1, 2022},
  pages        = {7202--7210},
  publisher    = {{AAAI} Press},
  year         = {2022},
  url          = {https://doi.org/10.1609/aaai.v36i7.20681},
  doi          = {10.1609/AAAI.V36I7.20681},
  timestamp    = {Sat, 16 Dec 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/aaai/KirschFHFOC22.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/aaai/SchmittSH22,
  author       = {Simon Schmitt and
                  John Shawe{-}Taylor and
                  Hado van Hasselt},
  title        = {Chaining Value Functions for Off-Policy Learning},
  booktitle    = {Thirty-Sixth {AAAI} Conference on Artificial Intelligence, {AAAI}
                  2022, Thirty-Fourth Conference on Innovative Applications of Artificial
                  Intelligence, {IAAI} 2022, The Twelveth Symposium on Educational Advances
                  in Artificial Intelligence, {EAAI} 2022 Virtual Event, February 22
                  - March 1, 2022},
  pages        = {8187--8195},
  publisher    = {{AAAI} Press},
  year         = {2022},
  url          = {https://doi.org/10.1609/aaai.v36i8.20792},
  doi          = {10.1609/AAAI.V36I8.20792},
  timestamp    = {Sat, 21 Oct 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/aaai/SchmittSH22.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/iclr/FlennerhagSZHS022,
  author       = {Sebastian Flennerhag and
                  Yannick Schroecker and
                  Tom Zahavy and
                  Hado van Hasselt and
                  David Silver and
                  Satinder Singh},
  title        = {Bootstrapped Meta-Learning},
  booktitle    = {The Tenth International Conference on Learning Representations, {ICLR}
                  2022, Virtual Event, April 25-29, 2022},
  publisher    = {OpenReview.net},
  year         = {2022},
  url          = {https://openreview.net/forum?id=b-ny3x071E5},
  timestamp    = {Sat, 20 Aug 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/iclr/FlennerhagSZHS022.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/iclr/SilverGDHH22,
  author       = {David Silver and
                  Anirudh Goyal and
                  Ivo Danihelka and
                  Matteo Hessel and
                  Hado van Hasselt},
  title        = {Learning by Directional Gradient Descent},
  booktitle    = {The Tenth International Conference on Learning Representations, {ICLR}
                  2022, Virtual Event, April 25-29, 2022},
  publisher    = {OpenReview.net},
  year         = {2022},
  url          = {https://openreview.net/forum?id=5i7lJLuhTm},
  timestamp    = {Sat, 20 Aug 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/iclr/SilverGDHH22.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2201-06468,
  author       = {Simon Schmitt and
                  John Shawe{-}Taylor and
                  Hado van Hasselt},
  title        = {Chaining Value Functions for Off-Policy Learning},
  journal      = {CoRR},
  volume       = {abs/2201.06468},
  year         = {2022},
  url          = {https://arxiv.org/abs/2201.06468},
  eprinttype    = {arXiv},
  eprint       = {2201.06468},
  timestamp    = {Fri, 21 Jan 2022 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2201-06468.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2202-09699,
  author       = {Veronica Chelu and
                  Diana Borsa and
                  Doina Precup and
                  Hado van Hasselt},
  title        = {Selective Credit Assignment},
  journal      = {CoRR},
  volume       = {abs/2202.09699},
  year         = {2022},
  url          = {https://arxiv.org/abs/2202.09699},
  eprinttype    = {arXiv},
  eprint       = {2202.09699},
  timestamp    = {Tue, 01 Mar 2022 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2202-09699.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2209-07550,
  author       = {Steven Kapturowski and
                  V{\'{\i}}ctor Campos and
                  Ray Jiang and
                  Nemanja Rakicevic and
                  Hado van Hasselt and
                  Charles Blundell and
                  Adri{\`{a}} Puigdom{\`{e}}nech Badia},
  title        = {Human-level Atari 200x faster},
  journal      = {CoRR},
  volume       = {abs/2209.07550},
  year         = {2022},
  url          = {https://doi.org/10.48550/arXiv.2209.07550},
  doi          = {10.48550/ARXIV.2209.07550},
  eprinttype    = {arXiv},
  eprint       = {2209.07550},
  timestamp    = {Wed, 02 Nov 2022 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2209-07550.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/aaai/HasseltMHSBB21,
  author       = {Hado van Hasselt and
                  Sephora Madjiheurem and
                  Matteo Hessel and
                  David Silver and
                  Andr{\'{e}} Barreto and
                  Diana Borsa},
  title        = {Expected Eligibility Traces},
  booktitle    = {Thirty-Fifth {AAAI} Conference on Artificial Intelligence, {AAAI}
                  2021, Thirty-Third Conference on Innovative Applications of Artificial
                  Intelligence, {IAAI} 2021, The Eleventh Symposium on Educational Advances
                  in Artificial Intelligence, {EAAI} 2021, Virtual Event, February 2-9,
                  2021},
  pages        = {9997--10005},
  publisher    = {{AAAI} Press},
  year         = {2021},
  url          = {https://doi.org/10.1609/aaai.v35i11.17200},
  doi          = {10.1609/AAAI.V35I11.17200},
  timestamp    = {Mon, 04 Sep 2023 12:29:24 +0200},
  biburl       = {https://dblp.org/rec/conf/aaai/HasseltMHSBB21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/atal/GarneloCLTOGHB21,
  author       = {Marta Garnelo and
                  Wojciech Marian Czarnecki and
                  Siqi Liu and
                  Dhruva Tirumala and
                  Junhyuk Oh and
                  Gauthier Gidel and
                  Hado van Hasselt and
                  David Balduzzi},
  editor       = {Frank Dignum and
                  Alessio Lomuscio and
                  Ulle Endriss and
                  Ann Now{\'{e}}},
  title        = {Pick Your Battles: Interaction Graphs as Population-Level Objectives
                  for Strategic Diversity},
  booktitle    = {{AAMAS} '21: 20th International Conference on Autonomous Agents and
                  Multiagent Systems, Virtual Event, United Kingdom, May 3-7, 2021},
  pages        = {1501--1503},
  publisher    = {{ACM}},
  year         = {2021},
  url          = {https://www.ifaamas.org/Proceedings/aamas2021/pdfs/p1501.pdf},
  doi          = {10.5555/3463952.3464139},
  timestamp    = {Thu, 11 May 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/atal/GarneloCLTOGHB21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/HesselDVGSSWSH21,
  author       = {Matteo Hessel and
                  Ivo Danihelka and
                  Fabio Viola and
                  Arthur Guez and
                  Simon Schmitt and
                  Laurent Sifre and
                  Theophane Weber and
                  David Silver and
                  Hado van Hasselt},
  editor       = {Marina Meila and
                  Tong Zhang},
  title        = {Muesli: Combining Improvements in Policy Optimization},
  booktitle    = {Proceedings of the 38th International Conference on Machine Learning,
                  {ICML} 2021, 18-24 July 2021, Virtual Event},
  series       = {Proceedings of Machine Learning Research},
  volume       = {139},
  pages        = {4214--4226},
  publisher    = {{PMLR}},
  year         = {2021},
  url          = {http://proceedings.mlr.press/v139/hessel21a.html},
  timestamp    = {Wed, 25 Aug 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icml/HesselDVGSSWSH21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/JiangZXWHBH21,
  author       = {Ray Jiang and
                  Tom Zahavy and
                  Zhongwen Xu and
                  Adam White and
                  Matteo Hessel and
                  Charles Blundell and
                  Hado van Hasselt},
  editor       = {Marina Meila and
                  Tong Zhang},
  title        = {Emphatic Algorithms for Deep Reinforcement Learning},
  booktitle    = {Proceedings of the 38th International Conference on Machine Learning,
                  {ICML} 2021, 18-24 July 2021, Virtual Event},
  series       = {Proceedings of Machine Learning Research},
  volume       = {139},
  pages        = {5023--5033},
  publisher    = {{PMLR}},
  year         = {2021},
  url          = {http://proceedings.mlr.press/v139/jiang21j.html},
  timestamp    = {Mon, 25 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icml/JiangZXWHBH21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/FarquharBMFHHS21,
  author       = {Gregory Farquhar and
                  Kate Baumli and
                  Zita Marinho and
                  Angelos Filos and
                  Matteo Hessel and
                  Hado Philip van Hasselt and
                  David Silver},
  editor       = {Marc'Aurelio Ranzato and
                  Alina Beygelzimer and
                  Yann N. Dauphin and
                  Percy Liang and
                  Jennifer Wortman Vaughan},
  title        = {Self-Consistent Models and Values},
  booktitle    = {Advances in Neural Information Processing Systems 34: Annual Conference
                  on Neural Information Processing Systems 2021, NeurIPS 2021, December
                  6-14, 2021, virtual},
  pages        = {1111--1125},
  year         = {2021},
  url          = {https://proceedings.neurips.cc/paper/2021/hash/08f0efebb1c51aada9430a089a2050cc-Abstract.html},
  timestamp    = {Tue, 03 May 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/nips/FarquharBMFHHS21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/VeeriahZHXOKHSS21,
  author       = {Vivek Veeriah and
                  Tom Zahavy and
                  Matteo Hessel and
                  Zhongwen Xu and
                  Junhyuk Oh and
                  Iurii Kemaev and
                  Hado van Hasselt and
                  David Silver and
                  Satinder Singh},
  editor       = {Marc'Aurelio Ranzato and
                  Alina Beygelzimer and
                  Yann N. Dauphin and
                  Percy Liang and
                  Jennifer Wortman Vaughan},
  title        = {Discovery of Options via Meta-Learned Subgoals},
  booktitle    = {Advances in Neural Information Processing Systems 34: Annual Conference
                  on Neural Information Processing Systems 2021, NeurIPS 2021, December
                  6-14, 2021, virtual},
  pages        = {29861--29873},
  year         = {2021},
  url          = {https://proceedings.neurips.cc/paper/2021/hash/fa246d0262c3925617b0c72bb20eeb1d-Abstract.html},
  timestamp    = {Tue, 03 May 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/nips/VeeriahZHXOKHSS21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2102-06741,
  author       = {Vivek Veeriah and
                  Tom Zahavy and
                  Matteo Hessel and
                  Zhongwen Xu and
                  Junhyuk Oh and
                  Iurii Kemaev and
                  Hado van Hasselt and
                  David Silver and
                  Satinder Singh},
  title        = {Discovery of Options via Meta-Learned Subgoals},
  journal      = {CoRR},
  volume       = {abs/2102.06741},
  year         = {2021},
  url          = {https://arxiv.org/abs/2102.06741},
  eprinttype    = {arXiv},
  eprint       = {2102.06741},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2102-06741.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2102-12425,
  author       = {David Raposo and
                  Samuel Ritter and
                  Adam Santoro and
                  Greg Wayne and
                  Theophane Weber and
                  Matt M. Botvinick and
                  Hado van Hasselt and
                  H. Francis Song},
  title        = {Synthetic Returns for Long-Term Credit Assignment},
  journal      = {CoRR},
  volume       = {abs/2102.12425},
  year         = {2021},
  url          = {https://arxiv.org/abs/2102.12425},
  eprinttype    = {arXiv},
  eprint       = {2102.12425},
  timestamp    = {Tue, 02 Mar 2021 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2102-12425.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2104-06159,
  author       = {Matteo Hessel and
                  Ivo Danihelka and
                  Fabio Viola and
                  Arthur Guez and
                  Simon Schmitt and
                  Laurent Sifre and
                  Theophane Weber and
                  David Silver and
                  Hado van Hasselt},
  title        = {Muesli: Combining Improvements in Policy Optimization},
  journal      = {CoRR},
  volume       = {abs/2104.06159},
  year         = {2021},
  url          = {https://arxiv.org/abs/2104.06159},
  eprinttype    = {arXiv},
  eprint       = {2104.06159},
  timestamp    = {Mon, 19 Apr 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2104-06159.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2104-06272,
  author       = {Matteo Hessel and
                  Manuel Kroiss and
                  Aidan Clark and
                  Iurii Kemaev and
                  John Quan and
                  Thomas Keck and
                  Fabio Viola and
                  Hado van Hasselt},
  title        = {Podracer architectures for scalable Reinforcement Learning},
  journal      = {CoRR},
  volume       = {abs/2104.06272},
  year         = {2021},
  url          = {https://arxiv.org/abs/2104.06272},
  eprinttype    = {arXiv},
  eprint       = {2104.06272},
  timestamp    = {Mon, 19 Apr 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2104-06272.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2106-11779,
  author       = {Ray Jiang and
                  Tom Zahavy and
                  Zhongwen Xu and
                  Adam White and
                  Matteo Hessel and
                  Charles Blundell and
                  Hado van Hasselt},
  title        = {Emphatic Algorithms for Deep Reinforcement Learning},
  journal      = {CoRR},
  volume       = {abs/2106.11779},
  year         = {2021},
  url          = {https://arxiv.org/abs/2106.11779},
  eprinttype    = {arXiv},
  eprint       = {2106.11779},
  timestamp    = {Mon, 25 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2106-11779.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2107-05405,
  author       = {Ray Jiang and
                  Shangtong Zhang and
                  Veronica Chelu and
                  Adam White and
                  Hado van Hasselt},
  title        = {Learning Expected Emphatic Traces for Deep {RL}},
  journal      = {CoRR},
  volume       = {abs/2107.05405},
  year         = {2021},
  url          = {https://arxiv.org/abs/2107.05405},
  eprinttype    = {arXiv},
  eprint       = {2107.05405},
  timestamp    = {Mon, 25 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2107-05405.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2109-04504,
  author       = {Sebastian Flennerhag and
                  Yannick Schroecker and
                  Tom Zahavy and
                  Hado van Hasselt and
                  David Silver and
                  Satinder Singh},
  title        = {Bootstrapped Meta-Learning},
  journal      = {CoRR},
  volume       = {abs/2109.04504},
  year         = {2021},
  url          = {https://arxiv.org/abs/2109.04504},
  eprinttype    = {arXiv},
  eprint       = {2109.04504},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2109-04504.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2109-10781,
  author       = {Louis Kirsch and
                  Sebastian Flennerhag and
                  Hado van Hasselt and
                  Abram L. Friesen and
                  Junhyuk Oh and
                  Yutian Chen},
  title        = {Introducing Symmetries to Black Box Meta Reinforcement Learning},
  journal      = {CoRR},
  volume       = {abs/2109.10781},
  year         = {2021},
  url          = {https://arxiv.org/abs/2109.10781},
  eprinttype    = {arXiv},
  eprint       = {2109.10781},
  timestamp    = {Sat, 16 Dec 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2109-10781.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2110-04041,
  author       = {Marta Garnelo and
                  Wojciech Marian Czarnecki and
                  Siqi Liu and
                  Dhruva Tirumala and
                  Junhyuk Oh and
                  Gauthier Gidel and
                  Hado van Hasselt and
                  David Balduzzi},
  title        = {Pick Your Battles: Interaction Graphs as Population-Level Objectives
                  for Strategic Diversity},
  journal      = {CoRR},
  volume       = {abs/2110.04041},
  year         = {2021},
  url          = {https://arxiv.org/abs/2110.04041},
  eprinttype    = {arXiv},
  eprint       = {2110.04041},
  timestamp    = {Thu, 08 Feb 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2110-04041.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2110-12840,
  author       = {Gregory Farquhar and
                  Kate Baumli and
                  Zita Marinho and
                  Angelos Filos and
                  Matteo Hessel and
                  Hado van Hasselt and
                  David Silver},
  title        = {Self-Consistent Models and Values},
  journal      = {CoRR},
  volume       = {abs/2110.12840},
  year         = {2021},
  url          = {https://arxiv.org/abs/2110.12840},
  eprinttype    = {arXiv},
  eprint       = {2110.12840},
  timestamp    = {Thu, 28 Oct 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2110-12840.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/aistats/RowlandHHBSMD20,
  author       = {Mark Rowland and
                  Anna Harutyunyan and
                  Hado van Hasselt and
                  Diana Borsa and
                  Tom Schaul and
                  R{\'{e}}mi Munos and
                  Will Dabney},
  editor       = {Silvia Chiappa and
                  Roberto Calandra},
  title        = {Conditional Importance Sampling for Off-Policy Learning},
  booktitle    = {The 23rd International Conference on Artificial Intelligence and Statistics,
                  {AISTATS} 2020, 26-28 August 2020, Online [Palermo, Sicily, Italy]},
  series       = {Proceedings of Machine Learning Research},
  volume       = {108},
  pages        = {45--55},
  publisher    = {{PMLR}},
  year         = {2020},
  url          = {http://proceedings.mlr.press/v108/rowland20b.html},
  timestamp    = {Mon, 29 Jun 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/aistats/RowlandHHBSMD20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/iclr/OsbandDHASSMLSS20,
  author       = {Ian Osband and
                  Yotam Doron and
                  Matteo Hessel and
                  John Aslanides and
                  Eren Sezener and
                  Andre Saraiva and
                  Katrina McKinney and
                  Tor Lattimore and
                  Csaba Szepesv{\'{a}}ri and
                  Satinder Singh and
                  Benjamin Van Roy and
                  Richard S. Sutton and
                  David Silver and
                  Hado van Hasselt},
  title        = {Behaviour Suite for Reinforcement Learning},
  booktitle    = {8th International Conference on Learning Representations, {ICLR} 2020,
                  Addis Ababa, Ethiopia, April 26-30, 2020},
  publisher    = {OpenReview.net},
  year         = {2020},
  url          = {https://openreview.net/forum?id=rygf-kSYwH},
  timestamp    = {Mon, 15 May 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/iclr/OsbandDHASSMLSS20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/ZhengOHXKHSS20,
  author       = {Zeyu Zheng and
                  Junhyuk Oh and
                  Matteo Hessel and
                  Zhongwen Xu and
                  Manuel Kroiss and
                  Hado van Hasselt and
                  David Silver and
                  Satinder Singh},
  title        = {What Can Learned Intrinsic Rewards Capture?},
  booktitle    = {Proceedings of the 37th International Conference on Machine Learning,
                  {ICML} 2020, 13-18 July 2020, Virtual Event},
  series       = {Proceedings of Machine Learning Research},
  volume       = {119},
  pages        = {11436--11446},
  publisher    = {{PMLR}},
  year         = {2020},
  url          = {http://proceedings.mlr.press/v119/zheng20b.html},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icml/ZhengOHXKHSS20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/CheluPH20,
  author       = {Veronica Chelu and
                  Doina Precup and
                  Hado van Hasselt},
  editor       = {Hugo Larochelle and
                  Marc'Aurelio Ranzato and
                  Raia Hadsell and
                  Maria{-}Florina Balcan and
                  Hsuan{-}Tien Lin},
  title        = {Forethought and Hindsight in Credit Assignment},
  booktitle    = {Advances in Neural Information Processing Systems 33: Annual Conference
                  on Neural Information Processing Systems 2020, NeurIPS 2020, December
                  6-12, 2020, virtual},
  year         = {2020},
  url          = {https://proceedings.neurips.cc/paper/2020/hash/18064d61b6f93dab8681a460779b8429-Abstract.html},
  timestamp    = {Tue, 19 Jan 2021 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/nips/CheluPH20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/OhHCXHSS20,
  author       = {Junhyuk Oh and
                  Matteo Hessel and
                  Wojciech M. Czarnecki and
                  Zhongwen Xu and
                  Hado van Hasselt and
                  Satinder Singh and
                  David Silver},
  editor       = {Hugo Larochelle and
                  Marc'Aurelio Ranzato and
                  Raia Hadsell and
                  Maria{-}Florina Balcan and
                  Hsuan{-}Tien Lin},
  title        = {Discovering Reinforcement Learning Algorithms},
  booktitle    = {Advances in Neural Information Processing Systems 33: Annual Conference
                  on Neural Information Processing Systems 2020, NeurIPS 2020, December
                  6-12, 2020, virtual},
  year         = {2020},
  url          = {https://proceedings.neurips.cc/paper/2020/hash/0b96d81f0494fde5428c7aea243c9157-Abstract.html},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/nips/OhHCXHSS20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/XuHHOSS20,
  author       = {Zhongwen Xu and
                  Hado Philip van Hasselt and
                  Matteo Hessel and
                  Junhyuk Oh and
                  Satinder Singh and
                  David Silver},
  editor       = {Hugo Larochelle and
                  Marc'Aurelio Ranzato and
                  Raia Hadsell and
                  Maria{-}Florina Balcan and
                  Hsuan{-}Tien Lin},
  title        = {Meta-Gradient Reinforcement Learning with an Objective Discovered
                  Online},
  booktitle    = {Advances in Neural Information Processing Systems 33: Annual Conference
                  on Neural Information Processing Systems 2020, NeurIPS 2020, December
                  6-12, 2020, virtual},
  year         = {2020},
  url          = {https://proceedings.neurips.cc/paper/2020/hash/ae3d525daf92cee0003a7f2d92c34ea3-Abstract.html},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/nips/XuHHOSS20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/ZahavyXVHOHSS20,
  author       = {Tom Zahavy and
                  Zhongwen Xu and
                  Vivek Veeriah and
                  Matteo Hessel and
                  Junhyuk Oh and
                  Hado van Hasselt and
                  David Silver and
                  Satinder Singh},
  editor       = {Hugo Larochelle and
                  Marc'Aurelio Ranzato and
                  Raia Hadsell and
                  Maria{-}Florina Balcan and
                  Hsuan{-}Tien Lin},
  title        = {A Self-Tuning Actor-Critic Algorithm},
  booktitle    = {Advances in Neural Information Processing Systems 33: Annual Conference
                  on Neural Information Processing Systems 2020, NeurIPS 2020, December
                  6-12, 2020, virtual},
  year         = {2020},
  url          = {https://proceedings.neurips.cc/paper/2020/hash/f02208a057804ee16ac72ff4d3cec53b-Abstract.html},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/nips/ZahavyXVHOHSS20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2002-12928,
  author       = {Tom Zahavy and
                  Zhongwen Xu and
                  Vivek Veeriah and
                  Matteo Hessel and
                  Junhyuk Oh and
                  Hado van Hasselt and
                  David Silver and
                  Satinder Singh},
  title        = {Self-Tuning Deep Reinforcement Learning},
  journal      = {CoRR},
  volume       = {abs/2002.12928},
  year         = {2020},
  url          = {https://arxiv.org/abs/2002.12928},
  eprinttype    = {arXiv},
  eprint       = {2002.12928},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2002-12928.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2007-01839,
  author       = {Hado van Hasselt and
                  Sephora Madjiheurem and
                  Matteo Hessel and
                  David Silver and
                  Andr{\'{e}} Barreto and
                  Diana Borsa},
  title        = {Expected Eligibility Traces},
  journal      = {CoRR},
  volume       = {abs/2007.01839},
  year         = {2020},
  url          = {https://arxiv.org/abs/2007.01839},
  eprinttype    = {arXiv},
  eprint       = {2007.01839},
  timestamp    = {Mon, 10 Jan 2022 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2007-01839.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2007-08433,
  author       = {Zhongwen Xu and
                  Hado van Hasselt and
                  Matteo Hessel and
                  Junhyuk Oh and
                  Satinder Singh and
                  David Silver},
  title        = {Meta-Gradient Reinforcement Learning with an Objective Discovered
                  Online},
  journal      = {CoRR},
  volume       = {abs/2007.08433},
  year         = {2020},
  url          = {https://arxiv.org/abs/2007.08433},
  eprinttype    = {arXiv},
  eprint       = {2007.08433},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2007-08433.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2007-08794,
  author       = {Junhyuk Oh and
                  Matteo Hessel and
                  Wojciech M. Czarnecki and
                  Zhongwen Xu and
                  Hado van Hasselt and
                  Satinder Singh and
                  David Silver},
  title        = {Discovering Reinforcement Learning Algorithms},
  journal      = {CoRR},
  volume       = {abs/2007.08794},
  year         = {2020},
  url          = {https://arxiv.org/abs/2007.08794},
  eprinttype    = {arXiv},
  eprint       = {2007.08794},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2007-08794.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2010-13685,
  author       = {Veronica Chelu and
                  Doina Precup and
                  Hado van Hasselt},
  title        = {Forethought and Hindsight in Credit Assignment},
  journal      = {CoRR},
  volume       = {abs/2010.13685},
  year         = {2020},
  url          = {https://arxiv.org/abs/2010.13685},
  eprinttype    = {arXiv},
  eprint       = {2010.13685},
  timestamp    = {Mon, 02 Nov 2020 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2010-13685.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/aaai/HesselSE0SH19,
  author       = {Matteo Hessel and
                  Hubert Soyer and
                  Lasse Espeholt and
                  Wojciech Czarnecki and
                  Simon Schmitt and
                  Hado van Hasselt},
  title        = {Multi-Task Deep Reinforcement Learning with PopArt},
  booktitle    = {The Thirty-Third {AAAI} Conference on Artificial Intelligence, {AAAI}
                  2019, The Thirty-First Innovative Applications of Artificial Intelligence
                  Conference, {IAAI} 2019, The Ninth {AAAI} Symposium on Educational
                  Advances in Artificial Intelligence, {EAAI} 2019, Honolulu, Hawaii,
                  USA, January 27 - February 1, 2019},
  pages        = {3796--3803},
  publisher    = {{AAAI} Press},
  year         = {2019},
  url          = {https://doi.org/10.1609/aaai.v33i01.33013796},
  doi          = {10.1609/AAAI.V33I01.33013796},
  timestamp    = {Mon, 04 Sep 2023 12:29:24 +0200},
  biburl       = {https://dblp.org/rec/conf/aaai/HesselSE0SH19.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/iclr/BorsaBQMHMSS19,
  author       = {Diana Borsa and
                  Andr{\'{e}} Barreto and
                  John Quan and
                  Daniel J. Mankowitz and
                  Hado van Hasselt and
                  R{\'{e}}mi Munos and
                  David Silver and
                  Tom Schaul},
  title        = {Universal Successor Features Approximators},
  booktitle    = {7th International Conference on Learning Representations, {ICLR} 2019,
                  New Orleans, LA, USA, May 6-9, 2019},
  publisher    = {OpenReview.net},
  year         = {2019},
  url          = {https://openreview.net/forum?id=S1VWjiRcKX},
  timestamp    = {Mon, 10 Jan 2022 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/iclr/BorsaBQMHMSS19.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/VeeriahHXRLOHSS19,
  author       = {Vivek Veeriah and
                  Matteo Hessel and
                  Zhongwen Xu and
                  Janarthanan Rajendran and
                  Richard L. Lewis and
                  Junhyuk Oh and
                  Hado van Hasselt and
                  David Silver and
                  Satinder Singh},
  editor       = {Hanna M. Wallach and
                  Hugo Larochelle and
                  Alina Beygelzimer and
                  Florence d'Alch{\'{e}}{-}Buc and
                  Emily B. Fox and
                  Roman Garnett},
  title        = {Discovery of Useful Questions as Auxiliary Tasks},
  booktitle    = {Advances in Neural Information Processing Systems 32: Annual Conference
                  on Neural Information Processing Systems 2019, NeurIPS 2019, December
                  8-14, 2019, Vancouver, BC, Canada},
  pages        = {9306--9317},
  year         = {2019},
  url          = {https://proceedings.neurips.cc/paper/2019/hash/10ff0b5e85e5b85cc3095d431d8c08b4-Abstract.html},
  timestamp    = {Mon, 16 May 2022 15:41:51 +0200},
  biburl       = {https://dblp.org/rec/conf/nips/VeeriahHXRLOHSS19.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/HarutyunyanDMAP19,
  author       = {Anna Harutyunyan and
                  Will Dabney and
                  Thomas Mesnard and
                  Mohammad Gheshlaghi Azar and
                  Bilal Piot and
                  Nicolas Heess and
                  Hado van Hasselt and
                  Gregory Wayne and
                  Satinder Singh and
                  Doina Precup and
                  R{\'{e}}mi Munos},
  editor       = {Hanna M. Wallach and
                  Hugo Larochelle and
                  Alina Beygelzimer and
                  Florence d'Alch{\'{e}}{-}Buc and
                  Emily B. Fox and
                  Roman Garnett},
  title        = {Hindsight Credit Assignment},
  booktitle    = {Advances in Neural Information Processing Systems 32: Annual Conference
                  on Neural Information Processing Systems 2019, NeurIPS 2019, December
                  8-14, 2019, Vancouver, BC, Canada},
  pages        = {12467--12476},
  year         = {2019},
  url          = {https://proceedings.neurips.cc/paper/2019/hash/195f15384c2a79cedf293e4a847ce85c-Abstract.html},
  timestamp    = {Wed, 20 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/nips/HarutyunyanDMAP19.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/HasseltHA19,
  author       = {Hado van Hasselt and
                  Matteo Hessel and
                  John Aslanides},
  editor       = {Hanna M. Wallach and
                  Hugo Larochelle and
                  Alina Beygelzimer and
                  Florence d'Alch{\'{e}}{-}Buc and
                  Emily B. Fox and
                  Roman Garnett},
  title        = {When to use parametric models in reinforcement learning?},
  booktitle    = {Advances in Neural Information Processing Systems 32: Annual Conference
                  on Neural Information Processing Systems 2019, NeurIPS 2019, December
                  8-14, 2019, Vancouver, BC, Canada},
  pages        = {14322--14333},
  year         = {2019},
  url          = {https://proceedings.neurips.cc/paper/2019/hash/1b742ae215adf18b75449c6e272fd92d-Abstract.html},
  timestamp    = {Thu, 21 Jan 2021 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/nips/HasseltHA19.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1905-03030,
  author       = {Pedro A. Ortega and
                  Jane X. Wang and
                  Mark Rowland and
                  Tim Genewein and
                  Zeb Kurth{-}Nelson and
                  Razvan Pascanu and
                  Nicolas Heess and
                  Joel Veness and
                  Alexander Pritzel and
                  Pablo Sprechmann and
                  Siddhant M. Jayakumar and
                  Tom McGrath and
                  Kevin J. Miller and
                  Mohammad Gheshlaghi Azar and
                  Ian Osband and
                  Neil C. Rabinowitz and
                  Andr{\'{a}}s Gy{\"{o}}rgy and
                  Silvia Chiappa and
                  Simon Osindero and
                  Yee Whye Teh and
                  Hado van Hasselt and
                  Nando de Freitas and
                  Matthew M. Botvinick and
                  Shane Legg},
  title        = {Meta-learning of Sequential Strategies},
  journal      = {CoRR},
  volume       = {abs/1905.03030},
  year         = {2019},
  url          = {http://arxiv.org/abs/1905.03030},
  eprinttype    = {arXiv},
  eprint       = {1905.03030},
  timestamp    = {Wed, 20 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1905-03030.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1906-05243,
  author       = {Hado van Hasselt and
                  Matteo Hessel and
                  John Aslanides},
  title        = {When to use parametric models in reinforcement learning?},
  journal      = {CoRR},
  volume       = {abs/1906.05243},
  year         = {2019},
  url          = {http://arxiv.org/abs/1906.05243},
  eprinttype    = {arXiv},
  eprint       = {1906.05243},
  timestamp    = {Fri, 14 Jun 2019 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1906-05243.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1907-02908,
  author       = {Matteo Hessel and
                  Hado van Hasselt and
                  Joseph Modayil and
                  David Silver},
  title        = {On Inductive Biases in Deep Reinforcement Learning},
  journal      = {CoRR},
  volume       = {abs/1907.02908},
  year         = {2019},
  url          = {http://arxiv.org/abs/1907.02908},
  eprinttype    = {arXiv},
  eprint       = {1907.02908},
  timestamp    = {Mon, 08 Jul 2019 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1907-02908.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1907-03687,
  author       = {Hado van Hasselt and
                  John Quan and
                  Matteo Hessel and
                  Zhongwen Xu and
                  Diana Borsa and
                  Andr{\'{e}} Barreto},
  title        = {General non-linear Bellman equations},
  journal      = {CoRR},
  volume       = {abs/1907.03687},
  year         = {2019},
  url          = {http://arxiv.org/abs/1907.03687},
  eprinttype    = {arXiv},
  eprint       = {1907.03687},
  timestamp    = {Mon, 10 Jan 2022 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1907-03687.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1908-03568,
  author       = {Ian Osband and
                  Yotam Doron and
                  Matteo Hessel and
                  John Aslanides and
                  Eren Sezener and
                  Andre Saraiva and
                  Katrina McKinney and
                  Tor Lattimore and
                  Csaba Szepesv{\'{a}}ri and
                  Satinder Singh and
                  Benjamin Van Roy and
                  Richard S. Sutton and
                  David Silver and
                  Hado van Hasselt},
  title        = {Behaviour Suite for Reinforcement Learning},
  journal      = {CoRR},
  volume       = {abs/1908.03568},
  year         = {2019},
  url          = {http://arxiv.org/abs/1908.03568},
  eprinttype    = {arXiv},
  eprint       = {1908.03568},
  timestamp    = {Mon, 15 May 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1908-03568.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1909-04607,
  author       = {Vivek Veeriah and
                  Matteo Hessel and
                  Zhongwen Xu and
                  Richard L. Lewis and
                  Janarthanan Rajendran and
                  Junhyuk Oh and
                  Hado van Hasselt and
                  David Silver and
                  Satinder Singh},
  title        = {Discovery of Useful Questions as Auxiliary Tasks},
  journal      = {CoRR},
  volume       = {abs/1909.04607},
  year         = {2019},
  url          = {http://arxiv.org/abs/1909.04607},
  eprinttype    = {arXiv},
  eprint       = {1909.04607},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1909-04607.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1910-07479,
  author       = {Mark Rowland and
                  Anna Harutyunyan and
                  Hado van Hasselt and
                  Diana Borsa and
                  Tom Schaul and
                  R{\'{e}}mi Munos and
                  Will Dabney},
  title        = {Conditional Importance Sampling for Off-Policy Learning},
  journal      = {CoRR},
  volume       = {abs/1910.07479},
  year         = {2019},
  url          = {http://arxiv.org/abs/1910.07479},
  eprinttype    = {arXiv},
  eprint       = {1910.07479},
  timestamp    = {Tue, 22 Oct 2019 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1910-07479.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1912-02503,
  author       = {Anna Harutyunyan and
                  Will Dabney and
                  Thomas Mesnard and
                  Mohammad Gheshlaghi Azar and
                  Bilal Piot and
                  Nicolas Heess and
                  Hado van Hasselt and
                  Greg Wayne and
                  Satinder Singh and
                  Doina Precup and
                  R{\'{e}}mi Munos},
  title        = {Hindsight Credit Assignment},
  journal      = {CoRR},
  volume       = {abs/1912.02503},
  year         = {2019},
  url          = {http://arxiv.org/abs/1912.02503},
  eprinttype    = {arXiv},
  eprint       = {1912.02503},
  timestamp    = {Wed, 20 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1912-02503.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1912-05500,
  author       = {Zeyu Zheng and
                  Junhyuk Oh and
                  Matteo Hessel and
                  Zhongwen Xu and
                  Manuel Kroiss and
                  Hado van Hasselt and
                  David Silver and
                  Satinder Singh},
  title        = {What Can Learned Intrinsic Rewards Capture?},
  journal      = {CoRR},
  volume       = {abs/1912.05500},
  year         = {2019},
  url          = {http://arxiv.org/abs/1912.05500},
  eprinttype    = {arXiv},
  eprint       = {1912.05500},
  timestamp    = {Tue, 19 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1912-05500.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/aaai/HesselMHSODHPAS18,
  author       = {Matteo Hessel and
                  Joseph Modayil and
                  Hado van Hasselt and
                  Tom Schaul and
                  Georg Ostrovski and
                  Will Dabney and
                  Dan Horgan and
                  Bilal Piot and
                  Mohammad Gheshlaghi Azar and
                  David Silver},
  editor       = {Sheila A. McIlraith and
                  Kilian Q. Weinberger},
  title        = {Rainbow: Combining Improvements in Deep Reinforcement Learning},
  booktitle    = {Proceedings of the Thirty-Second {AAAI} Conference on Artificial Intelligence,
                  (AAAI-18), the 30th innovative Applications of Artificial Intelligence
                  (IAAI-18), and the 8th {AAAI} Symposium on Educational Advances in
                  Artificial Intelligence (EAAI-18), New Orleans, Louisiana, USA, February
                  2-7, 2018},
  pages        = {3215--3222},
  publisher    = {{AAAI} Press},
  year         = {2018},
  url          = {https://doi.org/10.1609/aaai.v32i1.11796},
  doi          = {10.1609/AAAI.V32I1.11796},
  timestamp    = {Mon, 04 Sep 2023 12:29:24 +0200},
  biburl       = {https://dblp.org/rec/conf/aaai/HesselMHSODHPAS18.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/iclr/HorganQBBHHS18,
  author       = {Dan Horgan and
                  John Quan and
                  David Budden and
                  Gabriel Barth{-}Maron and
                  Matteo Hessel and
                  Hado van Hasselt and
                  David Silver},
  title        = {Distributed Prioritized Experience Replay},
  booktitle    = {6th International Conference on Learning Representations, {ICLR} 2018,
                  Vancouver, BC, Canada, April 30 - May 3, 2018, Conference Track Proceedings},
  publisher    = {OpenReview.net},
  year         = {2018},
  url          = {https://openreview.net/forum?id=H1Dy---0Z},
  timestamp    = {Thu, 25 Jul 2019 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/iclr/HorganQBBHHS18.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/BargiacchiVRNH18,
  author       = {Eugenio Bargiacchi and
                  Timothy Verstraeten and
                  Diederik M. Roijers and
                  Ann Now{\'{e}} and
                  Hado van Hasselt},
  editor       = {Jennifer G. Dy and
                  Andreas Krause},
  title        = {Learning to Coordinate with Coordination Graphs in Repeated Single-Stage
                  Multi-Agent Decision Problems},
  booktitle    = {Proceedings of the 35th International Conference on Machine Learning,
                  {ICML} 2018, Stockholmsm{\"{a}}ssan, Stockholm, Sweden, July
                  10-15, 2018},
  series       = {Proceedings of Machine Learning Research},
  volume       = {80},
  pages        = {491--499},
  publisher    = {{PMLR}},
  year         = {2018},
  url          = {http://proceedings.mlr.press/v80/bargiacchi18a.html},
  timestamp    = {Wed, 03 Apr 2019 18:17:30 +0200},
  biburl       = {https://dblp.org/rec/conf/icml/BargiacchiVRNH18.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/XuHS18,
  author       = {Zhongwen Xu and
                  Hado van Hasselt and
                  David Silver},
  editor       = {Samy Bengio and
                  Hanna M. Wallach and
                  Hugo Larochelle and
                  Kristen Grauman and
                  Nicol{\`{o}} Cesa{-}Bianchi and
                  Roman Garnett},
  title        = {Meta-Gradient Reinforcement Learning},
  booktitle    = {Advances in Neural Information Processing Systems 31: Annual Conference
                  on Neural Information Processing Systems 2018, NeurIPS 2018, December
                  3-8, 2018, Montr{\'{e}}al, Canada},
  pages        = {2402--2413},
  year         = {2018},
  url          = {https://proceedings.neurips.cc/paper/2018/hash/2715518c875999308842e3455eda2fe3-Abstract.html},
  timestamp    = {Mon, 16 May 2022 15:41:51 +0200},
  biburl       = {https://dblp.org/rec/conf/nips/XuHS18.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1802-08294,
  author       = {Daniel J. Mankowitz and
                  Augustin Z{\'{\i}}dek and
                  Andr{\'{e}} Barreto and
                  Dan Horgan and
                  Matteo Hessel and
                  John Quan and
                  Junhyuk Oh and
                  Hado van Hasselt and
                  David Silver and
                  Tom Schaul},
  title        = {Unicorn: Continual Learning with a Universal, Off-policy Agent},
  journal      = {CoRR},
  volume       = {abs/1802.08294},
  year         = {2018},
  url          = {http://arxiv.org/abs/1802.08294},
  eprinttype    = {arXiv},
  eprint       = {1802.08294},
  timestamp    = {Mon, 10 Jan 2022 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1802-08294.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1803-00933,
  author       = {Dan Horgan and
                  John Quan and
                  David Budden and
                  Gabriel Barth{-}Maron and
                  Matteo Hessel and
                  Hado van Hasselt and
                  David Silver},
  title        = {Distributed Prioritized Experience Replay},
  journal      = {CoRR},
  volume       = {abs/1803.00933},
  year         = {2018},
  url          = {http://arxiv.org/abs/1803.00933},
  eprinttype    = {arXiv},
  eprint       = {1803.00933},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1803-00933.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1805-09801,
  author       = {Zhongwen Xu and
                  Hado van Hasselt and
                  David Silver},
  title        = {Meta-Gradient Reinforcement Learning},
  journal      = {CoRR},
  volume       = {abs/1805.09801},
  year         = {2018},
  url          = {http://arxiv.org/abs/1805.09801},
  eprinttype    = {arXiv},
  eprint       = {1805.09801},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1805-09801.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1805-11593,
  author       = {Tobias Pohlen and
                  Bilal Piot and
                  Todd Hester and
                  Mohammad Gheshlaghi Azar and
                  Dan Horgan and
                  David Budden and
                  Gabriel Barth{-}Maron and
                  Hado van Hasselt and
                  John Quan and
                  Mel Vecer{\'{\i}}k and
                  Matteo Hessel and
                  R{\'{e}}mi Munos and
                  Olivier Pietquin},
  title        = {Observe and Look Further: Achieving Consistent Performance on Atari},
  journal      = {CoRR},
  volume       = {abs/1805.11593},
  year         = {2018},
  url          = {http://arxiv.org/abs/1805.11593},
  eprinttype    = {arXiv},
  eprint       = {1805.11593},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1805-11593.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1809-04474,
  author       = {Matteo Hessel and
                  Hubert Soyer and
                  Lasse Espeholt and
                  Wojciech Czarnecki and
                  Simon Schmitt and
                  Hado van Hasselt},
  title        = {Multi-task Deep Reinforcement Learning with PopArt},
  journal      = {CoRR},
  volume       = {abs/1809.04474},
  year         = {2018},
  url          = {http://arxiv.org/abs/1809.04474},
  eprinttype    = {arXiv},
  eprint       = {1809.04474},
  timestamp    = {Fri, 05 Oct 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1809-04474.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1811-07004,
  author       = {Tom Schaul and
                  Hado van Hasselt and
                  Joseph Modayil and
                  Martha White and
                  Adam White and
                  Pierre{-}Luc Bacon and
                  Jean Harb and
                  Shibl Mourad and
                  Marc G. Bellemare and
                  Doina Precup},
  title        = {The Barbados 2018 List of Open Issues in Continual Learning},
  journal      = {CoRR},
  volume       = {abs/1811.07004},
  year         = {2018},
  url          = {http://arxiv.org/abs/1811.07004},
  eprinttype    = {arXiv},
  eprint       = {1811.07004},
  timestamp    = {Thu, 21 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1811-07004.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1812-02648,
  author       = {Hado van Hasselt and
                  Yotam Doron and
                  Florian Strub and
                  Matteo Hessel and
                  Nicolas Sonnerat and
                  Joseph Modayil},
  title        = {Deep Reinforcement Learning and the Deadly Triad},
  journal      = {CoRR},
  volume       = {abs/1812.02648},
  year         = {2018},
  url          = {http://arxiv.org/abs/1812.02648},
  eprinttype    = {arXiv},
  eprint       = {1812.02648},
  timestamp    = {Tue, 01 Jan 2019 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1812-02648.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1812-07626,
  author       = {Diana Borsa and
                  Andr{\'{e}} Barreto and
                  John Quan and
                  Daniel J. Mankowitz and
                  R{\'{e}}mi Munos and
                  Hado van Hasselt and
                  David Silver and
                  Tom Schaul},
  title        = {Universal Successor Features Approximators},
  journal      = {CoRR},
  volume       = {abs/1812.07626},
  year         = {2018},
  url          = {http://arxiv.org/abs/1812.07626},
  eprinttype    = {arXiv},
  eprint       = {1812.07626},
  timestamp    = {Mon, 10 Jan 2022 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1812-07626.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/SilverHHSGHDRRB17,
  author       = {David Silver and
                  Hado van Hasselt and
                  Matteo Hessel and
                  Tom Schaul and
                  Arthur Guez and
                  Tim Harley and
                  Gabriel Dulac{-}Arnold and
                  David P. Reichert and
                  Neil C. Rabinowitz and
                  Andr{\'{e}} Barreto and
                  Thomas Degris},
  editor       = {Doina Precup and
                  Yee Whye Teh},
  title        = {The Predictron: End-To-End Learning and Planning},
  booktitle    = {Proceedings of the 34th International Conference on Machine Learning,
                  {ICML} 2017, Sydney, NSW, Australia, 6-11 August 2017},
  series       = {Proceedings of Machine Learning Research},
  volume       = {70},
  pages        = {3191--3199},
  publisher    = {{PMLR}},
  year         = {2017},
  url          = {http://proceedings.mlr.press/v70/silver17a.html},
  timestamp    = {Mon, 10 Jan 2022 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/icml/SilverHHSGHDRRB17.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/XuMHBSS17,
  author       = {Zhongwen Xu and
                  Joseph Modayil and
                  Hado van Hasselt and
                  Andr{\'{e}} Barreto and
                  David Silver and
                  Tom Schaul},
  editor       = {Isabelle Guyon and
                  Ulrike von Luxburg and
                  Samy Bengio and
                  Hanna M. Wallach and
                  Rob Fergus and
                  S. V. N. Vishwanathan and
                  Roman Garnett},
  title        = {Natural Value Approximators: Learning when to Trust Past Estimates},
  booktitle    = {Advances in Neural Information Processing Systems 30: Annual Conference
                  on Neural Information Processing Systems 2017, December 4-9, 2017,
                  Long Beach, CA, {USA}},
  pages        = {2120--2128},
  year         = {2017},
  url          = {https://proceedings.neurips.cc/paper/2017/hash/fb60d411a5c5b72b2e7d3527cfc84fd0-Abstract.html},
  timestamp    = {Mon, 10 Jan 2022 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/nips/XuMHBSS17.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/BarretoDMHSSH17,
  author       = {Andr{\'{e}} Barreto and
                  Will Dabney and
                  R{\'{e}}mi Munos and
                  Jonathan J. Hunt and
                  Tom Schaul and
                  David Silver and
                  Hado van Hasselt},
  editor       = {Isabelle Guyon and
                  Ulrike von Luxburg and
                  Samy Bengio and
                  Hanna M. Wallach and
                  Rob Fergus and
                  S. V. N. Vishwanathan and
                  Roman Garnett},
  title        = {Successor Features for Transfer in Reinforcement Learning},
  booktitle    = {Advances in Neural Information Processing Systems 30: Annual Conference
                  on Neural Information Processing Systems 2017, December 4-9, 2017,
                  Long Beach, CA, {USA}},
  pages        = {4055--4065},
  year         = {2017},
  url          = {https://proceedings.neurips.cc/paper/2017/hash/350db081a661525235354dd3e19b8c05-Abstract.html},
  timestamp    = {Mon, 10 Jan 2022 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/nips/BarretoDMHSSH17.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1708-04782,
  author       = {Oriol Vinyals and
                  Timo Ewalds and
                  Sergey Bartunov and
                  Petko Georgiev and
                  Alexander Sasha Vezhnevets and
                  Michelle Yeo and
                  Alireza Makhzani and
                  Heinrich K{\"{u}}ttler and
                  John P. Agapiou and
                  Julian Schrittwieser and
                  John Quan and
                  Stephen Gaffney and
                  Stig Petersen and
                  Karen Simonyan and
                  Tom Schaul and
                  Hado van Hasselt and
                  David Silver and
                  Timothy P. Lillicrap and
                  Kevin Calderone and
                  Paul Keet and
                  Anthony Brunasso and
                  David Lawrence and
                  Anders Ekermo and
                  Jacob Repp and
                  Rodney Tsing},
  title        = {StarCraft {II:} {A} New Challenge for Reinforcement Learning},
  journal      = {CoRR},
  volume       = {abs/1708.04782},
  year         = {2017},
  url          = {http://arxiv.org/abs/1708.04782},
  eprinttype    = {arXiv},
  eprint       = {1708.04782},
  timestamp    = {Sun, 02 Oct 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1708-04782.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1710-02298,
  author       = {Matteo Hessel and
                  Joseph Modayil and
                  Hado van Hasselt and
                  Tom Schaul and
                  Georg Ostrovski and
                  Will Dabney and
                  Daniel Horgan and
                  Bilal Piot and
                  Mohammad Gheshlaghi Azar and
                  David Silver},
  title        = {Rainbow: Combining Improvements in Deep Reinforcement Learning},
  journal      = {CoRR},
  volume       = {abs/1710.02298},
  year         = {2017},
  url          = {http://arxiv.org/abs/1710.02298},
  eprinttype    = {arXiv},
  eprint       = {1710.02298},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1710-02298.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/aaai/HasseltGS16,
  author       = {Hado van Hasselt and
                  Arthur Guez and
                  David Silver},
  editor       = {Dale Schuurmans and
                  Michael P. Wellman},
  title        = {Deep Reinforcement Learning with Double Q-Learning},
  booktitle    = {Proceedings of the Thirtieth {AAAI} Conference on Artificial Intelligence,
                  February 12-17, 2016, Phoenix, Arizona, {USA}},
  pages        = {2094--2100},
  publisher    = {{AAAI} Press},
  year         = {2016},
  url          = {https://doi.org/10.1609/aaai.v30i1.10295},
  doi          = {10.1609/AAAI.V30I1.10295},
  timestamp    = {Mon, 04 Sep 2023 15:08:28 +0200},
  biburl       = {https://dblp.org/rec/conf/aaai/HasseltGS16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/WangSHHLF16,
  author       = {Ziyu Wang and
                  Tom Schaul and
                  Matteo Hessel and
                  Hado van Hasselt and
                  Marc Lanctot and
                  Nando de Freitas},
  editor       = {Maria{-}Florina Balcan and
                  Kilian Q. Weinberger},
  title        = {Dueling Network Architectures for Deep Reinforcement Learning},
  booktitle    = {Proceedings of the 33nd International Conference on Machine Learning,
                  {ICML} 2016, New York City, NY, USA, June 19-24, 2016},
  series       = {{JMLR} Workshop and Conference Proceedings},
  volume       = {48},
  pages        = {1995--2003},
  publisher    = {JMLR.org},
  year         = {2016},
  url          = {http://proceedings.mlr.press/v48/wangf16.html},
  timestamp    = {Wed, 29 May 2019 08:41:46 +0200},
  biburl       = {https://dblp.org/rec/conf/icml/WangSHHLF16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/HasseltGHMS16,
  author       = {Hado van Hasselt and
                  Arthur Guez and
                  Matteo Hessel and
                  Volodymyr Mnih and
                  David Silver},
  editor       = {Daniel D. Lee and
                  Masashi Sugiyama and
                  Ulrike von Luxburg and
                  Isabelle Guyon and
                  Roman Garnett},
  title        = {Learning values across many orders of magnitude},
  booktitle    = {Advances in Neural Information Processing Systems 29: Annual Conference
                  on Neural Information Processing Systems 2016, December 5-10, 2016,
                  Barcelona, Spain},
  pages        = {4287--4295},
  year         = {2016},
  url          = {https://proceedings.neurips.cc/paper/2016/hash/5227b6aaf294f5f027273aebf16015f2-Abstract.html},
  timestamp    = {Mon, 16 May 2022 15:41:51 +0200},
  biburl       = {https://dblp.org/rec/conf/nips/HasseltGHMS16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/HasseltGHS16,
  author       = {Hado van Hasselt and
                  Arthur Guez and
                  Matteo Hessel and
                  David Silver},
  title        = {Learning functions across many orders of magnitudes},
  journal      = {CoRR},
  volume       = {abs/1602.07714},
  year         = {2016},
  url          = {http://arxiv.org/abs/1602.07714},
  eprinttype    = {arXiv},
  eprint       = {1602.07714},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/HasseltGHS16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/SilverHHSGHDRRB16,
  author       = {David Silver and
                  Hado van Hasselt and
                  Matteo Hessel and
                  Tom Schaul and
                  Arthur Guez and
                  Tim Harley and
                  Gabriel Dulac{-}Arnold and
                  David P. Reichert and
                  Neil C. Rabinowitz and
                  Andr{\'{e}} Barreto and
                  Thomas Degris},
  title        = {The Predictron: End-To-End Learning and Planning},
  journal      = {CoRR},
  volume       = {abs/1612.08810},
  year         = {2016},
  url          = {http://arxiv.org/abs/1612.08810},
  eprinttype    = {arXiv},
  eprint       = {1612.08810},
  timestamp    = {Mon, 10 Jan 2022 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/SilverHHSGHDRRB16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/HasseltS15,
  author       = {Hado van Hasselt and
                  Richard S. Sutton},
  title        = {Learning to Predict Independent of Span},
  journal      = {CoRR},
  volume       = {abs/1508.04582},
  year         = {2015},
  url          = {http://arxiv.org/abs/1508.04582},
  eprinttype    = {arXiv},
  eprint       = {1508.04582},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/HasseltS15.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/HasseltGS15,
  author       = {Hado van Hasselt and
                  Arthur Guez and
                  David Silver},
  title        = {Deep Reinforcement Learning with Double Q-learning},
  journal      = {CoRR},
  volume       = {abs/1509.06461},
  year         = {2015},
  url          = {http://arxiv.org/abs/1509.06461},
  eprinttype    = {arXiv},
  eprint       = {1509.06461},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/HasseltGS15.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/SuttonMPH14,
  author       = {Richard S. Sutton and
                  Ashique Rupam Mahmood and
                  Doina Precup and
                  Hado van Hasselt},
  title        = {A new Q(lambda) with interim forward view and Monte Carlo equivalence},
  booktitle    = {Proceedings of the 31th International Conference on Machine Learning,
                  {ICML} 2014, Beijing, China, 21-26 June 2014},
  series       = {{JMLR} Workshop and Conference Proceedings},
  volume       = {32},
  pages        = {568--576},
  publisher    = {JMLR.org},
  year         = {2014},
  url          = {http://proceedings.mlr.press/v32/sutton14.html},
  timestamp    = {Wed, 29 May 2019 08:41:45 +0200},
  biburl       = {https://dblp.org/rec/conf/icml/SuttonMPH14.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/MahmoodHS14,
  author       = {Ashique Rupam Mahmood and
                  Hado van Hasselt and
                  Richard S. Sutton},
  editor       = {Zoubin Ghahramani and
                  Max Welling and
                  Corinna Cortes and
                  Neil D. Lawrence and
                  Kilian Q. Weinberger},
  title        = {Weighted importance sampling for off-policy learning with linear function
                  approximation},
  booktitle    = {Advances in Neural Information Processing Systems 27: Annual Conference
                  on Neural Information Processing Systems 2014, December 8-13 2014,
                  Montreal, Quebec, Canada},
  pages        = {3014--3022},
  year         = {2014},
  url          = {https://proceedings.neurips.cc/paper/2014/hash/be53ee61104935234b174e62a07e53cf-Abstract.html},
  timestamp    = {Mon, 16 May 2022 15:41:51 +0200},
  biburl       = {https://dblp.org/rec/conf/nips/MahmoodHS14.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/uai/HasseltMS14,
  author       = {Hado van Hasselt and
                  Ashique Rupam Mahmood and
                  Richard S. Sutton},
  editor       = {Nevin L. Zhang and
                  Jin Tian},
  title        = {Off-policy {TD(} l) with a true online equivalence},
  booktitle    = {Proceedings of the Thirtieth Conference on Uncertainty in Artificial
                  Intelligence, {UAI} 2014, Quebec City, Quebec, Canada, July 23-27,
                  2014},
  pages        = {330--339},
  publisher    = {{AUAI} Press},
  year         = {2014},
  url          = {https://dslpitt.org/uai/displayArticleDetails.jsp?mmnu=1\&\#38;smnu=2\&\#38;article\_id=2468\&\#38;proceeding\_id=30},
  timestamp    = {Wed, 03 Feb 2021 11:09:27 +0100},
  biburl       = {https://dblp.org/rec/conf/uai/HasseltMS14.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/cipls/HasseltP13,
  author       = {Hado van Hasselt and
                  Han La Poutr{\'{e}}},
  title        = {Stacking under uncertainty: We know how to predict, but how should
                  we act?},
  booktitle    = {{IEEE} Symposium on Computational Intelligence In Production And Logistics
                  Systems, {CIPLS} 2013, Singapore, April 16-19, 2013},
  pages        = {25--32},
  publisher    = {{IEEE}},
  year         = {2013},
  url          = {https://doi.org/10.1109/CIPLS.2013.6595196},
  doi          = {10.1109/CIPLS.2013.6595196},
  timestamp    = {Wed, 16 Oct 2019 14:14:52 +0200},
  biburl       = {https://dblp.org/rec/conf/cipls/HasseltP13.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1302-7175,
  author       = {Hado van Hasselt},
  title        = {Estimating the Maximum Expected Value: An Analysis of (Nested) Cross
                  Validation and the Maximum Sample Average},
  journal      = {CoRR},
  volume       = {abs/1302.7175},
  year         = {2013},
  url          = {http://arxiv.org/abs/1302.7175},
  eprinttype    = {arXiv},
  eprint       = {1302.7175},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1302-7175.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@incollection{DBLP:books/sp/12/Hasselt12,
  author       = {Hado van Hasselt},
  editor       = {Marco A. Wiering and
                  Martijn van Otterlo},
  title        = {Reinforcement Learning in Continuous State and Action Spaces},
  booktitle    = {Reinforcement Learning},
  series       = {Adaptation, Learning, and Optimization},
  volume       = {12},
  pages        = {207--251},
  publisher    = {Springer},
  year         = {2012},
  url          = {https://doi.org/10.1007/978-3-642-27645-3\_7},
  doi          = {10.1007/978-3-642-27645-3\_7},
  timestamp    = {Tue, 29 Dec 2020 18:14:51 +0100},
  biburl       = {https://dblp.org/rec/books/sp/12/Hasselt12.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@phdthesis{DBLP:phd/basesearch/vanHasselt11,
  author       = {Hado Philip van Hasselt},
  title        = {Insights in reinforcement rearning : formal analysis and empirical
                  evaluation of temporal-difference learning algorithms},
  school       = {Utrecht University, Netherlands},
  year         = {2011},
  url          = {https://dspace.library.uu.nl/handle/1874/192729},
  timestamp    = {Wed, 24 Jul 2019 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/phd/basesearch/vanHasselt11.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/jmlr/SeijenWHW11,
  author       = {Harm van Seijen and
                  Shimon Whiteson and
                  Hado van Hasselt and
                  Marco A. Wiering},
  title        = {Exploiting Best-Match Equations for Efficient Reinforcement Learning},
  journal      = {J. Mach. Learn. Res.},
  volume       = {12},
  pages        = {2045--2094},
  year         = {2011},
  url          = {https://dl.acm.org/doi/10.5555/1953048.2021066},
  doi          = {10.5555/1953048.2021066},
  timestamp    = {Thu, 02 Jun 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/jmlr/SeijenWHW11.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/adprl/WieringHPS11,
  author       = {Marco A. Wiering and
                  Hado van Hasselt and
                  Auke{-}Dirk Pietersma and
                  Lambert Schomaker},
  title        = {Reinforcement learning algorithms for solving classification problems},
  booktitle    = {2011 {IEEE} Symposium on Adaptive Dynamic Programming And Reinforcement
                  Learning, {ADPRL} 2011, Paris, France, April 12-14, 2011},
  pages        = {91--96},
  publisher    = {{IEEE}},
  year         = {2011},
  url          = {https://doi.org/10.1109/ADPRL.2011.5967372},
  doi          = {10.1109/ADPRL.2011.5967372},
  timestamp    = {Tue, 29 Dec 2020 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/adprl/WieringHPS11.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/Hasselt10,
  author       = {Hado van Hasselt},
  editor       = {John D. Lafferty and
                  Christopher K. I. Williams and
                  John Shawe{-}Taylor and
                  Richard S. Zemel and
                  Aron Culotta},
  title        = {Double Q-learning},
  booktitle    = {Advances in Neural Information Processing Systems 23: 24th Annual
                  Conference on Neural Information Processing Systems 2010. Proceedings
                  of a meeting held 6-9 December 2010, Vancouver, British Columbia,
                  Canada},
  pages        = {2613--2621},
  publisher    = {Curran Associates, Inc.},
  year         = {2010},
  url          = {https://proceedings.neurips.cc/paper/2010/hash/091d584fced301b442654dd8c23b3fc9-Abstract.html},
  timestamp    = {Mon, 16 May 2022 15:41:51 +0200},
  biburl       = {https://dblp.org/rec/conf/nips/Hasselt10.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/adprl/WieringH09,
  author       = {Marco A. Wiering and
                  Hado van Hasselt},
  title        = {The {QV} family compared to other reinforcement learning algorithms},
  booktitle    = {{IEEE} Symposium on Adaptive Dynamic Programming and Reinforcement
                  Learning, {ADPRL} 2009, Nashville, TN, USA, March 31 - April 1, 2009},
  pages        = {101--108},
  publisher    = {{IEEE}},
  year         = {2009},
  url          = {https://doi.org/10.1109/ADPRL.2009.4927532},
  doi          = {10.1109/ADPRL.2009.4927532},
  timestamp    = {Wed, 16 Oct 2019 14:14:48 +0200},
  biburl       = {https://dblp.org/rec/conf/adprl/WieringH09.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/adprl/SeijenHWW09,
  author       = {Harm van Seijen and
                  Hado van Hasselt and
                  Shimon Whiteson and
                  Marco A. Wiering},
  title        = {A theoretical and empirical analysis of Expected Sarsa},
  booktitle    = {{IEEE} Symposium on Adaptive Dynamic Programming and Reinforcement
                  Learning, {ADPRL} 2009, Nashville, TN, USA, March 31 - April 1, 2009},
  pages        = {177--184},
  publisher    = {{IEEE}},
  year         = {2009},
  url          = {https://doi.org/10.1109/ADPRL.2009.4927542},
  doi          = {10.1109/ADPRL.2009.4927542},
  timestamp    = {Fri, 26 May 2017 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/adprl/SeijenHWW09.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/ags/WestraHDD09,
  author       = {Joost Westra and
                  Hado van Hasselt and
                  Frank Dignum and
                  Virginia Dignum},
  editor       = {Frank Dignum and
                  Jeffrey M. Bradshaw and
                  Barry G. Silverman and
                  Willem A. van Doesburg},
  title        = {Adaptive Serious Games Using Agent Organizations},
  booktitle    = {Agents for Games and Simulations, Trends in Techniques, Concepts and
                  Design {[AGS} 2009, The First International Workshop on Agents for
                  Games and Simulations, May 11, 2009, Budapest, Hungary]},
  series       = {Lecture Notes in Computer Science},
  volume       = {5920},
  pages        = {206--220},
  publisher    = {Springer},
  year         = {2009},
  url          = {https://doi.org/10.1007/978-3-642-11198-3\_14},
  doi          = {10.1007/978-3-642-11198-3\_14},
  timestamp    = {Thu, 28 Nov 2019 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/ags/WestraHDD09.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/ijcnn/HasseltW09,
  author       = {Hado van Hasselt and
                  Marco A. Wiering},
  title        = {Using continuous action spaces to solve discrete problems},
  booktitle    = {International Joint Conference on Neural Networks, {IJCNN} 2009, Atlanta,
                  Georgia, USA, 14-19 June 2009},
  pages        = {1149--1156},
  publisher    = {{IEEE} Computer Society},
  year         = {2009},
  url          = {https://doi.org/10.1109/IJCNN.2009.5178745},
  doi          = {10.1109/IJCNN.2009.5178745},
  timestamp    = {Fri, 24 Mar 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/ijcnn/HasseltW09.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/tsmc/WieringH08,
  author       = {Marco A. Wiering and
                  Hado van Hasselt},
  title        = {Ensemble Algorithms in Reinforcement Learning},
  journal      = {{IEEE} Trans. Syst. Man Cybern. Part {B}},
  volume       = {38},
  number       = {4},
  pages        = {930--936},
  year         = {2008},
  url          = {https://doi.org/10.1109/TSMCB.2008.920231},
  doi          = {10.1109/TSMCB.2008.920231},
  timestamp    = {Tue, 29 Dec 2020 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/tsmc/WieringH08.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/cig/WestraHDD08,
  author       = {Joost Westra and
                  Hado van Hasselt and
                  Virginia Dignum and
                  Frank Dignum},
  editor       = {Philip Hingston and
                  Luigi Barone},
  title        = {On-line adapting games using agent organizations},
  booktitle    = {Proceedings of the 2008 {IEEE} Symposium on Computational Intelligence
                  and Games, {CIG} 2009, Perth, Australia, 15-18 December, 2008},
  pages        = {243--250},
  publisher    = {{IEEE}},
  year         = {2008},
  url          = {https://doi.org/10.1109/CIG.2008.5035646},
  doi          = {10.1109/CIG.2008.5035646},
  timestamp    = {Thu, 28 Nov 2019 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/cig/WestraHDD08.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
a service of  Schloss Dagstuhl - Leibniz Center for Informatics