Search dblp for Publications

export results for "toc:db/journals/ml/ml22.bht:"

 download as .bib file

@article{DBLP:journals/ml/BradtkeB96,
  author       = {Steven J. Bradtke and
                  Andrew G. Barto},
  title        = {Linear Least-Squares Algorithms for Temporal Difference Learning},
  journal      = {Mach. Learn.},
  volume       = {22},
  number       = {1-3},
  pages        = {33--57},
  year         = {1996},
  url          = {https://doi.org/10.1023/A:1018056104778},
  doi          = {10.1023/A:1018056104778},
  timestamp    = {Mon, 02 Mar 2020 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/ml/BradtkeB96.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/ml/Dietterich96,
  author       = {Thomas G. Dietterich},
  title        = {Editorial},
  journal      = {Mach. Learn.},
  volume       = {22},
  number       = {1-3},
  pages        = {5--6},
  year         = {1996},
  url          = {https://doi.org/10.1023/A:1018067001143},
  doi          = {10.1023/A:1018067001143},
  timestamp    = {Mon, 02 Mar 2020 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/ml/Dietterich96.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/ml/Heger96,
  author       = {Matthias Heger},
  title        = {The Loss from Imperfect Value Functions in Expectation-Based and Minimax-Based
                  Tasks},
  journal      = {Mach. Learn.},
  volume       = {22},
  number       = {1-3},
  pages        = {197--225},
  year         = {1996},
  url          = {https://doi.org/10.1023/A:1018016523433},
  doi          = {10.1023/A:1018016523433},
  timestamp    = {Mon, 02 Mar 2020 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/ml/Heger96.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/ml/Kaelbling96,
  author       = {Leslie Pack Kaelbling},
  title        = {Introduction},
  journal      = {Mach. Learn.},
  volume       = {22},
  number       = {1-3},
  pages        = {7--9},
  year         = {1996},
  url          = {https://doi.org/10.1023/A:1018091703869},
  doi          = {10.1023/A:1018091703869},
  timestamp    = {Mon, 02 Mar 2020 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/ml/Kaelbling96.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/ml/KoenigS96,
  author       = {Sven Koenig and
                  Reid G. Simmons},
  title        = {The Effect of Representation and Knowledge on Goal-Directed Exploration
                  with Reinforcement-Learning Algorithms},
  journal      = {Mach. Learn.},
  volume       = {22},
  number       = {1-3},
  pages        = {227--250},
  year         = {1996},
  url          = {https://doi.org/10.1023/A:1018068507504},
  doi          = {10.1023/A:1018068507504},
  timestamp    = {Mon, 02 Mar 2020 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/ml/KoenigS96.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/ml/MaclinS96,
  author       = {Richard Maclin and
                  Jude W. Shavlik},
  title        = {Creating Advice-Taking Reinforcement Learners},
  journal      = {Mach. Learn.},
  volume       = {22},
  number       = {1-3},
  pages        = {251--281},
  year         = {1996},
  url          = {https://doi.org/10.1023/A:1018020625251},
  doi          = {10.1023/A:1018020625251},
  timestamp    = {Mon, 02 Mar 2020 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/ml/MaclinS96.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/ml/Mahadevan96,
  author       = {Sridhar Mahadevan},
  title        = {Average Reward Reinforcement Learning: Foundations, Algorithms, and
                  Empirical Results},
  journal      = {Mach. Learn.},
  volume       = {22},
  number       = {1-3},
  pages        = {159--195},
  year         = {1996},
  url          = {https://doi.org/10.1023/A:1018064306595},
  doi          = {10.1023/A:1018064306595},
  timestamp    = {Mon, 02 Mar 2020 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/ml/Mahadevan96.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/ml/MoriartyM96,
  author       = {David E. Moriarty and
                  Risto Miikkulainen},
  title        = {Efficient Reinforcement Learning through Symbiotic Evolution},
  journal      = {Mach. Learn.},
  volume       = {22},
  number       = {1-3},
  pages        = {11--32},
  year         = {1996},
  url          = {https://doi.org/10.1023/A:1018004120707},
  doi          = {10.1023/A:1018004120707},
  timestamp    = {Mon, 02 Mar 2020 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/ml/MoriartyM96.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/ml/PengW96,
  author       = {Jing Peng and
                  Ronald J. Williams},
  title        = {Incremental Multi-Step Q-Learning},
  journal      = {Mach. Learn.},
  volume       = {22},
  number       = {1-3},
  pages        = {283--290},
  year         = {1996},
  url          = {https://doi.org/10.1023/A:1018076709321},
  doi          = {10.1023/A:1018076709321},
  timestamp    = {Mon, 02 Mar 2020 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/ml/PengW96.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/ml/SchapireW96,
  author       = {Robert E. Schapire and
                  Manfred K. Warmuth},
  title        = {On the Worst-Case Analysis of Temporal-Difference Learning Algorithms},
  journal      = {Mach. Learn.},
  volume       = {22},
  number       = {1-3},
  pages        = {95--121},
  year         = {1996},
  url          = {https://doi.org/10.1023/A:1018060205686},
  doi          = {10.1023/A:1018060205686},
  timestamp    = {Mon, 02 Mar 2020 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/ml/SchapireW96.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/ml/SinghS96,
  author       = {Satinder P. Singh and
                  Richard S. Sutton},
  title        = {Reinforcement Learning with Replacing Eligibility Traces},
  journal      = {Mach. Learn.},
  volume       = {22},
  number       = {1-3},
  pages        = {123--158},
  year         = {1996},
  url          = {https://doi.org/10.1023/A:1018012322525},
  doi          = {10.1023/A:1018012322525},
  timestamp    = {Mon, 02 Mar 2020 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/ml/SinghS96.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/ml/TsitsiklisR96,
  author       = {John N. Tsitsiklis and
                  Benjamin Van Roy},
  title        = {Feature-Based Methods for Large Scale Dynamic Programming},
  journal      = {Mach. Learn.},
  volume       = {22},
  number       = {1-3},
  pages        = {59--94},
  year         = {1996},
  url          = {https://doi.org/10.1023/A:1018008221616},
  doi          = {10.1023/A:1018008221616},
  timestamp    = {Mon, 02 Mar 2020 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/ml/TsitsiklisR96.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
a service of  Schloss Dagstuhl - Leibniz Center for Informatics