@inproceedings{8da0e9ec9015416398ff9957f0717f9e,
title = "A Direct Policy-Search Algorithm for Relational Reinforcement Learning",
abstract = "In the field of relational reinforcement learning - a representational generalisation of reinforcement learning - the first-order representation of environments results in a potentially infinite number of possible states, requiring learning agents to use some form of abstraction to learn effectively. Instead of forming an abstraction over the state-action space, an alternative technique is to create behaviour directly through policy-search. The algorithm named CERRLA presented in this paper uses the cross-entropy method to learn behaviour directly in the form of decision-lists of relation rules for solving problems in a range of different environments, without the need for expert guidance in the learning process. The behaviour produced by the algorithm is easy to comprehend and is biased towards compactness. The results obtained show that CERRLA is competitive in both the standard testing environment and in Ms. Pac-MAN and CARCASSONNE, two large and complex game environments.",
author = "S. Sarjant and B. Pfahringer and K. Driessens and T. Smith",
year = "2014",
doi = "10.1007/978-3-662-44923-3_6",
language = "English",
isbn = "978-3-662-44922-6",
series = "Lecture Notes in Computer Science",
publisher = "Springer Nature Switzerland AG",
pages = "76--92",
booktitle = "Inductive Logic Programming",
}