@COMMENT This file was generated by bib2html.pl version 0.94
@COMMENT written by Patrick Riley
@COMMENT This file came from Sanjit Seshia's publication pages at http://www.eecs.berkeley.edu/~sseshia
@inproceedings{vazquez-cav20,
author = {Marcell Vazquez{-}Chanlatte and
Sanjit A. Seshia},
title = {Maximum Causal Entropy Specification Inference from Demonstrations},
booktitle = {32nd International Conference on Computer Aided Verification (CAV)},
month = jul,
year = {2020},
abstract = {In many settings, such as robotics, demonstrations provide a natural
way to specify tasks. However, most methods for learning from
demonstrations either do not provide guarantees that the learned
artifacts can be safely composed or do not explicitly capture
temporal properties. Motivated by this deficit, recent works have
proposed learning Boolean \emph{task specifications}, a class of
Boolean non-Markovian rewards which admit well-defined composition
and explicitly handle historical dependencies. This work continues
this line of research by adapting maximum \emph{causal} entropy
inverse reinforcement learning to estimate the posteriori
probability of a specification given a multi-set of
demonstrations. The key algorithmic insight is to leverage the
extensive literature and tooling on reduced ordered binary decision
diagrams to efficiently encode a time unrolled Markov Decision
Process. This enables transforming a na\"ive algorithm with running
time exponential in the episode length, into a polynomial time
algorithm.}
}