@COMMENT This file was generated by bib2html.pl <https://sourceforge.net/projects/bib2html/> version 0.94
@COMMENT written by Patrick Riley <http://sourceforge.net/users/patstg/>
@COMMENT This file came from Sanjit Seshia's publication pages at http://www.eecs.berkeley.edu/~sseshia
@inproceedings{vazquez-cav20,
  author    = {Marcell Vazquez{-}Chanlatte and
               Sanjit A. Seshia},
  title     = {Maximum Causal Entropy Specification Inference from Demonstrations},
  booktitle = {32nd International Conference on Computer Aided Verification (CAV)},
  month = jul,
  year = {2020},
  abstract = {In many settings, such as robotics, demonstrations provide a natural 
  way to specify tasks. However, most methods for learning from 
  demonstrations either do not provide guarantees that the learned 
  artifacts can be safely composed or do not explicitly capture 
  temporal properties. Motivated by this deficit, recent works have 
  proposed learning Boolean \emph{task specifications}, a class of 
  Boolean non-Markovian rewards which admit well-defined composition 
  and explicitly handle historical dependencies. This work continues 
  this line of research by adapting maximum \emph{causal} entropy 
  inverse reinforcement learning to estimate the posteriori 
  probability of a specification given a multi-set of 
  demonstrations. The key algorithmic insight is to leverage the 
  extensive literature and tooling on reduced ordered binary decision 
  diagrams to efficiently encode a time unrolled Markov Decision 
  Process. This enables transforming a na\"ive algorithm with running 
  time exponential in the episode length, into a polynomial time 
  algorithm.}
}