# these packages are required by python-docx, which depends on lxml
# and requires these things
python-dev
libxml2-dev
libxslt1-dev

# parse word documents
antiword

# parse pdfs
poppler-utils
