#!/bin/bash -f

# command line bash shell script to convert the .pptx format to text

pptx2txt_usage ()
{
  echo " "
  echo "USAGE: pptx2txt [options] PPTX_FILENAME"
  echo "$#"
  echo " "
  echo "OPTIONS:"
  echo " "
  echo "  -v, --verbose     verbose output including filename and slide number heading"
  echo " "
  echo "  -h, --help        display this help message"
  echo " "
  exit 1
}

error_exit ()
{
        echo " "
        echo "ERROR: $1"
        pptx2txt_usage
}

# initialize flags and arguments
verbose=0
help=0

# loop over arguments
while [[ $# > 0 ]]
do
key="$1"
  case $key in
    -v|--verbose)
      verbose=1
      ;;
    -h|--help)
      help=1
      ;;
    *)
      if [ -e "$key"  ]
      then
        pptx_filename="$key"
      else
        error_exit "unknown option '$key'"
      fi;
      break
      ;;
  esac
  shift
done

# dislpay help if requested
if [ $help -eq 1  ]
then
  pptx2txt_usage
fi;

# detect empty PPTX_FILENAME
if [ -z "$pptx_filename" ];
then
  error_exit "No PPTX_FILENAME name detected."
else
  pptx_filename="$1"
fi;

# find number of slides in pptx file
nslides=`unzip -l "$pptx_filename" ppt/slides/slide*.xml | awk '{print $2}' | tail -1`

# echo PPTX_FILENAME and TOTAL SLIDE COUNT if verbose
if [ $verbose -eq 1 ]
then
  echo "PPTX_FILENAME = $pptx_filename"
  echo "TOTAL SLIDE COUNT = $nslides"
fi;

# loop over slide index
for idx in `seq 1 $nslides`;
  do

    if [ $verbose -eq 1 ]
    then
      echo " "
      echo "SLIDE $idx OF $nslides"
      echo " "
    fi;

    unzip -qc "$pptx_filename" ppt/slides/slide$idx.xml | perl -e 'while(<>) {  if (@list = ($_ =~ m/\<a:t\>(.+?)\<\/a:t\>/g)) { print "$_\n" for @list } }'

done
