1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192 |
- #!/bin/bash -f
- # command line bash shell script to convert the .pptx format to text
- pptx2txt_usage ()
- {
- echo " "
- echo "USAGE: pptx2txt [options] PPTX_FILENAME"
- echo "$#"
- echo " "
- echo "OPTIONS:"
- echo " "
- echo " -v, --verbose verbose output including filename and slide number heading"
- echo " "
- echo " -h, --help display this help message"
- echo " "
- exit 1
- }
- error_exit ()
- {
- echo " "
- echo "ERROR: $1"
- pptx2txt_usage
- }
- # initialize flags and arguments
- verbose=0
- help=0
- # loop over arguments
- while [[ $# > 0 ]]
- do
- key="$1"
- case $key in
- -v|--verbose)
- verbose=1
- ;;
- -h|--help)
- help=1
- ;;
- *)
- if [ -e "$key" ]
- then
- pptx_filename="$key"
- else
- error_exit "unknown option '$key'"
- fi;
- break
- ;;
- esac
- shift
- done
- # dislpay help if requested
- if [ $help -eq 1 ]
- then
- pptx2txt_usage
- fi;
- # detect empty PPTX_FILENAME
- if [ -z "$pptx_filename" ];
- then
- error_exit "No PPTX_FILENAME name detected."
- else
- pptx_filename="$1"
- fi;
- # find number of slides in pptx file
- nslides=`unzip -l "$pptx_filename" ppt/slides/slide*.xml | awk '{print $2}' | tail -1`
- # echo PPTX_FILENAME and TOTAL SLIDE COUNT if verbose
- if [ $verbose -eq 1 ]
- then
- echo "PPTX_FILENAME = $pptx_filename"
- echo "TOTAL SLIDE COUNT = $nslides"
- fi;
- # loop over slide index
- for idx in `seq 1 $nslides`;
- do
- if [ $verbose -eq 1 ]
- then
- echo " "
- echo "SLIDE $idx OF $nslides"
- echo " "
- fi;
- unzip -qc "$pptx_filename" ppt/slides/slide$idx.xml | perl -e 'while(<>) { if (@list = ($_ =~ m/\<a:t\>(.+?)\<\/a:t\>/g)) { print "$_\n" for @list } }'
- done
|