pptx2text 1.7 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192
  1. #!/bin/bash -f
  2. # command line bash shell script to convert the .pptx format to text
  3. pptx2txt_usage ()
  4. {
  5. echo " "
  6. echo "USAGE: pptx2txt [options] PPTX_FILENAME"
  7. echo "$#"
  8. echo " "
  9. echo "OPTIONS:"
  10. echo " "
  11. echo " -v, --verbose verbose output including filename and slide number heading"
  12. echo " "
  13. echo " -h, --help display this help message"
  14. echo " "
  15. exit 1
  16. }
  17. error_exit ()
  18. {
  19. echo " "
  20. echo "ERROR: $1"
  21. pptx2txt_usage
  22. }
  23. # initialize flags and arguments
  24. verbose=0
  25. help=0
  26. # loop over arguments
  27. while [[ $# > 0 ]]
  28. do
  29. key="$1"
  30. case $key in
  31. -v|--verbose)
  32. verbose=1
  33. ;;
  34. -h|--help)
  35. help=1
  36. ;;
  37. *)
  38. if [ -e "$key" ]
  39. then
  40. pptx_filename="$key"
  41. else
  42. error_exit "unknown option '$key'"
  43. fi;
  44. break
  45. ;;
  46. esac
  47. shift
  48. done
  49. # dislpay help if requested
  50. if [ $help -eq 1 ]
  51. then
  52. pptx2txt_usage
  53. fi;
  54. # detect empty PPTX_FILENAME
  55. if [ -z "$pptx_filename" ];
  56. then
  57. error_exit "No PPTX_FILENAME name detected."
  58. else
  59. pptx_filename="$1"
  60. fi;
  61. # find number of slides in pptx file
  62. nslides=`unzip -l "$pptx_filename" ppt/slides/slide*.xml | awk '{print $2}' | tail -1`
  63. # echo PPTX_FILENAME and TOTAL SLIDE COUNT if verbose
  64. if [ $verbose -eq 1 ]
  65. then
  66. echo "PPTX_FILENAME = $pptx_filename"
  67. echo "TOTAL SLIDE COUNT = $nslides"
  68. fi;
  69. # loop over slide index
  70. for idx in `seq 1 $nslides`;
  71. do
  72. if [ $verbose -eq 1 ]
  73. then
  74. echo " "
  75. echo "SLIDE $idx OF $nslides"
  76. echo " "
  77. fi;
  78. unzip -qc "$pptx_filename" ppt/slides/slide$idx.xml | perl -e 'while(<>) { if (@list = ($_ =~ m/\<a:t\>(.+?)\<\/a:t\>/g)) { print "$_\n" for @list } }'
  79. done