use IO::Handle; use strict; use warnings; use CAM::PDF; use CAM::PDF::PageText; my $PDFDIR = "./SomeSubDirectory"; my $pdf; my %ddl; opendir DDL, $PDFDIR || die "Error in opening PDF directory $PDFDIR\n"; while((my $filename = readdir(DDL))) { # Skip non-PDF files next if ($filename !~ /\.pdf$/); $filename = $PDFDIR . '/' . $filename; if(!-f $filename) { print "\nCould not load $filename";} # Name output file same as the PDF my $output = $filename; $output =~ s/\.pdf/\.txt/; print "Creating $output...\n"; open(TXTFILE, '>' . $output); # Load the PDF $pdf = CAM::PDF->new($filename); # Total number of pages within the PDF my $pages = $pdf->numPages; # Get the text for each page for(my $x = 1; $x <= $pages; $x++) { print TXTFILE text_from_page($x); } close(TXTFILE); } closedir DDL; sub text_from_page { my $pg_num = shift; return CAM::PDF::PageText->render($pdf->getPageContentTree($pg_num)); }