A veces la editoriales suben pdfs en alta, y es necesario reducir el tamaño para poder usarlos. Acá les dejo un script que me ayudó a automatizar esta tarea.
Bueno, este es mi código:
#!/bin/bash if [ ! -f /tmp/hmhbaja.lock ]; then #compruebo que no hay otra instancia corriendo touch /tmp/hmhbaja.lock for i in $(cat /etc/hmhbaja/paths); do path1=$i"/" path2=$i"_low/" rsync -r --ignore-existing $path1 $path2 if [ -f $path2.marca ]; then find $path2 -iname \*.pdf -cnewer $path2.marca -execdir /root/pdf2low.pl \{\} \; else find $path2 -iname \*.pdf -cnewer -execdir /root/pdf2low.pl \{\} \; fi touch $path2.marca done; rm /tmp/hmhbaja.lock fi
Tiene un archivo de configuración con los paths:
/var/ftp/virtual1/hmhreadingse/Grado2/TE_Reprints/07-Digitals /var/ftp/virtual1/hmhreadingse/Grado3/TE_Reprints/07-Digitals
y el archivo que hace la magia, o sea, reduce el temaño del archivo, el archivo es propiedad de Wolfgang Dobler, para que funcione obiamente necesita pdftk.
#!/bin/sh # -*-Perl-*- # ====================================================================== # # Run the right perl version: if [ -x /usr/local/bin/perl ]; then perl=/usr/local/bin/perl elif [ -x /usr/bin/perl ]; then perl=/usr/bin/perl else perl=`which perl| sed 's/.*aliased to *//'` fi exec $perl -x -S $0 "$@" # -x: start from the following line # ====================================================================== # #! /Good_Path/perl -w # line 17 # Name: compress-newsletter # Author: wd (Wolfgang.Dobler@ucalgary.ca) # Date: 03-Oct-2005 # Description: # Use ghostscript's pdfwrite device (Ã la ps2pdf) to reduce the # Newsletter's PDF file size, and add meta information like author, # date, etc. # The preferred route is currently: # [scribus>=1.2.3] # | # file.pdf # | # [pdftops>=3.00] # | # file.ps # | # [pstopdf14 (gs-gnu-8.16 or higher)] # | # V # final.pdf # Usage: # compress-newletter [-i col:gray:mono] Newsletter_big.pdf # Options: # -i col:gray:mono # --imgres=col:gray:mono Set resolution for downsampling color, # grayscale and black-and-white images # (default is 144:300:300) # --debug Be verbose and keep temporary files around use strict; use File::Temp qw/ :mktemp /; use Getopt::Long; # Allow for `-Plp' as equivalent to `-P lp' etc: Getopt::Long::config("bundling"); my (%opts); # Options hash for GetOptions my $doll='\$'; # Need this to trick CVS ## Process command line GetOptions(\%opts, qw( -h --help -i=s --imgres=s --debug -q --quiet -v --version )); my $debug = ($opts{'debug'} ? 1 : 0 ); # undocumented debug option if ($debug) { printopts(\%opts); print "\@ARGV = `@ARGV'\n"; } if ($opts{'h'} || $opts{'help'}) { die usage(); } if ($opts{'v'} || $opts{'version'}) { die version(); } my $quiet = ($opts{'q'} || $opts{'quiet'} || '' ); my $imgres = ($opts{'i'} || $opts{'imgres'} || '144:300:300'); my ($gs, @gsargs ) = ('gs' ); my ($pdftops, @pdftopsargs) = ('pdftops'); my ($pdfopt, @pdfoptargs ) = ('pdfopt' ); my $infile = shift or die usage(); (my $root=$infile) =~ s/\.(pdf|ps).*//; (my $outfile=$infile) =~ s/(.*)(\.(pdf|ps))/${1}_new${2}/; my $tmpfile = mktemp("${root}.tmp_XXXXXX"); ## 0. Extract all sorts of information # Extract Scribus version, creation date, bookmarks from original PDF: print "Running pdftk ...\n"; print STDERR "pdftk $infile dump_data output\n" if ($debug); my $meta = `pdftk $infile dump_data output -`; my ($creator) = ( $meta =~ m{InfoKey: Creator\s+InfoValue:\s*(.+)$}m ); $creator = 'Scribus 1.2.3' unless defined($creator); my $datestring = extract_CreationDate($meta); my @bookmarks = extract_bookmarks($meta); # Extract desired image resolutions my ($colres,$grayres,$monores) = ($imgres =~ /([0-9]+):([0-9]+):([0-9]+)/); die "Image resolution must be of form `col:gray:mono'\n" unless defined($monores); ## 1. Run pdftops push @pdftopsargs, "-level3"; my $psfile = mktemp("${root}.ps_XXXXXX"); push @pdftopsargs, $infile, $psfile; print "Running pdftops ...\n"; print STDERR "$pdftops @pdftopsargs\n" if ($debug); system($pdftops,@pdftopsargs); ## 2. Run gs # a) Prepare options push @gsargs, qw{-q -dNOPAUSE -dBATCH}; push @gsargs, '-sDEVICE=pdfwrite'; push @gsargs, '-dCompatibilityLevel=1.3'; # One of /printer, /screen, /prepress, /ebook, /default; see Ps2pdf.htm: push @gsargs, '-dPDFSETTINGS=/screen'; push @gsargs, '-dEmbedAllFonts=true'; push @gsargs, '-dSubsetFonts=true'; push @gsargs, '-dColorImageDownsampleType=/Bicubic'; push @gsargs, "-dColorImageResolution=$colres"; push @gsargs, '-dGrayImageDownsampleType=/Bicubic'; push @gsargs, "-dGrayImageResolution=$grayres"; push @gsargs, '-dMonoImageDownsampleType=/Bicubic'; push @gsargs, "-dMonoImageResolution=$monores"; push @gsargs, "-sOutputFile=$tmpfile"; push @gsargs, "-c .setpdfwrite"; # b) Write meta information to temporary file #my $metafile = mktemp("metainfo.tmp_XXXXXX"); my $metafile = "${root}.meta"; open(META, "> $metafile"); print META <<"DEAD_PARROT"; % Document information [% /CreationDate (D:$datestring) /ModDate (D:$datestring) /Creator ($creator) /Title ([Insert your document title here]) /Subject ([Insert the Subject here]) /Keywords ([Insert key words here]) /Author ([Insert author' nsme here]) /DOCINFO pdfmark % Initial view on opening the document [/View [/Fit] % Fit page in window /Page 1 % /PageMode /UseOutlines % /UseNone /UserOutlines /UseThumbs /FullScreen /DOCVIEW pdfmark DEAD_PARROT ## Bookmarks. [Commented out for acroread 7.0 has problems] Currently at ## the mercy of the original bookmarks (and Scribus 1.2.2 does not allow ## to edit the bookmark names) and the encoding that pdftk understands ## (most quotation marks get mapped to `?'). ## Ideally, one would write out the meta information file with ## `compress-newsletter -m CC.pdf' and use it then with ## `compress-newsletter CC.pdf'. ## % Bookmarks: @bookmarks push @gsargs, '-f', $psfile, $metafile; print "Running gs ...\n"; print STDERR "$gs @gsargs\n" if ($debug); system($gs,@gsargs); ## 3. Run pdfopt print "Running pdfopt ...\n"; print STDERR "$pdfopt @pdfoptargs $tmpfile $outfile\n" if ($debug); system($pdfopt,@pdfoptargs,$tmpfile,$outfile); # Some diagnostics: system('rm', $infile, $psfile, $tmpfile,$metafile); system('mv',$outfile,$infile); END { # Clean up even in case of an error: unless ($debug) { foreach my $file ($psfile,$tmpfile) { unlink $file if (defined($file) && -f $file); } } } # ---------------------------------------------------------------------- # sub extract_CreationDate { use POSIX qw(strftime); my $meta = shift; my ($cdate) = ( $meta =~ m{InfoKey: CreationDate\s+InfoValue:\s*(.+)$}m ); # Time string: need to splice in "'" after hours and minutes of time zone # definition. To me this looks like the technical documentation was taken # too literally and now applications (and Acroread 7) insist on these # stupid markers. my $datestring; if ($cdate =~ /[0-9]{14}/) { # managed to extract CreationDate from $meta $datestring = "$cdate-06'00'"; } else { # Creation date unknown -- use current date my $tz = strftime "%z", localtime(); $tz =~ s/([0-9][0-9])([0-9][0-9])/$1'$2'/; $datestring = strftime "%Y%m%d%H%M%S$tz", localtime(); } $datestring; } # ---------------------------------------------------------------------- # sub extract_bookmarks { my $meta = shift; my @bm; while ($meta =~ /^BookmarkTitle: \s* (.*) \n BookmarkLevel: \s* (.*) \n BookmarkPageNumber: \s* (.*) /xmg) { my ($title,$level,$page) = ($1,$2,$3); push @bm, "[/Title ($title /Page $page /OUT pdfmark\n"; } } # ---------------------------------------------------------------------- # sub printopts { # Print command line options my $optsref = shift; my %opts = %$optsref; foreach my $opt (keys(%opts)) { print STDERR "\$opts{$opt} = `$opts{$opt}'\n"; } } # ---------------------------------------------------------------------- # sub usage { # Extract description and usage information from this file's header. my $thisfile = __FILE__; local $/ = ''; # Read paragraphs open(FILE, "<$thisfile") or die "Cannot open $thisfile\n"; while () { # Paragraph _must_ contain `Description:' or `Usage:' next unless /^\s*\#\s*(Description|Usage):/m; # Drop `Author:', etc. (anything before `Description:' or `Usage:') s/.*?\n(\s*\#\s*(Description|Usage):\s*\n.*)/$1/s; # Don't print comment sign: s/^\s*# ?//mg; last; # ignore body } $_ or "\n"; } # ---------------------------------------------------------------------- # sub version { # Return CVS data and version info. my $doll='\$'; # Need this to trick CVS my $cmdname = (split('/', $0))[-1]; my $rev = '$Revision: 1.8 $'; my $date = '$Date: 2006/02/02 09:38:52 $'; $rev =~ s/${doll}Revision:\s*(\S+).*/$1/; $date =~ s/${doll}Date:\s*(\S+).*/$1/; "$cmdname version $rev ($date)\n"; } # ---------------------------------------------------------------------- # # End of file compress-newsletter
Espero les serva 😉
Tags: bash script, pdf reduce size, pdftk, reducir tamaño pdf