You are on page 1of 10

#!

/usr/bin/perl -w
########################################################################################
#
# Name: vprr_file_remover.pl
#
# Revision: 0.01 2009/02/12
# Initial Revision.
#
########################################################################################
use strict;
use warnings;
use
use
use
use
use
use
use

Getopt::Long qw(GetOptions);
Pod::Usage qw(pod2usage);
File::Basename qw(basename);
File::Copy qw(move);
DBI;
IO::Handle;
Data::Dumper;

#
# Turn on autoflush
#
BEGIN { $| = 1 }
#
# Make %ENV safer
#
$ENV{'PATH'} = '/bin:/usr/bin';
delete @ENV{'IFS', 'CDPATH', 'ENV', 'BASH_ENV'};
#
# Clear possible tainted script name
#
$0=basename($0);
{
no locale;
($0)= ($0 =~ m/^([\w\.\-]+)/);
}
#
# Globals
#
use vars qw/ %opt %config @error_msgs /;

my %opt = ();
my %config = ();
my @error_msgs = ();
our $VERSION= 0.01;
####################################################################################################
#
# The usage_error() subroutine is used to print this script usage error.
#
####################################################################################################
sub usage_error {
my $message = shift;
print STDERR "USAGE ERROR: $message\n" if defined($message);
exit 1;
}
####################################################################################################
#
# The init() subroutine is used to set up database connection and capture any of the options
# from the command line.
#
####################################################################################################
sub init {
my $config = shift;
#
# Parse the command line for options
#
GetOptions('verbose|n'
=> \$opt{'n'},
'count_run|c'
=> \$opt{'c'},
'ifile|f=s'
=> \$opt{'f'},
'vprr_dir|d=s'
=> \$opt{'d'},
'target_dir|t=s' => \$opt{'t'},
'version|v'
=> \$opt{'v'},
'help|h'
=> \$opt{'h'}) or usage_error("Failed to process command line options.");
#
# Print VERSION number for this script and exit if 'version' option is detected.
#
do { print STDOUT "$0 Version $VERSION\n"; exit(0); } if $opt{'v'};
#

# Print perldoc for this script and exit if 'help' option is detected.
#
pod2usage({-exitval => 0, -verbose => 2}) if $opt{'h'};
#
# Check --test option.
#
$config->{'count_run'} = !defined($opt{'c'}) ? 0 : 1;
usage_error("The -ifile option is required.") if !defined($opt{'f'});
#
# Check if ifile option is provided.
#
usage_error("The -ifile option is required.") if !defined($opt{'f'});
$config->{'ifile'} = $opt{'f'};
$config->{'ifile'} =~ s/\s//g;
#
# Check Input file
#
usage_error("Input file $config->{'ifile'} does not exist.") if !-e $config->{'ifile'};
#
# Default VPRR directory to /ftp/edgar/vprr if --vprr_dir option is not provided.
#
$config->{'vprr_dir'} = !$opt{'d'} ? '/ftp/edgar/vprr' : $opt{'d'};
#
# Validate the VPRR directory
#
usage_error("Directory $config->{'vprr_dir'} does not exist.") if !-d $config->{'vprr_dir'};
usage_error("Directory $config->{'vprr_dir'} is not accessable.")
if !opendir(DIR, $config->{'vprr_dir'});
closedir(DIR);
#
# Set up the output filename
#
my ($mday, $mon, $year) = (localtime(time()))[3,4,5];
$year += 1900;
$mon++;
$config->{'outfile'} = basename($0, '.pl') . '.' .
sprintf("%04d-%02d-%02d", $year, $mon, $mday) .
'.out';
if (!$config->{'count_run'}) {

#
# Default target directory to /ftp/edgar/vprr/vprr_removed/YYYYMMDD
# if --target_dir option is not provided.
#
if (!defined($opt{'t'})) {
$config->{'target_dir'} = '/ftp/edgar/vprr/vprr_removed/' .
sprintf("%04d%02d%02d", $year, $mon, $mday);
if (!-d $config->{'target_dir'}) {
if (!mkdir($config->{'target_dir'})) {
do_log("ERROR: Failed to create target directory $config->{'target_dir'} $!");
return 0;
}
elsif (chmod(0700, $config->{'target_dir'}) != 1) {
do_log("ERROR: Failed to chmod on target directory $config->{'target_dir'} $!");
return 0;
}
}
}
else {
$config->{'target_dir'} = $opt{'t'};
}
#
# Validate the target directory
#
usage_error("Directory $config->{'target_dir'} does not exist.") if !-d $config->{'target_dir'};
usage_error("Directory $config->{'target_dir'} is not accessable.")
if !opendir(DIR, $config->{'target_dir'});
closedir(DIR);
}
$config->{'log_filename'} = basename($0, '.pl') . '.log';
#
# Connect to MySQL
#
my $dbiconnect = "DBI:mysql:edgar;mysql_read_default_file=/home/mysql/.my.cnf.nobody";
$config->{'dbh'} = DBI->connect($dbiconnect, "", "", {RaiseError => 0, AutoCommit => 1});
if (!defined($config->{'dbh'})) {
do_log("ERROR: Failed to connect to database");
return 0;
}

else {
return 1;
}
}
####################################################################################################
#
# The do_log() subroutine is used to generate debug message.
#
####################################################################################################
sub do_log {
my $msg
= shift;
my $errors = shift;
my ($sec, $min, $hour, $mday, $mon, $year) = localtime(time);
$year += 1900;
$mon++;
my $dts = sprintf("%04d-%02d-%02d %02d:%02d:%02d", $year, $mon, $mday, $hour, $min, $sec);
print STDOUT "$dts:$0 $VERSION $$ $msg\n" if defined($opt{'n'});
push(@$errors, $msg) if !defined($errors);
return if !defined($config{'log_filename'});
return if !open(LOG_FILE, ">>$config{'log_filename'}");
print LOG_FILE "$dts:$0 $VERSION $$ $msg\n";
close LOG_FILE;
}
####################################################################################################
#
# The find_link() subroutine is used to search the database for the accession number of a specific
# film number. It will build a directory path based on the the accession number it found.
#
####################################################################################################
sub find_link {
my $film_number = shift;
my $config
= shift;
my $link = '';
my $sql = 'SELECT ';
$sql .= '
SUBSTRING(accession_number, 12, 2) as SubDir,';
$sql .= '
accession_number ';
$sql .= 'FROM';

$sql .= '
filing_values ';
$sql .= 'WHERE';
$sql .= "
film_number ='$film_number'";
my $sth = $config->{'dbh'}->prepare($sql);
if (!defined($sth)) {
do_log("ERROR: Can't prepare statement ($DBI::errstr)");
return undef;
}
else {
my $rc = $sth->execute();
if (!defined($rc)) {
do_log("ERROR: Can't execute statement $sql ($DBI::errstr)");
$sth->finish;
return undef;
}
else {
my @row = ();
while (@row = $sth->fetchrow_array) {
$link = "$config->{'vprr_dir'}/$row[0]/$row[1]";
}
$sth->finish;
return $link;
}
}
}
####################################################################################################
#
# Main program.
#
####################################################################################################
do_log("Program starts processing..........");
#
# Initialize this script.
#
if (init(\%config, \@error_msgs)) {
my @film_numbers = ();
my %accession_nums = ();
do_log('Configuration = ' . Dumper(\%config));

#
# Open the output file
#
if (!open(OUT_FILE, ">$config{'outfile'}")) {
do_log("Failed to open output file ($config{'outfile'}). $!");
}
#
# Open the input file
#
elsif (!open(IN_FILE, $config{'ifile'})) {
do_log("Failed to open input file ($config{'ifile'}). $!");
}
else {
@film_numbers = <IN_FILE>;
close IN_FILE;
my $total_count = @film_numbers;
my $link = undef;
my $pdf_file = '';
my $doesnot_exist_count = 0;
foreach my $f (@film_numbers) {
$pdf_file = '';
chomp $f;
$f =~ s/^\s+//;
$f =~ s/\s+$//;
next if length($f) == 0;
#
# Invalid film number
#
if ($f !~ /^(\d{4})\d{4}$/) {
do_log("WARNING: Invalid film number $f");
print OUT_FILE "$f\tInvalid film number\n";
}
else {
$pdf_file = "$config{'vprr_dir'}/0000/$1/$f.pdf";
#
# See if PDF file exists on the system disk
#
if (!-e $pdf_file) {
$doesnot_exist_count++;
do_log("WARNING: No PDF file ($pdf_file) found for film number $f");

print OUT_FILE "$f\tNo PDF file found\n";


next;
}
#
# Doing a count run
#
elsif ($config{'count_run'}) {
next;
}
else {
$link = find_link($f, \%config);
#
# No Symbolic link string built
#
if (!defined($link)) {
do_log("WARNING: No accession number found for film number $f");
if (!move($pdf_file, "$config{'target_dir'}/$f.pdf")) {
do_log("ERROR: Failed to move file $pdf_file to $config{'target_dir'}. $!");
print OUT_FILE "$f\tFailed to move file\n";
}
else {
print OUT_FILE "$f\tSuccessfully removed with no link $pdf_file\n";
}
}
#
# Symbolic link does not exist
#
elsif (!-l $link) {
do_log("WARNING: No symbolic link ($link) found for film number $f");
if (!move($pdf_file, "$config{'target_dir'}/$f.pdf")) {
do_log("ERROR: Failed to move file $pdf_file to $config{'target_dir'}. $!");
print OUT_FILE "$f\tFailed to move file\n";
}
else {
print OUT_FILE "$f\tSuccessfully removed with no link $pdf_file\n";
}
}
#
# Move the PDF file out to the target directory

#
else {
if (!move($pdf_file, "$config{'target_dir'}/$f.pdf")) {
do_log("ERROR: Failed to move file $pdf_file to $config{'target_dir'}. $!");
print OUT_FILE "$f\tFailed to move file\n";
}
#
# Remove the symbolic link
#
if (unlink($link) == 1) {
do_log("INFO: Successfully removed film number $f");
print OUT_FILE "$f\tSuccessfully removed $link $pdf_file\n";
}
#
# Failed to remove the symbolic link
#
else {
do_log("ERROR: Failed to remove symbolic link $link. $!");
print OUT_FILE "$f\tFailed to remove symbolic link\n";
}
}
}
}
}
if ($config{'count_run'}) {
print OUT_FILE "Total film numbers=$total_count\n";
print OUT_FILE "Does Not Exist Count=$doesnot_exist_count\n";
}
close OUT_FILE;
}
#
# Disconnect from MySQL
#
$config{'dbh'}->disconnect() if defined($config{'dbh'});
}
#
# Failed to initialize send notification email to DMZ OPS
#
else {
my $error_msgs = join("\n", @error_msgs);
do_log("Failed to initialize ($error_msgs)");
}

do_log("Program is exiting........");
__END__
=head1 NAME
vprr_file_remover.pl -

VPRR PDF Files Remover.

=head1 SYNOPSIS
vprr_file_remover.pl {options}
=head1 DESCRIPTION
This script is used to generate a list of accession numbers that associate with multiple PDF files in the
database. The report will be mail out the DMZ OPS.
=head1 OPTIONS
-h, --help :

Display this help and exit

-v, --version : Display the current version of this script


-f, --ifile : Input file name
-t, --target_dir : Specify the directory where to move the PDF files to
-d, --vprr_dir : Specify the directory of the VPRR root directory
-c, --count_run : Get the count for no PDF files
-n, --verbose : Run in verbose mode

You might also like