How to remove new lines within double quotes

!/usr/bin/perl
use warnings;
use strict;
use Path::Tiny;
use Text::CSV;
use Time::Piece;
use File::Path qw( make_path );
use diagnostics;
use Try::Tiny;
#use File::NCopy;
use File::Copy::Recursive qw(fcopy rcopy dircopy fmove rmove dirmove);
use Time::HiRes qw( time );
my $start = time();
my $date = localtime->strftime('%Y%m%d');
my $feed_date = $date;
if(exists($ARGV[3])){
$feed_date = $ARGV[3];
}
# build source directory path ==>
my $source_feed_dir = $ARGV[0];
my $source_feed_dir_path = path($source_feed_dir);
# process i.e. current date
my $source_feed_date_dir = $source_feed_dir_path->child($date);
my $source_feed_date_dir2 = $source_feed_date_dir->child("data");
#feed date current date or could be argv supplied date
my $source_feed_date_dir3 = $source_feed_date_dir2->child($feed_date);
# build source directories for header
my $source_feed_header_dir1 = $source_feed_date_dir->child("header");
my $source_feed_header_dir2 = $source_feed_header_dir1->child($feed_date);
# build destination directory path as ==>
# TestSourceDataCleansedDataFinance<Source>[<Feed>]<Current date>data<FeedDate> file(s) with its original name>
my $dest_feed_dir = $ARGV[1];
my $dest_feed_dir_path = path($dest_feed_dir);
my $dest_feed_date_dir_1 = $dest_feed_dir_path->child($date);
my $dest_feed_date_dir_2 = $dest_feed_date_dir_1->child("data");
my $dest_feed_date_dir = $dest_feed_date_dir_2->child($feed_date);
# Build destination directory for header
# TestSourceDataCleansedDataFinance<Source>[<Feed>]<Current date>header<FeedDate> <file(s) with its original name>
my $dest_feed_header_dir_1 = $dest_feed_date_dir_1->child("header");
my $dest_feed_header_dir= $dest_feed_header_dir_1->child($feed_date);
# build Logging directory path ==>
my $log_feed_dir = $ARGV[2];
my $log_feed_dir_path = path($log_feed_dir);
my $log_feed_date_dir = $log_feed_dir_path->child($date);
my $log_feed_date_file = $log_feed_date_dir->child(("Cleaning_Log_".localtime->strftime(("%Y_%m_%d_%H_%M_%S")).".log"));
my $current_time = localtime->strftime("%Y-%m-%d %H:%M:%S");
logger('LOG-1','STARTING CLEANING APPLICATION @ '.$current_time);
# check if source directory exists
if ( ! -e $source_feed_date_dir3) {
logger('FAILURE','SOURCE DIRECTORY DOES NOT EXISTS: '." ".$source_feed_date_dir3);
exit 1;
}
try {
logger('LOG-2','OPENING SOURCE DIRECTORY :'." ".$source_feed_date_dir3);
opendir DIR, $source_feed_date_dir3;
} catch {
logger('FAILURE','FAILED TO OPEN SOURCE DIRECTORY : '." ".$source_feed_date_dir3);
exit 1;
};
my @file= my @files = grep { $_ ne '.' && $_ ne '..' } readdir DIR;
closedir DIR;
foreach my $file (@file) {
my $source_feed_date_file = $source_feed_date_dir3->child($file);
my $dest_feed_date_file = $dest_feed_date_dir->child($file);
my $s1 = time();
try {
if ( !-d $dest_feed_date_dir ) {
make_path $dest_feed_date_dir;
}
} catch {
logger('FAILURE','FAILED TO CREATE DESTINATION DIRECTORY : '." ".$dest_feed_date_dir);
exit 1;
};
my $csv_in = 'Text::CSV'->new({binary => 1,
sep_char => ";",
auto_diag => 1
})
or die "CANNOT USE CSV: " . 'Text::CSV'->error_diag;
my $csv_out = 'Text::CSV'->new({ binary => 1,
eol => " ",
sep_char => ";",
always_quote => 1,
auto_diag => 1
})
or die "CANNOT USE CSV: " . 'Text::CSV'->error_diag;
logger('LOG-3','PROCESSING FILE :'." ".$source_feed_date_file);
try{
# Inbound file reader with no encoding specified ==>
open(my $CSV_FILE, '<', $source_feed_date_file) ;
# Outbound file writer with UTF8 encoding ==>
open(my $fh, '>:encoding(UTF-8)', $dest_feed_date_file) ;
my $rx = 0;
while (my $row = $csv_in->getline($CSV_FILE)) {
s/ | | |[^ -]//g for @$row;
$csv_out->print ($fh, $row);
if( $rx % 1000 == 0) {
print "$rx ";
}
$rx+=1;
}
print "Total Number Of Records processed:";
print $rx ;
my $e1 = time();
printf(" Time elapsed for %s : %.2f ", $file,$e1 - $s1);
} catch {
my $e = shift;
print $e;
logger('LOG-4','PROCESSING FAILED FOR FILE :'." ".$source_feed_date_file);
exit 1;
};
logger('LOG-4','PROCESSING ENDED FOR FILE :'." ".$source_feed_date_file);
};
logger('LOG-5','CLEANED DATA FILE SAVED AT :'." ".$dest_feed_date_dir);
try{
dircopy($source_feed_header_dir2,$dest_feed_header_dir);
} catch {
logger("LOG6","COULD NOT PERFORM RCOPY OF $source_feed_header_dir2 TO $dest_feed_header_dir: $!");
exit 1;
};
logger('LOG-6','HEADER FILE SAVED AT :'." ".$dest_feed_header_dir);
logger('COMPLETE','DATA CLEANING COMPLETE @ '.localtime->strftime(("%Y-%m-%d %H:%M:%S")));
my $end = time();
printf(" Toal Time elapsed : %.2f ", $end - $start);
# sub-routine to log audit messages ==>
sub logger {
my ($level, $msg) = @_;
if ( !-d $log_feed_date_dir ) {
make_path $log_feed_date_dir or die "FAILED TO CREATE LOGGING DIRECTORY : $log_feed_date_dir";
}
if (open my $out, '>>', $log_feed_date_file) {
chomp $msg;
print $out " "."*" x 80 . " ";
print $out "$level - $msg ";
print $out "*" x 80 . " ";
}
}
0 Comments
Be first to comment on this post.