River IQ

How to remove new lines within double quotes

  Ashish Kumar      other February 14, 2020
Image

!/usr/bin/perl

use warnings;

use strict;

use Path::Tiny; 

use Text::CSV;

use Time::Piece;

use File::Path qw( make_path );

use diagnostics;

use Try::Tiny;

#use File::NCopy;

use File::Copy::Recursive qw(fcopy rcopy dircopy fmove rmove dirmove);

use Time::HiRes qw( time );


my $start = time();

my $date = localtime->strftime('%Y%m%d');

my $feed_date = $date;


if(exists($ARGV[3])){

  $feed_date = $ARGV[3];

}


# build source directory path ==>

my $source_feed_dir = $ARGV[0];

my $source_feed_dir_path = path($source_feed_dir);

# process i.e. current date 

my $source_feed_date_dir = $source_feed_dir_path->child($date); 

my $source_feed_date_dir2 = $source_feed_date_dir->child("data");

#feed date current date or could be argv supplied date 

my $source_feed_date_dir3 = $source_feed_date_dir2->child($feed_date); 


# build source directories for header

my $source_feed_header_dir1 = $source_feed_date_dir->child("header");

my $source_feed_header_dir2 = $source_feed_header_dir1->child($feed_date);


# build destination directory path as ==>

# TestSourceDataCleansedDataFinance<Source>[<Feed>]<Current date>data<FeedDate> file(s) with its original name>

my $dest_feed_dir = $ARGV[1];

my $dest_feed_dir_path = path($dest_feed_dir);

my $dest_feed_date_dir_1 = $dest_feed_dir_path->child($date);  

my $dest_feed_date_dir_2 = $dest_feed_date_dir_1->child("data");

my $dest_feed_date_dir = $dest_feed_date_dir_2->child($feed_date);


# Build destination directory for header

# TestSourceDataCleansedDataFinance<Source>[<Feed>]<Current date>header<FeedDate> <file(s) with its original name>

my $dest_feed_header_dir_1 = $dest_feed_date_dir_1->child("header");

my $dest_feed_header_dir= $dest_feed_header_dir_1->child($feed_date);


# build Logging directory path ==>

my $log_feed_dir = $ARGV[2];

my $log_feed_dir_path = path($log_feed_dir);

my $log_feed_date_dir = $log_feed_dir_path->child($date);

my $log_feed_date_file = $log_feed_date_dir->child(("Cleaning_Log_".localtime->strftime(("%Y_%m_%d_%H_%M_%S")).".log"));



my $current_time = localtime->strftime("%Y-%m-%d %H:%M:%S");

logger('LOG-1','STARTING CLEANING APPLICATION @ '.$current_time);


# check if source directory exists

if ( ! -e $source_feed_date_dir3) {

logger('FAILURE','SOURCE DIRECTORY DOES NOT EXISTS: '." ".$source_feed_date_dir3);

exit 1;

}


try {

logger('LOG-2','OPENING SOURCE DIRECTORY :'." ".$source_feed_date_dir3);

opendir DIR, $source_feed_date_dir3;


} catch {

logger('FAILURE','FAILED TO OPEN SOURCE DIRECTORY : '." ".$source_feed_date_dir3);

exit 1;

};


my @file= my @files = grep { $_ ne '.' && $_ ne '..' } readdir DIR;

closedir DIR;


foreach my $file (@file) {

    my $source_feed_date_file = $source_feed_date_dir3->child($file);

    my $dest_feed_date_file = $dest_feed_date_dir->child($file);

my $s1 = time();

    try {

        if ( !-d $dest_feed_date_dir ) {

            make_path $dest_feed_date_dir;

        }

    } catch {

        logger('FAILURE','FAILED TO CREATE DESTINATION DIRECTORY : '." ".$dest_feed_date_dir);

        exit 1;

    };

    my $csv_in = 'Text::CSV'->new({binary => 1,

    sep_char => ";",

auto_diag => 1

})

    or die "CANNOT USE CSV: " . 'Text::CSV'->error_diag;


my $csv_out = 'Text::CSV'->new({ binary => 1,

                                 eol => " ",

    sep_char => ";",

    always_quote => 1,

auto_diag => 1

})

    or die "CANNOT USE CSV: " . 'Text::CSV'->error_diag;

    logger('LOG-3','PROCESSING FILE :'." ".$source_feed_date_file);

    try{

# Inbound file reader with no encoding specified ==>

        open(my $CSV_FILE, '<', $source_feed_date_file) ;

# Outbound file writer with UTF8 encoding ==>

        open(my $fh, '>:encoding(UTF-8)', $dest_feed_date_file) ;

my $rx = 0;

        while (my $row = $csv_in->getline($CSV_FILE)) {

            s/ | ||[^-]//g for @$row;

    $csv_out->print ($fh, $row);

if( $rx % 1000 == 0) {

print "$rx ";

}

$rx+=1;

}

print "Total Number Of Records processed:";

print $rx ;

my $e1 = time();

printf(" Time elapsed for %s : %.2f ", $file,$e1 - $s1);

      } catch {

            my $e = shift;

print $e;

logger('LOG-4','PROCESSING FAILED FOR FILE :'." ".$source_feed_date_file);

exit 1;

        };

logger('LOG-4','PROCESSING ENDED FOR FILE :'." ".$source_feed_date_file);

};


logger('LOG-5','CLEANED DATA FILE SAVED AT :'." ".$dest_feed_date_dir);


try{

    dircopy($source_feed_header_dir2,$dest_feed_header_dir);

} catch {

    logger("LOG6","COULD NOT PERFORM RCOPY OF $source_feed_header_dir2 TO $dest_feed_header_dir: $!");

    exit 1;

};


logger('LOG-6','HEADER FILE SAVED AT :'." ".$dest_feed_header_dir);

logger('COMPLETE','DATA CLEANING COMPLETE @ '.localtime->strftime(("%Y-%m-%d %H:%M:%S")));


my $end = time();

printf(" Toal Time elapsed : %.2f ", $end - $start);



# sub-routine to log audit messages ==>

sub logger {

    my ($level, $msg) = @_;


if ( !-d $log_feed_date_dir ) {

        make_path $log_feed_date_dir or die "FAILED TO CREATE LOGGING DIRECTORY : $log_feed_date_dir";

}


    if (open my $out, '>>', $log_feed_date_file) {

        chomp $msg;

        print $out " "."*" x 80 . " ";

        print $out "$level - $msg ";

        print $out "*" x 80 . " ";

    }


}


0 Comments

Be first to comment on this post.