WebSVN – eFlore/Applications.cel – Path Comparison – / – /branches/v1.8-debroussailleuse/jrest/tests/bin/xls2csv-separate-sheets.pl Rev 1979 and /branches/v1.8-debroussailleuse/jrest/tests/bin/xls2csv-separate-sheets.pl Rev 1980

Ignore whitespace Rev 1979 → Rev 1980

 /branches/v1.8-debroussailleuse/jrest/tests/bin/xls2csv-separate-sheets.pl
 New file
 ,0 → 1,159
+#!/usr/bin/env perl
+use strict;
+use warnings;
+use Modern::Perl;
+use autodie qw{ :all };
+use utf8;
+use File::Spec;
+use Smart::Comments '####';
+use Getopt::Euclid;
+use Text::CSV;
+use File::Basename;
+use Spreadsheet::ParseExcel;
+use Text::Trim;
+my $input_filename = $ARGV{'--infile'};
+warn "Warning: The input file does not look like an Excel spreadsheet file."
+    unless looks_like_xls($input_filename);
+my $workbook = Spreadsheet::ParseExcel::Workbook->Parse($input_filename);
+my ($inbase,$inpath,$insuffix) = fileparse($input_filename, qr{\.[^.]*});
+my $output_base = File::Spec->catfile($inpath, $inbase);
+my $csv = Text::CSV->new();
+for my $worksheet ( $workbook->worksheets() ) { #### Processing worksheets (% done)...
+    my ( $row_min, $row_max ) = $worksheet->row_range();
+    my ( $col_min, $col_max ) = $worksheet->col_range();
+    my @sheet_data;
+    for my $row ( $row_min .. $row_max ) {
+        for my $col ( $col_min .. $col_max ) {
+            my $cell = $worksheet->get_cell( $row, $col ) or undef;
+            next unless $cell;
+            $sheet_data[$row][$col] = trim($cell->value());
+        }
+    }
+    ### @sheet_data;
+    my $ws_name = $worksheet->{Name};
+    $ws_name =~ s{\s}{_}xsmg;   # no spaces
+    my $csvname = "${output_base}-${ws_name}.csv";
+    ### $csvname
+    my $output = IO::File->new($csvname, '>') or die $!;
+    ### $output
+    foreach my $line (@sheet_data) {
+        ### $line
+        $csv->print($output, $line) or die $csv->error_diag();
+        print $output "\n";
+    }
+    close $output;
+}
+sub extract_sheet_contents {
+    my $sheet = $_[0];
+    my ($nrow, $ncol) = ($sheet->{maxrow}, $sheet->{maxcol});
+    return extract_rect_from_listref($sheet->{cell}, 1, $nrow, 1, $ncol);
+}
+sub extract_slice_of_listref {
+    my ($listref, @slice) = @_;
+    return [ map { $listref->[$_] } @slice ];
+}
+sub extract_rect_from_listref {
+    my ($listref, $row_start, $row_end, $col_start, $col_end) = @_;
+    return [ map {
+        extract_slice_of_listref($_, $col_start..$col_end)
+    } @{extract_slice_of_listref($listref, $row_start..$row_end)} ];
+}
+sub looks_like_xls {
+    state $xls_regex = qr{\.xls$};
+    return 1 if $_[0] =~ m{$xls_regex}i;
+    return;
+}
+__END__
+=head1 NAME
+spreadsheet2csv-separate-sheets.pl - Split a spreadsheet into one csv file for each worksheet
+=head1 VERSION
+Version 1.0
+=head1 USAGE
+    progname [options]
+=head1 REQUIRED ARGUMENTS
+=over
+=item --infile [=] <file> | -i <file>
+The input spreadsheet file.
+=for Euclid:
+    file.type: readable
+    file.default: '-'
+=back
+=head1 OPTIONS
+=over
+=item --version
+=item --usage
+=item --help
+=item --man
+Print the usual program information
+=back
+=head1 DESCRIPTION
+This program will read a spreadsheet file and output one csv file for
+each worksseht in the input file. The name of each output file will be
+determined by the name of the input file and the name of the
+worksheet. For example, a worksheet "Sheet1" in a file called
+"reports.xls" will be output to "reports-Sheet1.csv".
+=head1 NOTES
+Empty rows and columns at the beginning of a worksheet will be
+omitted. So if a worksheet has columns C through F filled, then the
+output for that sheet will have exactly 4 columns, not 6.
+=head1 AUTHOR
+Ryan C. Thompson
+=head1 BUGS
+If you encounter a problem with this program, please email
+rct+perlbug@thompsonclan.org. Bug reports and other feedback are
+welcome.
+=head1 COPYRIGHT
+Copyright (c) 2010, Ryan C. Thompson
 Property changes:
 Added: svn:executable
 +*
 \ No newline at end of property

Subversion Repositories eFlore/Applications.cel

Compare Revisions

Ignore whitespace Rev 1979 → Rev 1980