Subversion Repositories eFlore/Applications.cel

Compare Revisions

No changes between revisions

Ignore whitespace Rev 1979 → Rev 1980

/branches/v1.8-debroussailleuse/jrest/tests/bin/xls2csv-separate-sheets.pl
New file
0,0 → 1,159
#!/usr/bin/env perl
 
use strict;
use warnings;
use Modern::Perl;
 
use autodie qw{ :all };
use utf8;
use File::Spec;
 
use Smart::Comments '####';
use Getopt::Euclid;
 
use Text::CSV;
use File::Basename;
use Spreadsheet::ParseExcel;
use Text::Trim;
 
my $input_filename = $ARGV{'--infile'};
warn "Warning: The input file does not look like an Excel spreadsheet file."
unless looks_like_xls($input_filename);
my $workbook = Spreadsheet::ParseExcel::Workbook->Parse($input_filename);
 
my ($inbase,$inpath,$insuffix) = fileparse($input_filename, qr{\.[^.]*});
my $output_base = File::Spec->catfile($inpath, $inbase);
my $csv = Text::CSV->new();
 
for my $worksheet ( $workbook->worksheets() ) { #### Processing worksheets (% done)...
my ( $row_min, $row_max ) = $worksheet->row_range();
my ( $col_min, $col_max ) = $worksheet->col_range();
 
my @sheet_data;
 
for my $row ( $row_min .. $row_max ) {
for my $col ( $col_min .. $col_max ) {
my $cell = $worksheet->get_cell( $row, $col ) or undef;
next unless $cell;
$sheet_data[$row][$col] = trim($cell->value());
}
}
### @sheet_data;
 
my $ws_name = $worksheet->{Name};
$ws_name =~ s{\s}{_}xsmg; # no spaces
my $csvname = "${output_base}-${ws_name}.csv";
### $csvname
 
my $output = IO::File->new($csvname, '>') or die $!;
### $output
 
foreach my $line (@sheet_data) {
### $line
$csv->print($output, $line) or die $csv->error_diag();
print $output "\n";
}
 
close $output;
}
 
sub extract_sheet_contents {
my $sheet = $_[0];
my ($nrow, $ncol) = ($sheet->{maxrow}, $sheet->{maxcol});
return extract_rect_from_listref($sheet->{cell}, 1, $nrow, 1, $ncol);
}
 
sub extract_slice_of_listref {
my ($listref, @slice) = @_;
return [ map { $listref->[$_] } @slice ];
}
 
sub extract_rect_from_listref {
my ($listref, $row_start, $row_end, $col_start, $col_end) = @_;
return [ map {
extract_slice_of_listref($_, $col_start..$col_end)
} @{extract_slice_of_listref($listref, $row_start..$row_end)} ];
}
 
sub looks_like_xls {
state $xls_regex = qr{\.xls$};
return 1 if $_[0] =~ m{$xls_regex}i;
return;
}
 
 
__END__
 
=head1 NAME
 
spreadsheet2csv-separate-sheets.pl - Split a spreadsheet into one csv file for each worksheet
 
 
=head1 VERSION
 
Version 1.0
 
 
=head1 USAGE
 
progname [options]
 
 
=head1 REQUIRED ARGUMENTS
 
=over
 
=item --infile [=] <file> | -i <file>
 
The input spreadsheet file.
 
=for Euclid:
file.type: readable
file.default: '-'
 
=back
 
 
=head1 OPTIONS
 
=over
 
=item --version
 
=item --usage
 
=item --help
 
=item --man
 
Print the usual program information
 
=back
 
=head1 DESCRIPTION
 
This program will read a spreadsheet file and output one csv file for
each worksseht in the input file. The name of each output file will be
determined by the name of the input file and the name of the
worksheet. For example, a worksheet "Sheet1" in a file called
"reports.xls" will be output to "reports-Sheet1.csv".
 
=head1 NOTES
 
Empty rows and columns at the beginning of a worksheet will be
omitted. So if a worksheet has columns C through F filled, then the
output for that sheet will have exactly 4 columns, not 6.
 
=head1 AUTHOR
 
Ryan C. Thompson
 
=head1 BUGS
 
If you encounter a problem with this program, please email
rct+perlbug@thompsonclan.org. Bug reports and other feedback are
welcome.
 
=head1 COPYRIGHT
 
Copyright (c) 2010, Ryan C. Thompson
Property changes:
Added: svn:executable
+*
\ No newline at end of property