| 1803 | raphael | 1 | #!/usr/bin/env perl
 | 
        
           |  |  | 2 |   | 
        
           |  |  | 3 | use strict;
 | 
        
           |  |  | 4 | use warnings;
 | 
        
           |  |  | 5 | use Modern::Perl;
 | 
        
           |  |  | 6 |   | 
        
           |  |  | 7 | use autodie qw{ :all };
 | 
        
           |  |  | 8 | use utf8;
 | 
        
           |  |  | 9 | use File::Spec;
 | 
        
           |  |  | 10 |   | 
        
           |  |  | 11 | use Smart::Comments '####';
 | 
        
           |  |  | 12 | use Getopt::Euclid;
 | 
        
           |  |  | 13 |   | 
        
           |  |  | 14 | use Text::CSV;
 | 
        
           |  |  | 15 | use File::Basename;
 | 
        
           |  |  | 16 | use Spreadsheet::ParseExcel;
 | 
        
           |  |  | 17 | use Text::Trim;
 | 
        
           |  |  | 18 |   | 
        
           |  |  | 19 | my $input_filename = $ARGV{'--infile'};
 | 
        
           |  |  | 20 | warn "Warning: The input file does not look like an Excel spreadsheet file."
 | 
        
           |  |  | 21 |     unless looks_like_xls($input_filename);
 | 
        
           |  |  | 22 | my $workbook = Spreadsheet::ParseExcel::Workbook->Parse($input_filename);
 | 
        
           |  |  | 23 |   | 
        
           |  |  | 24 | my ($inbase,$inpath,$insuffix) = fileparse($input_filename, qr{\.[^.]*});
 | 
        
           |  |  | 25 | my $output_base = File::Spec->catfile($inpath, $inbase);
 | 
        
           |  |  | 26 | my $csv = Text::CSV->new();
 | 
        
           |  |  | 27 |   | 
        
           |  |  | 28 | for my $worksheet ( $workbook->worksheets() ) { #### Processing worksheets (% done)...
 | 
        
           |  |  | 29 |     my ( $row_min, $row_max ) = $worksheet->row_range();
 | 
        
           |  |  | 30 |     my ( $col_min, $col_max ) = $worksheet->col_range();
 | 
        
           |  |  | 31 |   | 
        
           |  |  | 32 |     my @sheet_data;
 | 
        
           |  |  | 33 |   | 
        
           |  |  | 34 |     for my $row ( $row_min .. $row_max ) {
 | 
        
           |  |  | 35 |         for my $col ( $col_min .. $col_max ) {
 | 
        
           |  |  | 36 |             my $cell = $worksheet->get_cell( $row, $col ) or undef;
 | 
        
           |  |  | 37 |             next unless $cell;
 | 
        
           |  |  | 38 |             $sheet_data[$row][$col] = trim($cell->value());
 | 
        
           |  |  | 39 |         }
 | 
        
           |  |  | 40 |     }
 | 
        
           |  |  | 41 |     ### @sheet_data;
 | 
        
           |  |  | 42 |   | 
        
           |  |  | 43 |     my $ws_name = $worksheet->{Name};
 | 
        
           |  |  | 44 |     $ws_name =~ s{\s}{_}xsmg;   # no spaces
 | 
        
           |  |  | 45 |     my $csvname = "${output_base}-${ws_name}.csv";
 | 
        
           |  |  | 46 |     ### $csvname
 | 
        
           |  |  | 47 |   | 
        
           |  |  | 48 |     my $output = IO::File->new($csvname, '>') or die $!;
 | 
        
           |  |  | 49 |     ### $output
 | 
        
           |  |  | 50 |   | 
        
           |  |  | 51 |     foreach my $line (@sheet_data) {
 | 
        
           |  |  | 52 |         ### $line
 | 
        
           |  |  | 53 |         $csv->print($output, $line) or die $csv->error_diag();
 | 
        
           |  |  | 54 |         print $output "\n";
 | 
        
           |  |  | 55 |     }
 | 
        
           |  |  | 56 |   | 
        
           |  |  | 57 |     close $output;
 | 
        
           |  |  | 58 | }
 | 
        
           |  |  | 59 |   | 
        
           |  |  | 60 | sub extract_sheet_contents {
 | 
        
           |  |  | 61 |     my $sheet = $_[0];
 | 
        
           |  |  | 62 |     my ($nrow, $ncol) = ($sheet->{maxrow}, $sheet->{maxcol});
 | 
        
           |  |  | 63 |     return extract_rect_from_listref($sheet->{cell}, 1, $nrow, 1, $ncol);
 | 
        
           |  |  | 64 | }
 | 
        
           |  |  | 65 |   | 
        
           |  |  | 66 | sub extract_slice_of_listref {
 | 
        
           |  |  | 67 |     my ($listref, @slice) = @_;
 | 
        
           |  |  | 68 |     return [ map { $listref->[$_] } @slice ];
 | 
        
           |  |  | 69 | }
 | 
        
           |  |  | 70 |   | 
        
           |  |  | 71 | sub extract_rect_from_listref {
 | 
        
           |  |  | 72 |     my ($listref, $row_start, $row_end, $col_start, $col_end) = @_;
 | 
        
           |  |  | 73 |     return [ map {
 | 
        
           |  |  | 74 |         extract_slice_of_listref($_, $col_start..$col_end)
 | 
        
           |  |  | 75 |     } @{extract_slice_of_listref($listref, $row_start..$row_end)} ];
 | 
        
           |  |  | 76 | }
 | 
        
           |  |  | 77 |   | 
        
           |  |  | 78 | sub looks_like_xls {
 | 
        
           |  |  | 79 |     state $xls_regex = qr{\.xls$};
 | 
        
           |  |  | 80 |     return 1 if $_[0] =~ m{$xls_regex}i;
 | 
        
           |  |  | 81 |     return;
 | 
        
           |  |  | 82 | }
 | 
        
           |  |  | 83 |   | 
        
           |  |  | 84 |   | 
        
           |  |  | 85 | __END__
 | 
        
           |  |  | 86 |   | 
        
           |  |  | 87 | =head1 NAME
 | 
        
           |  |  | 88 |   | 
        
           |  |  | 89 | spreadsheet2csv-separate-sheets.pl - Split a spreadsheet into one csv file for each worksheet
 | 
        
           |  |  | 90 |   | 
        
           |  |  | 91 |   | 
        
           |  |  | 92 | =head1 VERSION
 | 
        
           |  |  | 93 |   | 
        
           |  |  | 94 | Version 1.0
 | 
        
           |  |  | 95 |   | 
        
           |  |  | 96 |   | 
        
           |  |  | 97 | =head1 USAGE
 | 
        
           |  |  | 98 |   | 
        
           |  |  | 99 |     progname [options]
 | 
        
           |  |  | 100 |   | 
        
           |  |  | 101 |   | 
        
           |  |  | 102 | =head1 REQUIRED ARGUMENTS
 | 
        
           |  |  | 103 |   | 
        
           |  |  | 104 | =over
 | 
        
           |  |  | 105 |   | 
        
           |  |  | 106 | =item --infile [=] <file> | -i <file>
 | 
        
           |  |  | 107 |   | 
        
           |  |  | 108 | The input spreadsheet file.
 | 
        
           |  |  | 109 |   | 
        
           |  |  | 110 | =for Euclid:
 | 
        
           |  |  | 111 |     file.type: readable
 | 
        
           |  |  | 112 |     file.default: '-'
 | 
        
           |  |  | 113 |   | 
        
           |  |  | 114 | =back
 | 
        
           |  |  | 115 |   | 
        
           |  |  | 116 |   | 
        
           |  |  | 117 | =head1 OPTIONS
 | 
        
           |  |  | 118 |   | 
        
           |  |  | 119 | =over
 | 
        
           |  |  | 120 |   | 
        
           |  |  | 121 | =item --version
 | 
        
           |  |  | 122 |   | 
        
           |  |  | 123 | =item --usage
 | 
        
           |  |  | 124 |   | 
        
           |  |  | 125 | =item --help
 | 
        
           |  |  | 126 |   | 
        
           |  |  | 127 | =item --man
 | 
        
           |  |  | 128 |   | 
        
           |  |  | 129 | Print the usual program information
 | 
        
           |  |  | 130 |   | 
        
           |  |  | 131 | =back
 | 
        
           |  |  | 132 |   | 
        
           |  |  | 133 | =head1 DESCRIPTION
 | 
        
           |  |  | 134 |   | 
        
           |  |  | 135 | This program will read a spreadsheet file and output one csv file for
 | 
        
           |  |  | 136 | each worksseht in the input file. The name of each output file will be
 | 
        
           |  |  | 137 | determined by the name of the input file and the name of the
 | 
        
           |  |  | 138 | worksheet. For example, a worksheet "Sheet1" in a file called
 | 
        
           |  |  | 139 | "reports.xls" will be output to "reports-Sheet1.csv".
 | 
        
           |  |  | 140 |   | 
        
           |  |  | 141 | =head1 NOTES
 | 
        
           |  |  | 142 |   | 
        
           |  |  | 143 | Empty rows and columns at the beginning of a worksheet will be
 | 
        
           |  |  | 144 | omitted. So if a worksheet has columns C through F filled, then the
 | 
        
           |  |  | 145 | output for that sheet will have exactly 4 columns, not 6.
 | 
        
           |  |  | 146 |   | 
        
           |  |  | 147 | =head1 AUTHOR
 | 
        
           |  |  | 148 |   | 
        
           |  |  | 149 | Ryan C. Thompson
 | 
        
           |  |  | 150 |   | 
        
           |  |  | 151 | =head1 BUGS
 | 
        
           |  |  | 152 |   | 
        
           |  |  | 153 | If you encounter a problem with this program, please email
 | 
        
           |  |  | 154 | rct+perlbug@thompsonclan.org. Bug reports and other feedback are
 | 
        
           |  |  | 155 | welcome.
 | 
        
           |  |  | 156 |   | 
        
           |  |  | 157 | =head1 COPYRIGHT
 | 
        
           |  |  | 158 |   | 
        
           |  |  | 159 | Copyright (c) 2010, Ryan C. Thompson
 |