1803 |
raphael |
1 |
#!/usr/bin/env perl
|
|
|
2 |
|
|
|
3 |
use strict;
|
|
|
4 |
use warnings;
|
|
|
5 |
use Modern::Perl;
|
|
|
6 |
|
|
|
7 |
use autodie qw{ :all };
|
|
|
8 |
use utf8;
|
|
|
9 |
use File::Spec;
|
|
|
10 |
|
|
|
11 |
use Smart::Comments '####';
|
|
|
12 |
use Getopt::Euclid;
|
|
|
13 |
|
|
|
14 |
use Text::CSV;
|
|
|
15 |
use File::Basename;
|
|
|
16 |
use Spreadsheet::ParseExcel;
|
|
|
17 |
use Text::Trim;
|
|
|
18 |
|
|
|
19 |
my $input_filename = $ARGV{'--infile'};
|
|
|
20 |
warn "Warning: The input file does not look like an Excel spreadsheet file."
|
|
|
21 |
unless looks_like_xls($input_filename);
|
|
|
22 |
my $workbook = Spreadsheet::ParseExcel::Workbook->Parse($input_filename);
|
|
|
23 |
|
|
|
24 |
my ($inbase,$inpath,$insuffix) = fileparse($input_filename, qr{\.[^.]*});
|
|
|
25 |
my $output_base = File::Spec->catfile($inpath, $inbase);
|
|
|
26 |
my $csv = Text::CSV->new();
|
|
|
27 |
|
|
|
28 |
for my $worksheet ( $workbook->worksheets() ) { #### Processing worksheets (% done)...
|
|
|
29 |
my ( $row_min, $row_max ) = $worksheet->row_range();
|
|
|
30 |
my ( $col_min, $col_max ) = $worksheet->col_range();
|
|
|
31 |
|
|
|
32 |
my @sheet_data;
|
|
|
33 |
|
|
|
34 |
for my $row ( $row_min .. $row_max ) {
|
|
|
35 |
for my $col ( $col_min .. $col_max ) {
|
|
|
36 |
my $cell = $worksheet->get_cell( $row, $col ) or undef;
|
|
|
37 |
next unless $cell;
|
|
|
38 |
$sheet_data[$row][$col] = trim($cell->value());
|
|
|
39 |
}
|
|
|
40 |
}
|
|
|
41 |
### @sheet_data;
|
|
|
42 |
|
|
|
43 |
my $ws_name = $worksheet->{Name};
|
|
|
44 |
$ws_name =~ s{\s}{_}xsmg; # no spaces
|
|
|
45 |
my $csvname = "${output_base}-${ws_name}.csv";
|
|
|
46 |
### $csvname
|
|
|
47 |
|
|
|
48 |
my $output = IO::File->new($csvname, '>') or die $!;
|
|
|
49 |
### $output
|
|
|
50 |
|
|
|
51 |
foreach my $line (@sheet_data) {
|
|
|
52 |
### $line
|
|
|
53 |
$csv->print($output, $line) or die $csv->error_diag();
|
|
|
54 |
print $output "\n";
|
|
|
55 |
}
|
|
|
56 |
|
|
|
57 |
close $output;
|
|
|
58 |
}
|
|
|
59 |
|
|
|
60 |
sub extract_sheet_contents {
|
|
|
61 |
my $sheet = $_[0];
|
|
|
62 |
my ($nrow, $ncol) = ($sheet->{maxrow}, $sheet->{maxcol});
|
|
|
63 |
return extract_rect_from_listref($sheet->{cell}, 1, $nrow, 1, $ncol);
|
|
|
64 |
}
|
|
|
65 |
|
|
|
66 |
sub extract_slice_of_listref {
|
|
|
67 |
my ($listref, @slice) = @_;
|
|
|
68 |
return [ map { $listref->[$_] } @slice ];
|
|
|
69 |
}
|
|
|
70 |
|
|
|
71 |
sub extract_rect_from_listref {
|
|
|
72 |
my ($listref, $row_start, $row_end, $col_start, $col_end) = @_;
|
|
|
73 |
return [ map {
|
|
|
74 |
extract_slice_of_listref($_, $col_start..$col_end)
|
|
|
75 |
} @{extract_slice_of_listref($listref, $row_start..$row_end)} ];
|
|
|
76 |
}
|
|
|
77 |
|
|
|
78 |
sub looks_like_xls {
|
|
|
79 |
state $xls_regex = qr{\.xls$};
|
|
|
80 |
return 1 if $_[0] =~ m{$xls_regex}i;
|
|
|
81 |
return;
|
|
|
82 |
}
|
|
|
83 |
|
|
|
84 |
|
|
|
85 |
__END__
|
|
|
86 |
|
|
|
87 |
=head1 NAME
|
|
|
88 |
|
|
|
89 |
spreadsheet2csv-separate-sheets.pl - Split a spreadsheet into one csv file for each worksheet
|
|
|
90 |
|
|
|
91 |
|
|
|
92 |
=head1 VERSION
|
|
|
93 |
|
|
|
94 |
Version 1.0
|
|
|
95 |
|
|
|
96 |
|
|
|
97 |
=head1 USAGE
|
|
|
98 |
|
|
|
99 |
progname [options]
|
|
|
100 |
|
|
|
101 |
|
|
|
102 |
=head1 REQUIRED ARGUMENTS
|
|
|
103 |
|
|
|
104 |
=over
|
|
|
105 |
|
|
|
106 |
=item --infile [=] <file> | -i <file>
|
|
|
107 |
|
|
|
108 |
The input spreadsheet file.
|
|
|
109 |
|
|
|
110 |
=for Euclid:
|
|
|
111 |
file.type: readable
|
|
|
112 |
file.default: '-'
|
|
|
113 |
|
|
|
114 |
=back
|
|
|
115 |
|
|
|
116 |
|
|
|
117 |
=head1 OPTIONS
|
|
|
118 |
|
|
|
119 |
=over
|
|
|
120 |
|
|
|
121 |
=item --version
|
|
|
122 |
|
|
|
123 |
=item --usage
|
|
|
124 |
|
|
|
125 |
=item --help
|
|
|
126 |
|
|
|
127 |
=item --man
|
|
|
128 |
|
|
|
129 |
Print the usual program information
|
|
|
130 |
|
|
|
131 |
=back
|
|
|
132 |
|
|
|
133 |
=head1 DESCRIPTION
|
|
|
134 |
|
|
|
135 |
This program will read a spreadsheet file and output one csv file for
|
|
|
136 |
each worksseht in the input file. The name of each output file will be
|
|
|
137 |
determined by the name of the input file and the name of the
|
|
|
138 |
worksheet. For example, a worksheet "Sheet1" in a file called
|
|
|
139 |
"reports.xls" will be output to "reports-Sheet1.csv".
|
|
|
140 |
|
|
|
141 |
=head1 NOTES
|
|
|
142 |
|
|
|
143 |
Empty rows and columns at the beginning of a worksheet will be
|
|
|
144 |
omitted. So if a worksheet has columns C through F filled, then the
|
|
|
145 |
output for that sheet will have exactly 4 columns, not 6.
|
|
|
146 |
|
|
|
147 |
=head1 AUTHOR
|
|
|
148 |
|
|
|
149 |
Ryan C. Thompson
|
|
|
150 |
|
|
|
151 |
=head1 BUGS
|
|
|
152 |
|
|
|
153 |
If you encounter a problem with this program, please email
|
|
|
154 |
rct+perlbug@thompsonclan.org. Bug reports and other feedback are
|
|
|
155 |
welcome.
|
|
|
156 |
|
|
|
157 |
=head1 COPYRIGHT
|
|
|
158 |
|
|
|
159 |
Copyright (c) 2010, Ryan C. Thompson
|