1803 |
raphael |
1 |
#!/usr/bin/env perl
|
|
|
2 |
|
|
|
3 |
# csv2xls: Convert csv to xls
|
|
|
4 |
# (m)'11 [06 Oct 2011] Copyright H.M.Brand 2007-2013
|
|
|
5 |
|
|
|
6 |
use strict;
|
|
|
7 |
use warnings;
|
|
|
8 |
|
|
|
9 |
our $VERSION = "1.71";
|
|
|
10 |
|
|
|
11 |
sub usage
|
|
|
12 |
{
|
|
|
13 |
my $err = shift and select STDERR;
|
|
|
14 |
print <<EOU;
|
|
|
15 |
usage: csv2xls [-s <sep>] [-q <quot>] [-w <width>] [-d <dtfmt>]
|
|
|
16 |
[-o <xls>] [file.csv]
|
|
|
17 |
-s <sep> use <sep> as seperator char. Auto-detect, default = ','
|
|
|
18 |
The string "tab" is allowed.
|
|
|
19 |
-e <esc> use <sep> as seperator char. Auto-detect, default = ','
|
|
|
20 |
The string "undef" is allowed.
|
|
|
21 |
-q <quot> use <quot> as quotation char. Default = '"'
|
|
|
22 |
The string "undef" will disable quotation.
|
|
|
23 |
-w <width> use <width> as default minimum column width (4)
|
|
|
24 |
-o <xls> write output to file named <xls>, defaults
|
|
|
25 |
to input file name with .csv replaced with .xls
|
|
|
26 |
if from standard input, defaults to csv2xls.xls
|
|
|
27 |
-F allow formula's. Otherwise fields starting with
|
|
|
28 |
an equal sign are forced to string
|
|
|
29 |
-f force usage of <xls> if already exists (unlink before use)
|
|
|
30 |
-d <dtfmt> use <dtfmt> as date formats. Default = 'dd-mm-yyyy'
|
|
|
31 |
-D cols only convert dates in columns <cols>. Default is everywhere.
|
|
|
32 |
-u CSV is UTF8
|
|
|
33 |
-v [<lvl>] verbosity (default = 1)
|
|
|
34 |
EOU
|
|
|
35 |
exit $err;
|
|
|
36 |
} # usage
|
|
|
37 |
|
|
|
38 |
use Getopt::Long qw(:config bundling nopermute passthrough);
|
|
|
39 |
my $sep; # Set after reading first line in a flurry attempt to auto-detect
|
|
|
40 |
my $quo = '"';
|
|
|
41 |
my $esc = '"';
|
|
|
42 |
my $wdt = 4; # Default minimal column width
|
|
|
43 |
my $xls; # Excel out file name
|
|
|
44 |
my $frc = 0; # Force use of file
|
|
|
45 |
my $utf = 0; # Data is encoded in Unicode
|
|
|
46 |
my $frm = 0; # Allow formula's
|
|
|
47 |
my $dtf = "dd-mm-yyyy"; # Date format
|
|
|
48 |
my $opt_v = 1;
|
|
|
49 |
my $dtc;
|
|
|
50 |
|
|
|
51 |
GetOptions (
|
|
|
52 |
"help|?" => sub { usage (0); },
|
|
|
53 |
|
|
|
54 |
"c|s=s" => \$sep,
|
|
|
55 |
"q=s" => \$quo,
|
|
|
56 |
"e=s" => \$esc,
|
|
|
57 |
"w=i" => \$wdt,
|
|
|
58 |
"o|x=s" => \$xls,
|
|
|
59 |
"d=s" => \$dtf,
|
|
|
60 |
"D=s" => \$dtc,
|
|
|
61 |
"f" => \$frc,
|
|
|
62 |
"F" => \$frm,
|
|
|
63 |
"u" => \$utf,
|
|
|
64 |
"v:1" => \$opt_v,
|
|
|
65 |
) or usage (1);
|
|
|
66 |
|
|
|
67 |
my $title = @ARGV && -f $ARGV[0] ? $ARGV[0] : "csv2xls";
|
|
|
68 |
($xls ||= $title) =~ s/(?:\.csv)?$/.xls/i;
|
|
|
69 |
|
|
|
70 |
-s $xls && $frc and unlink $xls;
|
|
|
71 |
if (-s $xls) {
|
|
|
72 |
print STDERR "File '$xls' already exists. Overwrite? [y/N] > N\b";
|
|
|
73 |
scalar <STDIN> =~ m/^[yj](es|a)?$/i or exit;
|
|
|
74 |
}
|
|
|
75 |
|
|
|
76 |
# Don't split ourselves when modules do it _much_ better, and follow the standards
|
|
|
77 |
use Text::CSV_XS;
|
|
|
78 |
use Date::Calc qw( Delta_Days Days_in_Month );
|
|
|
79 |
use Spreadsheet::WriteExcel;
|
|
|
80 |
use Encode qw( from_to );
|
|
|
81 |
|
|
|
82 |
my $csv;
|
|
|
83 |
|
|
|
84 |
my $wbk = Spreadsheet::WriteExcel->new ($xls);
|
|
|
85 |
my $wks = $wbk->add_worksheet ();
|
|
|
86 |
$dtf =~ s/j/y/g;
|
|
|
87 |
my %fmt = (
|
|
|
88 |
date => $wbk->add_format (
|
|
|
89 |
num_format => $dtf,
|
|
|
90 |
align => "center",
|
|
|
91 |
),
|
|
|
92 |
|
|
|
93 |
rest => $wbk->add_format (
|
|
|
94 |
align => "left",
|
|
|
95 |
),
|
|
|
96 |
);
|
|
|
97 |
|
|
|
98 |
my ($h, $w, @w) = (0, 1); # data height, -width, and default column widths
|
|
|
99 |
my $row;
|
|
|
100 |
my $firstline;
|
|
|
101 |
unless ($sep) { # No sep char passed, try to auto-detect;
|
|
|
102 |
while (<>) {
|
|
|
103 |
m/\S/ or next; # Skip empty leading blank lines
|
|
|
104 |
$sep = # start auto-detect with quoted strings
|
|
|
105 |
m/["\d];["\d;]/ ? ";" :
|
|
|
106 |
m/["\d],["\d,]/ ? "," :
|
|
|
107 |
m/["\d]\t["\d,]/ ? "\t" :
|
|
|
108 |
# If neither, then for unquoted strings
|
|
|
109 |
m/\w;[\w;]/ ? ";" :
|
|
|
110 |
m/\w,[\w,]/ ? "," :
|
|
|
111 |
m/\w\t[\w,]/ ? "\t" :
|
|
|
112 |
";" ;
|
|
|
113 |
# Yeah I know it should be a ',' (hence Csv), but the majority
|
|
|
114 |
# of the csv files to be shown comes from fucking Micky$hit,
|
|
|
115 |
# that uses semiColon ';' instead.
|
|
|
116 |
$firstline = $_;
|
|
|
117 |
last;
|
|
|
118 |
}
|
|
|
119 |
}
|
|
|
120 |
$csv = Text::CSV_XS-> new ({
|
|
|
121 |
sep_char => $sep eq "tab" ? "\t" : $sep,
|
|
|
122 |
quote_char => $quo eq "undef" ? undef : $quo,
|
|
|
123 |
escape_char => $esc eq "undef" ? undef : $esc,
|
|
|
124 |
binary => 1,
|
|
|
125 |
keep_meta_info => 1,
|
|
|
126 |
auto_diag => 1,
|
|
|
127 |
});
|
|
|
128 |
if ($firstline) {
|
|
|
129 |
$csv->parse ($firstline) or die $csv->error_diag ();
|
|
|
130 |
$row = [ $csv->fields ];
|
|
|
131 |
}
|
|
|
132 |
if ($opt_v > 3) {
|
|
|
133 |
foreach my $k (qw( sep_char quote_char escape_char )) {
|
|
|
134 |
my $c = $csv->$k () || "undef";
|
|
|
135 |
$c =~ s/\t/\\t/g;
|
|
|
136 |
$c =~ s/\r/\\r/g;
|
|
|
137 |
$c =~ s/\n/\\n/g;
|
|
|
138 |
$c =~ s/\0/\\0/g;
|
|
|
139 |
$c =~ s/([\x00-\x1f\x7f-\xff])/sprintf"\\x{%02x}",ord$1/ge;
|
|
|
140 |
printf STDERR "%-11s = %s\n", $k, $c;
|
|
|
141 |
}
|
|
|
142 |
}
|
|
|
143 |
|
|
|
144 |
if (my $rows = $dtc) {
|
|
|
145 |
$rows =~ s/-$/-999/; # 3,6-
|
|
|
146 |
$rows =~ s/-/../g;
|
|
|
147 |
eval "\$dtc = { map { \$_ => 1 } $rows }";
|
|
|
148 |
}
|
|
|
149 |
|
|
|
150 |
while ($row && @$row or $row = $csv->getline (*ARGV)) {
|
|
|
151 |
my @row = @$row;
|
|
|
152 |
@row > $w and push @w, ($wdt) x (($w = @row) - @w);
|
|
|
153 |
foreach my $c (0 .. $#row) {
|
|
|
154 |
my $val = $row[$c] || "";
|
|
|
155 |
my $l = length $val;
|
|
|
156 |
$l > $w[$c] and $w[$c] = $l;
|
|
|
157 |
|
|
|
158 |
if ($utf and $csv->is_binary ($c)) {
|
|
|
159 |
from_to ($val, "utf-8", "ucs2");
|
|
|
160 |
$wks->write_unicode ($h, $c, $val);
|
|
|
161 |
next;
|
|
|
162 |
}
|
|
|
163 |
|
|
|
164 |
if ($csv->is_quoted ($c)) {
|
|
|
165 |
if ($utf) {
|
|
|
166 |
from_to ($val, "utf-8", "ucs2");
|
|
|
167 |
$wks->write_unicode ($h, $c, $val);
|
|
|
168 |
}
|
|
|
169 |
else {
|
|
|
170 |
$wks->write_string ($h, $c, $val);
|
|
|
171 |
}
|
|
|
172 |
next;
|
|
|
173 |
}
|
|
|
174 |
|
|
|
175 |
if (!$dtc or $dtc->{$c + 1}) {
|
|
|
176 |
my @d = (0, 0, 0); # Y, M, D
|
|
|
177 |
$val =~ m/^(\d{4})(\d{2})(\d{2})$/ and @d = ($1, $2, $3);
|
|
|
178 |
$val =~ m/^(\d{2})-(\d{2})-(\d{4})$/ and @d = ($3, $2, $1);
|
|
|
179 |
if ( $d[2] >= 1 && $d[2] <= 31 &&
|
|
|
180 |
$d[1] >= 1 && $d[1] <= 12 &&
|
|
|
181 |
$d[0] >= 1900 && $d[0] <= 2199) {
|
|
|
182 |
my $dm = Days_in_Month (@d[0,1]);
|
|
|
183 |
$d[2] < 1 and $d[2] = 1;
|
|
|
184 |
$d[2] > $dm and $d[2] = $dm;
|
|
|
185 |
my $dt = 2 + Delta_Days (1900, 1, 1, @d);
|
|
|
186 |
$wks->write ($h, $c, $dt, $fmt{date});
|
|
|
187 |
next;
|
|
|
188 |
}
|
|
|
189 |
}
|
|
|
190 |
|
|
|
191 |
if (!$frm && $val =~ m/^=/) {
|
|
|
192 |
$wks->write_string ($h, $c, $val);
|
|
|
193 |
}
|
|
|
194 |
else {
|
|
|
195 |
$wks->write ($h, $c, $val);
|
|
|
196 |
}
|
|
|
197 |
}
|
|
|
198 |
++$h % 100 or $opt_v && printf STDERR "%6d x %6d\r", $w, $h;
|
|
|
199 |
} continue { $row = undef }
|
|
|
200 |
$opt_v && printf STDERR "%6d x %6d\n", $w, $h;
|
|
|
201 |
|
|
|
202 |
$wks->set_column ($_, $_, $w[$_]) for 0 .. $#w;
|
|
|
203 |
$wbk->close ();
|