Subversion Repositories eFlore/Applications.cel

Rev

Go to most recent revision | Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
1803 raphael 1
#!/usr/bin/env perl
2
 
3
# csv2xls: Convert csv to xls
4
#	   (m)'11 [06 Oct 2011] Copyright H.M.Brand 2007-2013
5
 
6
use strict;
7
use warnings;
8
 
9
our $VERSION = "1.71";
10
 
11
sub usage
12
{
13
    my $err = shift and select STDERR;
14
    print <<EOU;
15
usage: csv2xls [-s <sep>] [-q <quot>] [-w <width>] [-d <dtfmt>]
16
               [-o <xls>] [file.csv]
17
       -s <sep>   use <sep>   as seperator char. Auto-detect, default = ','
18
                  The string "tab" is allowed.
19
       -e <esc>   use <sep>   as seperator char. Auto-detect, default = ','
20
                  The string "undef" is allowed.
21
       -q <quot>  use <quot>  as quotation char. Default = '"'
22
                  The string "undef" will disable quotation.
23
       -w <width> use <width> as default minimum column width (4)
24
       -o <xls>   write output to file named <xls>, defaults
25
                  to input file name with .csv replaced with .xls
26
                  if from standard input, defaults to csv2xls.xls
27
       -F         allow formula's. Otherwise fields starting with
28
                  an equal sign are forced to string
29
       -f         force usage of <xls> if already exists (unlink before use)
30
       -d <dtfmt> use <dtfmt> as date formats.   Default = 'dd-mm-yyyy'
31
       -D cols    only convert dates in columns <cols>. Default is everywhere.
32
       -u         CSV is UTF8
33
       -v [<lvl>] verbosity (default = 1)
34
EOU
35
    exit $err;
36
    } # usage
37
 
38
use Getopt::Long qw(:config bundling nopermute passthrough);
39
my $sep;	# Set after reading first line in a flurry attempt to auto-detect
40
my $quo = '"';
41
my $esc = '"';
42
my $wdt = 4;	# Default minimal column width
43
my $xls;	# Excel out file name
44
my $frc = 0;	# Force use of file
45
my $utf = 0;	# Data is encoded in Unicode
46
my $frm = 0;	# Allow formula's
47
my $dtf = "dd-mm-yyyy";	# Date format
48
my $opt_v = 1;
49
my $dtc;
50
 
51
GetOptions (
52
    "help|?"	=> sub { usage (0); },
53
 
54
    "c|s=s"	=> \$sep,
55
    "q=s"	=> \$quo,
56
    "e=s"	=> \$esc,
57
    "w=i"	=> \$wdt,
58
    "o|x=s"	=> \$xls,
59
    "d=s"	=> \$dtf,
60
    "D=s"	=> \$dtc,
61
    "f"		=> \$frc,
62
    "F"		=> \$frm,
63
    "u"		=> \$utf,
64
    "v:1"	=> \$opt_v,
65
    ) or usage (1);
66
 
67
my $title = @ARGV && -f $ARGV[0] ? $ARGV[0] : "csv2xls";
68
($xls ||= $title) =~ s/(?:\.csv)?$/.xls/i;
69
 
70
-s $xls && $frc and unlink $xls;
71
if (-s $xls) {
72
    print STDERR "File '$xls' already exists. Overwrite? [y/N] > N\b";
73
    scalar <STDIN> =~ m/^[yj](es|a)?$/i or exit;
74
    }
75
 
76
# Don't split ourselves when modules do it _much_ better, and follow the standards
77
use Text::CSV_XS;
78
use Date::Calc qw( Delta_Days Days_in_Month );
79
use Spreadsheet::WriteExcel;
80
use Encode qw( from_to );
81
 
82
my $csv;
83
 
84
my $wbk = Spreadsheet::WriteExcel->new ($xls);
85
my $wks = $wbk->add_worksheet ();
86
   $dtf =~ s/j/y/g;
87
my %fmt = (
88
    date	=> $wbk->add_format (
89
	num_format	=> $dtf,
90
	align		=> "center",
91
	),
92
 
93
    rest	=> $wbk->add_format (
94
	align		=> "left",
95
	),
96
    );
97
 
98
my ($h, $w, @w) = (0, 1); # data height, -width, and default column widths
99
my $row;
100
my $firstline;
101
unless ($sep) { # No sep char passed, try to auto-detect;
102
    while (<>) {
103
	m/\S/ or next;	# Skip empty leading blank lines
104
	$sep = # start auto-detect with quoted strings
105
	       m/["\d];["\d;]/  ? ";"  :
106
	       m/["\d],["\d,]/  ? ","  :
107
	       m/["\d]\t["\d,]/ ? "\t" :
108
	       # If neither, then for unquoted strings
109
	       m/\w;[\w;]/      ? ";"  :
110
	       m/\w,[\w,]/      ? ","  :
111
	       m/\w\t[\w,]/     ? "\t" :
112
				  ";"  ;
113
	    # Yeah I know it should be a ',' (hence Csv), but the majority
114
	    # of the csv files to be shown comes from fucking Micky$hit,
115
	    # that uses semiColon ';' instead.
116
	$firstline = $_;
117
	last;
118
	}
119
    }
120
$csv = Text::CSV_XS-> new ({
121
    sep_char       => $sep eq "tab"   ? "\t"  : $sep,
122
    quote_char     => $quo eq "undef" ? undef : $quo,
123
    escape_char    => $esc eq "undef" ? undef : $esc,
124
    binary         => 1,
125
    keep_meta_info => 1,
126
    auto_diag      => 1,
127
    });
128
if ($firstline) {
129
    $csv->parse ($firstline) or die $csv->error_diag ();
130
    $row = [ $csv->fields ];
131
    }
132
if ($opt_v > 3) {
133
    foreach my $k (qw( sep_char quote_char escape_char )) {
134
	my $c = $csv->$k () || "undef";
135
	$c =~ s/\t/\\t/g;
136
	$c =~ s/\r/\\r/g;
137
	$c =~ s/\n/\\n/g;
138
	$c =~ s/\0/\\0/g;
139
	$c =~ s/([\x00-\x1f\x7f-\xff])/sprintf"\\x{%02x}",ord$1/ge;
140
	printf STDERR "%-11s = %s\n", $k, $c;
141
	}
142
    }
143
 
144
if (my $rows = $dtc) {
145
    $rows =~ s/-$/-999/;			# 3,6-
146
    $rows =~ s/-/../g;
147
    eval "\$dtc = { map { \$_ => 1 } $rows }";
148
    }
149
 
150
while ($row && @$row or $row = $csv->getline (*ARGV)) {
151
    my @row = @$row;
152
    @row > $w and push @w, ($wdt) x (($w = @row) - @w);
153
    foreach my $c (0 .. $#row) {
154
	my $val = $row[$c] || "";
155
	my $l = length $val;
156
	$l > $w[$c] and $w[$c] = $l;
157
 
158
	if ($utf and $csv->is_binary ($c)) {
159
	    from_to ($val, "utf-8", "ucs2");
160
	    $wks->write_unicode ($h, $c, $val);
161
	    next;
162
	    }
163
 
164
	if ($csv->is_quoted ($c)) {
165
	    if ($utf) {
166
		from_to ($val, "utf-8", "ucs2");
167
		$wks->write_unicode ($h, $c, $val);
168
		}
169
	    else {
170
		$wks->write_string  ($h, $c, $val);
171
		}
172
	    next;
173
	    }
174
 
175
	if (!$dtc or $dtc->{$c + 1}) {
176
	    my @d = (0, 0, 0);	# Y, M, D
177
	    $val =~ m/^(\d{4})(\d{2})(\d{2})$/   and @d = ($1, $2, $3);
178
	    $val =~ m/^(\d{2})-(\d{2})-(\d{4})$/ and @d = ($3, $2, $1);
179
	    if ( $d[2] >=    1 && $d[2] <=   31 &&
180
		 $d[1] >=    1 && $d[1] <=   12 &&
181
		 $d[0] >= 1900 && $d[0] <= 2199) {
182
		my $dm = Days_in_Month (@d[0,1]);
183
		$d[2] <   1 and $d[2] = 1;
184
		$d[2] > $dm and $d[2] = $dm;
185
		my $dt = 2 + Delta_Days (1900, 1, 1, @d);
186
		$wks->write ($h, $c, $dt, $fmt{date});
187
		next;
188
		}
189
	    }
190
 
191
	if (!$frm && $val =~ m/^=/) {
192
	    $wks->write_string  ($h, $c, $val);
193
	    }
194
	else {
195
	    $wks->write ($h, $c, $val);
196
	    }
197
	}
198
    ++$h % 100 or $opt_v && printf STDERR "%6d x %6d\r", $w, $h;
199
    } continue { $row = undef }
200
$opt_v && printf STDERR "%6d x %6d\n", $w, $h;
201
 
202
$wks->set_column ($_, $_, $w[$_]) for 0 .. $#w;
203
$wbk->close ();