343 |
jpm |
1 |
#!/usr/bin/perl
|
|
|
2 |
|
|
|
3 |
# $Date: 2005/10/19 10:14:00 $
|
|
|
4 |
|
|
|
5 |
# Excel to SQL statements translator.
|
|
|
6 |
# Needs the Spreadsheet::ParseExcel module from CPAN
|
|
|
7 |
#
|
|
|
8 |
# Example usage:
|
|
|
9 |
#
|
|
|
10 |
# xls2sql.pl help
|
|
|
11 |
# xls2sql.pl file=6960_TS_Bressanone_Brixen.xls \
|
|
|
12 |
# coldefs="year INTEGER,MEAN1 DOUBLE PRECISION,MEAN2 DOUBLE PRECISION,MEAN3 DOUBLE PRECISION"
|
|
|
13 |
#
|
|
|
14 |
# Released under GPLv2.0
|
|
|
15 |
# by Daniel Calvelo Aros (dca@users.sf.net)
|
|
|
16 |
#
|
|
|
17 |
# few modifications by MN (why not reading 'Learning Perl' O'Reilly book)
|
|
|
18 |
|
|
|
19 |
#use strict;
|
|
|
20 |
use English;
|
|
|
21 |
use Spreadsheet::ParseExcel;
|
|
|
22 |
# gparser-like options:
|
|
|
23 |
# option label => [ref to var to hold opt value, "description", default value or undef for no default]
|
|
|
24 |
%gopts = ( file => [\$file, "Name of input excel spreadsheet file", undef],
|
|
|
25 |
skip => [\$skip, "Number of rows to skip at the beginning of sheet", 0],
|
|
|
26 |
table => [\$tablename,"Name of output table", "mytable"],
|
|
|
27 |
rows => [\$nrows, "Number of rows to extract", undef],
|
|
|
28 |
sheet => [\$sheetn, "Sheet number to convert from the workbook, counting from 1", 1],
|
|
|
29 |
coldefs=> [\$coldefs, "Column definitions as in SQL", "auto"],
|
|
|
30 |
nodata => [\$nodata, "No data character(s) used in the Excel table", ""],
|
|
|
31 |
help => [\$help, "Help, of course", undef],
|
|
|
32 |
debug => [\$DEBUG, "Debugging flag, for developers only", 0]);
|
|
|
33 |
#function defined below:
|
|
|
34 |
&parse_opts();
|
|
|
35 |
|
|
|
36 |
#-- Open and look for obvious errors
|
|
|
37 |
my $wkbk =
|
|
|
38 |
Spreadsheet::ParseExcel::Workbook->Parse($file);
|
|
|
39 |
if( !defined $wkbk->{Worksheet}) {die "Error:couldn't parse file $file\n"}
|
|
|
40 |
my($iR, $iC, $sheet, $ncols, $roffset, $rsize);
|
|
|
41 |
$sheet = @{$wkbk->{Worksheet}}[--$sheetn]; #-- Numbering starts at 1 for the user
|
|
|
42 |
$ncols = $sheet->{MaxCol} or die "Error:the specified sheet $sheetn does not contain data\n";
|
|
|
43 |
$ncols -= $sheet->{MinCol} if defined $sheet->{MinCol} ;
|
|
|
44 |
$roffset = $sheet->{MinRow}-1;
|
|
|
45 |
$rsize = $sheet->{MaxRow} - $sheet->{MinRow};
|
|
|
46 |
die "Error:the specified worksheet seems to contain only one line\n" if $rsize == 0;
|
|
|
47 |
$roffset += $skip;
|
|
|
48 |
$lastrow = ( defined $nrows
|
|
|
49 |
? $nrows + $roffset -1
|
|
|
50 |
: $sheet->{MaxRow} );
|
|
|
51 |
die "Invalid skip option: the sheet only has $rsize rows" if $roffset >= $rsize - 1;
|
|
|
52 |
my (@types, @sqltypes, @firstrow, @titlerow);
|
|
|
53 |
|
|
|
54 |
if($coldefs ne "auto"){
|
|
|
55 |
#-- We have user-defined column definitions
|
|
|
56 |
#-- Check them
|
|
|
57 |
$coldefs =~ s/^\s*//;
|
|
|
58 |
$coldefs =~ s/\s*$//;
|
|
|
59 |
@defs = split ",", $coldefs;
|
|
|
60 |
foreach $i (0..$#defs){
|
|
|
61 |
($colname, $typedef) = split /\s+/,$defs[$i],2;
|
|
|
62 |
die "Column specification $i: can't parse SQL type definition '$typedef' (should be INTEGER, DOUBLE PRECISION, CHAR).\n" if $typedef !~ /INTEGER|DOUBLE PRECISION|CHAR/i;
|
|
|
63 |
die "Column name '$colname' for column $i contains spurious characters (no spaces permitted in list).\n" if $colname !~ /[a-zA-Z][a-zA-Z_0-9]*/;
|
|
|
64 |
push @sqltypes, $typedef;
|
|
|
65 |
push @titles, $colname;
|
|
|
66 |
}
|
|
|
67 |
}else{
|
|
|
68 |
#-- Inspect file for types:
|
|
|
69 |
#-- First estimate initial types from the first row of data
|
|
|
70 |
@firstrow = @{$sheet->{Cells}[$roffset+1]};
|
|
|
71 |
@types = map { $_->{Type}} @firstrow;
|
|
|
72 |
%cvt = (Text=>'CHAR',Numeric=>'INTEGER',Date=>'DOUBLE');
|
|
|
73 |
@sqltypes = map { $cvt{$_} } @types;
|
|
|
74 |
@lens = map { 0 } @types;
|
|
|
75 |
print STDERR "\nTypes:", join ";", @types if $DEBUG;
|
|
|
76 |
print STDERR "\nInitial sqltypes:", join ";", @sqltypes if $DEBUG;
|
|
|
77 |
#-- Then adjust widths and numeric type from the data
|
|
|
78 |
for(my $iR = $roffset ; $iR <= $lastrow ; $iR++) {
|
|
|
79 |
for(my $iC = $sheet->{MinCol} ;$iC <= $sheet->{MaxCol} ; $iC++) {
|
|
|
80 |
$cell = $sheet->{Cells}[$iR][$iC];
|
|
|
81 |
next if !defined $cell;
|
|
|
82 |
$cellvalue = $cell->Value;
|
|
|
83 |
if($types[$iC] eq 'Text'){
|
|
|
84 |
$thislength = length( $cellvalue );
|
|
|
85 |
$lens[$iC] = $thislength if $thislength > $lens[$iC];
|
|
|
86 |
}else{
|
|
|
87 |
if( $cellvalue =~ /[\.,]/ ){
|
|
|
88 |
$sqltypes[$iC] = 'DOUBLE PRECISION';
|
|
|
89 |
}
|
|
|
90 |
if( $cellvalue =~ /[a-df-z]/ ){
|
|
|
91 |
$sqltypes[$iC] = 'CHAR'; $lens[$iC] = length( $cellvalue);
|
|
|
92 |
}
|
|
|
93 |
}
|
|
|
94 |
}
|
|
|
95 |
}
|
|
|
96 |
foreach $i (0..$#sqltypes){
|
|
|
97 |
if( $sqltypes[$i] eq 'CHAR' ){
|
|
|
98 |
$sqltypes[$i] .= "($lens[$i])";
|
|
|
99 |
}
|
|
|
100 |
}
|
|
|
101 |
print STDERR "\nAdjusted sqltypes:", join ";", @sqltypes if $DEBUG;
|
|
|
102 |
|
|
|
103 |
#-- Generate field names from the title row
|
|
|
104 |
@titlerow = @{$sheet->{Cells}[$roffset]};
|
|
|
105 |
print STDERR "\nTitlerow:", join ";", map { defined $_ ? $_->Value : "" } @titlerow if $DEBUG;
|
|
|
106 |
$varname = "V000";
|
|
|
107 |
@titles = map {
|
|
|
108 |
/^[^a-zA-Z]/ ? $varname++ : $_
|
|
|
109 |
} map {
|
|
|
110 |
if( defined $_ && length > 0 ) {$_=$_->Value;y/a-z/A-Z/;s/[^a-zA-Z_0-9]/_/g}
|
|
|
111 |
else { $_=$varname++ }
|
|
|
112 |
$_;
|
|
|
113 |
} @titlerow;
|
|
|
114 |
map { $istitle{$_}++ } @titles;
|
|
|
115 |
foreach $i (reverse 0..$#titles){
|
|
|
116 |
if( $istitle{$titles[$i]} > 1){
|
|
|
117 |
$titles[$i] .= --$istitle{$titles[$i]};
|
|
|
118 |
}
|
|
|
119 |
}
|
|
|
120 |
while( $#titles < $ncols ){ #Missing titles, according to the size of the sheet
|
|
|
121 |
push @titles, $varname++;
|
|
|
122 |
push @sqltypes, "CHAR(32)";
|
|
|
123 |
}
|
|
|
124 |
print STDERR "\nTitles:" ,join ";", @titles if $DEBUG;
|
|
|
125 |
print STDERR "\n" if $DEBUG;
|
|
|
126 |
}
|
|
|
127 |
|
|
|
128 |
#-- Write out
|
|
|
129 |
print "CREATE TABLE $tablename (";
|
|
|
130 |
print join ",", map {"$titles[$_] $sqltypes[$_]"} (0..$#titles);
|
|
|
131 |
print ");\n";
|
|
|
132 |
if($coldefs eq "auto"){
|
|
|
133 |
$lastcol = $sheet->{MaxCol};
|
|
|
134 |
}else{
|
|
|
135 |
$lastcol = $#sqltypes + $sheet->{MinCol};
|
|
|
136 |
foreach $i (reverse 0..$#sqltypes){
|
|
|
137 |
$sqltypes[$i + $sheet->{MinCol}] = $sqltypes[$i];
|
|
|
138 |
}
|
|
|
139 |
}
|
|
|
140 |
for(my $iR = $roffset+1 ; $iR <= $lastrow ; $iR++) {
|
|
|
141 |
print "INSERT INTO $tablename VALUES(";
|
|
|
142 |
print join ",", map {
|
|
|
143 |
my $c = $sheet->{Cells}[$iR][$_];
|
|
|
144 |
# defined $c ? '"'.&cast($c->Value,$sqltypes[$_]).'"' : NULL
|
|
|
145 |
defined $c ? ''.&cast($c->Value,$sqltypes[$_]).'' : NULL
|
|
|
146 |
} ($sheet->{MinCol}..$lastcol);
|
|
|
147 |
print ");\n"
|
|
|
148 |
}
|
|
|
149 |
|
|
|
150 |
sub cast($$){
|
|
|
151 |
my ($value, $sqltype) = @_;
|
|
|
152 |
if( length($value)>0 ){
|
|
|
153 |
if ($value eq $nodata){
|
|
|
154 |
$value = "NULL"; # no data coded with char
|
|
|
155 |
}else{
|
|
|
156 |
if( $sqltype =~ /CHAR\s*\((\d+)\)/i ){
|
|
|
157 |
$value =~ s/[\n\r]/ /gm;
|
|
|
158 |
$value =~ s/"/\\"/g;
|
|
|
159 |
$value =~ s/'/\\'/g;
|
|
|
160 |
# $value = substr( $value, 0, $1 );
|
|
|
161 |
$value = '\''.substr( $value, 0, $1 ).'\'';
|
|
|
162 |
}elsif( $sqltype =~ /DOUBLE PRECISION/i ){
|
|
|
163 |
$value += 0;
|
|
|
164 |
}elsif( $sqltype =~ /INTEGER/i ){
|
|
|
165 |
$value = int $value;
|
|
|
166 |
}else{
|
|
|
167 |
die "Unknown SQL type '$sqltype'; can't typecast '$value' to that type.\n";
|
|
|
168 |
}
|
|
|
169 |
}
|
|
|
170 |
}else{
|
|
|
171 |
$value = "NULL"; # no data
|
|
|
172 |
}
|
|
|
173 |
}
|
|
|
174 |
|
|
|
175 |
sub parse_opts(){
|
|
|
176 |
for $o (sort keys %gopts){
|
|
|
177 |
if( defined $gopts{$o}[2] ){
|
|
|
178 |
${$gopts{$o}[0]} = $gopts{$o}[2];
|
|
|
179 |
}
|
|
|
180 |
for $arg (@ARGV){
|
|
|
181 |
$arg =~ /^\Q$o\E(?:\s*=\s*(.+)$)?/;
|
|
|
182 |
if( length($1)>0 ){
|
|
|
183 |
${$gopts{$o}[0]} = $1;
|
|
|
184 |
}elsif( $& ){
|
|
|
185 |
${$gopts{$o}[0]} = 1;
|
|
|
186 |
}
|
|
|
187 |
}
|
|
|
188 |
}
|
|
|
189 |
if($help){
|
|
|
190 |
select STDERR;
|
|
|
191 |
print "\n$PROGRAM_NAME : extract sheets from an excel workbook and\n";
|
|
|
192 |
print "produce SQL statements that create the database\n";
|
|
|
193 |
print "\nArguments (use grass style, i.e. arg=value):\n";
|
|
|
194 |
foreach (keys %gopts){ $longest = $longest < length() ? length() : $longest }
|
|
|
195 |
foreach $arg (grep {!/help/} keys %gopts){
|
|
|
196 |
print " $arg".(" "x($longest+2-length $arg));
|
|
|
197 |
print $gopts{$arg}[1];
|
|
|
198 |
print " (default: ".$gopts{$arg}[2].")" if defined $gopts{$arg}[2];
|
|
|
199 |
print "\n";
|
|
|
200 |
}
|
|
|
201 |
select STDOUT;
|
|
|
202 |
die "\n";
|
|
|
203 |
}
|
|
|
204 |
}
|
|
|
205 |
|