Subversion Repositories eFlore/Projets.eflore-projets

Rev

Rev 343 | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
343 jpm 1
#!/usr/bin/perl
2
 
3
# $Date: 2005/10/19 10:14:00 $
4
 
5
# Excel to SQL statements translator.
6
# Needs the Spreadsheet::ParseExcel module from CPAN
7
#
8
# Example usage:
9
#
10
# xls2sql.pl help
11
# xls2sql.pl file=6960_TS_Bressanone_Brixen.xls \
12
#   coldefs="year INTEGER,MEAN1 DOUBLE PRECISION,MEAN2 DOUBLE PRECISION,MEAN3 DOUBLE PRECISION"
13
#
14
# Released under GPLv2.0
15
# by Daniel Calvelo Aros (dca@users.sf.net)
16
#
17
# few modifications by MN (why not reading 'Learning Perl' O'Reilly book)
18
 
19
#use strict;
20
use English;
21
use Spreadsheet::ParseExcel;
22
# gparser-like options:
23
#          option label => [ref to var to hold opt value, "description", default value or undef for no default]
24
%gopts = ( file   => [\$file,     "Name of input excel spreadsheet file", undef],
25
	   skip   => [\$skip,     "Number of rows to skip at the beginning of sheet", 0],
26
	   table  => [\$tablename,"Name of output table", "mytable"],
27
	   rows   => [\$nrows,    "Number of rows to extract", undef],
28
	   sheet  => [\$sheetn,   "Sheet number to convert from the workbook, counting from 1", 1],
29
	   coldefs=> [\$coldefs,  "Column definitions as in SQL", "auto"],
30
	   nodata => [\$nodata,   "No data character(s) used in the Excel table", ""],
31
	   help   => [\$help,     "Help, of course", undef],
32
	   debug  => [\$DEBUG,    "Debugging flag, for developers only", 0]);
33
#function defined below:
34
&parse_opts();
35
 
36
#-- Open and look for obvious errors
37
my $wkbk =
38
  Spreadsheet::ParseExcel::Workbook->Parse($file);
39
if( !defined $wkbk->{Worksheet}) {die "Error:couldn't parse file $file\n"}
40
my($iR, $iC, $sheet, $ncols, $roffset, $rsize);
41
$sheet = @{$wkbk->{Worksheet}}[--$sheetn]; #-- Numbering starts at 1 for the user
42
$ncols = $sheet->{MaxCol} or die "Error:the specified sheet $sheetn does not contain data\n";
43
$ncols -= $sheet->{MinCol} if defined $sheet->{MinCol} ;
44
$roffset = $sheet->{MinRow}-1;
45
$rsize = $sheet->{MaxRow} - $sheet->{MinRow};
46
die "Error:the specified worksheet seems to contain only one line\n" if $rsize == 0;
47
$roffset += $skip;
48
$lastrow = ( defined $nrows
49
	     ? $nrows + $roffset -1
50
	     : $sheet->{MaxRow} );
51
die "Invalid skip option: the sheet only has $rsize rows" if $roffset >= $rsize - 1;
52
my (@types, @sqltypes, @firstrow, @titlerow);
53
 
54
if($coldefs ne "auto"){
55
  #-- We have user-defined column definitions
56
  #-- Check them
57
  $coldefs =~ s/^\s*//;
58
  $coldefs =~ s/\s*$//;
59
  @defs = split ",", $coldefs;
60
  foreach $i (0..$#defs){
61
    ($colname, $typedef) = split /\s+/,$defs[$i],2;
62
    die "Column specification $i: can't parse SQL type definition '$typedef' (should be INTEGER, DOUBLE PRECISION, CHAR).\n" if $typedef !~ /INTEGER|DOUBLE PRECISION|CHAR/i;
63
    die "Column name '$colname' for column $i contains spurious characters (no spaces permitted in list).\n" if $colname !~ /[a-zA-Z][a-zA-Z_0-9]*/;
64
    push @sqltypes, $typedef;
65
    push @titles, $colname;
66
  }
67
}else{
68
  #-- Inspect file for types:
69
  #-- First estimate initial types from the first row of data
70
  @firstrow = @{$sheet->{Cells}[$roffset+1]};
71
  @types = map { $_->{Type}} @firstrow;
72
  %cvt = (Text=>'CHAR',Numeric=>'INTEGER',Date=>'DOUBLE');
73
  @sqltypes = map { $cvt{$_} } @types;
74
  @lens = map { 0 } @types;
75
  print STDERR "\nTypes:", join ";", @types if $DEBUG;
76
  print STDERR "\nInitial sqltypes:", join ";", @sqltypes if $DEBUG;
77
  #-- Then adjust widths and numeric type from the data
78
  for(my $iR = $roffset ; $iR <= $lastrow ; $iR++) {
79
    for(my $iC = $sheet->{MinCol} ;$iC <= $sheet->{MaxCol} ; $iC++) {
80
      $cell = $sheet->{Cells}[$iR][$iC];
81
      next if !defined $cell;
82
      $cellvalue = $cell->Value;
83
      if($types[$iC] eq 'Text'){
84
	$thislength = length( $cellvalue );
85
	$lens[$iC] = $thislength if $thislength > $lens[$iC];
86
      }else{
87
	if( $cellvalue =~ /[\.,]/ ){
88
	  $sqltypes[$iC] = 'DOUBLE PRECISION';
89
	}
90
	if( $cellvalue =~ /[a-df-z]/ ){
91
	  $sqltypes[$iC] = 'CHAR'; $lens[$iC] = length( $cellvalue);
92
	}
93
      }
94
    }
95
  }
96
  foreach $i (0..$#sqltypes){
97
    if( $sqltypes[$i] eq 'CHAR' ){
98
      $sqltypes[$i] .= "($lens[$i])";
99
    }
100
  }
101
  print STDERR "\nAdjusted sqltypes:", join ";", @sqltypes if $DEBUG;
102
 
103
  #-- Generate field names from the title row
104
  @titlerow = @{$sheet->{Cells}[$roffset]};
105
  print STDERR "\nTitlerow:", join ";", map { defined $_ ? $_->Value : "" } @titlerow if $DEBUG;
106
  $varname = "V000";
107
  @titles = map {
108
    /^[^a-zA-Z]/ ? $varname++ : $_
109
  } map {
110
    if( defined $_ && length > 0 ) {$_=$_->Value;y/a-z/A-Z/;s/[^a-zA-Z_0-9]/_/g}
111
    else { $_=$varname++ }
112
    $_;
113
  } @titlerow;
114
  map { $istitle{$_}++ } @titles;
115
  foreach $i (reverse 0..$#titles){
116
    if( $istitle{$titles[$i]} > 1){
117
      $titles[$i] .= --$istitle{$titles[$i]};
118
    }
119
  }
120
  while( $#titles < $ncols ){ #Missing titles, according to the size of the sheet
121
    push @titles, $varname++;
122
    push @sqltypes, "CHAR(32)";
123
  }
124
  print STDERR "\nTitles:" ,join ";", @titles if $DEBUG;
125
  print STDERR "\n" if $DEBUG;
126
}
127
 
128
#-- Write out
129
print "CREATE TABLE $tablename (";
130
print join ",", map {"$titles[$_] $sqltypes[$_]"} (0..$#titles);
131
print ");\n";
132
if($coldefs eq "auto"){
133
  $lastcol = $sheet->{MaxCol};
134
}else{
135
  $lastcol = $#sqltypes + $sheet->{MinCol};
136
  foreach $i (reverse 0..$#sqltypes){
137
    $sqltypes[$i + $sheet->{MinCol}] = $sqltypes[$i];
138
  }
139
}
140
for(my $iR = $roffset+1 ; $iR <= $lastrow ; $iR++) {
141
  print "INSERT INTO $tablename VALUES(";
142
  print join ",", map {
143
    my $c = $sheet->{Cells}[$iR][$_];
144
#    defined $c ? '"'.&cast($c->Value,$sqltypes[$_]).'"' : NULL
145
    defined $c ? ''.&cast($c->Value,$sqltypes[$_]).'' : NULL
146
  } ($sheet->{MinCol}..$lastcol);
147
  print ");\n"
148
}
149
 
150
sub cast($$){
151
  my ($value, $sqltype) = @_;
152
  if( length($value)>0 ){
153
    if ($value eq $nodata){
154
      $value = "NULL";  # no data coded with char
155
    }else{
156
      if( $sqltype =~ /CHAR\s*\((\d+)\)/i ){
157
        $value =~ s/[\n\r]/ /gm;
158
        $value =~ s/"/\\"/g;
159
        $value =~ s/'/\\'/g;
160
#        $value = substr( $value, 0, $1 );
161
        $value = '\''.substr( $value, 0, $1 ).'\'';
162
      }elsif( $sqltype =~ /DOUBLE PRECISION/i ){
163
        $value += 0;
164
      }elsif( $sqltype =~ /INTEGER/i ){
165
        $value = int $value;
166
      }else{
167
        die "Unknown SQL type '$sqltype'; can't typecast '$value' to that type.\n";
168
      }
169
    }
170
  }else{
171
      $value = "NULL";  # no data
172
  }
173
}
174
 
175
sub parse_opts(){
176
  for $o (sort keys %gopts){
177
    if( defined $gopts{$o}[2] ){
178
      ${$gopts{$o}[0]} = $gopts{$o}[2];
179
    }
180
    for $arg (@ARGV){
181
      $arg =~ /^\Q$o\E(?:\s*=\s*(.+)$)?/;
182
      if( length($1)>0 ){
183
	${$gopts{$o}[0]} = $1;
184
      }elsif( $& ){
185
	${$gopts{$o}[0]} = 1;
186
      }
187
    }
188
  }
189
  if($help){
190
    select STDERR;
191
    print "\n$PROGRAM_NAME : extract sheets from an excel workbook and\n";
192
    print "produce SQL statements that create the database\n";
193
    print "\nArguments (use grass style, i.e. arg=value):\n";
194
    foreach (keys %gopts){ $longest = $longest < length() ? length() : $longest }
195
    foreach $arg (grep {!/help/} keys %gopts){
196
      print "  $arg".(" "x($longest+2-length $arg));
197
      print $gopts{$arg}[1];
198
      print " (default: ".$gopts{$arg}[2].")" if defined $gopts{$arg}[2];
199
      print "\n";
200
    }
201
    select STDOUT;
202
    die "\n";
203
  }
204
}
205