New file |
0,0 → 1,205 |
#!/usr/bin/perl |
|
# $Date: 2005/10/19 10:14:00 $ |
|
# Excel to SQL statements translator. |
# Needs the Spreadsheet::ParseExcel module from CPAN |
# |
# Example usage: |
# |
# xls2sql.pl help |
# xls2sql.pl file=6960_TS_Bressanone_Brixen.xls \ |
# coldefs="year INTEGER,MEAN1 DOUBLE PRECISION,MEAN2 DOUBLE PRECISION,MEAN3 DOUBLE PRECISION" |
# |
# Released under GPLv2.0 |
# by Daniel Calvelo Aros (dca@users.sf.net) |
# |
# few modifications by MN (why not reading 'Learning Perl' O'Reilly book) |
|
#use strict; |
use English; |
use Spreadsheet::ParseExcel; |
# gparser-like options: |
# option label => [ref to var to hold opt value, "description", default value or undef for no default] |
%gopts = ( file => [\$file, "Name of input excel spreadsheet file", undef], |
skip => [\$skip, "Number of rows to skip at the beginning of sheet", 0], |
table => [\$tablename,"Name of output table", "mytable"], |
rows => [\$nrows, "Number of rows to extract", undef], |
sheet => [\$sheetn, "Sheet number to convert from the workbook, counting from 1", 1], |
coldefs=> [\$coldefs, "Column definitions as in SQL", "auto"], |
nodata => [\$nodata, "No data character(s) used in the Excel table", ""], |
help => [\$help, "Help, of course", undef], |
debug => [\$DEBUG, "Debugging flag, for developers only", 0]); |
#function defined below: |
&parse_opts(); |
|
#-- Open and look for obvious errors |
my $wkbk = |
Spreadsheet::ParseExcel::Workbook->Parse($file); |
if( !defined $wkbk->{Worksheet}) {die "Error:couldn't parse file $file\n"} |
my($iR, $iC, $sheet, $ncols, $roffset, $rsize); |
$sheet = @{$wkbk->{Worksheet}}[--$sheetn]; #-- Numbering starts at 1 for the user |
$ncols = $sheet->{MaxCol} or die "Error:the specified sheet $sheetn does not contain data\n"; |
$ncols -= $sheet->{MinCol} if defined $sheet->{MinCol} ; |
$roffset = $sheet->{MinRow}-1; |
$rsize = $sheet->{MaxRow} - $sheet->{MinRow}; |
die "Error:the specified worksheet seems to contain only one line\n" if $rsize == 0; |
$roffset += $skip; |
$lastrow = ( defined $nrows |
? $nrows + $roffset -1 |
: $sheet->{MaxRow} ); |
die "Invalid skip option: the sheet only has $rsize rows" if $roffset >= $rsize - 1; |
my (@types, @sqltypes, @firstrow, @titlerow); |
|
if($coldefs ne "auto"){ |
#-- We have user-defined column definitions |
#-- Check them |
$coldefs =~ s/^\s*//; |
$coldefs =~ s/\s*$//; |
@defs = split ",", $coldefs; |
foreach $i (0..$#defs){ |
($colname, $typedef) = split /\s+/,$defs[$i],2; |
die "Column specification $i: can't parse SQL type definition '$typedef' (should be INTEGER, DOUBLE PRECISION, CHAR).\n" if $typedef !~ /INTEGER|DOUBLE PRECISION|CHAR/i; |
die "Column name '$colname' for column $i contains spurious characters (no spaces permitted in list).\n" if $colname !~ /[a-zA-Z][a-zA-Z_0-9]*/; |
push @sqltypes, $typedef; |
push @titles, $colname; |
} |
}else{ |
#-- Inspect file for types: |
#-- First estimate initial types from the first row of data |
@firstrow = @{$sheet->{Cells}[$roffset+1]}; |
@types = map { $_->{Type}} @firstrow; |
%cvt = (Text=>'CHAR',Numeric=>'INTEGER',Date=>'DOUBLE'); |
@sqltypes = map { $cvt{$_} } @types; |
@lens = map { 0 } @types; |
print STDERR "\nTypes:", join ";", @types if $DEBUG; |
print STDERR "\nInitial sqltypes:", join ";", @sqltypes if $DEBUG; |
#-- Then adjust widths and numeric type from the data |
for(my $iR = $roffset ; $iR <= $lastrow ; $iR++) { |
for(my $iC = $sheet->{MinCol} ;$iC <= $sheet->{MaxCol} ; $iC++) { |
$cell = $sheet->{Cells}[$iR][$iC]; |
next if !defined $cell; |
$cellvalue = $cell->Value; |
if($types[$iC] eq 'Text'){ |
$thislength = length( $cellvalue ); |
$lens[$iC] = $thislength if $thislength > $lens[$iC]; |
}else{ |
if( $cellvalue =~ /[\.,]/ ){ |
$sqltypes[$iC] = 'DOUBLE PRECISION'; |
} |
if( $cellvalue =~ /[a-df-z]/ ){ |
$sqltypes[$iC] = 'CHAR'; $lens[$iC] = length( $cellvalue); |
} |
} |
} |
} |
foreach $i (0..$#sqltypes){ |
if( $sqltypes[$i] eq 'CHAR' ){ |
$sqltypes[$i] .= "($lens[$i])"; |
} |
} |
print STDERR "\nAdjusted sqltypes:", join ";", @sqltypes if $DEBUG; |
|
#-- Generate field names from the title row |
@titlerow = @{$sheet->{Cells}[$roffset]}; |
print STDERR "\nTitlerow:", join ";", map { defined $_ ? $_->Value : "" } @titlerow if $DEBUG; |
$varname = "V000"; |
@titles = map { |
/^[^a-zA-Z]/ ? $varname++ : $_ |
} map { |
if( defined $_ && length > 0 ) {$_=$_->Value;y/a-z/A-Z/;s/[^a-zA-Z_0-9]/_/g} |
else { $_=$varname++ } |
$_; |
} @titlerow; |
map { $istitle{$_}++ } @titles; |
foreach $i (reverse 0..$#titles){ |
if( $istitle{$titles[$i]} > 1){ |
$titles[$i] .= --$istitle{$titles[$i]}; |
} |
} |
while( $#titles < $ncols ){ #Missing titles, according to the size of the sheet |
push @titles, $varname++; |
push @sqltypes, "CHAR(32)"; |
} |
print STDERR "\nTitles:" ,join ";", @titles if $DEBUG; |
print STDERR "\n" if $DEBUG; |
} |
|
#-- Write out |
print "CREATE TABLE $tablename ("; |
print join ",", map {"$titles[$_] $sqltypes[$_]"} (0..$#titles); |
print ");\n"; |
if($coldefs eq "auto"){ |
$lastcol = $sheet->{MaxCol}; |
}else{ |
$lastcol = $#sqltypes + $sheet->{MinCol}; |
foreach $i (reverse 0..$#sqltypes){ |
$sqltypes[$i + $sheet->{MinCol}] = $sqltypes[$i]; |
} |
} |
for(my $iR = $roffset+1 ; $iR <= $lastrow ; $iR++) { |
print "INSERT INTO $tablename VALUES("; |
print join ",", map { |
my $c = $sheet->{Cells}[$iR][$_]; |
# defined $c ? '"'.&cast($c->Value,$sqltypes[$_]).'"' : NULL |
defined $c ? ''.&cast($c->Value,$sqltypes[$_]).'' : NULL |
} ($sheet->{MinCol}..$lastcol); |
print ");\n" |
} |
|
sub cast($$){ |
my ($value, $sqltype) = @_; |
if( length($value)>0 ){ |
if ($value eq $nodata){ |
$value = "NULL"; # no data coded with char |
}else{ |
if( $sqltype =~ /CHAR\s*\((\d+)\)/i ){ |
$value =~ s/[\n\r]/ /gm; |
$value =~ s/"/\\"/g; |
$value =~ s/'/\\'/g; |
# $value = substr( $value, 0, $1 ); |
$value = '\''.substr( $value, 0, $1 ).'\''; |
}elsif( $sqltype =~ /DOUBLE PRECISION/i ){ |
$value += 0; |
}elsif( $sqltype =~ /INTEGER/i ){ |
$value = int $value; |
}else{ |
die "Unknown SQL type '$sqltype'; can't typecast '$value' to that type.\n"; |
} |
} |
}else{ |
$value = "NULL"; # no data |
} |
} |
|
sub parse_opts(){ |
for $o (sort keys %gopts){ |
if( defined $gopts{$o}[2] ){ |
${$gopts{$o}[0]} = $gopts{$o}[2]; |
} |
for $arg (@ARGV){ |
$arg =~ /^\Q$o\E(?:\s*=\s*(.+)$)?/; |
if( length($1)>0 ){ |
${$gopts{$o}[0]} = $1; |
}elsif( $& ){ |
${$gopts{$o}[0]} = 1; |
} |
} |
} |
if($help){ |
select STDERR; |
print "\n$PROGRAM_NAME : extract sheets from an excel workbook and\n"; |
print "produce SQL statements that create the database\n"; |
print "\nArguments (use grass style, i.e. arg=value):\n"; |
foreach (keys %gopts){ $longest = $longest < length() ? length() : $longest } |
foreach $arg (grep {!/help/} keys %gopts){ |
print " $arg".(" "x($longest+2-length $arg)); |
print $gopts{$arg}[1]; |
print " (default: ".$gopts{$arg}[2].")" if defined $gopts{$arg}[2]; |
print "\n"; |
} |
select STDOUT; |
die "\n"; |
} |
} |
|