Blame | Last modification | View Log | RSS feed
#!/usr/bin/perl# $Date: 2005/10/19 10:14:00 $# Excel to SQL statements translator.# Needs the Spreadsheet::ParseExcel module from CPAN## Example usage:## xls2sql.pl help# xls2sql.pl file=6960_TS_Bressanone_Brixen.xls \# coldefs="year INTEGER,MEAN1 DOUBLE PRECISION,MEAN2 DOUBLE PRECISION,MEAN3 DOUBLE PRECISION"## Released under GPLv2.0# by Daniel Calvelo Aros (dca@users.sf.net)## few modifications by MN (why not reading 'Learning Perl' O'Reilly book)#use strict;use English;use Spreadsheet::ParseExcel;# gparser-like options:# option label => [ref to var to hold opt value, "description", default value or undef for no default]%gopts = ( file => [\$file, "Name of input excel spreadsheet file", undef],skip => [\$skip, "Number of rows to skip at the beginning of sheet", 0],table => [\$tablename,"Name of output table", "mytable"],rows => [\$nrows, "Number of rows to extract", undef],sheet => [\$sheetn, "Sheet number to convert from the workbook, counting from 1", 1],coldefs=> [\$coldefs, "Column definitions as in SQL", "auto"],nodata => [\$nodata, "No data character(s) used in the Excel table", ""],help => [\$help, "Help, of course", undef],debug => [\$DEBUG, "Debugging flag, for developers only", 0]);#function defined below:&parse_opts();#-- Open and look for obvious errorsmy $wkbk =Spreadsheet::ParseExcel::Workbook->Parse($file);if( !defined $wkbk->{Worksheet}) {die "Error:couldn't parse file $file\n"}my($iR, $iC, $sheet, $ncols, $roffset, $rsize);$sheet = @{$wkbk->{Worksheet}}[--$sheetn]; #-- Numbering starts at 1 for the user$ncols = $sheet->{MaxCol} or die "Error:the specified sheet $sheetn does not contain data\n";$ncols -= $sheet->{MinCol} if defined $sheet->{MinCol} ;$roffset = $sheet->{MinRow}-1;$rsize = $sheet->{MaxRow} - $sheet->{MinRow};die "Error:the specified worksheet seems to contain only one line\n" if $rsize == 0;$roffset += $skip;$lastrow = ( defined $nrows? $nrows + $roffset -1: $sheet->{MaxRow} );die "Invalid skip option: the sheet only has $rsize rows" if $roffset >= $rsize - 1;my (@types, @sqltypes, @firstrow, @titlerow);if($coldefs ne "auto"){#-- We have user-defined column definitions#-- Check them$coldefs =~ s/^\s*//;$coldefs =~ s/\s*$//;@defs = split ",", $coldefs;foreach $i (0..$#defs){($colname, $typedef) = split /\s+/,$defs[$i],2;die "Column specification $i: can't parse SQL type definition '$typedef' (should be INTEGER, DOUBLE PRECISION, CHAR).\n" if $typedef !~ /INTEGER|DOUBLE PRECISION|CHAR/i;die "Column name '$colname' for column $i contains spurious characters (no spaces permitted in list).\n" if $colname !~ /[a-zA-Z][a-zA-Z_0-9]*/;push @sqltypes, $typedef;push @titles, $colname;}}else{#-- Inspect file for types:#-- First estimate initial types from the first row of data@firstrow = @{$sheet->{Cells}[$roffset+1]};@types = map { $_->{Type}} @firstrow;%cvt = (Text=>'CHAR',Numeric=>'INTEGER',Date=>'DOUBLE');@sqltypes = map { $cvt{$_} } @types;@lens = map { 0 } @types;print STDERR "\nTypes:", join ";", @types if $DEBUG;print STDERR "\nInitial sqltypes:", join ";", @sqltypes if $DEBUG;#-- Then adjust widths and numeric type from the datafor(my $iR = $roffset ; $iR <= $lastrow ; $iR++) {for(my $iC = $sheet->{MinCol} ;$iC <= $sheet->{MaxCol} ; $iC++) {$cell = $sheet->{Cells}[$iR][$iC];next if !defined $cell;$cellvalue = $cell->Value;if($types[$iC] eq 'Text'){$thislength = length( $cellvalue );$lens[$iC] = $thislength if $thislength > $lens[$iC];}else{if( $cellvalue =~ /[\.,]/ ){$sqltypes[$iC] = 'DOUBLE PRECISION';}if( $cellvalue =~ /[a-df-z]/ ){$sqltypes[$iC] = 'CHAR'; $lens[$iC] = length( $cellvalue);}}}}foreach $i (0..$#sqltypes){if( $sqltypes[$i] eq 'CHAR' ){$sqltypes[$i] .= "($lens[$i])";}}print STDERR "\nAdjusted sqltypes:", join ";", @sqltypes if $DEBUG;#-- Generate field names from the title row@titlerow = @{$sheet->{Cells}[$roffset]};print STDERR "\nTitlerow:", join ";", map { defined $_ ? $_->Value : "" } @titlerow if $DEBUG;$varname = "V000";@titles = map {/^[^a-zA-Z]/ ? $varname++ : $_} map {if( defined $_ && length > 0 ) {$_=$_->Value;y/a-z/A-Z/;s/[^a-zA-Z_0-9]/_/g}else { $_=$varname++ }$_;} @titlerow;map { $istitle{$_}++ } @titles;foreach $i (reverse 0..$#titles){if( $istitle{$titles[$i]} > 1){$titles[$i] .= --$istitle{$titles[$i]};}}while( $#titles < $ncols ){ #Missing titles, according to the size of the sheetpush @titles, $varname++;push @sqltypes, "CHAR(32)";}print STDERR "\nTitles:" ,join ";", @titles if $DEBUG;print STDERR "\n" if $DEBUG;}#-- Write outprint "CREATE TABLE $tablename (";print join ",", map {"$titles[$_] $sqltypes[$_]"} (0..$#titles);print ");\n";if($coldefs eq "auto"){$lastcol = $sheet->{MaxCol};}else{$lastcol = $#sqltypes + $sheet->{MinCol};foreach $i (reverse 0..$#sqltypes){$sqltypes[$i + $sheet->{MinCol}] = $sqltypes[$i];}}for(my $iR = $roffset+1 ; $iR <= $lastrow ; $iR++) {print "INSERT INTO $tablename VALUES(";print join ",", map {my $c = $sheet->{Cells}[$iR][$_];# defined $c ? '"'.&cast($c->Value,$sqltypes[$_]).'"' : NULLdefined $c ? ''.&cast($c->Value,$sqltypes[$_]).'' : NULL} ($sheet->{MinCol}..$lastcol);print ");\n"}sub cast($$){my ($value, $sqltype) = @_;if( length($value)>0 ){if ($value eq $nodata){$value = "NULL"; # no data coded with char}else{if( $sqltype =~ /CHAR\s*\((\d+)\)/i ){$value =~ s/[\n\r]/ /gm;$value =~ s/"/\\"/g;$value =~ s/'/\\'/g;# $value = substr( $value, 0, $1 );$value = '\''.substr( $value, 0, $1 ).'\'';}elsif( $sqltype =~ /DOUBLE PRECISION/i ){$value += 0;}elsif( $sqltype =~ /INTEGER/i ){$value = int $value;}else{die "Unknown SQL type '$sqltype'; can't typecast '$value' to that type.\n";}}}else{$value = "NULL"; # no data}}sub parse_opts(){for $o (sort keys %gopts){if( defined $gopts{$o}[2] ){${$gopts{$o}[0]} = $gopts{$o}[2];}for $arg (@ARGV){$arg =~ /^\Q$o\E(?:\s*=\s*(.+)$)?/;if( length($1)>0 ){${$gopts{$o}[0]} = $1;}elsif( $& ){${$gopts{$o}[0]} = 1;}}}if($help){select STDERR;print "\n$PROGRAM_NAME : extract sheets from an excel workbook and\n";print "produce SQL statements that create the database\n";print "\nArguments (use grass style, i.e. arg=value):\n";foreach (keys %gopts){ $longest = $longest < length() ? length() : $longest }foreach $arg (grep {!/help/} keys %gopts){print " $arg".(" "x($longest+2-length $arg));print $gopts{$arg}[1];print " (default: ".$gopts{$arg}[2].")" if defined $gopts{$arg}[2];print "\n";}select STDOUT;die "\n";}}