# DBD::MVS_FTPSQL - DBD driver to query IBM DB2 mainframe databases through an FTP server.
#
# Copyright (c) 2007 Clemente Biondo <clemente.biondo@gmail.com>
#
# You may distribute under the terms of either the GNU General Public
# License or the Artistic License, as specified in the Perl README file.

use warnings;
use strict;
#require 5.004;
require DBI;
use Net::FTP;
use IO::File;  
use Carp qw(croak);

package DBD::MVS_FTPSQL;
our $VERSION = '0.38.13';

our $drh = undef; # Driver handle. Every thread has one (see CLONE method)

# Driver handle constructor
sub driver {
  return $drh if $drh; # If already created, return it
  my ($class, $attr) = @_;
  $class .= "::dr";

  return DBI::_new_drh($class, {
    'Name'        => 'MVS_FTPSQL',
    'Version'     => $VERSION,
    'Attribution' => 'DBD::MVS_FTPSQL by Clemente Biondo '.
                     '<clemente.biondo@gmail.com>'
  });
} 

#Ensure that two different ithreads don't' share the same driver object
sub CLONE {undef $drh;}

#End of DBD::MVS_FTPSQL
package DBD::MVS_FTPSQL::dr;

$DBD::MVS_FTPSQL::dr::imp_data_size = 0;

# Database handle constructor. 
# Some database specific verifications, default settings and the like can 
# go here.
sub connect {
  my ($drh, $dr_dsn, $username, $password, $attr) = @_;
  my $driver_prefix = "mvs_ftpsql_";

  #The dr_dsn string is in "ODBC" format name1=value1;...;nameN=valueN
  foreach my $var ( split /;/, $dr_dsn ) {
      my ($attr_name, $attr_value) = split '=', $var, 2;
      return $drh->set_err(1, "Can't parse DSN part '$var'")
          unless defined $attr_value;

      # add driver prefix to attribute name if it doesn't have it already
      $attr_name = $driver_prefix.$attr_name
          unless $attr_name =~ /^$driver_prefix/o;

      # Store attribute into %$attr, replacing any existing value.
      # The DBI will STORE() these into $dbh after we've connected
      $attr->{$attr_name} = $attr_value;
  }  


  return $drh->set_err(1, "Error in the dns string: you must specify the ".
                          "mainframe hostname.") 
           unless defined ($attr->{mvs_ftpsql_hostname});

  # Get the attributes we'll use to connect.
  # We use delete here because these no need to STORE them
  my $host    = delete $attr->{mvs_ftpsql_hostname};
  my $port    = delete $attr->{mvs_ftpsql_port}     || 21;
  my $timeout = delete $attr->{mvs_ftpsql_timeout}  || 120;
  my $remote_directory = delete $attr->{mvs_ftpsql_remote_directory}  || '';

  #Additional default attributes
  $attr->{mvs_ftpsql_remote_prefix} = 'FSQL' 
    unless $attr->{mvs_ftpsql_remote_prefix};

  $attr->{mvs_ftpsql_ssid} = ''  
    unless $attr->{mvs_ftpsql_ssid};
  my $debug = 0;
  my $conn =  Net::FTP->new(            $host 
                            ,Port    => $port 
                            ,Debug   => $debug  
                            ,Timeout => $timeout 
                            ,Passive => 1 )
 or return $drh->set_err(1,"Cannot establish an ftp connection to host ".
              "$host at port $port. Error received: $!");

  return $drh->set_err(1,"Login failed. Error received: ". $conn->message) 
    unless ($conn->login($username,$password));

  unless ($remote_directory eq '') {
  	$remote_directory =~ s/^([^\/])/\/\/$1/;
  	return $drh->set_err(1,"Remote directory not accepted. Error received: ". 
  	  $conn->message) unless ($conn->cwd($remote_directory));
  }

  my ($outer, $dbh) = DBI::_new_dbh($drh, { Name => $dr_dsn });
  #$dbh->STORE('Active', 1 );

  $dbh->{mvs_ftpsql_connection} = $conn;
  return $outer;
}

sub data_sources {return undef;}

sub disconnect_all {}

#End of DBD::MVS_FTPSQL::dr

package DBD::MVS_FTPSQL::db;

$DBD::MVS_FTPSQL::db::imp_data_size = 0;             

#Todo:
# primary_key
# foreign_key_info

# The get_info function was automatically generated by
# DBI::DBD::Metadata::write_getinfo_pm v1.05.
sub get_info {
  my($dbh, $info_type) = @_;
  require DBD::MVS_FTPSQL::GetInfo;
  my $v = $DBD::MVS_FTPSQL::GetInfo::info{int($info_type)};
  $v = $v->($dbh) if ref $v eq 'CODE';
  return $v;
}

# The type_info_all function was automatically generated by
# DBI::DBD::Metadata::write_typeinfo_pm v1.05.
sub type_info_all {
  my ($dbh) = @_;
  require DBD::MVS_FTPSQL::TypeInfo;
  return [ @$DBD::MVS_FTPSQL::TypeInfo::type_info_all ];
}

#Note: blanks must become undef            
sub column_info {
  my $dbh     = shift;
  my $catalog = shift; #not applicable so not used at all
  my $schema  = shift; 
  my $table   = shift;
  my $column  = shift;
  my @where = ();

  foreach ( [\$schema,'TBCREATOR'],  [\$table,'TBNAME'],  [\$column,'NAME']) {
    if (defined(${$_->[0]}) && ${$_->[0]} ne '') {
      my $op = index(${$_->[0]},'%') < 0  ? '=' : 'LIKE';
      push(@where,$_->[1]." $op '".${$_->[0]}."'");
    }
  }

  my $where = (($#where >= 0) ?  'WHERE ' : '') . join (' AND ',@where);
  my $sth = $dbh->prepare(<<EOSQL) || Carp::croak ("Prepare operation failed:$!");
  select
     ''                      as TABLE_CAT          
    ,TBCREATOR               as TABLE_SCHEM        
    ,TBNAME                  as TABLE_NAME         
    ,NAME                    as COLUMN_NAME        
    ,''                      as DATA_TYPE          
    ,COLTYPE                 as TYPE_NAME          
    ,LENGTH                  as COLUMN_SIZE        
    ,''                      as BUFFER_LENGTH      
    ,LENGTH - SCALE          as DECIMAL_DIGITS     
    ,''                      as NUM_PREC_RADIX     
    ,case NULLS when 'N' then 
     '0' else '1' end        as NULLABLE           
    ,REMARKS                 as REMARKS            
    ,DEFAULTVALUE            as COLUMN_DEF         
    ,''                      as SQL_DATA_TYPE      
    ,''                      as SQL_DATETIME_SUB   
    ,''                      as CHAR_OCTET_LENGTH  
    ,COLNO                   as ORDINAL_POSITION   
    ,case NULLS when 'N' then 
     'NO' else 'YES' end     as IS_NULLABLE           
  from sysibm.syscolumns 
  $where
  order by TBCREATOR,TBNAME,NAME,COLNO
  with ur
EOSQL
  $sth->execute() || Carp::croak ("Execute operation failed:$!");
  return $sth;  
}

#Note: blanks must become undef            
sub table_info {
  my $dbh     = shift;
  my $catalog = shift; #not applicable so not used at all
  my $schema  = shift; 
  my $table   = shift;
  my $type    = shift;

  my %type2flag = (
    'ALIAS'                    => 'A'
   ,'GLOBAL TEMPORARY'         => 'G'
   ,'SYSTEM TABLE'             => 'T'
   ,'TABLE'                    => 'T'
   ,'VIEW'                     => 'V'
   ,'AUXILIARY TABLE'          => 'X'
   ,'MATERIALIZED QUERY TABLE' => 'M'
  );

  my $flag_table = $type2flag{$type};
  $flag_table    = '' unless(defined($type2flag{$type}));

  my @where = ();

  foreach ( [\$schema,'CREATOR'],  [\$table,'NAME'],  [\$flag_table,'TYPE']) {
    if (defined(${$_->[0]}) && ${$_->[0]} ne '') {
      my $op = index(${$_->[0]},'%') < 0  ? '=' : 'LIKE';
      push(@where,$_->[1]." $op '".${$_->[0]}."'");
    }
  }

  my $where = (($#where >= 0) ?  'WHERE ' : '') . join (' AND ',@where);

  #There is no need of escaping because only the first sql instruction can be
  #executed and this driver alllows only selects.             
  # create a "blank" statement handle
  my $sth = $dbh->prepare(<<EOSQL) || Carp::croak ("Prepare operation failed:$!");
SELECT 
  ''   AS TABLE_CAT 
 ,NAME as TABLE_NAME
 ,CREATOR as TABLE_SCHEM 
 ,case when type = 'A'                         then 'ALIAS'
       when type = 'G'                         then 'GLOBAL TEMPORARY'
       when type = 'T' and name like 'SYS'     then 'SYSTEM TABLE'
       when type = 'T' and name not like 'SYS' then 'TABLE'
       when type = 'V'                         then 'VIEW'
       when type = 'X'                         then 'AUXILIARY TABLE'
       when type = 'M'                         then 'MATERIALIZED QUERY TABLE'
       else 'UNKNOWN' END AS TABLE_TYPE
 ,REMARKS
FROM SYSIBM.SYSTABLES 
$where
WITH UR
EOSQL
  $sth->execute() || Carp::croak ("Execute operation failed:$!");
  return $sth;  
}

sub ping {
  my $dbh = shift;
   if ($dbh->FETCH('Active')) {
     my $warnmsg = "";
     {
       local $SIG{__WARN__} = sub {$warnmsg=shift;};
     	$dbh->{mvs_ftpsql_connection}->quot('noop');
     }
     $dbh->disconnect() unless $warnmsg eq "";
     #Todo: warnmsg needs to be returned to the user?
  }
  return $dbh->FETCH('Active');
}

sub prepare {
    my ($dbh, $statement, @attribs) = @_;
    return $drh->set_err(1, 'Statement preparation failed: '.
      'There is no active database connection.') 
      unless $dbh->FETCH('Active');
    return $drh->set_err(1, 'Statement preparation failed: '.
                 'The sql statement is empty.') unless length($statement);

    # workaround for a peculiarity of the ftp server: if CR/LF is present 
    # the preceding character will be removed (the string will be chopped)
    $statement =~ s/\r|\n/ /g;
    # create a 'blank' sth
    my ($outer, $sth) = DBI::_new_sth($dbh, { 
    	 Statement     => $statement
    	});

    # Todo: improve the placeholder management
    $sth->STORE('NUM_OF_PARAMS', ($statement =~ tr/?//));
    $sth->{mvs_ftpsql_params} = [];

    return $outer;
}

sub commit {
  my ($dbh) = @_;
  if ($dbh->FETCH('Warn')) {
      warn("Commit ineffective while AutoCommit is on");
  }
  0;
}

sub rollback {
  my ($dbh) = @_;
  if ($dbh->FETCH('Warn')) {
      warn("Rollback ineffective while AutoCommit is on");
  }
  0;
}

sub STORE {
  my ($dbh, $attr, $val) = @_;
  if ($attr eq 'AutoCommit') {
    if (!$val) { die "Can't disable AutoCommit"; }
    return 1;
  }
  if ($attr eq 'ChopBlanks') {
    if (!$val) { die "Can't set ChopBlanks to false"; }
    return 1;
  }
  if ($attr eq 'Active') {
    die "Can't change the read-only connection status attribute 'Active'";
    return 1;
  }
  if ($attr =~ m/^mvs_ftpsql_/) {
    $dbh->{$attr} = $val; 
    return 1;
  }
  $dbh->SUPER::STORE($attr, $val);
}

sub FETCH {
    my ($dbh, $attr) = @_;
    if ($attr eq 'AutoCommit') { return 1; }
    if ($attr eq 'ChopBlanks') { return 1; }
    if ($attr eq 'Active') { 
         return    defined($dbh->{mvs_ftpsql_connection}) 
                && defined($dbh->{mvs_ftpsql_connection}->connected()); 
    }
    if ($attr =~ m/^mvs_ftpsql_/) {
        return $dbh->{$attr}; 
    }

#  defined($conn->connected());

    $dbh->SUPER::FETCH($attr);
}

sub disconnect () {
  my $dbh = shift;	
  $dbh->{mvs_ftpsql_connection}->quit() if $dbh->FETCH('Active');
  #$dbh->STORE('Active',0);
  return 1;
}

sub DESTROY ($) {
  my $dbh = shift;
  #Take care of DBI handle 0x....... cleared whilst still active error.
  $dbh->disconnect();
}

#End of DBD::MVS_FTPSQL::db

package DBD::MVS_FTPSQL::st;

$DBD::MVS_FTPSQL::st::imp_data_size = 0;

#Attributes Implemented
#NUM_OF_FIELDS (integer, read-only)
#NAME (array-ref, read-only)
#NAME_lc (array-ref, read-only)
#NAME_uc (array-ref, read-only)
#NAME_hash (hash-ref, read-only)
#NAME_lc_hash (hash-ref, read-only)
#NAME_uc_hash (hash-ref, read-only)
#Statement (string, read-only)
#Database (dbh, read-only)
#Attributes not Implemented (todo)
#TYPE (array-ref, read-only)
#PRECISION (array-ref, read-only)
#SCALE (array-ref, read-only)
#NULLABLE (array-ref, read-only)
#CursorName (string, read-only)
#ParamValues (hash ref, read-only)
#ParamArrays (hash ref, read-only)
#ParamTypes (hash ref, read-only)
#RowsInCache (integer, read-only)

sub STORE {
  my ($sth, $attr, $val) = @_;
    if ($attr =~ m/^mvs_ftpsql_/) {
        $sth->{$attr} = $val; 
        return 1;             
    }
    $sth->SUPER::STORE($attr, $val);
}

sub FETCH {
    my ($sth, $attr) = @_;
    if ($attr =~ m/^mvs_ftpsql_/) {
        return $sth->{$attr};
    }
    $sth->SUPER::FETCH($attr);
}

#Taken (like other pieces of code) from DBI guide
sub bind_param {
  my ($sth, $pNum, $val, $attr) = @_;
  my $type = (ref $attr) ? $attr->{TYPE} : $attr;
  if ($type) {
      my $dbh = $sth->{Database};
      #mhm seems a bug in the manual?
      #$val = $dbh->quote($sth, $type);
      $val = $dbh->quote($val, $type);
  }
  my $params = $sth->{mvs_ftpsql_params};
  $params->[$pNum-1] = $val;
  1;
}

sub execute {
  my ($sth, @bind_values) = @_;

  # start of by finishing any previous execution if still active
  $sth->finish if $sth->FETCH('Active');
  my $params = (@bind_values) ?
      \@bind_values : $sth->{mvs_ftpsql_params};

  my $numParam = $sth->FETCH('NUM_OF_PARAMS');
  return $sth->set_err(1, "Wrong number of parameters")
      if @$params != $numParam;

  my $statement = $sth->{'Statement'};

  #Todo: the bind mechanism needs to be improved
  for (my $i = 0;  $i < $numParam;  $i++) {
      $statement =~ s/\?/$params->[$i]/;
  }

  #very dirty error handling technique, but eval {} if(@$) seems to clutter
  #(maybe my mistake) with $drh->set_err (todo: dig into the problem)
  my ($error_code,$error_message,$error_state) = (1,"",0);
  my $dbh = $sth->{Database};
  my $fh = mvs_ftpsql_execute(
     $dbh->{'mvs_ftpsql_connection'}
    ,$dbh->{'mvs_ftpsql_ssid'}
    ,$dbh->{'mvs_ftpsql_remote_prefix'}
    ,$statement
    ,\$error_message
    ,\$error_state
    ,\$error_code
  ) or return $sth->set_err($error_code, $error_message,$error_state);

  # Notice that this driver processes only SELECT statement (a protocol 
  # limitation imposed by design), so $fh is ever a file handle to the
  # output of a query.
  #print while(<$fh>);exit;
  my $header = <$fh>;

  #\x00 was placed as a workaround for a strange behaviour with some tables
  $header =~ s/\x00| |\r|\n//g;
  my @header = split(/\t/,$header);
  #print $header[0];exit;
  unless (exists($sth->{'NAME'})) {
    $sth->STORE('NUM_OF_FIELDS' => $#header +1);
    $sth->{'NAME'}              = \@header;
  }

  $sth->{'mvs_ftpsql_data'} = $fh;

  #Row counting
  my $rowcount = 0;
  my $pos = $fh->getpos();
  $rowcount++ while(<$fh>);
  $fh->setpos($pos);
  $sth->{'mvs_ftpsql_rows'} = $rowcount;

  $sth->{Active} = 1;
  return ($rowcount ? $rowcount : '0E0');
}

sub fetchrow_arrayref {
  my ($sth) = @_;
  my $fh = $sth->{mvs_ftpsql_data};
  unless ($fh) {
      $sth->STORE(Active => 0);
      return undef;
  }
  my $tmp = <$fh>;
  unless ($tmp) {
      $sth->STORE(Active => 0);
      return undef;
  }

  #Text fields are right padded, numbers are left padded.
  #The field is at least long as his label.
  #This is the reason we can't disable ChopBlanks
  $tmp =~ s/\r|\n//g;
  $tmp =~ s/ +\t/\t/g; 
  $tmp =~ s/ +$//g;

  my @fields = split(/\t/,$tmp,-1);
  if (($sth->FETCH('NUM_OF_FIELDS')) < ($#fields+1) ) {
    $fh->close();
    $sth->SUPER::finish();
    #Todo: give more info in the pod and propose as solution
    #TRANSLATE (A, ' ', x'05') (lo horizontal tab (HT) \x09 in EBCDIC diventa \x05)
    Carp::croak (
      "Fetch failed: Horizontal tab found. One or more character columns in the resultset ".
      "contain tabs characters ('\\x09').\nAlthough not an error, due to ".
      "limitations imposed by the ftp/sql feature this driver can't ".
      "manage those values.\nSee the documentation to learn how to work ".
      "around this issue."
    );
  }
  return $sth->_set_fbav(\@fields);
}

*fetch = \&fetchrow_arrayref; # required alias for fetchrow_arrayref

sub rows { shift->{mvs_ftpsql_rows}; } 

sub DESTROY {
  my $sth = shift;
  $sth->finish if $sth->FETCH('Active');
}

sub finish {
  my $sth = shift;
  $sth->{mvs_ftpsql_data}->close();
  $sth->SUPER::finish();
}

sub mvs_ftpsql_execute {
  my $ftp_conn                   = shift;
  my $db2subsys                  = shift; 
  my $remote_sql_filename_prefix = shift;
  my $sql                        = shift;
  my $error_message              = shift;
  my $error_state                = shift;
  my $error_code                 = shift;
  my $qlen = length($sql);

  #datasets allocated with RETPD > 0 can't be deleted
  $ftp_conn->quot("site FILE=SEQ LR=$qlen BLOCKSI=$qlen REC=F RET=0");

  #Query upload
  my $fh = IO::File->new_tmpfile() 
    or Carp::croak("Cannot create temporary storage for the sql statement:$!");
  $fh->seek(0,0) || Carp::croak ("Seek operation failed:$!");
  print $fh $sql;
  $fh->flush() || Carp::croak ("Flush operation failed:$!");
  $fh->seek(0,0) || Carp::croak ("Seek operation failed:$!");
  $ftp_conn->put_unique($fh,$remote_sql_filename_prefix.'0001');

  #Workaround:the current implementation of Net::FTP::put_unique do not 
  #returns the filename. The error lie in the regexp at line 72 of 
  #Net/FTP/dataconn.pm
  my $filename = $1
    if $ftp_conn->message() =~ 
      /($remote_sql_filename_prefix\d{4}) \(unique name\)/ 
        or Carp::croak ("Cannot determine the remote sql filename.");
  $ftp_conn->quot ('SITE NOTRAIL FILE=SQL DB2='.$db2subsys.' SPR LR=32000 REC=F '.
                   'SQLC=N BLOCKSI=32000');
  $fh->truncate(0) || Carp::croak ("Truncate operation failed:$!");
  #Error handling
  #"551 Transfer aborted: SQL PREPARE/DESCRIBE failure" -> sql syntax error
  #"551 Transfer aborted: SQL not available.  Attempt to open plan EZAFTPMQ"
  #"554 Transfer aborted: unsupported SQL statement" -> only selects
  #"551 Transfer aborted: attempt to connect to DB2  failed" -> subsystem error
  #MVS was unable to locate a DB2 subsystem with the specified name

  my $warnmsg = "";
  my $transfer_msg="";
  {
    local $SIG{__WARN__} = sub {$warnmsg=shift;};
    $ftp_conn->get ($filename,$fh);
    $transfer_msg = $ftp_conn->message();
  }
  $ftp_conn->quot ('SITE FILETYPE=SEQ');
  $ftp_conn->delete($filename);
  if ($transfer_msg =~ /Transfer aborted: SQL PREPARE\/DESCRIBE failure/) {
    $fh->flush()   || Carp::croak ("Flush operation failed:$!");
    $fh->seek(0,0) ||Carp::croak ("Seek operation failed:$!");

    $$error_message = "The SQL statement is invalid:\n". do {local $/; <$fh>} ."\n";

    #Workaround for a problem with filehandles and set_err
    #Forces a copy of the content of the file.
    #Without the following line the content of the error message is not reported.
    $$error_message = sprintf ('%s',$$error_message);
    $$error_state = $1 if ($$error_message =~ /SQLSTATE\s+=\s+(\d+)/);
    $$error_code  = $1 if ($$error_message =~ /SQLCODE\s+=\s+([\-0-9]+)/);

    #print "$$error_state";
    #exit;
   
  } elsif($transfer_msg =~ /Transfer aborted: (.*)/) {
  	$$error_message = $1;
  	$$error_code = '-30080';
  	$$error_state = '08001'
  } elsif ($warnmsg ne "") {
  	$$error_message = $warnmsg; 
  	$$error_code = '-30080';
  	$$error_state = '08001'
  } else {
    $fh->seek(0,0) || Carp::croak ("Seek operation failed:$!");
    return $fh;
  }
  $fh->flush()   || Carp::croak ("Flush operation failed:$!");
  $fh->close();
  undef ($fh);
  return undef;
}

#End of DBD::MVS_FTPSQL::st

1;

__END__

=head1 NAME

DBD::MVS_FTPSQL - DBI driver to query IBM DB2 mainframe databases through an IBM FTP server.

=head1 VERSION 

This documentation refers to DBD::MVS_FTPSQL version 0.38.13.

=head1 SYNOPSIS

  require DBI;
  
  #DSN String that identifies the DB2 subsystem
  my $DSN = "hostname=...;ssid=..." ;

  #A mainframe account able to perform SELECTs  
  my ($username, $password) = ('...','...');
  
  my $dbh = DBI->connect("dbi:MVS_FTPSQL:$DSN", $username, $password) 
    or die "Cannot connect: " . $DBI::errstr;

It connects to the DB2 subsystem C<$ssid> of the mainframe whose hostname is C<$hostname>.
Refer to the L<DSN string|/"DSN string"> section of this document for details on matching
the configuration of the mainframe you want to connect to (in particular the attributes
L<hostname|/"hostname">, L<port|/"port"> and L<ssid|/"ssid">). 
It could be a good idea to take also a look at the L<EXAMPLES|/"EXAMPLES"> section below.

=head1 DESCRIPTION

This pure Perl module lets you submit SQL queries (that's it, only SELECT statements) to a DB2 subsystem 
installed on a mainframe,  provided that: 

=over 4

=item *

The IBM FTP Communications Server (CS) on the mainframe side was installed 
with the (optional) SQL query function enabled ( see the section
L<Installing the SQL query function on the Communications Server|/"Installing the SQL query function on the Communications Server"> 
for additional information on this subject).

=item *

You supply, as mentioned in the L<SYNOPSIS|/"SYNOPSIS"> section above, enough information, 
inside the DSN string of the L<DBI::connect()|DBI/connect> statement, to identify and locate the 
L<ssid|/"ssid"> (the four character DB2 subsystem identifier) you want to connect to
and the L<hostname|/"hostname"> and L<port|/"port"> of the IBM FTP CS that will taxi your queries.

=item * 

You can submit queries via QMF (Query Management Facility) or, in other words,
your account has C<SELECT> privileges on the subsystem that contains the tables
you want to query.

=item *

Your account on the mainframe has write permissions to either a 
I<"swap"> directory (that you can specify via the DSN facultative attribute 
L<remote_directory|/"remote_directory">) or your home directory (if you omit it). This directory
will be utilized as a temporary storage area for the files containing the 
sql statements you submit.
No harm will be done to other files located there.
For more details read the L<DSN string|/"DSN string"> section of this document.

=back

This document focuses primarily on specific issues regarding this particular DBI 
driver and it assumes that you are familiar with the DBI architecture. If not the case, please read the L<DBI documentation|DBI> first to acquire a general 
knowledge of its classes and methods. 

=head2 DSN string

The following instruction: 

  my $dbh = DBI->connect("dbi:MVS_FTPSQL:$DSN", $username, $password)
            or die $DBI::errstr;  

establishes a connection to the DB2 subsystem identified by the DSN string 
C<$DSN>, using as login credentials the C<$username> and C<$password> supplied. 
Notice that in the OS/390 or z/OS environment you don't connect to a 
database, instead you have to connect to the DB2 subsystem which gives 
access to all the databases it contains (a mainframe database has very 
little to do with its pc counterpart as it is basically a logical grouping 
of tables, other objects and so, in many ways, it's more similar to a pc 
schema).   

The DSN string consists of a list of I<argument=value> pairs separated by semicolons,
like the example below: 

  # Identifies the DB2 subsystem DDB2 accessible through an IBM FTP CS 
  # running at foo.com:9999
  my $DSN = 'hostname=foo.com;port=9999;ssid=DDB2'; 

The following is a list of allowed arguments and their meaning, arranged in order of relevance.

=over 4

=item C<hostname>

The mainframe hostname or ip address. This argument is mandatory.

=item C<port>

Denotes the port on which the IBM FTP CS installed on the mainframe is listening and defaults to 21. 
Do not confuse this (ftp) port with the DRDA port; this is not a DRDA driver.

=item C<ssid>

A 4 character string representing a DB2 subsystem identifier.
You can omit this attribute and rely on the default specified
in the IBM FTP CS configuration dataset C<FTP.DATA>
(the IBM book Communications Server: IP Configuration Reference 
covers this topic in detail ).

If, however, while connecting to the DB2 subsystem <ssid>, you receive the 
error message:

  DBD::MVS_FTPSQL::st execute failed: attempt to connect to <ssid> failed

this means that the default value doesn't match an existing DB2 subsystem and you need 
to explicitly specify it. Notice that this is the same error message that you
receive if the ssid you specify doesn't exist.
If you don't know the ssid of the DB2 subsystem(s) installed on the 
mainframe you are trying to connect to ask your system administrator 
or read the L<"How to find out the DB2 subsystem IDs"|/"How to find out the DB2 subsystem IDs"> section of this document.

=item C<remote_prefix>

The first 4 characters of the temporary dataset name that will be 
used to store the query on the mainframe. The default prefix is 'FSQL', 
so the dataset will be named with the first unassigned string of the 
sequence FSQL0001, FSQL0002, FSQL0003 and so on (this prevents overwriting 
existing files).
Please note that you don't need to interact with those files directly as they 
only serve to upload the query to the mainframe before the  execution 
and they will be deleted suddenly after.
The files will be allocated for the minimum retention possible period
(0 days) so if something goes wrong they will be deleted anyway the 
next day.

=item C<remote_directory>

The "directory" (more precisely the first n-1 name segments of a dataset 
name, composed of n name segments, where the n-th name segment can be 
inappropriately called the "filename") on 
the mainframe where the temporary datasets are stored
during the query execution phase. 
The directory must be writable for the user. The default is the user 
home directory. If specified, this attribute must obey
MVS data set naming convention: 

=over 4

=item *

it may consist of one or more name segments separated by a period 

=item *

every name segment can be 1 to 8 alphanumeric (A-Z plus 0-9) or national (-,#, @, $) characters long  but

=item * 

a restriction to the previous rule is that the first character of every name segment can't be a number or a sign (-)

=back

If the dataset name is preceded by two slashes, then the path will be interpreted as absolute, 
otherwise the path will be evaluated as relative to the user home directory.
Valid values for this attribute are:   

  //FOO.BAR 
  //FIRST.SECOND.THIRD  
  DB2TEMP 
  ONE.TWO.THREE                

where the latter two are relative paths. If the user's home directory being connected with the mainframe is JSMITH and the preceding 
attribute L<remote_prefix|/remote_prefix> is set to
FDB2, then the temporary files will be created in the form:   

  //FOO.BAR.FDB2XXXX 
  //FIRST.SECOND.THIRD.FDB2XXXX 
  //JSMITH.DB2TEMP.FDB2XXXX 
  //JSMITH.ONE.TWO.THREE.FDB2XXXX  

where XXXX is the smallest zero-padded 4-digit integer that will produce an unique filename.
See how the total length of one of those dataset names must not exceed 44 characters.

=item C<timeout>

The "client" timeout. Specifies the maximum time in seconds the client 
will wait for a response from the server before signaling failure 
to the caller. The default is 120 seconds and should be appropriate for most situations.

Don't confuse the "client" timeout DSN attribute with the "server" 
timeout that is instead the timeout after which an idle connection to the 
database (the IBM FTP server, in our case) is closed. To hold a connection open 
and get rid of the server timeout you should periodically issue ping commands
(internally redefined as noop).

=back

=head2 How to find out the DB2 subsystem IDs

I've asked this question to the comp.databases.ibm-db2 group and
Jeroen van den Broek kindly answered me:

=over 4

Every DB2 subsystem has at least 3 address spaces associated with it,
the names of which all start with the subsystem-id (SSID):

  <SSID>MSTR = system services address space
  <SSID>DBM1 = database services address space
  <SSID>DIST = DDF (distributed data facility) address space

(next to these, you might have others, like the Stored Procedures
address space and the IRLM (Integrated Resource Lock Manager) address
space, but naming for these is not fixed)
You should be able to identify your SSID's via SDSF's "Status
Display" (option ST on SDSF's Primary Option Menu).
Use the following subcommands to show the various types of fixed
address spaces:

  SELECT *MSTR
  SELECT *DBM1
  SELECT *DIST

from which you can deduct your SSID's. 

=back

Note that SDSF (System Display and Search Facility) is an IBM product which interfaces with the MVS spool that,
among other things, allows the user to list all the jobs on the spool,
not only the ones whose name starts with his user-id.


=head2 Locking and concurrency considerations

This section discusses issues related to how the intrinsic constraints of 
the "SQL through FTP" feature influence the concurrent access of data.
An explanation of the concepts behind locking, concurrency and the way DB2 implements 
it is far beyond the scope of this document and although a brief introduction of 
isolation levels is provided, that knowledge is taken for granted.
Please consult your DB2 documentation for a more thorough overview.
It is also worth checking out a couple of interesting articles of Roger E. Sanders published on Db2 
Magazine.

Ok, after having said that...       

IBM DB2 for OS/390 or later supports four levels of isolation. These, ordered from the more to the less 
restrictive, are: 

=over 4

=item Repeatable Read (RR)  

Share locks are acquired on all the rows referenced (not only
those ones that will be returned) and they are released only when the transaction is 
committed or rolled back. Other concurrent transactions
can't acquire exclusive locks on those rows (and hence will have to wait before
modify the data) until the transaction owning the locks terminates. This prevents any 
interference between transactions themselves (the same query issued multiple times within the 
same transaction will ever return the same data) but also decreases concurrency, 
causing a slow down in performance.

=item Read Stability (RS) 

Share locks are acquired only for those rows that are part of a result set.
This prevents dirty reads (the reading of uncommitted data) and nonrepeatable 
reads while phantoms phenomena (described below) can occur.
If a query is issued more than once in the same transaction, it may get additional 
(precisely phantom) rows, as another concurrent transaction can insert rows that match 
the search criteria of the query.

=item Cursor Stability (CS)  

This is the default isolation level. It locks only the row (the page) that is currently being returned. As the cursor leaves the row, the lock is released and acquired for the next one, until all the data is returned.
While this maximizes concurrency and prevents dirty reads it does not 
ensure that the data retrieved will not be changed by other transactions, so
if the transaction reads the same row of data more than once odds are it
gets different results each time (nonrepetable read phenomena).

=item Uncommitted Read (UR)

With this isolation levels the transaction (almost) doesn't acquire locks and 
doesn't check if the data that is retrieving is locked.
This, at the price of risking reading non committed data, 
leads to two main advantages:

=over 2

=item *

Better performance if compared with other isolation levels.

=item *

Ensures that a deadlock condition can not occur.

=back

=back

Notice that with this driver you can override the default isolation level 
only at query level. You can do so by ending the statement 
with a "with" clause whose syntax is:

  (fullselect)  WITH [RR|RS|CS|UR]

as illustrated by the following example:

  SELECT * FROM SYSIBM.SYSDUMMY1 WITH UR

When using IBM FTP CS as a medium to submit queries, there are two main limitations 
that affect your control over the way the data is locked and 
isolated between concurrent processes. These limitations are:

=over 2

=item *

The inability to control transaction boundaries. That is, you can only
issue select statements; every other statement - and this includes also
commit and rollback - are not permitted. To put it briefly, you can't disable 
autocommit. 


=item *

When a statement is L<execute()|DBI/execute>d, all the data requested it's transferred from 
the mainframe to the pc altogether. Subsequently, when you L<fetch()|DBI/fetchrow_arrayref> the rows,
you will interact with a local temporary copy of the data.

=back

The first condition implies it's not possible in any way to protect your application
against nonrepetable read and phantom phenomena between two different executions 
of the same query.
The second one makes de-facto the choice of RR or RS as isolation levels useless,
because while you are (locally) fetching the data, the transaction is already terminated (
the real fetching of data happens contextually to the statement execution).

Using RR as isolation level, can make a difference when your 
query (maybe with the auxilium of a subquery) accesses the same 
table more than once, like in the following example:

  select max(salary) as sal_ko from staff where 
    salary = ( select max(salary) as sal_ok from staff)

If this query it's not executed with an isolation level RR,
B<it may return a null value> instead of the maximum
salary. Let's clarify why this can take place. During execution, the table  staff is processed two 
times, first time to determine the maximum salary sal_ok and later, to check which 
salary corresponds to sal_ok.
If, between the two phases, a transaction that modifies the maximum salary is committed 
(like an update that increases the salary of that staff member) then sal_ko will not match any value.

Since RR or RS don't work as they should, this leave us with two options. Specifying CS (or omitting it as it's the default) and retrieve only 
data committed after the execution of the statement, or choosing UR and retrieve also non committed
data. Remember that, in any case, you won't see any changes, committed or not, happening while you're fetching
 data because, as stated before, you're working on a local copy of the resultset, that was internally fetched
during the execution of the query.

Although these limitations may seem harsh, it is important to realize that in the majority of the cases,
CS or also UR are the best choice, because maximize concurrency and hence performance. 
This is particularly true when retrieving data from a mainframe, because there are a lot of other processes
that accesses data, potentially more critical than your application (CICS applications for example).

=head1 EXAMPLES

=head2 Example 1: retrieving a single row of data 

  use warnings;
  use strict;
  use DBI;

  # It connects to the IBM FTP CS server running at IP 123.456.789.123, port 4021
  #
  # All the queries will be routed to the DB2 subsystem DB2P
  #
  # jsmith/123456 must be a valid mainframe account able to query the tables of DB2P
  #
  my ($hostname,$port,$ssid) = ('123.456.789.123',4021,'DB2P');
  my ($username,$password)   = ('jsmith','123456');
  my $dbh = DBI->connect("dbi:MVS_FTPSQL:hostname=$hostname;port=$port;ssid=$ssid", $username, $password,
                        { RaiseError => 1 }) ||  confess  $DBI::errstr;
  #Notice that RaiseError is set to 1 so we don't need to test for the return code of each method call

  #Prepares the query
  my $sth = $dbh->prepare(<<EOSQL);
  SELECT 
     max (SALARY) as MOST_PAYED
    ,min (SALARY) as LESS_PAYED
  FROM Q.STAFF
  WHERE
    JOB = 'CLERK'
  OPTIMIZE FOR 1 ROWS
  WITH UR
  EOSQL

  #Executes it
  $sth->execute();

  #Retrieves the data  
  my ($clerk_max, $clerk_min) = $sth->fetchrow_array();

  $sth->finish();
  $dbh->disconnect();

=head2 Example 2: read (and structure) all the data at once

  use warnings;
  use strict;
  use DBI;
  use Data::Dumper qw(Dumper);

  # It connects to the IBM FTP CS server running at bigiron.localdomain
  # with the default port 21
  #
  # All the queries will be submitted to the default DB2 subsystem
  #
  # jsmith/123456 must be a valid mainframe account able to query the default DB2 subsystem
  my ($hostname,$username,$password) = ('bigiron.localdomain','jsmith','123456');
  my $dbh = DBI->connect("dbi:MVS_FTPSQL:hostname=$hostname", $username, $password,
                        { RaiseError => 1 }) or die $dbh->errstr;

  #Prepare, execute and retrieve all the databases in the selected 
  #subsystem, all in a single call and returns a reference to an hash of hash,
  #where the index key of the first hash is the database name.
  #To return the data in the form of an array of array or an array of hashes see
  #the method selectall_arrayref
  my $db_list = $dbh->selectall_hashref( "SELECT * FROM SYSIBM.SYSDATABASE",'NAME');

  print Dumper($db_list);

  $dbh->disconnect();


=head2 Example 3: looping through the data

  use warnings;
  use strict;
  use DBI;

  # Same ssid location as in example 2, but this time the temporary datasets
  # will be in the form: //TEMPDS.FTPSQL.QRYXXXXXXXX
  my ($hostname,$username,$password) = ('bigiron.localdomain','jsmith','123456');
  my ($remote_directory,$remote_prefix) = ('//TEMPDS.FTPSQL','QRY');

  my $dbh = DBI->connect(
    "dbi:MVS_FTPSQL:hostname=$hostname;remote_directory=$remote_directory;remote_prefix=$remote_prefix"
   ,$username, $password,
   { RaiseError => 1}) ||  confess  $DBI::errstr;

  my $sth = $dbh->prepare(<<EOSQL);
    SELECT 
       PARTNAME AS PART 
      ,PRODUCT AS PROD 
      ,PRODPRICE AS PRICE 
    FROM 
       Q.PARTS AS PT
      ,Q.PRODUCTS AS PR
    WHERE
          PT.PRODNO = PR.PRODNUM
      AND PR.PRODPRICE <= 
          (SELECT AVG(PRODPRICE) * 2 FROM Q.PRODUCTS)
    order by price, product
    fetch first 4 rows only
    WITH CS
  EOSQL

  $sth->execute();

  #Prints The column headers
  print join("\t",@{$sth->{'NAME'}})."\n";

  #Prints the data
  while (my @row  = $sth->fetchrow_array()) {
    print join("\t",@row)."\n";
  }

  $sth->finish();
  $dbh->disconnect();

There is not much more to say about using this driver to establish a connection. 
In the following examples, we will assume that a database handle  B<$dbh>
to an active connection with a mainframe is available and we will focus on other aspects.

=head2 Example 4: binding parameters (input)

  my $sth = $dbh->prepare(<<EOSQL);
    SELECT 
       PARTNAME AS PART 
      ,PRODUCT AS PROD 
      ,PRODPRICE AS PRICE 
    FROM 
       Q.PARTS AS PT
      ,Q.PRODUCTS AS PR
    WHERE
          PT.PRODNO = PR.PRODNUM
      AND PARTNAME  = ?
    WITH CS
  EOSQL

  foreach my $partname (qw (WIRE BEARINGS COPPER)) {                                                          
    $sth->bind_param(1, $partname, SQL_VARCHAR);                                                                 
    $sth->execute();
    print join("\t",$sth->fetchrow_array())."\n";                                                                   
  } 

=head2 Example 5: binding columns (output)

  my $sth = $dbh->prepare(
    'SELECT PRODNAME, PRODPRICE FROM Q.PRODUCTS order by 1,2'
  );
  $sth->execute();
  
  my ($name,$price,$total)=('',0,0);
  $sth->bind_columns(\$name,\$price);
  
  my $delimiter = '-'x21 ."\n";
  
  print $delimiter;
  printf ("%-10s %10s\n" ,"Name","Price");
  print $delimiter;
  
  while ($sth->fetch()) {
    $total +=$price;
    printf ("%-12s %8.2f\n", $name,$price);
  }
  
  print $delimiter;
  printf ("%21.2f\n" ,$total);

The expected output is:

  ---------------------
  Name            Price
  ---------------------
  GENERATOR       45.75
  SCREWDRIVER      3.70
  SHAFT            8.65
  SWITCH           2.60
  RELAY            7.55
  SOCKET           1.40
  MOTOR           35.80
  CAM              1.15
  GEAR             9.65
  BUSHING          5.90
  SAW             18.90
  HAMMER           9.35
  CHISEL           7.75
  WRENCHSET       25.90
  ---------------------
                 184.05

=head2 Example 6: table_info()

  # Returns all the tables of the sample schema.
  # See the DBI manual for details on this method.
  #
  # Notice that DB2 does not have the concept of a catalog so
  # $catalog should ever be set to undef
  my ($catalog, $schema, $table, $type) = (undef ,'Q','','');
  my $sth = $dbh->table_info( $catalog, $schema, $table, $type );

  #Fetch all the rows in the form of an array of hashes where
  #the keys of the hashes are the column names
  my $refAOH = $sth->fetchall_arrayref({});
  $sth->finish();
  print Dumper($refAOH);

=head2 Example 7: column_info()

  # Returns all the columns of tables of the sample schema that
  # starts with 'N'
  my ($catalog, $schema, $table, $column ) = (undef ,'Q','','N%');
  my $sth = $dbh->column_info($catalog, $schema, $table, $column);
  my $refAOA = $sth->fetchall_arrayref([2,3,5,8]);
  $sth->finish();
  
  #Returns a list of fully qualified columns and their type and length in bytes
  printf ("%20s %20s %10s %7s\n".'-'x50,'TABLE','COLUMN','TYPE','LENGTH');
  print '-'x60 . "\n";
  map { printf ("%20s %20s %10s %7d\n",@{$_}) => $_ } @{$refAOA};

the output should be something like:

                 TABLE               COLUMN       TYPE  LENGTH
  ------------------------------------------------------------
             INTERVIEW            STARTTIME       TIME       3
           OBJECT_DATA                  SEQ   SMALLINT       2
                 PARTS               SUPPNO       CHAR       5
              PROFILES                SPACE       CHAR      50
              PROFILES             SYNONYMS    VARCHAR      31
               PROJECT               STARTD       DATE       4
                 SALES           SALESREPNO   SMALLINT       2
                 STAFF               SALARY    DECIMAL       5
              SUPPLIER                STATE       CHAR       2
              SUPPLIER               STREET    VARCHAR      15
  
=head1 INSTALLATION & PREREQUISITES

This driver relies on the following other Perl modules:

  Carp
  DBI
  IO::File
  Net::FTP

The automatic installation procedure via the CPAN module is the most recommended:

  perl -MCPAN -e "install Bundle::DBD::MVS_FTPSQL"

If you have never invoked CPAN, it will run through a series of
configuration questions such as which CPAN mirror site to use.
It's important that the network setup questions are answered correctly 
because network configuration errors may prevent access to CPAN repository and 
thus the download of the modules.

To install this module manually, run the following commands:

    perl Makefile.PL
    make
    make test
    make install

In order for this module to be of any use you need to have access to a mainframe running
an ftp server configured for executing SQL query via FTP (see the section below to find out how
this feature can be installed).

=head2 Installing the SQL query function on the Communications Server 

To install the optional SQL query function and access the DB2 subsystems from FTP
you need to bind the DBRM (Database Request Module) called EZAFTPMQ, located in the SEZADBRM library, 
to the plan used by FTP and grant execution privileges for that plan to PUBLIC.
A sample JCL, that needs to be customized to perform the bind, is EZAFTPAB and 
can be found in the library SEZAINST.
It is also advisable that your system administrator creates, if not already present,
the FTP.DATA configuration data set and:

=over 4

=item *

adds a DB2 statement that provides the ssid to be used by default.

=item *

specifies the DB2PLAN of the FTP server.

=back

The IBM books entitled "<Your OS Version> Communications Server: IP Configuration Guide"
describes such operations in detail.

=head1 CAVEATS

=head2 Rounding error when fetching numbers with more than 16 digits.

Due to a bug on the "sql through ftp" feature, when a numeric value is
returned, only its first 16 digits are significant. This is shown by the 
following query:

  select 19999999999999999 from sysibm.sysdummy1

that returns the incorrect value 20000000000000000. A workaround for this bug
is to cast to char every field returned by the query that is declared
DECIMAL(17,..) or more, The query in the example above will become as follows:

  select char(19999999999999999) from sysibm.sysdummy1

Note that this affects only returned data, so any column of a subquery
or any literal constant passed with the query are immune to this bug.
The following queries do not require any fix.

  select empno from jobhistory where id=12345678901234567890

  SELECT  NUM - 990000000000000000 FROM (                                   
    SELECT 999999999999999999 AS NUM FROM SYSIBM.SYSDUMMY1        
  ) AS X  

=head2 "Fetch failed: Horizontal tab found. ..."  error message

The "sql through ftp" feature returns data in the form of lines of text in which every
field is delimited by tab characters (\x09 in ASCII that corresponds to \x05 in EBCDIC). 
If one of the field returned contains a tab character this driver fails to distinguish
between values and delimiters and will return the error given in the title of this paragraph.
This bug can be reproduced by the following query:

  SELECT 'X'|| CHAR(X'05') ||'X' AS TXT FROM SYSIBM.SYSDUMMY1

A workaround for this bug is , in case you receive the mentioned error, that you
replace the problematic character with another string, like for example a blank character as
in the query below:

 select TRANSLATE (TXT, ' ', x'05') from (
   SELECT 'X'|| CHAR(X'05') ||'X' AS TXT FROM SYSIBM.SYSDUMMY1
 ) as x

As for the preceding bug, this affects only data that is returned, so any column of a subquery
or any literal constant passed with the query is immune.

=head2 Nullable columns are erroneously fetched as a sequence of '-'

  select NULLIF(1,1) FROM SYSIBM.SYSDUMMY1

This bug is a direct consequence of the way the "sql through ftp" feature returns nulls.
A workaround for this consists in applying the function COALESCE() (whose alias is VALUE())
to all the returned columns that can contain null values,as in the following example.

  select coalesce (A,0) from (
    select NULLIF(1,1) AS A FROM SYSIBM.SYSDUMMY1
  ) as X

Notice that also this bug as the preceding concerns only fetched columns.

=head1 TO DO

-) Implement other database handle methods like primary_key, foreign_key_info, etc

-) Write more tests

=head1 AUTHOR

Clemente Biondo, <clemente.biondo@gmail.com>

=head1 ACKNOWLEDGEMENTS

I wish to thank Sonia Ingrassia for the very careful review of this document.
I'd also like to thank to Jeroen van den Broek
for his answer to the question: L<How to find out the DB2 subsystem IDs>.

And I wish to thank you for reading this far. I hope this work will be useful to you to some degree.
Any comment will be really appreciated!.

=head1 SEE ALSO

L<DBI>

Books:

=over 4

=item *

IBM Communications Server IP: Configuration Guide 

=item *
  
IBM Communications Server: IP Configuration Reference.

=back

=head1 COPYRIGHT & LICENSE

Copyright 2007 Clemente Biondo <clemente.biondo@gmail.com>, all rights reserved.

This program is free software; you can redistribute it and/or modify it
under the same terms as Perl itself.

=cut

__END__

Todo:
-) Mailare Jeroen van den Broek <nltaal@baasbovenbaas.demon.nl> .
-) aggiungere alla sezione How to find out the DB2 subsystem IDs che si possono ottenere i ssid anche
   dal pannello del qmf
-) Preparare Bundle::DBD::MVS_FTPSQL 

-) podchecker MVS_FTPSQL.pm && pod2html --infile=MVS_FTPSQL.pm --outfile=MVS_FTPSQL.html 
