[comp.sources.misc] v13i026: recursive ftp'er in Perl

mikef@hpsadle.hp.com (Mike Ferrara) (06/06/90)

Posting-number: Volume 13, Issue 26
Submitted-by: mikef@hpsadle.hp.com (Mike Ferrara)
Archive-name: rftp/part01

Here's a recursive ftp'er for grabbing, putting, and listing whole
file trees via ftp. It uses perl to drive ftp, so in order to use it
you need perl. There are two versions, rftp and rftp.nosockets. rftp.nosockets
is much smaller, slower and has many fewer features. It was my proof of
concept code, but it works without perl sockets. That's why I put it
in the shar. For more info, see the code, or the man page. You may also
need to fool with the #! line at the beginning, and make sure you've 
done a "makelib" (from perl distribution) on /usr/include/sys/sockets.h.
This has been tested on perl 3 patchlevel 18.

  Mike Ferrara M/S 2LRR
  HP Signal Analysis Div R&D
  1212 Valley House Drive
  Rohnert Park, CA 94928
  (707) 794-4479
  mikef%hpsadle@hp-sde.sde.hp.com
  mikef@hpsadle.hp.com
--------------------------cut here-----------------------------

# This is a shell archive.  Remove anything before this line,
# then unpack it by saving it in a file and typing "sh file".
#
# Wrapped by Mike Ferrara <mikef@hpsadle> on Tue Jun  5 10:41:15 1990
#
# This archive contains:
#	rftp	
#

LANG=""; export LANG
PATH=/bin:/usr/bin:$PATH; export PATH

echo mkdir - rftp
mkdir rftp

echo x - rftp/rftp
cat >rftp/rftp <<'@EOF'
#!/usr/local/bin/perl

#Recursively decend via anon FTP and either get a listing
#or retrieve the tree.
# Usage:
#      rftp [options] host [list-file]
# Options --- 
#   [-s<source_dir>] Specify the root for transfer on remote host (default "/")
#   [-d<dest_dir>] Specify the root for transfer on local host (default ".")
#   [-l] Just a listing, thank you.
#   [-a] ASCII mode transfers (default: BIN)
#   [-g] get files
#   [-p] put files
#   [-u<user>:<passwd>] Specify a userid and passwd
#   [-b] debug mode
# 
#   return value is 0 if ok.
#                   1 if an error occurred during transfer.
#                   2 if login failed.
#                   
#
# Mail bugs or comments to:
#
#  Mike Ferrara M/S 2LRR
#  HP Signal Analysis Div R&D
#  1212 Valley House Drive
#  Rohnert Park, CA 94928
#  (707) 794-4479
#  mikef%hpsadle@hp-sde.sde.hp.com
#  mikef@hpsadle.hp.com

#main
$cd="";
$dirs[1]='/';
$source='.';
$dest='.';
$ftpin="/tmp/ftpin$$";
$ftpout="/tmp/ftpout$$";
$listing=1;
$bin=1;
$reader=0;
$writer=0;
$user='anonymous';
$passwd=`hostname`;
$debug=0;

#Setup signal handler
$SIG{'INT'}='cleanup';
$SIG{'HUP'}='cleanup';
$SIG{'QUIT'}='cleanup';
$SIG{'TERM'}='cleanup';

&parseopts;
if (!($putting)&&!($listing)) {
   if (-d $dest) {
     }
   else {
     system("mkdir -p $dest");
     }
   chdir $dest;
}
&setupcomm;
if ($writer) {
   open(LSOUT,">$lsout");
   open(FTPIN,"| ftp -i -v -n 1>$ftpout 2>&1");
   select(FTPIN);$|=1;select(stdout);
   &sendftp ("open $host");
   &sendftp ("user $user $passwd");
   while (1) { #Read from the socket to see if login worked.
        $_=<NS>;
        last if (/^230\s/);
        exit(2) if (/^530\s/);
        } 
   if ($bin==1) {
      &sendftp("bin");
      }
   undef($lastdirectory);
   if (!$putting) {
      &recurse;
      }
   if ($putting) {
      &putfiles;
      }
   &sendftp("quit");
   close(FTPIN);
   &cleanup;
}
if ($reader) {
   &readloop;
   }

sub putfiles {
   &sendftp("bin") if ($bin);
   open(FIND,"find $source -print |");
   while ($_=<FIND>) {
       chop;
       $destfile="$dest/$_";
       $destfile=~s,/\./,/,g;
       $destfile=~s,//,/,g;
       $destfile=~s,\.$,,;
       $destfile=~s,/$,,;
       $srcfile="$source/$_";
       $srcfile=~s,/\./,/,g;
       $srcfile=~s,//,/,g;
       if (-f $_) {
          &sendftp("put $srcfile $destfile");
          &readsock;
          next;
          }
       if (-d $_) {
          &sendftp("mkdir $destfile");
          }
       }
    close(FIND); 
}          


sub parseopts {
   &Getopts('abs:d:lu:pg');
   $host=shift(@ARGV);
   if (! defined($host)) {
      die "I need you to tell me the hostname!";
      }
   if (defined($opt_s)) {
      $source=$opt_s;
      }
   if (defined($opt_d)) {
      $dest=$opt_d;
      }
   if ($opt_a==1) {
      $bin=0;
      }
   if ($opt_l) {
      $listing=1;
      $bin=0;
      $lsout=shift(@ARGV);
      }
   if (defined($lsout)) {
      }
   else {
      $lsout='-';
      }
   if (defined($opt_u)) {
      ($user,$passwd)=split(":",$opt_u);
      }
   if (defined($opt_g)) {
      $listing=0;
      $putting=0;
      die "What do you want to do? put OR get?" if (defined($opt_p));
      }
   if (defined($opt_p)) {
      $listing=0;
      $putting=1;
      die "What do you want to do? put OR get?" if (defined($opt_g));
      }
   $debug=1 if (defined($opt_b));
}


# getopts.pl - a better getopt.pl
# Usage:
#      do Getopts('a:bc');  # -a takes arg. -b & -c not. Sets opt_* as a
#                           #  side effect.
sub Getopts {
    local($argumentative) = @_;
    local(@args,$_,$first,$rest,$errs);
    local($[) = 0;

    @args = split( / */, $argumentative );
    while(($_ = $ARGV[0]) =~ /^-(.)(.*)/) {
	($first,$rest) = ($1,$2);
	$pos = index($argumentative,$first);
	if($pos >= $[) {
	    if($args[$pos+1] eq ':') {
		shift(@ARGV);
		if($rest eq '') {
		    $rest = shift(@ARGV);
		}
		eval "\$opt_$first = \$rest;";
	    }
	    else {
		eval "\$opt_$first = 1";
		if($rest eq '') {
		    shift(@ARGV);
		}
		else {
		    $ARGV[0] = "-$rest";
		}
	    }
	}
	else {
	    print STDERR "Unknown option: $first\n";
	    ++$errs;
	    if($rest ne '') {
		$ARGV[0] = "-$rest";
	    }
	    else {
		shift(@ARGV);
	    }
	}
    }
    $errs == 0;
}

#
# readloop -- keep reading stuff from the $ftpout file and
# stuffing it over the socket.
#
sub readloop {
     while (1) {
     if (-f $ftpout) {
        open (FTPOUT,$ftpout);      
        while (1) {
           $_=<FTPOUT>;
           if (/^221\sGoodbye/) {
              last;
              }
           print(S $_);
           }
        exit(0);
        }
     }
}

#
# Workhorse subroutine, gets a whole directory or listing
# of a whole directory. It also forms the list of directories
# below the current one.
#
sub readsock {
          $i=1;
          $n=0;
          while (1) {
             $_=<NS>;
             if ($listing==1) {
                print (LSOUT $_) if (!/^\s?$/ && !/^[0-9]*\s/);
                }
             if (/^d/) {
                chop;
                split;
                $dirs[$i]=pop(@_);
                $i=$i+1;
                }
             if (/^-/) {
                chop;
                split;
                $fname[$n]=pop(@_);
                $n=$n+1;
                }           
             if (/^226\s/) {
                last;
                }
             if ((/^5[0-9][0-9]\s/)&&(!/^5[0-9][0-9]\sbytes/i)) {
                print (stderr "A fatal error occurred during transfer:");
                print (stderr $_);
                exit(1);
                }
             }
}

#
# Do the recursion, using getdir as the workhorse.
#
sub recurse {
    local(@dirlist)=@dirs;
    local($currentparent)=shift(@dirlist);
       while (defined($child=shift(@dirlist))) {
          $cd="$source/$currentparent/$child";
          undef @dirs;
          $cd=~s,//,/,g;
          $cd=~s,//,/,g;
          $cd=~s,/$,,;
          if (($cd EQ $lastdirectory) && ($lastdirectory NE "")) {
             die "OOOPS! I'm looping!!";
             }
          &sendftp("dir $cd");
#         print ("dir $cd\n");
          if ($listing==1) {
             print(LSOUT "\n$cd:\n");
             }
          &readsock;
          if ($listing == 0) {
             $ddir="$dest/$currentparent/$child";
             $ddir=~s,//,/,g;
             $ddir=~s,//,/,g;
             system("mkdir -p $ddir");
             while (defined($file=shift(@fname))){
                &sendftp("get $cd/$file $ddir/$file");
                &readsock;
                }
             }
          $lastdirectory=$cd;
          $dirs[0]="$currentparent/$child";
          &recurse;
       }
}


#
# Delete the temporary files, close the output, and leave
#
sub cleanup {
   unlink($ftpout);
   kill 15,$childpid;
   close(LSOUT);
   exit(0);
   }


sub sendftp {
   $line=@_[0];
   $line="$line\n" if (!($line=~m/\n$/));
   print (STDERR "$line") if ($debug);
   print (FTPIN $line);
   }

#
# Setup socket based communication between the child and parent
# and fork 
#
sub setupcomm {
   do 'sys/socket.h' || die "Can't do sys/socket.h";
   $port=$$;
   $sockaddr='S n a4 x8';
   chop($hostname=`hostname`);
   ($name,$aliases,$proto)=getprotobyname('tcp');
   ($name,$aliases,$type,$len,$thisaddr)=gethostbyname($hostname);
   if ($childpid == fork) {
      $reader=1; #The child reads FTP output, client
      sleep 3;
      $client=pack($sockaddr,&AF_INET,0,$thisaddr);
      $server=pack($sockaddr,&AF_INET,$port,$thisaddr);
  
      socket(S,&PF_INET,&SOCK_STREAM,$proto) || die "socket: $!";
      bind(S,$client) || die "bind: $!";
      connect(S,$server) || die "connect: $!";

      select(S);$|=1;select(stdout);
      }
   else {
      $writer=1; #The parent writes FTP input and effects transfers., server.
      $server=pack($sockaddr,&AF_INET,$port,"\0\0\0\0");
      select(NS);$|=1;select(stdout);
  
      socket(S,&PF_INET,&SOCK_STREAM,$proto) || die "socket: $!";
      bind(S,$server) || die "bind: $!";
      listen(S,5) || die "connect: $!";

      select(S);$|=1;select(stdout);
      loop: ($addr=accept(NS,S)) || goto loop;
      ($af,$port,$inetaddr)=unpack($sockaddr,$addr);
      }   
}
@EOF

chmod 755 rftp/rftp

echo x - rftp/rftp.1
cat >rftp/rftp.1 <<'@EOF'
.TH RFTP 1
.ds )H
.ds ]W March 1990
.SH NAME
rftp \- recursively ftp on a remote machine
.SH SYNOPSIS
.B rftp 
[
.I options
]
.I host
[
.I listing-file
]
.SH DESCRIPTION
.I Rftp\^
Drives ftp to recursively descend file trees, either for listings,
or to get or put a bunch of files keeping the tree structure intact.
Rftp is written in perl and requires that berkeley sockets be wired
into it. This generally requires that a "makelib" be run on
"/usr/include/sys/sockets.h" before running rftp. Rftp works as follows:
1) fork a copy of itself, and setup socket based IPC between parent
and child.
2) start ftp, with output redirected to /tmp/ftpout$$.
3) poke stdin of ftp while child reads /tmp/ftpout$$, and stuffs
the output back to the parent over the socket.
.SS Options
.PP
There are several options:
.TP
.B [\-s source-dir]
specifies the source directory for the transfer. Default is "."
.TP
.B  [\-d dest-dir]
specifies the destination directory for the transfer. Default is "."
.TP
.B  [\-a]
force transfer mode to ascii. Default is binary (image).
.TP
.B  [\-l]
for directory listing only. This is the default action. Listings 
will go to stdout or to "listing-file" if specified.
.TP
.B  [\-u username:passwd]
specify a user name and password for logging in. Default is user
"anonymous" password `hostname`.
.TP
.B  [-g] 
get files from remote host.
.TP
.B  [\-p]
send (put) files to remote host.
.TP
.SH DIAGNOSTICS
The program tries to clue you in to common failures.  Source is available.
Return codes are as follows: 0 for normal exit, 1 for a failure during a
transfer, and 2 for a login failure
.SH EXAMPLES
This command gets a directory listing from machine "jupiter" and writes 
it in file "xyzzy".
.IP
rftp jupiter xyzzy
.TP
This command retrieves the tree headed at "pub/pub" into "/users/foo/xx"
on machine "jupiter"
.IP
rftp -spub/pub -d/users/foo/xx -g jupiter
.SH AUTHOR
.I rftp
was developed by Mike Ferrara of Hewlett-Packard (Signal Analysis Division)
mikef%hpsad@hp-sde.sde.hp.com or mikef@hpsadle.hp.com.
.PP
.SH "STANDARDS CONFORMANCE"
.IR None.

@EOF

chmod 666 rftp/rftp.1

echo x - rftp/rftp.nosockets
cat >rftp/rftp.nosockets <<'@EOF'
#!/usr/local/bin/perl

#Recursively decend via anon FTP and either get a listing
#or retrieve the tree.
# Usage:
#      rftp [options] host [list-file]
# Options --- 
#   [-s<source_dir>] Specify the root for transfer on remote host (default "/")
#   [-d<dest_dir>] Specify the root for transfer on local host (default ".")
#   [-l] Just a listing, thank you.
#   [-a] ASCII mode transfers (default: BIN)
# 


;# getopts.pl - a better getopt.pl
;# Usage:
;#      do Getopts('a:bc');  # -a takes arg. -b & -c not. Sets opt_* as a
;#                           #  side effect.
sub Getopts {
    local($argumentative) = @_;
    local(@args,$_,$first,$rest,$errs);
    local($[) = 0;

    @args = split( / */, $argumentative );
    while(($_ = $ARGV[0]) =~ /^-(.)(.*)/) {
	($first,$rest) = ($1,$2);
	$pos = index($argumentative,$first);
	if($pos >= $[) {
	    if($args[$pos+1] eq ':') {
		shift(@ARGV);
		if($rest eq '') {
		    $rest = shift(@ARGV);
		}
		eval "\$opt_$first = \$rest;";
	    }
	    else {
		eval "\$opt_$first = 1";
		if($rest eq '') {
		    shift(@ARGV);
		}
		else {
		    $ARGV[0] = "-$rest";
		}
	    }
	}
	else {
	    print STDERR "Unknown option: $first\n";
	    ++$errs;
	    if($rest ne '') {
		$ARGV[0] = "-$rest";
	    }
	    else {
		shift(@ARGV);
	    }
	}
    }
    $errs == 0;
}

1;


sub getdir {
   $i=1;
   undef @dirs;
   $cd=~s,//,/,g;
   $mcd=$cd;
   $mcd=~s,^/,,;
   if ($cd EQ $lastdirectory) {
      die "OOOPS! I'm looping!!";
      }
   open(FTPIN,">$ftpin");
   print(FTPIN "open $host \n");
   print(FTPIN "user anonymous xx\n");
   print(FTPIN "cd /\n");
   if ($bin==1) {
      print(FTPIN "bin \n");
      }
   print(FTPIN "dir $cd\n");
   if ($listing == 0) {
      system("mkdir -p $dest/$cd");
      print (FTPIN "mget $mcd/* \n");
      }
   print(FTPIN "quit \n");
   close(FTPIN);
#   system("ftp -i -n <$ftpin 1>$ftpout 2>&1");
   system("ftp -i -n <$ftpin 1>$ftpout ");
   open(FTPOUT,"$ftpout");
   if ($listing==1) {
      print(LSOUT "\n$cd:\n");
   }
   $dirs[0]=$cd;
   while($_=<FTPOUT>) {
      if ($listing==1) {
         print (LSOUT $_) if (!/^\s?$/);
         }
      if (/^\s?d/) {
         chop;
         split;
         $dirs[$i]=pop(@_);
         $i=$i+1;
         }
      }
   close(FTPOUT);
   $lastdirectory=$cd;
}

sub recurse {
    local(@dirlist)=@dirs;
    local($currentparent)=shift(@dirlist);
       while (defined($child=shift(@dirlist))) {
          $cd="$currentparent/$child";
          &getdir;
          &recurse;
          }
   }

sub cleanup {
   unlink($ftpin,$ftpout);
   }

#main
$cd="";
$dirs[1]='/';
$dest='.';
$ftpin="/tmp/ftpin$$";
$ftpout="/tmp/ftpout$$";
$listing=0;
$bin=1;

$SIG{'INT'}='cleanup';
$SIG{'HUP'}='cleanup';
$SIG{'QUIT'}='cleanup';
$SIG{'TERM'}='cleanup';

&Getopts('as:d:l');

$host=shift(@ARGV);
if (! defined($host)) {
   die "I need you to tell me the hostname!";
   }


if (defined($opt_s)) {
   $dirs[1]=$opt_s;
   }
if (defined($opt_d)) {
   $dest=$opt_d;
   }
if ($opt_a==1) {
   $bin=0;
   }
if (-d $dest) {
  }
else {
  mkdir($dest,0755) || die "$dest already exists and is not a directory!";
  }
if ($opt_l) {
   $listing=1;
   $bin=0;
   $lsout=shift(@ARGV);
   }

if (defined($lsout)) {
   }
else {
   $lsout='-';
   }
open(LSOUT,">$lsout");


chdir $dest;
&recurse;
&cleanup;
close(LSOUT);
exit(0);




















@EOF

chmod 755 rftp/rftp.nosockets

chmod 755 rftp

exit 0