[comp.sources.d] cut.pl - enhanced cut

bob@dhw68k.cts.com (Bob Best) (07/23/88)

Larry Wall took my original cut(1) for perl, and crafted it into a
fast and fancy utility.  This little piece of code displays some of the
power and ease of programming in perl.  By using the -v (verify) option,
rather than executing the cut, a perl script to perform the task will be
generated and displayed on standard output.  The generated script can
then be executed or tailored to suit a more specialized function.
To speed up character list extraction, I added substr() operator support.
See the cut(1) man page for other options.
----------------cut here----------------
#!/usr/bin/perl
# 
# cut.pl
# perl version of cut(1)
#
# written by Bob Best (bob@dhw68k.cts.com)
#            Larry Wall (lwall@jpl-devvax.jpl.nasa.gov)
#
# Fri Jul 22 20:45:10 PDT 1988
#
# to use space character as delimiter, backslash single quotes (-d\' \')
#
# unlike cut(1), this script lists fields in the specified order
#
# maximum line length for character list extraction can be adjusted
# via '$MAXCHAR' - default is 1024
#
# option '-v' (verify) displays the generated perl script on stdout
#
# see cut(1) man page for other options
#
eval "exec /usr/bin/perl -S $0 $*"
	if $running_under_some_shell;

$MAXCHAR=1024;
$delim = "\t";

while ($_ = $ARGV[0], /^-/) {
	shift;
	/^-c(.*)/ && ($clist=$1) && next;
	/^-f(.*)/ && ($flist=$1) && next;
	/^-d(.)/ && ($delim=$1) && next;
	/^-s/ && ($suppress=1) && next;
	/^-v/ && ($verify=1);
}
if (!$clist && !$flist) {die "must specify either -c or -f\n";}
if ($clist && $flist) {die "specify -c or -f but not both\n";}
if ($flist) {$_=$flist;} else {$_=$clist;}

s/(\d*)-(\d*)/do range($1,$2)/eg;

@list=split(/,/);
$[=1;

$init =
'$[=1;
$\ = "\n";
';

$init .=
"\$, = '$delim';
" if $flist;

if ($flist) {
	$sdelim = $delim;
	$sdelim = '\\' . $sdelim if index('[()?*+$|\\', $delim) >= $[;
	$split =  "split(/$sdelim/)";
	$print = '(print), ' unless $suppress;
}

$printlist = "print ";

if ($flist) {
	if ($toend) {
		$toend = "
	\$end = '';
	for (\$i = $toend; \$i <= \$maxfld; ++\$i) {
		\$end .= \$_[\$i];
		\$end .= '$delim';
	}
	chop(\$end);";
		foreach $i (@list) {
			if ($i eq 'MAX') {
				$printlist .= "\$end, ";
			}
			else {
				$printlist .= "\$_[$i], ";
			}
		}
		$split = "(@_ = $split)";
	}
	else {	#not toend
		$max = 0;
		foreach $i (@list) {
			$printlist .= "\$Fld$i, ";
			$max = $i if $max < $i;
		}
		$splitlist = '';
		for ($i = 1; $i <= $max; ++$i) {
			$splitlist .= "\$Fld$i, ";
		}
		$splitlist =~ s/, $//;
		$split = "(($splitlist) = $split)";
	}
}

if ($clist) {
	$j=1;
	foreach $i (@list) {
		if ($i == '-') {	#range
			$ptr=$offsets[$j];
			$cnt=$lengths[$j++];
			$printlist .= "substr(\$_,$ptr,$cnt), ";
		}
		else {	#char
			$printlist .= "substr(\$_,$i,1), ";
		}
	}
}

$printlist =~ s/, $/;\n/;

if ($flist) {		#flist
	$loop = "
$init
line: while (<>) {
	chop;
	\$maxfld = $split;
	${print}next line if \$maxfld == 1;
	$toend
	$printlist
}
";
}
else { 			#clist
	$loop =
"$init
while (<>) {
	chop;
	$printlist
}
";
}

if ($verify) {
	print $loop;
}
else {
	eval $loop;
}

sub range {
	local($min,$max) = @_;
	$min = 1 unless $min;
	if (!$max) {
		if ($flist) {
			$toend = $min;
			return 'MAX';
		}
		else {
			$max=$MAXCHAR;
		}
	}

	if ($clist) {
		$offsets[$rangecnt]=$min;
		$lengths[$rangecnt++]=$max-$min+1;
	}
	else {	#flist
		for ($i=$min+1; $i<=$max; ++$i) {
			$min .= ",$i";
		}
	}
	$flist ? return $min : return '-';
}
-- 
Bob Best
uucp: ...{trwrb,hplabs}!felix!dhw68k!bob	InterNet: bob@dhw68k.cts.com