goer@SOPHIST.UCHICAGO.EDU (Richard Goerwitz) (07/25/90)
I've been swamped with requests for my list scanning routines.
This was quite unexpected. I sometimes post programs that I
think will be of great general interest, only to get two or
three responses. This last time, I didn't post code because
I couldn't imagine more than two or three people wanting list
scanning routines. The opposite seems to have been the case.
With a somewhat red face, I'm going to do an about-face, and
post the list scanning code.
-Richard L. Goerwitz goer%sophist@uchicago.bitnet
goer@sophist.uchicago.edu rutgers!oddjob!gide!sophist!goer
############################################################################
#
# Name: lscan.icn
#
# Title: Quasi ? scanning routines for lists.
#
# Author: Richard L. Goerwitz
#
# Date: 7/24/90 (version 1.10)
#
############################################################################
#
# Copyright (c) 1990, Richard L. Goerwitz, III
#
# This software is intended for free and unrestricted distribution.
# I place only two conditions on its use: 1) That you clearly mark
# any additions or changes you make to the source code, and 2) that
# you do not delete this message from it. In order to protect
# myself from spurious litigation, it must also be stated here that,
# because this is free software, I, Richard Goerwitz, make no claim
# about the applicability or fitness of this software for any
# purpose, and disclaim any responsibility for any damages that
# might be incurred in conjunction with its use.
#
############################################################################
#
# PURPOSE: String scanning is terriffic, but often I am forced to
# tokenize and work with lists. So as to make operations on these
# lists as close to corresponding string operations as possible, I've
# implemented a series of list analogues to any(), find(), many(),
# match(), move(), pos(), tab(), and upto(). Their names are just
# like corresponding string functions, except with a prepended "l_"
# (e.g. l_any()). Since Icon does not permit me to define new
# control structures via infix operators, list scanning must be
# initialized via a procedure, namely l_scan(l), where l is the list
# to be scanned. Scanning is ended by calling end_l_scan(l).
# Nesting is permitted, but you'd better make sure your indentation
# is correct. Since l_scan() does not represent a real control
# structure, you must call end_l_scan() before "breaking" from an
# l_scan. Otherwise l_POS and l_SUBJ will not be handled properly.
#
# If someone can think of a way to do this more elegantly let me
# know. Personally, I'd like p{a,b,c} to be extended to allow
# p{a;b;c} (where ; = ; or a newline). I could then do something
# like
#
# l_scan {
# a
# b
# c
# }
#
# Now that's a user-defined control structure. (How, though, would
# we work variable args into this scheme?)
#
# I envision l_scan being used (and in fact use it myself) to parse
# trees and compare complicated list structures. One big time-saver
# in parsing natural languages, for instance, is to store partial
# trees. The l_scan routines enable you to take your list structures
# and say, "Give me that portion of list 2 which corresponds
# structurally to list 1." Or you can say, "Tell me if the first
# portion of x list corresponds structurally to y." In effect, the
# structure of the tree can be checked without necessitating a
# reparse.
#
# BUGS: Can you say slow? I thought you could.
#
############################################################################
#
# Here's a trivial example of how one might utilize the lscan routines:
#
# procedure main()
#
# l := ["h","e","l","l","o"," ","t","t","t","h","e","r","e"]
#
# l_scan(l)
# hello_list := l_tab(l_match(["h","e","l","l","o"]))
# every writes(!hello_list)
# write()
#
# l_scan(l_tab(0))
# l_tab(l_many([[" "],["t"]]) - 1)
# every writes(!l_tab(0))
# write()
# end_l_scan()
#
# end_l_scan()
#
# end
#
# The above program simply writes "hello" and "there" on successive
# lines to the standard output.
#
# PITFALLS: In general, note that we are comparing lists here instead
# of strings, so l_find("h", l), for instance, will yield an error
# message (use l_find(["h"], l) instead). The point at which I
# expect this nuance will be most confusing will be in cases where
# one is looking for lists within lists. Suppose we have a list,
#
# l1 := ["junk",[["hello"]," ",["there"]],"!","m","o","r","e","junk"]
#
# and suppose, morover, that we wish to find the position in l1 at
# which the list
#
# [["hello"]," ",["there"]]
#
# occurs. If, say, we assign [["hello"]," ",["there"]] to the
# variable l2, then our l_find() expression will need to look like
#
# l_scan(l1)
# l_tab(l_find([l2]))
# etc.
#
# The reason l2 must be enclosed within a list is that we are
# searching for a single element within l1, not a series of three
# elements. Hence, just as l_find("h") would not work correctly
# above, so l_find(l2) will not work here (although in this latter
# case, no error message is generated; the expression merely fails
# when it finds no sequence of elements in l1 corresponding to the
# sequence of elements occurring in l2).
#
############################################################################
global l_POS
global l_SUBJ
procedure l_scan(l)
if /l_POS then {
l_POS := []
l_SUBJ := []
}
push(l_POS, 1)
push(l_SUBJ, l)
return
end
procedure end_l_scan()
every pop(l_POS|l_SUBJ)
if *l_POS = 0 then {
l_POS := &null
l_SUBJ := &null
}
return
end
procedure l_move(i)
/i & stop("l_move: Null argument.")
if /l_POS | /l_SUBJ
then stop("l_move: Call l_scan first.")
suspend l_SUBJ[1][.l_POS[1]:l_POS[1] <- (0 < (*l_SUBJ[1]+1 >= l_POS[1]+i))]
end
procedure l_tab(i)
/i & stop("l_tab: Null argument.")
if /l_POS | /l_SUBJ
then stop("l_tab: Call l_scan first.")
if i = 0
then suspend l_SUBJ[1][.l_POS[1]:l_POS[1] <- *l_SUBJ[1]+1]
else {
if i < 0
then suspend l_SUBJ[1][.l_POS[1]:l_POS[1] <- (0 < (*l_SUBJ[1]+1) + i)]
else suspend l_SUBJ[1][.l_POS[1]:l_POS[1] <- (*l_SUBJ[1]+1 >= i)]
}
end
procedure l_any(l1,l2,i,j)
# Like any(c,s2,i,j) except that the string & cset arguments are
# replaced by list arguments. l1 must be a list of one-element
# lists, while l2 can be any list (l_SUBJ[1] by default).
local sub_l
/l1 & stop("l_any: Null first argument!")
if /l_POS | /l_SUBJ
then stop("l_any: Call l_scan first.")
if type(l1) == "set" then l1 := sort(l1)
/l2 := l_SUBJ[1]
/i := \l_POS[1] | 1
/j := *l_SUBJ[1]+1
every sub_l := !l1 do {
if not (type(sub_l) == "list", *sub_l = 1) then
stop("l_any: Elements of l1 must be lists of length 1.")
if x := l_match(sub_l,l2,i,i+1)
then return x
}
end
procedure l_match(l1,l2,i,j)
# Analogous to match(s1,s2,i,j), except that s1 and s2 are lists,
# and l_match returns the next position in l2 after that portion
# (if any) which is structurally identical to l1. If a match is not
# found, l_match fails.
if /l1
then stop("l_match: Null first argument!")
if /l_POS | /l_SUBJ
then stop("l_match: Call l_scan first.")
if type(l1) ~== "list"
then stop("l_match: Call me with a list as the first arg.")
/l2 := l_SUBJ[1]
/i := \l_POS[1] | 1
/j := *l_SUBJ[1]+1
if l_comp(l1,l2[i+:*l1])
then return i + *l1
end
procedure l_comp(l1,l2)
# List comparison routine basically taken from Griswold & Griswold
# (1st ed.), p. 174.
local i
/l1 | /l2 & stop("l_comp: Null argument!")
l1 === l2 & return l2
if type(l1) == type(l2) == "list" then {
*l1 ~= *l2 & fail
every i := 1 to *l1
do l_comp(l1[i],l2[i]) | fail
return l2
}
end
procedure l_find(l1,l2,i,j)
local x
/l1 & stop("l_find: Null first argument!")
if /l_POS | /l_SUBJ
then stop("l_find: Call l_scan first.")
/l2 := l_SUBJ[1]
/i := \l_POS[1] | 1
/j := *l_SUBJ[1]+1
every x := i to ((*l2+1) - *l1) do {
if l_match(l1,l2,x,j)
then suspend x
}
end
procedure l_upto(l1,l2,i,j)
local x
/l1 & stop("l_upto: Null first argument!")
if /l_POS | /l_SUBJ
then stop("l_upto: Call l_scan first.")
if type(l1) == "set" then l1 := sort(l1)
/l2 := l_SUBJ[1]
/i := \l_POS[1] | 1
/j := *l_SUBJ[1]+1
every x := i to ((*l2+1) - *l1) do {
if l_any(l1,l2,x,j)
then suspend x
}
end
procedure l_many(l1,l2,i,j)
/l1 & stop("l_many: Null first argument!")
if /l_POS | /l_SUBJ
then stop("l_many: Call l_scan first.")
if type(l1) == "set" then l1 := sort(l1)
/l2 := l_SUBJ[1]
/i := \l_POS[1] | 1
/j := *l_SUBJ[1]+1
l_scan(l2)
while x := l_any(l1,,,j)
do l_tab(x)
end_l_scan(l2)
return \x
end
procedure l_pos(i)
local x
if /l_POS | /l_SUBJ
then stop("l_many: Call l_scan first.")
if i <= 0
then x := 0 < (*l_SUBJ[1]+1 >= (*l_SUBJ[1]+1)+i) | fail
else x := 0 < (*l_SUBJ[1]+1 >= i) | fail
if x = l_POS[1]
then return i
else fail
end