[comp.lang.icon] list scanning; here it is

goer@SOPHIST.UCHICAGO.EDU (Richard Goerwitz) (07/25/90)

I've been swamped with requests for my list scanning routines.
This was quite unexpected.  I sometimes post programs that I
think will be of great general interest, only to get two or
three responses.  This last time, I didn't post code because
I couldn't imagine more than two or three people wanting list
scanning routines.  The opposite seems to have been the case.

With a somewhat red face, I'm going to do an about-face, and
post the list scanning code.


   -Richard L. Goerwitz              goer%sophist@uchicago.bitnet
   goer@sophist.uchicago.edu         rutgers!oddjob!gide!sophist!goer


############################################################################
#
#	Name:	lscan.icn
#
#	Title:	Quasi ? scanning routines for lists.
#
#	Author:	Richard L. Goerwitz
#
#	Date:	7/24/90 (version 1.10)
#
############################################################################
#
#  Copyright (c) 1990, Richard L. Goerwitz, III
#
#  This software is intended for free and unrestricted distribution.
#  I place only two conditions on its use:  1) That you clearly mark
#  any additions or changes you make to the source code, and 2) that
#  you do not delete this message from it.  In order to protect
#  myself from spurious litigation, it must also be stated here that,
#  because this is free software, I, Richard Goerwitz, make no claim
#  about the applicability or fitness of this software for any
#  purpose, and disclaim any responsibility for any damages that
#  might be incurred in conjunction with its use.
#
############################################################################
#
#  PURPOSE: String scanning is terriffic, but often I am forced to
#  tokenize and work with lists.  So as to make operations on these
#  lists as close to corresponding string operations as possible, I've
#  implemented a series of list analogues to any(), find(), many(),
#  match(), move(), pos(), tab(), and upto().  Their names are just
#  like corresponding string functions, except with a prepended "l_"
#  (e.g. l_any()).  Since Icon does not permit me to define new
#  control structures via infix operators, list scanning must be
#  initialized via a procedure, namely l_scan(l), where l is the list
#  to be scanned.  Scanning is ended by calling end_l_scan(l).
#  Nesting is permitted, but you'd better make sure your indentation
#  is correct.  Since l_scan() does not represent a real control
#  structure, you must call end_l_scan() before "breaking" from an
#  l_scan.  Otherwise l_POS and l_SUBJ will not be handled properly.
#
#  If someone can think of a way to do this more elegantly let me
#  know.  Personally, I'd like p{a,b,c} to be extended to allow
#  p{a;b;c} (where ; = ; or a newline).  I could then do something
#  like 
#
#      l_scan {
#          a
#          b
#          c
#      }
#
#  Now that's a user-defined control structure.  (How, though, would
#  we work variable args into this scheme?)
#
#  I envision l_scan being used (and in fact use it myself) to parse
#  trees and compare complicated list structures.  One big time-saver
#  in parsing natural languages, for instance, is to store partial
#  trees.  The l_scan routines enable you to take your list structures
#  and say, "Give me that portion of list 2 which corresponds
#  structurally to list 1."  Or you can say, "Tell me if the first
#  portion of x list corresponds structurally to y."  In effect, the
#  structure of the tree can be checked without necessitating a
#  reparse.
#
#  BUGS:  Can you say slow?  I thought you could.
#
############################################################################
#
#  Here's a trivial example of how one might utilize the lscan routines:
#
#  procedure main()
#
#      l := ["h","e","l","l","o"," ","t","t","t","h","e","r","e"]
#
#      l_scan(l)
#          hello_list := l_tab(l_match(["h","e","l","l","o"]))
#          every writes(!hello_list)
#          write()
#
#	   l_scan(l_tab(0))
#	       l_tab(l_many([[" "],["t"]]) - 1)
#              every writes(!l_tab(0))
#	       write()
#          end_l_scan()
#  
#      end_l_scan()
#  
#  end
#
#  The above program simply writes "hello" and "there" on successive
#  lines to the standard output.
#
#  PITFALLS: In general, note that we are comparing lists here instead
#  of strings, so l_find("h", l), for instance, will yield an error
#  message (use l_find(["h"], l) instead).  The point at which I
#  expect this nuance will be most confusing will be in cases where
#  one is looking for lists within lists.  Suppose we have a list,
#
#      l1 := ["junk",[["hello"]," ",["there"]],"!","m","o","r","e","junk"]
#
#  and suppose, morover, that we wish to find the position in l1 at
#  which the list
#
#      [["hello"]," ",["there"]]
#
#  occurs.  If, say, we assign [["hello"]," ",["there"]] to the
#  variable l2, then our l_find() expression will need to look like
#
#      l_scan(l1)
#          l_tab(l_find([l2]))
#      etc.
#
#  The reason l2 must be enclosed within a list is that we are
#  searching for a single element within l1, not a series of three
#  elements.  Hence, just as l_find("h") would not work correctly
#  above, so l_find(l2) will not work here (although in this latter
#  case, no error message is generated; the expression merely fails
#  when it finds no sequence of elements in l1 corresponding to the
#  sequence of elements occurring in l2).
#
############################################################################

global l_POS
global l_SUBJ


procedure l_scan(l)

    if /l_POS then {
	l_POS  := []
	l_SUBJ := []
    }

    push(l_POS, 1)
    push(l_SUBJ, l)
    return

end



procedure end_l_scan()

    every pop(l_POS|l_SUBJ)
    if *l_POS = 0 then {
	l_POS  := &null
	l_SUBJ := &null
    }
    return

end



procedure l_move(i)

    /i & stop("l_move:  Null argument.")
    if /l_POS | /l_SUBJ
    then stop("l_move:  Call l_scan first.")

    suspend l_SUBJ[1][.l_POS[1]:l_POS[1] <- (0 < (*l_SUBJ[1]+1 >= l_POS[1]+i))]

end



procedure l_tab(i)

    /i & stop("l_tab:  Null argument.")
    if /l_POS | /l_SUBJ
    then stop("l_tab:  Call l_scan first.")

    if i = 0
    then suspend l_SUBJ[1][.l_POS[1]:l_POS[1] <- *l_SUBJ[1]+1]
    else {
	if i < 0
	then suspend l_SUBJ[1][.l_POS[1]:l_POS[1] <- (0 < (*l_SUBJ[1]+1) + i)]
	else suspend l_SUBJ[1][.l_POS[1]:l_POS[1] <- (*l_SUBJ[1]+1 >= i)]
    }

end



procedure l_any(l1,l2,i,j)

    # Like any(c,s2,i,j) except that the string & cset arguments are
    # replaced by list arguments.  l1 must be a list of one-element
    # lists, while l2 can be any list (l_SUBJ[1] by default).

    local sub_l

    /l1 & stop("l_any:  Null first argument!")
    if /l_POS | /l_SUBJ
    then stop("l_any:  Call l_scan first.")
    if type(l1) == "set" then l1 := sort(l1)

    /l2 := l_SUBJ[1]
    /i  := \l_POS[1] | 1
    /j  := *l_SUBJ[1]+1

    every sub_l := !l1 do {
	if not (type(sub_l) == "list", *sub_l = 1) then
	    stop("l_any:  Elements of l1 must be lists of length 1.")
	if x := l_match(sub_l,l2,i,i+1)
	then return x
    }
    
end



procedure l_match(l1,l2,i,j)

    # Analogous to match(s1,s2,i,j), except that s1 and s2 are lists,
    # and l_match returns the next position in l2 after that portion
    # (if any) which is structurally identical to l1.  If a match is not
    # found, l_match fails.

    if /l1
    then stop("l_match:  Null first argument!")
    if /l_POS | /l_SUBJ
    then stop("l_match:  Call l_scan first.")
    if type(l1) ~== "list"
    then stop("l_match:  Call me with a list as the first arg.")

    /l2 := l_SUBJ[1]
    /i  := \l_POS[1] | 1
    /j  := *l_SUBJ[1]+1

    if l_comp(l1,l2[i+:*l1])
    then return i + *l1

end

    

procedure l_comp(l1,l2)

    # List comparison routine basically taken from Griswold & Griswold
    # (1st ed.), p. 174.

    local i

    /l1 | /l2 & stop("l_comp:  Null argument!")
    l1 === l2 & return l2

    if type(l1) == type(l2) == "list" then {
	*l1 ~= *l2 & fail
	every i := 1 to *l1
	do l_comp(l1[i],l2[i]) | fail
	return l2
    }

end



procedure l_find(l1,l2,i,j)

    local x

    /l1 & stop("l_find:  Null first argument!")
    if /l_POS | /l_SUBJ
    then stop("l_find:  Call l_scan first.")

    /l2 := l_SUBJ[1]
    /i  := \l_POS[1] | 1
    /j  := *l_SUBJ[1]+1

    every x := i to ((*l2+1) - *l1) do {
	if l_match(l1,l2,x,j)
	then suspend x
    }
    
end



procedure l_upto(l1,l2,i,j)

    local x

    /l1 & stop("l_upto:  Null first argument!")
    if /l_POS | /l_SUBJ
    then stop("l_upto:  Call l_scan first.")
    if type(l1) == "set" then l1 := sort(l1)

    /l2 := l_SUBJ[1]
    /i  := \l_POS[1] | 1
    /j  := *l_SUBJ[1]+1

    every x := i to ((*l2+1) - *l1) do {
	if l_any(l1,l2,x,j)
	then suspend x
    }
    
end



procedure l_many(l1,l2,i,j)

    /l1 & stop("l_many:  Null first argument!")
    if /l_POS | /l_SUBJ
    then stop("l_many:  Call l_scan first.")
    if type(l1) == "set" then l1 := sort(l1)

    /l2 := l_SUBJ[1]
    /i  := \l_POS[1] | 1
    /j  := *l_SUBJ[1]+1

    l_scan(l2)
    while x := l_any(l1,,,j)
    do l_tab(x)
    end_l_scan(l2)

    return \x

end



procedure l_pos(i)

    local x

    if /l_POS | /l_SUBJ
    then stop("l_many:  Call l_scan first.")

    if i <= 0
    then x := 0 < (*l_SUBJ[1]+1 >= (*l_SUBJ[1]+1)+i) | fail
    else x := 0 < (*l_SUBJ[1]+1 >= i) | fail

    if x = l_POS[1]
    then return i
    else fail

end