[comp.lang.icon] list scanning routines

goer@ellis.uchicago.edu (Richard L. Goerwitz) (05/28/91)
############################################################################
#
#	Name:	lscan.icn
#
#	Title:	Quasi ? scanning routines for lists.
#
#	Author:	Richard L. Goerwitz
#
#	Version: 1.20
#
############################################################################
#
#  PURPOSE: String scanning is terrific, but often I am forced to
#  tokenize and work with lists.  So as to make operations on these
#  lists as close to corresponding string operations as possible, I've
#  implemented a series of list analogues to any(), bal(), find(),
#  many(), match(), move(), pos(), tab(), and upto().  Their names are
#  just like corresponding string functions, except with a prepended
#  "l_" (e.g. l_any()).  Functionally, the list routines parallel the
#  string ones closely, except that in place of strings, l_find and
#  l_match accept lists as their first argument.  L_any(), l_many(),
#  and l_upto() all take either sets of lists or lists of lists (e.g.
#  l_tab(l_upto([["a"],["b"],["j","u","n","k"]])).  Note that l_bal(),
#  unlike the builtin bal(), has no defaults for the first four
#  arguments.  This just seemed appropriate, given that no precise
#  list analogue to &cset, etc. occurs.
#
#  The default subject for list scans (analogous to &subject) is
#  l_SUBJ.  The equivalent of &pos is l_POS.  Naturally, these
#  variables are both global.  They are used pretty much like &subject
#  and &pos, except that they are null until a list scanning
#  expression has been encountered containing a call to l_Bscan() (on
#  which, see below).
#
#  Note that environments cannot be maintained quite as elegantly as
#  they can be for the builtin string-scanning functions.  One must
#  use instead a set of nested procedure calls, as explained in the
#  _Icon Analyst_ 1:6 (June, 1991), p. 1-2.  In particular, one cannot
#  suspend, return, or otherwise break out of the nested procedure
#  calls.  They can only be exited via failure.  The names of these
#  procedures, at least in this implementation, are l_Escan and
#  l_Bscan.  Here is one example of how they might be invoked:
#
#      suspend l_Escan(l_Bscan(some_list_or_other), {
#          l_tab(10 to *l_SUBJ) & {
#              if l_any(l1) | l_match(l2) then
#                  old_l_POS + (l_POS-1)
#          }
#      })
#
#  Note that you cannot do this:
#
#      l_Escan(l_Bscan(some_list_or_other), {
#          l_tab(10 to *l_SUBJ) & {
#              if l_any(l1) | l_match(l2) then
#                  suspend old_l_POS + (l_POS-1)
#          }
#      })
#
#  Remember, it's no fair to use suspend within the list scanning
#  expression.  l_Escan must do all the suspending.  It is perfectly OK,
#  though, to nest well-behaved list scanning expressions.  And they can
#  be reliably used to generate a series of results as well.
#
############################################################################
#
#  Here's another simple example of how one might invoke the l_scan
#  routines:
#
#  procedure main()
#
#      l := ["h","e","l","l","o"," ","t","t","t","h","e","r","e"]
#
#      l_Escan(l_Bscan(l), {
#          hello_list := l_tab(l_match(["h","e","l","l","o"]))
#          every writes(!hello_list)
#          write()
#
#          # Note the nested list-scanning expressions.
#	   l_Escan(l_Bscan(l_tab(0)), {
#	       l_tab(l_many([[" "],["t"]]) - 1)
#              every writes(!l_tab(0))
#	       write()
#          })
#      })
#  
#  end
#
#  The above program simply writes "hello" and "there" on successive
#  lines to the standard output.
#
############################################################################
#
#  PITFALLS: In general, note that we are comparing lists here instead
#  of strings, so l_find("h", l), for instance, will yield an error
#  message (use l_find(["h"], l) instead).  The point at which I
#  expect this nuance will be most confusing will be in cases where
#  one is looking for lists within lists.  Suppose we have a list,
#
#      l1 := ["junk",[["hello"]," ",["there"]],"!","m","o","r","e","junk"]
#
#  and suppose, moreover, that we wish to find the position in l1 at
#  which the list
#
#      [["hello"]," ",["there"]]
#
#  occurs.  If, say, we assign [["hello"]," ",["there"]] to the
#  variable l2, then our l_find() expression will need to look like
#
#      l_find([l2],l1)
#
############################################################################
#
#  Extending scanning to lists is really very difficult.  What I think
#  (at least tonight) is that scanning should never have been
#  restricted to strings.  It should have been designed to operate on
#  all homogenous one-dimensional arrays (vectors, for you LISPers).
#  You should be able, in other words, to scan vectors of ints, longs,
#  characters - any data type that seems useful.  The only question in
#  my mind is how to represent vectors as literals.  Extending scanning
#  to lists goes beyond the bounds of scanning per-se.  This library is
#  therefore something of a stab in the dark.
#
############################################################################


global l_POS
global l_SUBJ
record l_ScanEnvir(subject,pos)

procedure l_Bscan(e1)

    #
    # Prototype list scan initializer.  Based on code published in
    # the _Icon Analyst_ 1:6 (June, 1991), p. 1-2.
    #
    local l_OuterEnvir
    initial {
	l_SUBJ := []
	l_POS := 1
    }

    #
    # Save outer scanning environment.
    #
    l_OuterEnvir := l_ScanEnvir(l_SUBJ, l_POS)

    #
    # Set current scanning environment to subject e1 (arg 1).  Pos
    # defaults to 1.  Suspend the saved environment.  Later on, the
    # l_Escan procedure will need this in case the scanning expres-
    # sion as a whole sends a result back to the outer environment,
    # and the outer environment changes l_SUBJ and l_POS.
    #
    l_SUBJ := e1
    l_POS  := 1
    suspend l_OuterEnvir

    #
    # Restore the saved environment (plus any changes that might have
    # been made to it as noted in the previous run of comments).
    #
    l_SUBJ := l_OuterEnvir.subject
    l_POS := l_OuterEnvir.pos

    #
    # Signal failure of the scanning expression (we're done producing
    # results if we get to here).
    #
    fail

end



procedure l_Escan(l_OuterEnvir, e2)

    local l_InnerEnvir

    #
    # Set the inner scanning environment to the values assigned to it
    # by l_Bscan.  Remember that l_SUBJ and l_POS are global.  They
    # don't need to be passed as parameters from l_Bscan.  What
    # l_Bscan() needs to pass on is the l_OuterEnvir record,
    # containing the values of l_SUBJ and l_POS before l_Bscan() was
    # called.  l_Escan receives this "outer environment" as its first
    # argument, l_OuterEnvir.
    #
    l_InnerEnvir := l_ScanEnvir(l_SUBJ, l_POS)

    #
    # Whatever expression produced e2 has passed us a result.  Now we
    # restore l_SUBJ and l_POS, and send that result back to the outer
    # environment.
    #
    l_SUBJ := l_OuterEnvir.subject
    l_POS := l_OuterEnvir.pos
    suspend e2

    #
    # Okay, we've resumed to (attempt to) produce another result.  Re-
    # store the inner scanning environment (the one we're using in the
    # current scanning expression).  Remember?  It was saved in l_Inner-
    # Envir just above.
    #
    l_SUBJ := l_InnerEnvir.subject
    l_POS := l_InnerEnvir.pos

    #
    # Fail so that the second argument (the one that produced e2) gets
    # resumed.  If it fails to produce another result, then the first
    # argument is resumed, which is l_Bscan().  If l_Bscan is resumed, it
    # will restore the outer environment and fail, causing the entire
    # scanning expression to fail.
    #
    fail

end

    

procedure l_move(i)

    /i & stop("l_move:  Null argument.")
    if /l_POS | /l_SUBJ then
	stop("l_move:  Call l_Bscan() first.")

    #
    # Sets l_POS to l_POS+i; suspends that portion of l_SUBJ extending
    # from the old l_POS to the new one.  Resets l_POS if resumed,
    # just the way matching procedures are supposed to.  Fails if l_POS
    # plus i is larger than l_SUBJ+1 or if l_POS+i is less than 1.
    #
    suspend l_SUBJ[.l_POS:l_POS <- (0 < (*l_SUBJ+1 >= l_POS+i))]

end



procedure l_tab(i)

    /i & stop("l_tab:  Null argument.")
    if /l_POS | /l_SUBJ then
	stop("l_tab:  Call l_Bscan() first.")

    if i <= 0
    then suspend l_SUBJ[.l_POS:l_POS <- 0 < (*l_SUBJ+1 >= (*l_SUBJ+1)+i)]
    else suspend l_SUBJ[.l_POS:l_POS <- 0 < (*l_SUBJ+1 >= i)]

end



procedure l_any(l1,l2,i,j)

    #
    # Like any(c,s2,i,j) except that the string & cset arguments are
    # replaced by list arguments.  l1 must be a list of one-element
    # lists, while l2 can be any list (l_SUBJ by default).
    #

    local sub_l

    /l1 & stop("l_any:  Null first argument!")
    if type(l1) == "set" then l1 := sort(l1)

    /l2 := l_SUBJ
    if \i then {
	if i < 1 then
	    i := *l2 + (i+1)
    }
    else i := \l_POS | 1
    if \j then {
	if j < 1 then
	    j := *l2 + (j+1)
    }
    else j := *l_SUBJ+1

    (i+1) > j & i :=: j
    every sub_l := !l1 do {
	if not (type(sub_l) == "list", *sub_l = 1) then
	    stop("l_any:  Elements of l1 must be lists of length 1.")
	# Let l_match check to see if i+1 is out of range.
	if x := l_match(sub_l,l2,i,i+1) then
	    return x
    }
    
end



procedure l_match(l1,l2,i,j)

    #
    # Analogous to match(s1,s2,i,j), except that s1 and s2 are lists,
    # and l_match returns the next position in l2 after that portion
    # (if any) which is structurally identical to l1.  If a match is not
    # found, l_match fails.
    #

    if /l1
    then stop("l_match:  Null first argument!")
    if type(l1) ~== "list"
    then stop("l_match:  Call me with a list as the first arg.")

    /l2 := l_SUBJ
    if \i then {
	if i < 1 then
	    i := *l2 + (i+1)
    }
    else i := \l_POS | 1
    
    if \j then {
	if j < 1 then
	    j := *l2 + (j+1)
    }
    else j := *l_SUBJ+1

    i + *l1 > j & i :=: j
    i + *l1 > j & fail
    if l_comp(l1,l2[i+:*l1]) then
	return i + *l1

end

    

procedure l_comp(l1,l2)

    #
    # List comparison routine basically taken from Griswold & Griswold
    # (1st ed.), p. 174.
    #

    local i

    /l1 | /l2 & stop("l_comp:  Null argument!")
    l1 === l2 & (return l2)

    if type(l1) == type(l2) == "list" then {
	*l1 ~= *l2 & fail
	every i := 1 to *l1
	do l_comp(l1[i],l2[i]) | fail
	return l2
    }

end



procedure l_find(l1,l2,i,j)

    #
    # Like the builtin find(s1,s2,i,j), but for lists.
    #

    local x

    /l1 & stop("l_find:  Null first argument!")

    /l2 := l_SUBJ
    if \i then {
	if i < 1 then
	    i := *l2 + (i+1)
    }
    else i := \l_POS | 1
    if \j then {
	if j < 1 then
	    j := *l2 + (j+1)
    }
    else j := *l_SUBJ+1

    #
    # See l_upto() below for a discussion of why things have to be done
    # in this manner.
    #
    old_l_POS := l_POS

    suspend l_Escan(l_Bscan(l2[i:j]), {
	l_tab(1 to *l_SUBJ) & {
	    if l_match(l1) then
		old_l_POS + (l_POS-1)
	}
    })
    
end



procedure l_upto(l1,l2,i,j)

    #
    # See l_any() above.  This procedure just moves through l2, calling
    # l_any() for each member of l2[i:j].
    #

    local old_l_POS

    /l1 & stop("l_upto:  Null first argument!")
    if type(l1) == "set" then l1 := sort(l1)

    /l2 := l_SUBJ
    if \i then {
	if i < 1 then
	    i := *l2 + (i+1)
    }
    else i := \l_POS | 1
    if \j then {
	if j < 1 then
	    j := *l2 + (j+1)
    }
    else j := *l_SUBJ+1

    #
    # Save the old pos, then try arb()ing through the list to see if we
    # can do an l_any(l1) at any position.
    #
    old_l_POS := l_POS

    suspend l_Escan(l_Bscan(l2[i:j]), {
	l_tab(1 to *l_SUBJ) & {
	    if l_any(l1) then
		old_l_POS + (l_POS-1)
	}
    })

    #
    # Note that it WILL NOT WORK if you say:
    #
    # l_Escan(l_Bscan(l2[i:j]), {
    #     l_tab(1 to *l_SUBJ) & {
    #         if l_any(l1) then
    #             suspend old_l_POS + (l_POS-1)
    #     }
    # })
    #
    # If we are to suspend a result, l_Escan must suspend that result.
    # Otherwise scanning environments are not saved and/or restored
    # properly.
    #
    
end



procedure l_many(l1,l2,i,j)

    local x, old_l_POS

    /l1 & stop("l_many:  Null first argument!")
    if type(l1) == "set" then l1 := sort(l1)

    /l2 := l_SUBJ
    if \i then {
	if i < 1 then
	    i := *l2 + (i+1)
    }
    else i := \l_POS | 1
    if \j then {
	if j < 1 then
	    j := *l2 + (j+1)
    }
    else j := *l_SUBJ+1

    #
    # L_many(), like many(), is not a generator.  We can therefore
    # save one final result in x, and then later return (rather than
    # suspend) that result.
    #
    old_l_POS := l_POS
    l_Escan(l_Bscan(l2[i:j]), {
	while l_tab(l_any(l1))
	x := old_l_POS + (l_POS-1)
    })

    #
    # Fails if there was no positional change (i.e. l_any() did not
    # succeed even once).
    #
    return old_l_POS ~= x

end



procedure l_pos(i)

    local x

    if /l_POS | /l_SUBJ
    then stop("l_move:  Call l_Bscan() first.")

    if i <= 0
    then x := 0 < (*l_SUBJ+1 >= (*l_SUBJ+1)+i) | fail
    else x := 0 < (*l_SUBJ+1 >= i) | fail

    if x = l_POS
    then return x
    else fail

end



procedure l_bal(l1,l2,l3,l,i,j)

    local l2_count, l3_count, x, position

    /l1 & stop("l_bal:  Null first argument!")
    if type(l1) == "set" then l1 := sort(l1)  # convert to a list
    if type(l2) == "set" then l1 := sort(l2)
    if type(l3) == "set" then l1 := sort(l3)

    /l2 := l_SUBJ
    if \i then {
	if i < 1 then
	    i := *l2 + (i+1)
    }
    else i := \l_POS | 1
    if \j then {
	if j < 1 then
	    j := *l2 + (j+1)
    }
    else j := *l_SUBJ+1

    l2_count := l3_count := 0

    every x := i to j-1 do {

	if l_any(l2, l, x, x+1) then {
	    l2_count +:= 1
	}
	if l_any(l3, l, x, x+1) then {
	    l3_count +:= 1
	}
	if l2_count = l3_count then {
	    if l_any(l1,l,x,x+1)
	    then suspend x
	}
    }

end
-- 

   -Richard L. Goerwitz              goer%sophist@uchicago.bitnet
   goer@sophist.uchicago.edu         rutgers!oddjob!gide!sophist!goer