goer@SOPHIST.UCHICAGO.EDU (Richard Goerwitz) (07/25/90)
I've been swamped with requests for my list scanning routines. This was quite unexpected. I sometimes post programs that I think will be of great general interest, only to get two or three responses. This last time, I didn't post code because I couldn't imagine more than two or three people wanting list scanning routines. The opposite seems to have been the case. With a somewhat red face, I'm going to do an about-face, and post the list scanning code. -Richard L. Goerwitz goer%sophist@uchicago.bitnet goer@sophist.uchicago.edu rutgers!oddjob!gide!sophist!goer ############################################################################ # # Name: lscan.icn # # Title: Quasi ? scanning routines for lists. # # Author: Richard L. Goerwitz # # Date: 7/24/90 (version 1.10) # ############################################################################ # # Copyright (c) 1990, Richard L. Goerwitz, III # # This software is intended for free and unrestricted distribution. # I place only two conditions on its use: 1) That you clearly mark # any additions or changes you make to the source code, and 2) that # you do not delete this message from it. In order to protect # myself from spurious litigation, it must also be stated here that, # because this is free software, I, Richard Goerwitz, make no claim # about the applicability or fitness of this software for any # purpose, and disclaim any responsibility for any damages that # might be incurred in conjunction with its use. # ############################################################################ # # PURPOSE: String scanning is terriffic, but often I am forced to # tokenize and work with lists. So as to make operations on these # lists as close to corresponding string operations as possible, I've # implemented a series of list analogues to any(), find(), many(), # match(), move(), pos(), tab(), and upto(). Their names are just # like corresponding string functions, except with a prepended "l_" # (e.g. l_any()). Since Icon does not permit me to define new # control structures via infix operators, list scanning must be # initialized via a procedure, namely l_scan(l), where l is the list # to be scanned. Scanning is ended by calling end_l_scan(l). # Nesting is permitted, but you'd better make sure your indentation # is correct. Since l_scan() does not represent a real control # structure, you must call end_l_scan() before "breaking" from an # l_scan. Otherwise l_POS and l_SUBJ will not be handled properly. # # If someone can think of a way to do this more elegantly let me # know. Personally, I'd like p{a,b,c} to be extended to allow # p{a;b;c} (where ; = ; or a newline). I could then do something # like # # l_scan { # a # b # c # } # # Now that's a user-defined control structure. (How, though, would # we work variable args into this scheme?) # # I envision l_scan being used (and in fact use it myself) to parse # trees and compare complicated list structures. One big time-saver # in parsing natural languages, for instance, is to store partial # trees. The l_scan routines enable you to take your list structures # and say, "Give me that portion of list 2 which corresponds # structurally to list 1." Or you can say, "Tell me if the first # portion of x list corresponds structurally to y." In effect, the # structure of the tree can be checked without necessitating a # reparse. # # BUGS: Can you say slow? I thought you could. # ############################################################################ # # Here's a trivial example of how one might utilize the lscan routines: # # procedure main() # # l := ["h","e","l","l","o"," ","t","t","t","h","e","r","e"] # # l_scan(l) # hello_list := l_tab(l_match(["h","e","l","l","o"])) # every writes(!hello_list) # write() # # l_scan(l_tab(0)) # l_tab(l_many([[" "],["t"]]) - 1) # every writes(!l_tab(0)) # write() # end_l_scan() # # end_l_scan() # # end # # The above program simply writes "hello" and "there" on successive # lines to the standard output. # # PITFALLS: In general, note that we are comparing lists here instead # of strings, so l_find("h", l), for instance, will yield an error # message (use l_find(["h"], l) instead). The point at which I # expect this nuance will be most confusing will be in cases where # one is looking for lists within lists. Suppose we have a list, # # l1 := ["junk",[["hello"]," ",["there"]],"!","m","o","r","e","junk"] # # and suppose, morover, that we wish to find the position in l1 at # which the list # # [["hello"]," ",["there"]] # # occurs. If, say, we assign [["hello"]," ",["there"]] to the # variable l2, then our l_find() expression will need to look like # # l_scan(l1) # l_tab(l_find([l2])) # etc. # # The reason l2 must be enclosed within a list is that we are # searching for a single element within l1, not a series of three # elements. Hence, just as l_find("h") would not work correctly # above, so l_find(l2) will not work here (although in this latter # case, no error message is generated; the expression merely fails # when it finds no sequence of elements in l1 corresponding to the # sequence of elements occurring in l2). # ############################################################################ global l_POS global l_SUBJ procedure l_scan(l) if /l_POS then { l_POS := [] l_SUBJ := [] } push(l_POS, 1) push(l_SUBJ, l) return end procedure end_l_scan() every pop(l_POS|l_SUBJ) if *l_POS = 0 then { l_POS := &null l_SUBJ := &null } return end procedure l_move(i) /i & stop("l_move: Null argument.") if /l_POS | /l_SUBJ then stop("l_move: Call l_scan first.") suspend l_SUBJ[1][.l_POS[1]:l_POS[1] <- (0 < (*l_SUBJ[1]+1 >= l_POS[1]+i))] end procedure l_tab(i) /i & stop("l_tab: Null argument.") if /l_POS | /l_SUBJ then stop("l_tab: Call l_scan first.") if i = 0 then suspend l_SUBJ[1][.l_POS[1]:l_POS[1] <- *l_SUBJ[1]+1] else { if i < 0 then suspend l_SUBJ[1][.l_POS[1]:l_POS[1] <- (0 < (*l_SUBJ[1]+1) + i)] else suspend l_SUBJ[1][.l_POS[1]:l_POS[1] <- (*l_SUBJ[1]+1 >= i)] } end procedure l_any(l1,l2,i,j) # Like any(c,s2,i,j) except that the string & cset arguments are # replaced by list arguments. l1 must be a list of one-element # lists, while l2 can be any list (l_SUBJ[1] by default). local sub_l /l1 & stop("l_any: Null first argument!") if /l_POS | /l_SUBJ then stop("l_any: Call l_scan first.") if type(l1) == "set" then l1 := sort(l1) /l2 := l_SUBJ[1] /i := \l_POS[1] | 1 /j := *l_SUBJ[1]+1 every sub_l := !l1 do { if not (type(sub_l) == "list", *sub_l = 1) then stop("l_any: Elements of l1 must be lists of length 1.") if x := l_match(sub_l,l2,i,i+1) then return x } end procedure l_match(l1,l2,i,j) # Analogous to match(s1,s2,i,j), except that s1 and s2 are lists, # and l_match returns the next position in l2 after that portion # (if any) which is structurally identical to l1. If a match is not # found, l_match fails. if /l1 then stop("l_match: Null first argument!") if /l_POS | /l_SUBJ then stop("l_match: Call l_scan first.") if type(l1) ~== "list" then stop("l_match: Call me with a list as the first arg.") /l2 := l_SUBJ[1] /i := \l_POS[1] | 1 /j := *l_SUBJ[1]+1 if l_comp(l1,l2[i+:*l1]) then return i + *l1 end procedure l_comp(l1,l2) # List comparison routine basically taken from Griswold & Griswold # (1st ed.), p. 174. local i /l1 | /l2 & stop("l_comp: Null argument!") l1 === l2 & return l2 if type(l1) == type(l2) == "list" then { *l1 ~= *l2 & fail every i := 1 to *l1 do l_comp(l1[i],l2[i]) | fail return l2 } end procedure l_find(l1,l2,i,j) local x /l1 & stop("l_find: Null first argument!") if /l_POS | /l_SUBJ then stop("l_find: Call l_scan first.") /l2 := l_SUBJ[1] /i := \l_POS[1] | 1 /j := *l_SUBJ[1]+1 every x := i to ((*l2+1) - *l1) do { if l_match(l1,l2,x,j) then suspend x } end procedure l_upto(l1,l2,i,j) local x /l1 & stop("l_upto: Null first argument!") if /l_POS | /l_SUBJ then stop("l_upto: Call l_scan first.") if type(l1) == "set" then l1 := sort(l1) /l2 := l_SUBJ[1] /i := \l_POS[1] | 1 /j := *l_SUBJ[1]+1 every x := i to ((*l2+1) - *l1) do { if l_any(l1,l2,x,j) then suspend x } end procedure l_many(l1,l2,i,j) /l1 & stop("l_many: Null first argument!") if /l_POS | /l_SUBJ then stop("l_many: Call l_scan first.") if type(l1) == "set" then l1 := sort(l1) /l2 := l_SUBJ[1] /i := \l_POS[1] | 1 /j := *l_SUBJ[1]+1 l_scan(l2) while x := l_any(l1,,,j) do l_tab(x) end_l_scan(l2) return \x end procedure l_pos(i) local x if /l_POS | /l_SUBJ then stop("l_many: Call l_scan first.") if i <= 0 then x := 0 < (*l_SUBJ[1]+1 >= (*l_SUBJ[1]+1)+i) | fail else x := 0 < (*l_SUBJ[1]+1 >= i) | fail if x = l_POS[1] then return i else fail end