[comp.emacs] 'split' for emacs lisp

bjaspan@ATHENA.MIT.EDU ("Barr3y Jaspan") (05/09/91)

For emacs-loving perl users, or perl-loving emacs users, here is an
implementation of the perl function 'split' for emacs lisp.  It seems
to work for me, your mileage may vary.  Comments welcome.

(Note: matched-strings-list and matched-strings are used by split.)

Barr3y Jaspan, bjaspan@mit.edu
Watchmaker Computing

--- snip snip ---

;;; split.el --- emacs implementation of Perl's 'split' function.
;;; (C) Copyright 1991 by Barr3y Jaspan (bjaspan@mit.edu)

(defun split (string &optional regexp sep)
  "Splits STRING into a list of strings by choosing breakpoints 
according to REGEXP.  If REGEXP does not occur the entire STRING
is contained in the first element of the returned list.  REGEXP is
optional and defaults to [ \\t]+ (whitespace).  Optional third arg
SEPARATORS causes the text that matches grouped expressions in REGEXP
at each breakpoint to be included in the returned list.  If REGEXP
matches the null string \"\", split will behave as if it matched a
single character at that point; (split \"foo\" \" *\") --> (\"f\" \"o\" \"o\")"
  (if regexp nil (setq regexp "[ \\t]+"))
  (let ((buf (generate-new-buffer "  *tmp")))
    (unwind-protect 
	(save-excursion
	  (let* ((split-list (list 'split-list))
		 (split-tail split-list))
	    (set-buffer buf)
	    (insert string)
	    (goto-char (point-min))
	    (let ((p (point)) mtext)
	      (while (and (re-search-forward regexp nil t)
			  (< (1+ p) (point-max)))
		(if (/= (match-beginning 0) (match-end 0))
		    (setcdr split-tail
			    (list (buffer-substring p (match-beginning 0))))
		  (setcdr split-tail (list (buffer-substring p (1+ p))))
		  (forward-char 1))
		(setq split-tail (cdr split-tail))
		(setq mtext (matched-strings))
		(if (and sep mtext)
		    (progn
		      (setcdr split-tail (list mtext))
		      (setq split-tail (cdr split-tail))))
		(setq p (point))))
	    (setcdr split-tail (list (buffer-substring (point) (point-max))))
	    (setq split-tail (cdr split-tail))
	    (cdr split-list)))
      (kill-buffer buf))))

(defun matched-strings-list ()
  "Returns a list that contains each grouped subexpression matched in
the previous regular-expression search."
  (interactive)
  ;; Skip the first pair of numbers since it is the ENTIRE matched string.
  (let* ((data (cdr (cdr (match-data))))
	 (mlist (list 'mlist))
	 (mtail mlist))
    (while data
      ;; XXX (match-data) is supposed to contain markers or nil.
      ;; 18.55.160 on the Sun sometimes returns 0's, too.
      (if (and (car data) (not (= 0 (car data))))
	  (progn
	    (setcdr mtail (list (buffer-substring (car data) (cadr data))))
	    (setq mtail (cdr mtail))))
      (setq data (cdr (cdr data))))
    (cdr mlist)))

(defun matched-strings ()
  "Returns a single string that is the concatenation of all the parenthesized
expressions in the previous regular-expression search."
  (interactive)
  (apply 'concat (matched-strings-list)))