;;; Regexp "fold" combinators -*- scheme -*- ;;; Copyright (c) 1998 by Olin Shivers. ;;; REGEXP-FOLD re kons knil s [finish start] -> value ;;; REGEXP-FOLD-RIGHT re kons knil s [finish start] -> value ;;; REGEXP-FOR-EACH re proc s [start] -> unspecific ;;; Non-R4RS imports: let-optionals :optional error ? ;;; regexp-fold re kons knil s [finish start] -> value ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; The following definition is a bit unwieldy, but the intuition is ;;; simple: this procedure uses the regexp RE to divide up string S into ;;; non-matching/matching chunks, and then "folds" the procedure KONS ;;; across this sequence of chunks. ;;; ;;; Search from START (defaulting to 0) for a match to RE; call ;;; this match M. Let I be the index of the end of the match ;;; (that is, (match:end M 0)). Loop as follows: ;;; (regexp-fold re kons (kons START M knil) s finish I) ;;; If there is no match, return instead ;;; (finish START knil) ;;; FINISH defaults to (lambda (i knil) knil) ;;; ;;; In other words, we divide up S into a sequence of non-matching/matching ;;; chunks: ;;; NM1 M1 NM1 M2 ... NMk Mk NMlast ;;; where NM1 is the initial part of S that isn't matched by the RE, M1 is the ;;; first match, NM2 is the following part of S that isn't matched, M2 is the ;;; second match, and so forth -- NMlast is the final non-matching chunk of ;;; S. We apply KONS from left to right to build up a result, passing it one ;;; non-matching/matching chunk each time: on an application (KONS i m KNIL), ;;; the non-matching chunk goes from I to (match:begin m 0), and the following ;;; matching chunk goes from (match:begin m 0) to (match:end m 0). The last ;;; non-matching chunk NMlast is processed by FINISH. So the computation we ;;; perform is ;;; (final q (kons Jk MTCHk ... (kons J2 MTCH2 (kons J1 MTCH1 knil))...)) ;;; where Ji is the index of the start of NMi, MTCHi is a match value ;;; describing Mi, and Q is the index of the beginning of NMlast. (define-module (scsh rx re-fold) :use-module (scsh let-opt) :use-module (scsh rx cond-package) :use-module (scsh rx re-low) :use-module (scsh rx re-high) ) (export regexp-fold regexp-fold-right regexp-for-each) (define (regexp-fold re kons knil s . maybe-finish+start) (let-optionals maybe-finish+start ((finish (lambda (i x) x)) (start 0)) (if (> start (string-length s)) (error "Illegal START parameter" regexp-fold re kons knil s finish start)) (let lp ((i start) (val knil)) (? ((regexp-search re s i) => (lambda (m) (let ((next-i (match:end m 0))) (if (= next-i (match:start m 0)) (error "An empty-string regexp match has put regexp-fold into an infinite loop." re s start next-i) (lp next-i (kons i m val)))))) (else (finish i val)))))) ;;; regexp-fold-right re kons knil s [finish start] -> value ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; This procedure repeatedly matches regexp RE across string S. ;;; This divides S up into a sequence of matching/non-matching chunks: ;;; NM0 M1 NM1 M2 NM2 ... Mk NMk ;;; where NM0 is the initial part of S that isn't matched by the RE, ;;; M1 is the first match, NM1 is the following part of S that isn't ;;; matched, M2 is the second match, and so forth. We apply KONS from ;;; right to left to build up a result ;;; (final q (kons MTCH1 J1 (kons MTCH2 J2 ...(kons MTCHk JK knil)...))) ;;; where MTCHi is a match value describing Mi, Ji is the index of the end of ;;; NMi (or, equivalently, the beginning of Mi+1), and Q is the index of the ;;; beginning of M1. In other words, KONS is passed a match, an index ;;; describing the following non-matching text, and the value produced by ;;; folding the following text. The FINAL function "polishes off" the fold ;;; operation by handling the initial chunk of non-matching text (NM0, above). ;;; FINISH defaults to (lambda (i knil) knil) (define (regexp-fold-right re kons knil s . maybe-finish+start) (let-optionals maybe-finish+start ((finish (lambda (i x) x)) (start 0)) (if (> start (string-length s)) (error "Illegal START parameter" regexp-fold-right re kons knil s finish start)) (? ((regexp-search re s start) => (lambda (m) (finish (match:start m 0) (let recur ((last-m m)) (? ((regexp-search re s (match:end last-m 0)) => (lambda (m) (let ((i (match:start m 0))) (if (= i (match:end m 0)) (error "An empty-string regexp match has put regexp-fold-right into an infinite loop." re s start i) (kons last-m i (recur m)))))) (else (kons last-m (string-length s) knil))))))) (else (finish (string-length s) knil))))) ;;; regexp-for-each re proc s [start] -> unspecific ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; Repeatedly match regexp RE against string S. ;;; Apply PROC to each match that is produced. ;;; Matches do not overlap. (define (regexp-for-each re proc s . maybe-start) (let ((start (:optional maybe-start 0))) (if (> start (string-length s)) (apply error "Illegal START parameter" regexp-for-each re proc s start) (let lp ((i start)) (? ((regexp-search re s i) => (lambda (m) (let ((next-i (match:end m 0))) (if (= (match:start m 0) next-i) (error "An empty-string regexp match has put regexp-for-each into an infinite loop." re proc s start next-i)) (proc m) (lp next-i)))))))))