[[project @ 2000-09-04 18:37:13 by ghouston] ghouston**20000904183714 Ignore-this: 29dde5c69bbf9ef51db246dad1065067 ] addfile ./jar-defrecord.scm rmfile ./re.scm adddir ./lib addfile ./lib/ccp.scm addfile ./lib/list-lib.scm addfile ./lib/string-lib.scm adddir ./rx addfile ./rx/cond-package.scm addfile ./rx/let-match.scm addfile ./rx/oldfuns.scm addfile ./rx/parse.scm addfile ./rx/posixstr.scm addfile ./rx/re-fold.scm addfile ./rx/re-high.scm addfile ./rx/re-low.scm addfile ./rx/re-subst.scm addfile ./rx/re-syntax.scm addfile ./rx/re.scm addfile ./rx/rx-lib.scm addfile ./rx/simp.scm addfile ./rx/spencer.scm hunk ./COPYING 1 -Copyright (c) 1993, 1994 by Richard Kelsey and Jonathan Rees. -Copyright (c) 1994, 1995 by Olin Shivers and Brian D. Carlstrom. - - Use of this program for non-commercial purposes is permitted provided -that such use is acknowledged both in the software itself and in -accompanying documentation. - - Use of this program for commercial purposes is also permitted, but -only if, in addition to the acknowledgement required for -non-commercial users, written notification of such use is provided by -the commercial user to the authors prior to the fabrication and -distribution of the resulting software. - - This software is provided ``as is'' without express or implied warranty. +Copyright (c) 1993-1999 Richard Kelsey and Jonathan Rees +Copyright (c) 1994-1999 by Olin Shivers and Brian D. Carlstrom. +All rights reserved. hunk ./COPYING 5 +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. +3. The name of the authors may not be used to endorse or promote products + derived from this software without specific prior written permission. hunk ./COPYING 16 +THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR +IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT, +INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. hunk ./ChangeLog 1 +2000-09-04 Gary Houston + + * README: updated. guile-scsh is now based on scsh 0.5.2. + mention some bugs in the INCOMPAT file: more work is needed. + +2000-08-30 Gary Houston + + * char-set.scm (string-iter): renamed to char-set-string-iter, + so it doesn't conflict with lib/string-lib.scm:string-iter. + should be fixed by module system instead. + only called by char-set-invert! and char-set-intersection! in + char-set.scm. + +2000-08-28 Gary Houston + + * rx/oldfuns.scm: don't redefine Guile primitives. + +2000-08-23 Gary Houston + + * compile-posix-re->c-struct, cre-search, cre-search?: rewritten + using Guile regexp primitives. not particularly optimal nor + necessarily complete. + +2000-08-21 Gary Houston + + * rx/re-low.scm (clean-up-cres): make this a no-op. + +2000-08-20 Gary Houston + + * sighandlers.scm (maybe-define-signal), errno.scm (maybe-define-eno): + use string-downcase, not string-downcase!. + + * init.scm (structure-ref): defined bogusly: scheme48 module system + is not supported. + + * init: load new files. + * lib: ccp.scm, list-lib.scm, string-lib.scm: new files from + scsh-0.5.2. + + * lib: new directory. + + * init.scm (rx, if-sre-form): defined, from scsh old-packages.scm. + + * init.scm: load new files. + + * rx: new subdirectory. + + * jar-defrecord.scm, re-low.scm, re-high.scm, + cond-package.scm let-match.scm oldfuns.scm parse.scm posixstr.scm, + re-fold.scm re-subst.scm re-syntax.scm rx-lib.scm simp.scm, + spencer.scm: new files from scsh-0.5.2. + +2000-08-10 Gary Houston + + * init.scm: don't define index, rrindex. + + * updates from scsh-0.5.2: COPYING, awk.scm, char-set.scm, + defrec.scm, enumconst.scm, fileinfo.scm, filemtch.scm, + filesys.scm, fname.scm, fr.scm, glob.scm, here.scm, network.scm, + procobj.scm, rdelim.scm, re.scm, rw.scm, scsh-condition.scm, + scsh-version.scm, scsh.scm, sighandlers.scm, stringcoll.scm, + syntax-helpers.scm, syntax.scm, syscalls.scm, time.scm, + utilities.scm + hunk ./INCOMPAT 1 -Incompatibilities with scsh 0.4.4: +Bugs: + +extended process forms don't work: + +(run (echo hello) (> out.log))) + +problem with compiling regular expressions: + +compare the posix string in the result of (rx (- alpha ("ae"))) +with the one in (rx (- alpha ("aei"))): the latter contains garbage +and can't be used. + +Incompatibilities with the original scsh: hunk ./INCOMPAT 17 +loading guile-scsh may redefine Guile builtins. e.g., map is +redefined by list-lib.scm. This isn't very good. Maybe use of the +module system will make this more manageable in future. Other +affected procedures are: read-delimited read-delimited! pipe sleep +exit make-fluid and probably open-file. + +the interfaces to the I/O system may be incompatible. + +[check that the following are still true in 0.5.2] + hunk ./INCOMPAT 48 -Several scsh procedures are incompatible with Guile procedures. -In these cases the procedures are redefined when init.scm is -loaded. This isn't a very good solution: hopefully use of the -module system will help. The following procedures are -affected: read-delimited read-delimited! pipe sleep exit make-fluid -and probably open-file. - hunk ./README 1 -This is an incomplete port of the scheme shell (scsh) 0.5.1 to Guile. +This is an incomplete port of the scheme shell (scsh) 0.5.2 to Guile. hunk ./README 3 -The original scsh is available by ftp from -swiss-ftp.ai.mit.edu:/pub/su, which is also the place to go for scsh -documentation. +For the original scsh package and documentation, see: hunk ./README 5 -The Guile port uses a record type to represent the multiple values -returned by many scsh procedures. The values can be retrieved -using call-with-values or receive. +http://www.swiss.ai.mit.edu/ftpdir/scsh/ hunk ./README 7 -The Guile module system is not currently being used. Initialization -can be done by: +To install the package, use something like: hunk ./README 9 -(load-from-path "scsh/init") +configure --prefix=/usr/local +make +make install + +The configure prefix must match the one used for Guile itself. hunk ./README 15 -Note that SLIB must be found in %load-path, e.g., it can be unpacked -in the "site" directory. SLIB can be obtained by ftp from -prep.ai.mit.edu in the pub/gnu/jacal directory. +Alternatively it's sufficient to rename (or link) the main guile-scsh +directory to "scsh" and place it somewhere in the guile load path (type +%load-path in an interactive Guile session to see the current +setting). hunk ./README 20 -The Guile reference manual also has information about installing -and using SLIB and scsh. +The Guile module system is not currently used. The package can be +loaded into a running Guile interpreter with: hunk ./README 23 +(load-from-path "scsh/init") + +Note that SLIB must be available to Guile: see the guile reference +manual for information on installing SLIB. + +The Guile port uses a record type to represent the multiple values +returned by many scsh procedures. The values can be retrieved +using call-with-values or receive. hunk ./THANKS 1 -The Guile SCSH port is the work of volunteer Gary Houston, as well as -the Guile core system call support upon which SCSH rests. Russ -McManus took over from Gary in the summer of 1998. +The Guile SCSH port is the work of Gary Houston, as well as the Guile +core system call support upon which SCSH rests. Russ McManus took +over from Gary in the summer of 1998. hunk ./awk.scm 2 -;;; Copyright (c) 1994 by Olin Shivers. +;;; Copyright (c) 1994 by Olin Shivers. See file COPYING. hunk ./awk.scm 4 -;;; the only change for Guile is the awk definition on the last line. +;;; This uses the new RX SRE syntax. Defines a Clinger-Rees expander for +;;; the old, pre-SRE syntax AWK, and one for the new SRE-syntax AWK. hunk ./awk.scm 7 +;;; Imports: hunk ./awk.scm 11 -;;; - Requires STRING-MATCH from SCSH package. +;;; - Requires STRING-MATCH and STRING-MATCH? from RE-EXPORTS package. +;;; - Requires regexp manipulation stuff from SRE-SYNTAX-TOOLS +;;; - Requires ERROR from ERROR-PACKAGE. +;;; - Requires ANY and FILTER frm SCSH-UTILITIES. +;;; +;;; Needs error-package receiving sre-syntax-tools scsh-utilities +;;; +;;; Exports: +;;; (expand-awk exp r c) Clinger-Rees macro expander, new syntax +;;; (expand-awk/obsolete exp r c) Clinger-Rees macro expander, old syntax +;;; +;;; next-range next-:range These four functions are used in the +;;; next-range: next-:range: code output by the expander. hunk ./awk.scm 25 -;;; This should be hacked to convert regexp strings into regexp structures -;;; at the top of the form, and then just refer to the structs in the -;;; tests. hunk ./awk.scm 35 -;;; ("^[ \t]*;" nlines) ; A comment line. -;;; (else (+ nlines 1))) ; Not a comment line. +;;; ((: bos (* white) ";") nlines) ; A comment line. +;;; (else (+ nlines 1))) ; Not a comment line. hunk ./awk.scm 38 -;;; ;;; Read numbers, counting the evens and odds. +;;; ;;; Read numbers, counting the evens and odds, +;;; ;;; and printing out sign information. hunk ./awk.scm 41 -;;; ((zero? val) (display "zero ") (values evens odds)) ; Tell me about -;;; ((> val 0) (display "pos ") (values evens odds)) ; sign, too. +;;; ((zero? val) (display "zero ") (values evens odds)) +;;; ((> val 0) (display "pos ") (values evens odds)) hunk ./awk.scm 54 +;;; +;;; ::= (ELSE body ...) +;;; | (:RANGE test1 test2 body ...) ; RANGE :RANGE RANGE: :RANGE: +;;; | (AFTER body ...) +;;; | (test => proc) +;;; | (test ==> vars body ...) +;;; | (test body ...) +;;; +;;; test ::= integer | sre | (WHEN exp) | exp +;;; (sre/exp ambiguities resolved in favor of SRE) + hunk ./awk.scm 173 -(define (expand-awk exp r c) +;;; If STRING-REGEXPS? is true, we use the old, obsolete syntax, where +;;; a test form that is a string, such as "shivers|bdc", is treated as +;;; a regular expression in the Posix string syntax. Otherwise, we use the +;;; new SRE syntax, where strings are treated as SRE constants. + +(define (expand-awk exp r c) (really-expand-awk exp r c #f)) +(define (expand-awk/obsolete exp r c) (really-expand-awk exp r c #t)) + +(define (really-expand-awk exp r c string-regexps?) hunk ./awk.scm 184 - (%receive (r 'receive)) - (%values (r 'values)) hunk ./awk.scm 189 - (%make-regexp (r 'make-regexp)) + (%rx (r 'rx)) hunk ./awk.scm 207 - (rec/field-vars (caddr exp)) + ;; Replace #F's with gensym'd variables in the record/field vars. + (rec/field-vars (map (lambda (v) (or v (r (gensym "anon-rfval")))) + (caddr exp))) hunk ./awk.scm 218 - ;; Some analysis: what have we got? - ;; Range clauses, else clauses, line num tests,... - (let* ((recnum-tests? ; Do any of the clauses test the record - (any? (lambda (clause) ; count? (I.e., any integer tests?) - (let ((test (car clause))) - (or (integer? test) - (and (range? clause) - (or (integer? (cadr clause)) - (integer? (caddr clause))))))) - clauses)) + ;; If we are doing the old, obsolete Posix-string syntax, map + ;; the clause tests over to the new syntax. + (let* ((clauses (if string-regexps? + (map (lambda (clause) + (hack-clause-for-posix-string-syntax clause r c)) + clauses) + clauses)) + + ;; Some analysis: what have we got? + ;; Range clauses, else clauses, line num tests,... + (recnum-tests? ; Do any of the clauses test the record + (any (lambda (clause) ; count? (I.e., any integer tests?) + (let ((test (car clause))) + (or (integer? test) + (and (range? clause) + (or (integer? (cadr clause)) + (integer? (caddr clause))))))) + clauses)) hunk ./awk.scm 239 - (else-var (and (any? (lambda (clause) - (c (car clause) %else)) - clauses) - (r 'else))) + (else-var (and (any (lambda (clause) + (c (car clause) %else)) + clauses) + (r 'else-state))) hunk ./awk.scm 244 - ;; We compile all of the regexp patterns into regexp + ;; We compile all of the *static* regexp patterns into regexp hunk ./awk.scm 250 - (cond ((string? test) (list test)) + (cond ((sre-form? test r c) (list test)) hunk ./awk.scm 254 - (append (if (string? t1) + (append (if (sre-form? t1 r c) hunk ./awk.scm 257 - (if (string? t2) - (list t2) - '())))) + (if (sre-form? t2 r c) + (list t2) + '())))) hunk ./awk.scm 271 - ;; An alist matching regexp patterns with the vars to which - ;; we will bind their compiled regexp data structure. - (pats/vars (map (lambda (p) (cons p (r (gensym "re.")))) - patterns)) + (pats-static? (map (lambda (sre) + (static-regexp? (parse-sre sre r c))) + patterns)) hunk ./awk.scm 275 - ;; A LET-list binding the regexp vars to their compiled regexps. - (regexp-inits (map (lambda (p/v) - `(,(cdr p/v) (,%make-regexp ,(car p/v)))) - pats/vars)) + ;; An alist matching each pattern with the exp that refers + ;; to it -- a var if it's static, a Scheme (RX ...) exp otw. + (pats/refs (map (lambda (pat static?) + (cons pat + (if static? + (r (gensym "re.")) + `(,%rx ,pat)))) + patterns pats-static?)) hunk ./awk.scm 284 + ;; A LET-list binding the regexp vars to their + ;; compiled static regexps. + (regexp-inits (apply append + (map (lambda (p/r static?) + (if static? + `((,(cdr p/r) (,%rx ,(car p/r)))) + '())) + pats/refs + pats-static?))) hunk ./awk.scm 339 - clauses pats/vars r c)) + clauses pats/refs r c)) hunk ./awk.scm 362 +;;; This maps a clause in the old, obsolete syntax over to a clause +;;; in the new, SRE syntax. +(define (hack-clause-for-posix-string-syntax clause r c) + (let ((hack-simple-test (lambda (test) + (cond ((string? test) + `(,(r 'posix-string) ,test)) + ((integer? test) test) + (else `(,(r 'when) ,test))))) + (test (car clause))) + (cond ((range-keyword? test r c) + `(,test ,(hack-simple-test (cadr clause)) + ,(hack-simple-test (caddr clause)) + . ,(cdddr clause))) + + ((or (c test (r 'else)) + (c test (r 'after))) + clause) + + (else `(,(hack-simple-test test) . ,(cdr clause)))))) + hunk ./awk.scm 387 - range-vars svars clauses pats/vars r c) + range-vars svars clauses pats/refs r c) hunk ./awk.scm 405 - pats/vars + pats/refs hunk ./awk.scm 419 - pats/vars r c))))) + pats/refs r c))))) hunk ./awk.scm 427 -;;; String s => (regexp-exec s ) +;;; SRE s => (regexp-search ) +;;; (when e) => e hunk ./awk.scm 430 +;;; +;;; If FOR-VALUE? is true, then we do regexp searches with REGEXP-SEARCH, +;;; otherwise, we use the cheaper REGEXP-SEARCH?. hunk ./awk.scm 434 -(define (->simple-clause-test test-form rec-var rec-counter pats/vars r) +(define (->simple-clause-test test-form for-value? rec-var rec-counter pats/refs r c) hunk ./awk.scm 436 - ((string? test-form) - (let ((re-var (cond ((assoc test-form pats/vars) => cdr) - (else (error "Impossible AWK error -- unknown regexp" - test-form pats/vars))))) - `(,(r 'regexp-exec) ,re-var ,rec-var))) + + ((sre-form? test-form r c) + `(,(r (if for-value? 'regexp-search 'regexp-search?)) + ,(cdr (assoc test-form pats/refs)) + ,rec-var)) + + ((and (pair? test-form) + (c (r 'when) (car test-form))) + (if (= 2 (length test-form)) (cadr test-form) + (error "Illegal WHEN test in AWK" test-form))) + hunk ./awk.scm 452 - pats/vars r c) + pats/refs r c) hunk ./awk.scm 454 - (%= (r '=)) - (%string-match (r 'string-match)) hunk ./awk.scm 455 + (%long-arrow (r '==>)) hunk ./awk.scm 457 + (%mss (r 'match:substring)) hunk ./awk.scm 460 - (test (->simple-clause-test test rec-var rec-counter pats/vars r)) + (mktest (lambda (for-value?) + (->simple-clause-test test for-value? rec-var + rec-counter pats/refs r c))) hunk ./awk.scm 468 + ;; How about (test ==> (var ...) body ...)? + (long-arrow? (and (< 3 (length clause)) + (c (cadr clause) %long-arrow))) + hunk ./awk.scm 476 - (core (if arrow? - (let* ((tv (r 'tval)) ; APP is the actual - (app `(,(caddr clause) ,tv))) ; body: (proc tv). - `(,%let ((,tv ,test)) - (,%if ,tv - ,(clause-action (list app) else-var svars r c) - . ,null-clause-list))) + (core (cond (arrow? + (let* ((tv (r 'tval)) ; APP is the actual + (app `(,(caddr clause) ,tv)) ; body: (proc tv). + (test (mktest #t))) + `(,%let ((,tv ,test)) + (,%if ,tv + ,(clause-action (list app) else-var svars r c) + . ,null-clause-list)))) + + (long-arrow? + (let* ((tv (r 'tval)) + (test (mktest #t)) + (bindings ; List of LET bindings for submatches. + (let lp ((i 0) + (vars (caddr clause)) + (bindings '())) + (if (pair? vars) + (let ((var (car vars))) + (lp (+ i 1) (cdr vars) + (if var + `((,var (,%mss ,tv ,i)) . ,bindings) + bindings))) ; #F = "don't-care" + bindings)))) hunk ./awk.scm 500 - `(,%if ,test ,(clause-action (cdr clause) else-var svars r c) - . ,null-clause-list))) + `(,%let ((,tv ,test)) + (,%if ,tv + (,%let ,bindings ; Bind submatches. + . ,(deblock (clause-action (cdddr clause) + else-var svars + r c) + r c)) + . ,null-clause-list)))) + + (else + `(,%if ,(mktest #f) ,(clause-action (cdr clause) + else-var svars r c) + . ,null-clause-list)))) hunk ./awk.scm 525 - pats/vars r c) + pats/refs r c) hunk ./awk.scm 542 - (start-test (->simple-clause-test start-test rec-var - rec-counter pats/vars r)) - (stop-test (->simple-clause-test stop-test rec-var - rec-counter pats/vars r)) + (start-test (->simple-clause-test start-test #f rec-var + rec-counter pats/refs r c)) + (stop-test (->simple-clause-test stop-test #f rec-var + rec-counter pats/refs r c)) hunk ./awk.scm 567 - (%receive (r 'receive)) hunk ./awk.scm 669 +;;; guile hunk ./char-set.scm 4 -;;; ported from MIT Scheme runtime -;;; by Brian D. Carlstrom -;;; Sleazy code. +;;; - ported from MIT Scheme runtime +;;; by Brian D. Carlstrom +;;; - Rehacked & extended by Olin Shivers 6/98. + +;;; This is not great code. Char sets are represented as 256-char +;;; strings. If char i is ASCII 0, then it isn't in the set; if char i +;;; is ASCII 1, then it is in the set. +;;; - Should be rewritten to use bit strings, or at least byte vecs. +;;; - Is ASCII/Latin-1 specific. Would certainly have to be rewritten +;;; for Unicode. +;;; - The standard character sets are not Latin-1 compliant, just ASCII. + +;;; This code uses jar's DEFINE-RECORD-TYPE macro to define the char-set +;;; record type, because the scsh-standard DEFINE-RECORD form automatically +;;; defines a COPY-FOO function, which is not the one we want, being a shallow +;;; copy of the record fields. + +;;; New dfns: +;;; (char-set= cs1 cs2 ...) +;;; (char-set<= cs1 cs2 ...) +;;; (char-set-fold kons knil cs) +;;; (char-set-for-each f cs) +;;; (char-set-copy cs) +;;; (char-set-size cs) +;;; char-set:printing (char-printing? c) +;;; char-set:blank (char-blank? c) +;;; char-set:control (char-control? c) +;;; char-set:hex-digit (char-hex-digit? c) +;;; char-set:ascii (char-ascii? c) +;;; char-set:empty +;;; char-set:full +;;; char-set-every? pred cs +;;; char-set-any pred cs +;;; char-set-adjoin cset char -> cset +;;; char-set-adjoin! cset char -> cset +;;; char-set-delete cset char -> cset +;;; char-set-delete! cset char -> cset hunk ./char-set.scm 43 -(define char:tab (ascii->char 9)) -(define char:linefeed (ascii->char 13)) -(define char:page (ascii->char 12)) -(define char:return (ascii->char 10)) -(define char:space (ascii->char 32)) +(define char:tab (ascii->char 9)) +(define char:vtab (ascii->char 11)) +(define char:page (ascii->char 12)) +(define char:return (ascii->char 10)) +(define char:space (ascii->char 32)) + +(define (string-copy s) (substring s 0 (string-length s))) hunk ./char-set.scm 62 -(define (char-set? object) - (and (string? object) - (= (string-length object) 256))) +;(define-record char-set +; s) ; 256-char string; each char is either ASCII 0 or ASCII 1. + +;;; Use jar's record macro. +(define-record-type char-set :char-set + (make-char-set s) + char-set? + (s char-set:s)) + +(define (char-set-copy cs) (make-char-set (string-copy (char-set:s cs)))) + +;;; The = and <= code is ugly because it's n-ary. + +(define (char-set= cs1 . rest) + (let ((s1 (char-set:s cs1))) + (every (lambda (cs) (string=? s1 (char-set:s cs))) + rest))) + +(define (char-set<= cs1 . rest) + (let lp ((s1 (char-set:s cs1)) + (rest rest)) + (or (not (pair? rest)) + (let ((s2 (char-set:s (car rest))) + (rest (cdr rest))) + (let lp2 ((i 255)) + (if (< i 0) (lp s2 rest) + (and (<= (char->ascii (string-ref s1 i)) + (char->ascii (string-ref s2 i))) + (lp2 (- i 1))))))))) + + +(define (char-set-size cs) + (let ((s (char-set:s cs))) + (let lp ((i 255) (size 0)) + (if (< i 0) size + (lp (- i 1) + (if (= 0 (char->ascii (string-ref s i))) size (+ size 1))))))) + +(define (set-char-set cs in? . chars) + (let ((s (string-copy (char-set:s cs))) + (val (if in? (ascii->char 1) (ascii->char 0)))) + (for-each (lambda (c) (string-set! s (char->ascii c) val)) + chars) + (make-char-set s))) + +(define (set-char-set! cs in? . chars) + (let ((s (char-set:s cs)) + (val (if in? (ascii->char 1) (ascii->char 0)))) + (for-each (lambda (c) (string-set! s (char->ascii c) val)) + chars)) + cs) + +(define (char-set-adjoin cs . chars) (apply set-char-set cs #t chars)) +(define (char-set-adjoin! cs . chars) (apply set-char-set! cs #t chars)) +(define (char-set-delete cs . chars) (apply set-char-set cs #f chars)) +(define (char-set-delete! cs . chars) (apply set-char-set! cs #f chars)) + +(define (char-set-for-each proc cs) + (let ((s (char-set:s cs))) + (let lp ((i 255)) + (cond ((>= i 0) + (if (not (= 0 (char->ascii (string-ref s i)))) + (proc (ascii->char i))) + (lp (- i 1))))))) + +(define (char-set-fold kons knil cs) + (let ((s (char-set:s cs))) + (let lp ((i 255) (ans knil)) + (if (< i 0) ans + (lp (- i 1) + (if (= 0 (char->ascii (string-ref s i))) + ans + (kons (ascii->char i) ans))))))) + +(define reduce-char-set (deprecated-proc char-set-fold 'char-set-fold + "Use char-set-fold instead.")) + +(define (char-set-every? pred cs) + (let ((s (char-set:s cs))) + (let lp ((i 255)) + (or (< i 0) + (if (= 0 (char->ascii (string-ref s i))) + (lp (- i 1)) + (and (pred (ascii->char i)) + (lp (- i 1)))))))) + +(define (char-set-any pred cs) + (let ((s (char-set:s cs))) + (let lp ((i 255)) + (and (>= i 0) + (if (= 0 (char->ascii (string-ref s i))) + (lp (- i 1)) + (or (pred (ascii->char i)) + (lp (- i 1)))))))) + hunk ./char-set.scm 162 - (let ((char-set (make-string 256 (ascii->char 0)))) + (let ((s (make-string 256 (ascii->char 0)))) hunk ./char-set.scm 164 - (string-set! char-set (char->ascii char) (ascii->char 1))) + (string-set! s (char->ascii char) (ascii->char 1))) hunk ./char-set.scm 166 - char-set)) + (make-char-set s))) hunk ./char-set.scm 169 - (let ((char-set (make-string 256 (ascii->char 0)))) + (let ((s (make-string 256 (ascii->char 0)))) hunk ./char-set.scm 171 - ((< i 0) char-set) - (string-set! char-set (char->ascii (string-ref str i)) + ((< i 0) (make-char-set s)) + (string-set! s (char->ascii (string-ref str i)) hunk ./char-set.scm 176 - (let ((char-set (make-string 256 (ascii->char 0)))) - (string-fill-range! char-set lower upper (ascii->char 1)) - char-set)) + (let ((s (make-string 256 (ascii->char 0)))) + (string-fill-range! s lower upper (ascii->char 1)) + (make-char-set s))) hunk ./char-set.scm 181 - (let ((char-set (make-string 256))) - (let loop ((code 0)) - (if (< code 256) - (begin (string-set! char-set code - (if (predicate (ascii->char code)) - (ascii->char 1) - (ascii->char 0))) - (loop (+ 1 code))))) - char-set)) + (let ((s (make-string 256))) + (let lp ((i 255)) + (if (>= i 0) + (begin (string-set! s i (if (predicate (ascii->char i)) + (ascii->char 1) + (ascii->char 0))) + (lp (- i 1))))) + (make-char-set s))) hunk ./char-set.scm 204 -(define (char-set-members char-set) - (define (loop code) - (cond ((>= code 256) '()) - ((zero? (char->ascii (string-ref char-set code))) (loop (+ 1 code))) - (else (cons (ascii->char code) (loop (+ 1 code)))))) - (loop 0)) +(define (char-set-members cs) + (let ((s (char-set:s cs))) + (let lp ((i 255) (ans '())) + (if (< i 0) ans + (lp (- i 1) + (if (zero? (char->ascii (string-ref s i))) ans + (cons (ascii->char i) ans))))))) hunk ./char-set.scm 221 -(define (char-set-contains? char-set char) - (not (zero? (char->ascii (string-ref char-set (char->ascii char)))))) +(define (char-set-contains? cs char) + (not (zero? (char->ascii (string-ref (char-set:s cs) + (char->ascii char)))))) hunk ./char-set.scm 229 -(define (char-set-invert char-set) - (predicate->char-set - (lambda (char) (not (char-set-contains? char-set char))))) - -(define (char-set-union char-set-1 char-set-2) - (predicate->char-set - (lambda (char) - (or (char-set-contains? char-set-1 char) - (char-set-contains? char-set-2 char))))) hunk ./char-set.scm 230 -(define (char-set-intersection char-set-1 char-set-2) - (predicate->char-set - (lambda (char) - (and (char-set-contains? char-set-1 char) - (char-set-contains? char-set-2 char))))) +;;; Set algebra +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; hunk ./char-set.scm 233 -(define (char-set-difference char-set-1 char-set-2) - (predicate->char-set - (lambda (char) - (and (char-set-contains? char-set-1 char) - (not (char-set-contains? char-set-2 char)))))) +(define (char-set-invert cs) + (predicate->char-set (lambda (char) + (not (char-set-contains? cs char))))) hunk ./char-set.scm 237 -;;;; System Character Sets +(define (char-set-union . csets) + (if (pair? csets) + (apply char-set-union! (char-set-copy (car csets)) (cdr csets)) + char-set:empty)) hunk ./char-set.scm 242 -(define char-set:upper-case (ascii-range->char-set #x41 #x5B)) -(define char-set:lower-case (ascii-range->char-set #x61 #x7B)) -(define char-set:numeric (ascii-range->char-set #x30 #x3A)) -(define char-set:graphic (ascii-range->char-set #x20 #x7F)) -(define char-set:not-graphic (char-set-invert char-set:graphic)) -(define char-set:whitespace - (char-set char:newline char:tab char:linefeed - char:page char:return char:space)) -(define char-set:not-whitespace (char-set-invert char-set:whitespace)) -(define char-set:alphabetic - (char-set-union char-set:upper-case char-set:lower-case)) -(define char-set:alphanumeric - (char-set-union char-set:alphabetic char-set:numeric)) -(define char-set:standard - (char-set-union char-set:graphic (char-set char:newline))) +(define (char-set-intersection . csets) + (if (pair? csets) + (apply char-set-intersection! (char-set-copy (car csets)) (cdr csets)) + char-set:full)) hunk ./char-set.scm 247 -(define (char-upper-case? char) - (char-set-contains? char-set:upper-case char)) +(define (char-set-difference cs1 . csets) + (if (pair? csets) + (apply char-set-difference! (char-set-copy cs1) csets) + cs1)) hunk ./char-set.scm 252 -(define (char-lower-case? char) - (char-set-contains? char-set:lower-case char)) hunk ./char-set.scm 253 -(define (char-numeric? char) - (char-set-contains? char-set:numeric char)) +;;; Linear set-algebraic ops +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; These guys are allowed, but not required, to side-effect their first +;;; argument when computing their result. In other words, you must use them +;;; as if they were completely functional, just like their non-! counterparts, +;;; and you must additionally ensure that their first arguments are "dead" +;;; at the point of call. In return, we promise a more efficient result, plus +;;; allowing you to always assume char-sets are unchangeable values. hunk ./char-set.scm 262 -(define (char-graphic? char) - (char-set-contains? char-set:graphic char)) +;;; Apply P to each index and it's char in S: (P I C). +;;; Used by the intersection & difference. hunk ./char-set.scm 265 -(define (char-whitespace? char) - (char-set-contains? char-set:whitespace char)) +;;; char-set- prefix added for Guile to avoid conflict with +;;; string-iter defined in lib/string-lib.scm. +(define (char-set-string-iter p s) + (let lp ((i (- (string-length s) 1))) + (cond ((>= i 0) + (p i (string-ref s i)) + (lp (- i 1)))))) hunk ./char-set.scm 273 -(define (char-alphabetic? char) - (char-set-contains? char-set:alphabetic char)) +(define (char-set-invert! cset) + (let ((s (char-set:s cset))) + (char-set-string-iter (lambda (i c) + (string-set! s i (ascii->char (- 1 (char->ascii c))))) + s)) + cset) hunk ./char-set.scm 280 -(define (char-alphanumeric? char) - (char-set-contains? char-set:alphanumeric char)) +(define (char-set-union! cset1 . csets) + (let ((s (char-set:s cset1))) + (for-each (lambda (cset) + (char-set-for-each (lambda (c) + (string-set! s (char->ascii c) + (ascii->char 1))) + cset)) + csets)) + cset1) hunk ./char-set.scm 290 -(define (char-standard? char) - (char-set-contains? char-set:standard char)) +(define (char-set-intersection! cset1 . csets) + (let ((s (char-set:s cset1))) + (for-each (lambda (cset) + (char-set-string-iter (lambda (i c) + (if (zero? (char->ascii c)) + (string-set! s i (ascii->char 0)))) + (char-set:s cset))) + csets)) + cset1) hunk ./char-set.scm 300 -;;; Bullshit legalese -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;$Header: /cvsroot/guile/guile/guile-scsh/Attic/char-set.scm,v 1.1 1997/01/25 18:26:38 ghouston Exp $ +(define (char-set-difference! cset1 . csets) + (let ((s (char-set:s cset1))) + (for-each (lambda (cset) + (char-set-for-each (lambda (c) + (string-set! s (char->ascii c) + (ascii->char 0))) + cset)) + csets)) + cset1) hunk ./char-set.scm 310 -;Copyright (c) 1988 Massachusetts Institute of Technology hunk ./char-set.scm 311 -;This material was developed by the Scheme project at the Massachusetts -;Institute of Technology, Department of Electrical Engineering and -;Computer Science. Permission to copy this software, to redistribute -;it, and to use it for any purpose is granted, subject to the following -;restrictions and understandings. hunk ./char-set.scm 312 -;1. Any copy made of this software must include this copyright notice -;in full. - -;2. Users of this software agree to make their best efforts (a) to -;return to the MIT Scheme project any improvements or extensions that -;they make, so that these may be included in future releases; and (b) -;to inform MIT of noteworthy uses of this software. +;;;; System Character Sets +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; hunk ./char-set.scm 315 -;3. All materials developed as a consequence of the use of this -;software shall duly acknowledge such use, in accordance with the usual -;standards of acknowledging credit in academic research. +(define char-set:lower-case (ascii-range->char-set #x61 #x7B)) +(define char-set:upper-case (ascii-range->char-set #x41 #x5B)) +(define char-set:alphabetic + (char-set-union char-set:upper-case char-set:lower-case)) +(define char-set:numeric (ascii-range->char-set #x30 #x3A)) +(define char-set:alphanumeric + (char-set-union char-set:alphabetic char-set:numeric)) +(define char-set:graphic (ascii-range->char-set #x21 #x7F)) +(define char-set:printing (ascii-range->char-set #x20 #x7F)) +(define char-set:whitespace (char-set char:tab char:newline char:vtab + char:page char:return char:space)) +(define char-set:blank (char-set char:space char:tab)) +(define char-set:control (char-set-union (ascii-range->char-set 0 32) + (char-set (ascii->char 127)))) +(define char-set:punctuation + (string->char-set "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~")) +(define char-set:hex-digit (string->char-set "0123456789abcdefABCDEF")) +(define char-set:ascii (ascii-range->char-set 0 128)) +(define char-set:empty (char-set)) +(define char-set:full (char-set-invert char-set:empty)) hunk ./char-set.scm 336 -;4. MIT has made no warrantee or representation that the operation of -;this software will be error-free, and MIT is under no obligation to -;provide any services, by way of maintenance, update, or otherwise. hunk ./char-set.scm 337 -;5. In conjunction with products arising from the use of this material, -;there shall be no use of the name of the Massachusetts Institute of -;Technology nor of any adaptation thereof in any advertising, -;promotional, or sales literature without prior written consent from -;MIT in each case. +(define (char-set->pred cs) (lambda (c) (char-set-contains? cs c))) hunk ./char-set.scm 339 +(define char-lower-case? (char-set->pred char-set:lower-case)) +(define char-upper-case? (char-set->pred char-set:upper-case)) +(define char-alphabetic? (char-set->pred char-set:alphabetic)) +(define char-numeric? (char-set->pred char-set:numeric)) +(define char-alphanumeric? (char-set->pred char-set:alphanumeric)) +(define char-graphic? (char-set->pred char-set:graphic)) +(define char-printing? (char-set->pred char-set:printing)) +(define char-whitespace? (char-set->pred char-set:whitespace)) +(define char-blank? (char-set->pred char-set:blank)) +(define char-control? (char-set->pred char-set:control)) +(define char-punctuation? (char-set->pred char-set:punctuation)) +(define char-hex-digit? (char-set->pred char-set:hex-digit)) +(define char-ascii? (char-set->pred char-set:ascii)) hunk ./defrec.scm 1 -;;; Copyright (c) 1993 by Olin Shivers. +;;; Copyright (c) 1993 by Olin Shivers. See file COPYING. hunk ./defrec.scm 40 -;;; - Setter procedures: +;;; - Field-setting procedures: hunk ./defrec.scm 48 +;;; - Field-modifier procedures: +;;; (modify-employee:salary emp (lambda (s) (* 1.03 s))) ; 3% raise +;;; ...similarly for other fields. +;;; +;;; - Record-copy procedure: +;;; (copy-employee emp) -> emp' +;;; hunk ./defrec.scm 73 +;;; Dependencies: +;;; - Code produced by the macro needs the RECORDS package. +;;; - Macro-expander code needs ERROR-PACKAGE and RECEIVING hunk ./defrec.scm 113 + (mod-name (lambda (field-name) + (s-conc "modify-" (s->s name) ":" (s->s field-name)))) + (copy-name (s-conc "copy-" (s->s name))) hunk ./defrec.scm 165 - ,@(map (lambda (spec) - `(,%define ,(field-name (spec-name spec)) - (,%record-accessor ,type-name ',(spec-name spec)))) - field-specs) + ,@(map (lambda (field) + `(,%define ,(field-name field) + (,%record-accessor ,type-name ',field))) + fields) + + ;; Field setters (SET-EMPLOYEE:NAME emp name), ... + ,@(map (lambda (field) + `(,%define ,(set-name field) + (,%record-modifier ,type-name ',field))) + fields) + + ;; Field modifiers (MODIFY-EMPLOYEE:NAME emp proc), ... + ,@(let ((%setter (rename 'setter)); set-ship:name + (%rec (rename 'r)) ; parameter: record to be modified. + (%proc (rename 'proc))) ; parameter: modifying procedure. + (map (lambda (field) + (let ((%setter-proc `(,%record-modifier ,type-name + ',field)) + (%sel-proc `(,%record-accessor ,type-name ',field)) + (%selector (rename 'getter))) + `(,%define ,(mod-name field) + (,%let ((,%setter ,%setter-proc) + (,%selector ,%sel-proc)) + (,%lambda (,%rec ,%proc) + (,%setter ,%rec (,%proc (,%selector ,%rec)))))))) + fields)) hunk ./defrec.scm 192 - ;; Setters (SET-EMPLOYEE:NAME emp name), ... - ,@(map (lambda (spec) - `(,%define ,(set-name (spec-name spec)) - (,%record-modifier ,type-name ',(spec-name spec)))) - field-specs) + ;; Record copy procedure + ,(let ((%rec (rename 'r)) + (accessors (map (lambda (f) (rename (gensym "f"))) fields))) + `(,%define ,copy-name + (,%let ((,maker (,%record-constructor ,type-name ',fields)) + . ,(map (lambda (field accessor) + `(,accessor (,%record-accessor ,type-name + ',field))) + fields accessors)) + (,%lambda (,%rec) + (,maker . ,(map (lambda (a) `(,a ,%rec)) accessors)))))) hunk ./enumconst.scm 1 -;;; Copyright (c) 1994 by Olin Shivers. +;;; Copyright (c) 1994 by Olin Shivers. See file COPYING. hunk ./errno.scm 6 - (string-downcase! + (string-downcase hunk ./fileinfo.scm 1 -;;; Copyright (c) 1993, 1994 by Olin Shivers. - -;;; needs to be modified for Guile. +;;; Copyright (c) 1993, 1994 by Olin Shivers. See file COPYING. hunk ./fileinfo.scm 6 -;;; writeable means (1) file exists & is writeable OR (2) file doesn't exist -;;; but directory is writeable. hunk ./fileinfo.scm 7 +;;; (file-not-accessible? perms fd/port/fname) +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; PERMS is 3 bits, not 9. +;;; writeable means (1) file exists & is writeable OR (2) file doesn't exist +;;; and directory is writeable. That is, writeable means writeable or +;;; creatable. +;;; +;;; There's a Posix call, access(), that checks using the *real* uid, not +;;; the effective uid, so that setuid programs can figure out if the luser +;;; has perms. file-not-accessible? is defined in terms of the effective uid, +;;; so we can't use access(). +;;; +;;; This is a kind of bogus function. The only way to do a real check is to +;;; try an open() and see if it flies. Otherwise, there's an obvious atomicity +;;; problem. Also, we special case root, saying root always has all perms. But +;;; not even root can write on a read-only filesystem, such as a CD ROM. In +;;; this case, we'd blithely say the file was writeable -- there's no way to +;;; check for a ROFS without doing an open(). We need a euid analog to +;;; access(). Ah, well. +;;; +;;; I also should define a family of real uid perm-checking calls. +;;; hunk ./fileinfo.scm 30 -;;; #f Accessible +;;; #f Accessible in at least one of the requested ways. hunk ./fileinfo.scm 39 -(define (file-not-accessible? perms fd/port/fname . maybe-chase?) +(define (file-not-accessible? perms fd/port/fname) hunk ./fileinfo.scm 41 - (and (not (zero? uid)) ; Root can do what he likes. - (with-errno-handler ((err data) - ((errno/acces) 'search-denied) - ((errno/noent) 'nonexistent) - ((errno/notdir) 'not-directory)) + (with-errno-handler ((err data) + ((errno/acces) 'search-denied) + ((errno/notdir) 'not-directory) + + ;; If the file doesn't exist, we usually return + ;; 'nonexistent, but we special-case writability + ;; for the directory check. + ((errno/noent) + (and (or (zero? (bitwise-and perms 2)) + ;; This string? test *has* to return #t. + ;; If fd/port/fname is an fd or a port, + ;; we wouldn't get an errno/noent error! + ;; Just being paranoid... + (not (string? fd/port/fname)) + ;; OK, check to see if we can create + ;; files in the directory. + (file-not-accessible? 2 + (directory-as-file-name + (file-name-directory fd/port/fname)))) + 'nonexistent))) + + (and (let* ((info (file-info fd/port/fname)) + (acc (file-info:mode info))) + (cond ((zero? uid) #f) ; Root can do as he wishes. hunk ./fileinfo.scm 66 - (and (let* ((info (apply file-info fd/port/fname maybe-chase?)) - (acc (file-info:mode info))) - (cond ((= (file-info:uid info) (user-effective-uid)) ; User - (zero? (bitwise-and acc (arithmetic-shift perms 6)))) + ((= (file-info:uid info) (user-effective-uid)) ; User + (zero? (bitwise-and acc (arithmetic-shift perms 6)))) hunk ./fileinfo.scm 69 - ((= (file-info:gid info) (user-effective-gid)) ; Group - (zero? (bitwise-and acc (arithmetic-shift perms 3)))) - ((memv (file-info:gid info) (user-supplementary-gids)) - (zero? (bitwise-and acc (arithmetic-shift perms 3)))) + ((or (= (file-info:gid info) (user-effective-gid)) ; Group + (memv (file-info:gid info) (user-supplementary-gids))) + (zero? (bitwise-and acc (arithmetic-shift perms 3)))) hunk ./fileinfo.scm 73 - (else ; Other - (zero? (bitwise-and acc perms))))) - 'permission))))) + (else ; Other + (zero? (bitwise-and acc perms))))) + 'permission)))) hunk ./filemtch.scm 5 - -;;; minor changes for Guile. +;;; See file COPYING. hunk ./filemtch.scm 15 -;;; pattern-list := a list of regular expressions or predicates -;;; Each member of the list corresponds -;;; to one or more levels in a directory. -;;; (A member with embedded "/" characters -;;; corresponds to multiple levels.) -;;; Example: ("foo" "bar" "\\.c$") +;;; pattern-list := a list of +;;; - strings +;;; These are split at /'s and then +;;; treated as Posix regexp strings. +;;; - regexps (typically made with RX macro) +;;; - predicates +;;; Each member of the list corresponds to one +;;; or more levels in a directory. (A string +;;; with embedded "/" characters corresponds +;;; to multiple levels.) +;;; Example: +;;; (file-match "." #f "foo" "bar" "\\.c$") hunk ./filemtch.scm 32 +;;; Here are two more equivalent specs +;;; for the example above: +;;; (file-match "." #f "foo/bar/\\.c$") +;;; (file-match "." #f (rx "foo") (rx "bar") +;;; (rx ".c" eos)) hunk ./filemtch.scm 55 - (let ((patterns (apply append (map split-pat patterns)))) + (let ((patterns (apply append + (map (lambda (p) (if (string? p) + (map posix-string->regexp (split-pat p)) + p)) + patterns)))) hunk ./filemtch.scm 66 - (matcher (cond ((string? pattern) - (let ((re (make-regexp pattern))) - (lambda (f) (regexp-exec re f)))) + (matcher (cond ((regexp? pattern) + (lambda (f) (regexp-search? re f))) hunk ./filemtch.scm 100 - (cond ((rindex pat #\/ i) => + (cond ((string-index-right pat #\/ i) => hunk ./filemtch.scm 104 + hunk ./filesys.scm 3 -;;; Copyright (c) 1993 by Olin Shivers. +;;; Copyright (c) 1993 by Olin Shivers. See file COPYING. hunk ./fname.scm 71 - (cond ((rindex fname #\/) => + (cond ((string-index-right fname #\/) => hunk ./fname.scm 80 - (cond ((rindex fname #\/) => + (cond ((string-index-right fname #\/) => hunk ./fname.scm 93 - ((index fname #\/ start) => + ((string-index fname #\/ start) => hunk ./fname.scm 131 - (let ((dot (rindex fname #\.))) + (let ((dot (string-index-right fname #\.))) hunk ./fname.scm 157 - (cond ((index fname #\/ 1) => + (cond ((string-index fname #\/ 1) => hunk ./fname.scm 166 - (fname (ensure-file-name-is-nondirectory fname)) - (len (string-length fname))) - (if (zero? len) "/" + (fname (ensure-file-name-is-nondirectory fname))) + (if (zero? (string-length fname)) "/" hunk ./fname.scm 224 +(define (absolute-file-name fname . maybe-root) + (let ((fname (ensure-file-name-is-nondirectory fname))) + (if (zero? (string-length fname)) "/" + (simplify-file-name + (if (char=? #\/ (string-ref fname 0)) fname ; Absolute file name. + (let ((root (:optional maybe-root (cwd)))) + (string-append (file-name-as-directory root) fname))))))) + + hunk ./fname.scm 259 - ((index s #\$) => + ((string-index s #\$) => hunk ./fname.scm 267 - (cond ((index s #\}) => + (cond ((string-index s #\}) => hunk ./fname.scm 273 - (let ((i (or (index s #\/) len))) + (let ((i (or (string-index s #\/) len))) hunk ./fr.scm 2 -;;; Copyright (c) 1994 by Olin Shivers. +;;; Copyright (c) 1994 by Olin Shivers. See file COPYING. hunk ./fr.scm 57 -;;; ((suffix-splitter "") "foo") -> #("" "f" "o" "o") +;;; ((suffix-splitter (rx "")) "foo") -> #("" "f" "o" "o") hunk ./fr.scm 60 -;;; ((field-splitter ".") "foo") -> #("f" "o" "o") +;;; ((field-splitter (rx any)) "foo") -> #("f" "o" "o") hunk ./fr.scm 63 - -;;; (join-strings string-list [delimiter grammar]) => string -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;;; Paste strings together using the delimiter string. -;;; -;;; (join-strings '("foo" "bar" "baz") ":") => "foo:bar:baz" -;;; -;;; DELIMITER defaults to a single space " " -;;; GRAMMAR is one of the symbols {infix, suffix} and defaults to 'infix. - -;;; (join-strings strings [delim grammar]) - -(define (join-strings strings . args) - (if (pair? strings) - (let-optionals args ((delim " ") (grammar 'infix)) - (check-arg string? delim join-strings) - (let ((strings (reverse strings))) - (let lp ((strings (cdr strings)) - (ans (case grammar - ((infix) (list (car strings))) - ((suffix) (list (car strings) delim)) - (else (error "Illegal grammar" grammar))))) - (if (pair? strings) - (lp (cdr strings) - (cons (car strings) (cons delim ans))) - - ; All done - (apply string-append ans))))) - - "")) ; Special-cased for infix grammar. - hunk ./fr.scm 72 - (let ((re (cond ((regexp? x) x) ; regexp pattern - ((string? x) (make-regexp x)) ; regexp string - (else (error "Illegal pattern/parser" x))))) - - ;; The matcher proc. + (let ((re (cond ((string? x) (re-string x)) + ((char-set? x) (re-char-set x)) + ((char? x) (re-string (string x))) + ((regexp? x) x) + (else (error "Illegal field-reader delimiter value" x))))) hunk ./fr.scm 78 - (cond ((regexp-exec re s i) => + (cond ((regexp-search re s i) => hunk ./fr.scm 126 -(define default-field-matcher (->delim-matcher "[^ \t\n]+")) +;;; Default field spec is runs of non-whitespace chars. +(define default-field-matcher (->delim-matcher (rx (+ (~ white))))) hunk ./fr.scm 286 -(define default-suffix-matcher (->delim-matcher "[ \t\n]+|$")) -(define default-infix-matcher (->delim-matcher "[ \t\n]+")) +(define default-suffix-matcher (->delim-matcher (rx (| (+ white) eos)))) +(define default-infix-matcher (->delim-matcher (rx (+ white)))) hunk ./fr.scm 381 -;(define (regexp-reduce string start regexp proc . state) -; (let ((end (string-length string)) -; (regexp (if (string? regexp) -; (make-regexp regexp) -; regexp))) -; +;(define (regexp-fold string start regexp proc . state) +; (let ((end (string-length string))) hunk ./fr.scm 385 -; (cond ((and (<= j end) (regexp-exec regexp string j)) => +; (cond ((and (<= j end) (regexp-search regexp string j)) => hunk ./fr.scm 392 -; (reverse (regexp-reduce string 0 regexp -; (lambda (m ans) (cons (match:substring m 0) ans)) -; '()))) +; (reverse (regexp-fold string 0 regexp +; (lambda (m ans) (cons (match:substring m 0) ans)) +; '()))) hunk ./glob.scm 5 +;;; See file COPYING. hunk ./glob.scm 80 - (re (make-regexp (glob->regexp pat)))) - (values (filter (lambda (f) (regexp-exec re f)) candidates) + (re (glob->regexp pat))) + (values (filter (lambda (f) (regexp-search? re f)) candidates) hunk ./glob.scm 91 -(define (glob->regexp pat) +(define glob->regexp + (let ((dot-star (re-repeat 0 #f re-any))) ; ".*" or (* any) + (lambda (pat) + (let ((pat-len (string-length pat)) + + (str-cons (lambda (chars res) ; Reverse CHARS and cons the + (if (pair? chars) ; result string-re onto RES. + (cons (re-string (list->string (reverse chars))) + res) + res)))) + + ;; We accumulate chars into CHARS, and coalesce into a single string + ;; with STR-CONS when we run across a non-char. + (let lp ((chars '()) + (res (list re-bos)) + (i 0)) + (if (= i pat-len) + (re-seq (reverse (str-cons chars res))) + + (let ((c (string-ref pat i)) + (i (+ i 1))) + (case c + ((#\\) (if (< i pat-len) + (lp (cons (string-ref pat i) chars) + res (+ i 1)) + (error "Ill-formed glob pattern -- ends in backslash" pat))) + + ((#\*) (lp '() + (cons dot-star (str-cons chars res)) + i)) + ((#\?) (lp '() + (cons re-any (str-cons chars res)) + i)) + + ((#\[) (receive (cset i) (parse-glob-bracket pat i) + (lp '() + (cons (re-char-set cset) + (str-cons chars res)) + i))) + + (else (lp (cons c chars) res i)))))))))) + + +;;; A glob bracket expression is [...] or [^...]. +;;; The body is a sequence of and - ranges. +;;; A is any character except right-bracket, carat, hypen or backslash, +;;; or a backslash followed by any character at all. + +(define (parse-glob-bracket pat i) hunk ./glob.scm 141 - (let lp ((result '(#\^)) - (i 0) - (state 'normal)) - (if (= i pat-len) + (receive (negate? i) (if (and (< i pat-len) (char=? #\^ (string-ref pat i))) + (values #t (+ i 1)) + (values #f i)) hunk ./glob.scm 145 - (if (eq? state 'normal) - (list->string (reverse (cons #\$ result))) - (error "Illegal glob pattern" pat)) + (let lp ((elts '()) (i i)) + (if (>= i pat-len) + (error "Ill-formed glob pattern -- no terminating close-bracket" pat) hunk ./glob.scm 149 + (let ((c (string-ref pat i)) + (i (+ i 1))) + (case c + ((#\]) + (let ((cset (fold (lambda (elt cset) + (char-set-union + cset + (if (char? elt) + (char-set elt) + (ascii-range->char-set (char->ascii (car elt)) + (+ 1 (char->ascii (cdr elt))))))) + char-set:empty + elts))) + (values (re-char-set (if negate? + (char-set-invert cset) + cset)) + i))) hunk ./glob.scm 167 - (let ((c (string-ref pat i)) - (i (+ i 1))) - (case state - ((char-set) - (lp (cons c result) - i - (if (char=? c #\]) 'normal 'char-set))) + ((#\\) + (if (>= i pat-len) + (error "Ill-formed glob pattern -- ends in backslash" pat) + (lp (cons (string-ref pat i) elts) (+ i 1)))) hunk ./glob.scm 172 - ((escape) - (lp (case c - ((#\$ #\^ #\. #\+ #\? #\* #\| #\( #\) #\[) - (cons c (cons #\\ result))) - (else (cons c result))) - i - 'normal)) + ((#\-) + (cond ((>= i pat-len) + (error "Ill-formed glob pattern -- unterminated range." pat)) + ((or (null? elts) (not (char? (car elts)))) + (error "Ill-formed glob pattern -- range has no beginning." pat)) + (else (lp (cons (cons (car elts) (string-ref pat i)) elts) + (+ i 1))))) hunk ./glob.scm 180 - ;; Normal - (else (case c - ((#\\) (lp result i 'escape)) - ((#\*) (lp (cons #\* (cons #\. result)) i 'normal)) - ((#\?) (lp (cons #\. result) i 'normal)) - ((#\[) (lp (cons c result) i 'char-set)) - ((#\$ #\^ #\. #\+ #\| #\( #\)) - (lp (cons c (cons #\\ result)) i 'normal)) - (else (lp (cons c result) i 'normal)))))))))) + (else (lp (cons c elts) i))))))))) hunk ./glob.scm 186 - (let lp ((i 0) - (escape? #f)) ; Was last char an escape char (backslash)? - (if (= i patlen) - - (if escape? - (error "Ill-formed glob pattern" pattern) - #t) - + (let lp ((i 0)) + (or (= i patlen) hunk ./glob.scm 189 - (if escape? (lp next-i #f) - (case (string-ref pattern i) - ((#\* #\? #\[) #f) - ((#\\) (lp next-i #t)) - (else (lp next-i #f))))))))) + (case (string-ref pattern i) + ((#\\) ; Escape char + (if (= next-i patlen) + (error "Ill-formed glob pattern -- ends in backslash" + pattern) + (lp (+ next-i 1)))) + ((#\* #\? #\[) #f) + (else (lp next-i)))))))) hunk ./here.scm 4 -;;; Copyright (c) 1995 by Olin Shivers. - -;;; minor changes for Guile. +;;; Copyright (c) 1995 by Olin Shivers. See file COPYING. hunk ./here.scm 115 +;; guile hunk ./here.scm 119 -; (read-char port) ; Snarf the first < char. hunk ./init.scm 24 +(defmacro structure-ref (structure symb) + symb) + hunk ./init.scm 39 -;; replace procedures in utilities.scm with guile primitives. -(set! index string-index) -;; note the different convention for rindex starting position. -(set! rindex (lambda (str char . start) - (apply string-rindex str char 0 start))) hunk ./init.scm 52 -(load-from-path "scsh/re.scm") +(load-from-path "scsh/jar-defrecord.scm") +(load-from-path "scsh/char-set.scm") + +(define guile-regexp? regexp?) +(load-from-path "scsh/rx/re-low.scm") +(load-from-path "scsh/rx/re-high.scm") +(load-from-path "scsh/rx/let-match.scm") +(load-from-path "scsh/rx/spencer.scm") +(load-from-path "scsh/rx/oldfuns.scm") +(load-from-path "scsh/rx/cond-package.scm") +(load-from-path "scsh/rx/parse.scm") +(load-from-path "scsh/rx/posixstr.scm") +(load-from-path "scsh/rx/re-fold.scm") +(load-from-path "scsh/rx/re-subst.scm") +(load-from-path "scsh/rx/re-syntax.scm") +(load-from-path "scsh/rx/rx-lib.scm") +(load-from-path "scsh/rx/simp.scm") +(load-from-path "scsh/rx/re.scm") + +(define-syntax rx expand-rx) +(define-syntax if-sre-form + (lambda (exp r c) + (if (sre-form? (cadr exp) r c) + (caddr exp) + (cadddr exp)))) + +(load-from-path "scsh/lib/ccp.scm") +(load-from-path "scsh/lib/list-lib.scm") + +;; replaces string-downcase, string-downcase!, string-upcase, string-upcase! +(load-from-path "scsh/lib/string-lib.scm") + hunk ./init.scm 94 -(load-from-path "scsh/char-set.scm") hunk ./jar-defrecord.scm 1 +; Copyright (c) 1993, 1994 Richard Kelsey and Jonathan Rees. See file COPYING. + + +; This is JAR's define-record-type, which doesn't resemble Richard's. + +; There's no implicit name concatenation, so it can be defined +; entirely using syntax-rules. Example: +; (define-record-type foo :foo +; (make-foo x y) +; foo? - predicate name is optional +; (x foo-x) +; (y foo-y) +; (z foo-z set-foo-z!)) + +(define-syntax define-record-type + (syntax-rules () + ((define-record-type ?id ?type + (?constructor ?arg ...) + (?field . ?field-stuff) + ...) + (begin (define ?type (make-record-type '?id '(?field ...))) + (define ?constructor (record-constructor ?type '(?arg ...))) + (define-accessors ?type (?field . ?field-stuff) ...))) + ((define-record-type ?id ?type + (?constructor ?arg ...) + ?pred + ?more ...) + (begin (define-record-type ?id ?type + (?constructor ?arg ...) + ?more ...) + (define ?pred (record-predicate ?type)))))) + +; Straightforward version +(define-syntax define-accessors + (syntax-rules () + ((define-accessors ?type ?field-spec ...) + (begin (define-accessor ?type . ?field-spec) ...)))) + +(define-syntax define-accessor + (syntax-rules () + ((define-accessor ?type ?field ?accessor) + (define ?accessor (record-accessor ?type '?field))) + ((define-accessor ?type ?field ?accessor ?modifier) + (begin (define ?accessor (record-accessor ?type '?field)) + (define ?modifier (record-modifier ?type '?field)))) + ((define-accessor ?type ?field) + (begin)))) hunk ./lib/ccp.scm 1 +;;; Char->char partial maps -*- Scheme -*- +;;; Copyright (C) 1998 by Olin Shivers. + +;;; CCPs are an efficient data structure for doing simple string transforms, +;;; similar to the kinds of things you would do with the tr(1) program. +;;; +;;; This code is tuned for a 7- or 8-bit character type. Large, 16-bit +;;; character types would need a more sophisticated data structure, tuned +;;; for sparseness. I would suggest something like this: +;;; (define-record ccp +;;; domain ; The domain char-set +;;; map ; Sorted vector of (char . string) pairs +;;; ; specifying the map. +;;; id?) ; If true, mappings not specified by MAP are +;;; ; identity mapping. If false, MAP must +;;; ; specify a mapping for every char in DOMAIN. +;;; +;;; A (char . string) elements in MAP specifies a mapping for the contiguous +;;; sequence of L chars beginning with CHAR (in the sequence of the underlying +;;; char type representation), where L is the length of STRING. These MAP elements +;;; are sorted by CHAR, so that binary search can be used to get from an input +;;; character C to the right MAP element quickly. +;;; +;;; This representation should be reasonably compact for standard mappings on, +;;; say, a Unicode CCP. An implementation might wish to have a cache field +;;; in the record for storing the full 8kb bitset when performing ccp-map +;;; operations. Or, an implementation might want to store the Latin-1 subset +;;; of the map in a dense format, and keep the remainder in a sparse format. + +(define num-chars (char-set-size char-set:full)) ; AKA 256. + +(define-record ccp + domain ; The domain char-set + dshared? ; Is the domain value shared or linear? + map ; 256-elt string + mshared?) ; Is the map string shared or linear? + + +;;; Accessors and setters that manage the linear bookkeeping +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define (ccp-domain ccp) + (set-ccp:dshared? ccp #t) + (ccp:domain ccp)) + +;;; CCP is a linear ccp. PROC is a domain->domain function; it must be +;;; linear in its parameter and result. +;;; +;;; Updates the domain of the CCP with PROC, returns the resulting +;;; CCP; reuses the old one to construct the new one. + +(define (restrict-linear-ccp-domain ccp proc) + (let ((new-d (proc (if (ccp:dshared? ccp) + (begin (set-ccp:dshared? ccp #f) + (char-set-copy (ccp:domain ccp))) + (ccp:domain ccp))))) + (set-ccp:domain ccp new-d) + ccp)) + +;;; CCP is a linear CCP. PROC is a domain x cmap -> domain function. +;;; It is passed a linear domain and cmap string. It may side-effect +;;; the cmap string, and returns the resulting updated domain. +;;; We return the resulting CCP, reusing the parameter to construct it. + +(define (linear-update-ccp ccp proc) + (let* ((cmap (if (ccp:mshared? ccp) + (begin (set-ccp:mshared? ccp #f) + (string-copy (ccp:map ccp))) + (ccp:map ccp))) + + (new-d (proc (if (ccp:dshared? ccp) + (begin (set-ccp:dshared? ccp #f) + (char-set-copy (ccp:domain ccp))) + (ccp:domain ccp)) + cmap))) + (set-ccp:domain ccp new-d) + ccp)) + + + +;;; Return CCP's map field, and mark it as shared. CCP functions that +;;; restrict a ccp's domain share map strings, so they use this guy. +(define (ccp:map/shared ccp) + (set-ccp:mshared? ccp #t) + (ccp:map ccp)) + +(define (ccp-copy ccp) (make-ccp (char-set-copy (ccp:domain ccp)) #f + (string-copy (ccp:map ccp)) #f)) + +;;; N-ary equality relation for partial maps + +(define (ccp= ccp1 . rest) + (let ((domain (ccp:domain ccp1)) + (cmap (ccp:map ccp1))) + (every (lambda (ccp2) + (and (char-set= domain (ccp:domain ccp2)) + (let ((cmap2 (ccp:map ccp2))) + (char-set-every? (lambda (c) + (let ((i (char->ascii c))) + (char=? (string-ref cmap i) + (string-ref cmap2 i)))) + domain)))) + rest))) + + +;;; N-ary subset relation for partial maps + +(define (ccp<= ccp1 . rest) + (let lp ((domain1 (ccp:domain ccp1)) + (cmap1 (ccp:map ccp1)) + (rest rest)) + (or (not (pair? rest)) + (let* ((ccp2 (car rest)) + (domain2 (ccp:domain ccp2)) + (cmap2 (ccp:map ccp2)) + (rest (cdr rest))) + (and (char-set<= domain1 domain2) + (let ((cmap2 (ccp:map ccp2))) + (char-set-every? (lambda (c) + (let ((i (char->ascii c))) + (char=? (string-ref cmap1 i) + (string-ref cmap2 i)))) + domain1)) + (lp domain2 cmap2 rest)))))) + + +;;; CCP iterators +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define (ccp-fold kons knil ccp) + (let ((cmap (ccp:map ccp))) + (char-set-fold (lambda (c v) (kons c (string-ref cmap (char->ascii c)) v)) + knil + (ccp:domain ccp)))) + +(define (ccp-for-each proc ccp) + (let ((cmap (ccp:map ccp))) + (char-set-for-each (lambda (c) (proc c (string-ref cmap (char->ascii c)))) + (ccp:domain ccp)))) + +(define (ccp->alist ccp) + (ccp-fold (lambda (from to alist) (cons (cons from to) alist)) + '() + ccp)) + + +;;; CCP-RESTRICT +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; Restrict a ccp's domain. + +(define (ccp-restrict ccp cset) + (make-ccp (char-set-intersection cset (ccp:domain ccp)) + #f + (ccp:map/shared ccp) + #t)) + +(define (ccp-restrict! ccp cset) + (restrict-linear-ccp-domain ccp (lambda (d) (char-set-intersection! d cset)))) + + +;;; CCP-ADJOIN ccp from-char1 to-char1 ... +;;; CCP-DELETE ccp char1 ... +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; Add & delete mappings to/from a ccp. + +(define (ccp-delete ccp . chars) + (make-ccp (apply char-set-delete (ccp:domain ccp) chars) + #f + (ccp:map/shared ccp) + #t)) + +(define (ccp-delete! ccp . chars) + (restrict-linear-ccp-domain ccp (lambda (d) (apply char-set-delete! d chars)))) + + +(define (ccp-adjoin ccp . chars) + (let ((cmap (string-copy (ccp:map ccp)))) + (make-ccp (install-ccp-adjoin! cmap (char-set-copy (ccp:domain ccp)) chars) + #f + cmap + #f))) + +(define (ccp-adjoin! ccp . chars) + (linear-update-ccp ccp (lambda (d cmap) (install-ccp-adjoin! cmap d chars)))) + +(define (install-ccp-adjoin! cmap domain chars) + (let lp ((chars chars) (d domain)) + (if (pair? chars) + (let ((from (car chars)) + (to (cadr chars)) + (chars (cddr chars))) + (string-set! cmap (char->ascii from) to) + (lp chars (char-set-adjoin! d from))) + d))) + + +;;; CCP-EXTEND ccp1 ... +;;; CCP-EXTEND! ccp1 ccp2 ... +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; Extend ccp1 with ccp2, etc. + +(define (ccp-extend . ccps) + (if (pair? ccps) + (let ((ccp0 (car ccps)) + (ccps (cdr ccps))) + (if (pair? ccps) + (let ((cmap (string-copy (ccp:map ccp0)))) ; Copy cmap. + ;; The FOLD installs each ccp in CCPS into CMAP and produces + ;; the new domain. + (make-ccp (fold (lambda (ccp d) + (install-ccp-extension! cmap d ccp)) + (char-set-copy (ccp:domain ccp0)) + ccps) + #f cmap #f)) + + ccp0)) ; Only 1 parameter + + ccp:0)) ; 0 parameters + +(define (ccp-extend! ccp0 . ccps) + (linear-update-ccp ccp0 + (lambda (domain cmap) + (fold (lambda (ccp d) (install-ccp-extension! cmap d ccp)) + domain + ccps)))) + + +;;; Side-effect CMAP, linear-update and return DOMAIN. +(define (install-ccp-extension! cmap domain ccp) + (let ((cmap1 (ccp:map ccp)) + (domain1 (ccp:domain ccp))) + (char-set-for-each (lambda (c) + (let ((i (char->ascii c))) + (string-set! cmap i (string-ref cmap1 i)))) + domain1) + (char-set-union! domain domain1))) + + +;;; Compose the CCPs. 0-ary case: (ccp-compose) = ccp:1. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; For each character C-IN in the original domain, we push it +;;; through the pipeline of CCPs. If we ever land outside the +;;; domain of a ccp, we punt C-IN. If we push it all the way +;;; through, we add C-IN to our result domain, and add the mapping +;;; into the cmap we are assembling. +;;; +;;; Looping this way avoids building up intermediate temporary +;;; CCPs. If CCP's were small bitsets, we might be better off +;;; slicing the double-nested loops the other way around. + +(define (ccp-compose . ccps) + (cond ((not (pair? ccps)) ccp:1) ; 0 args => ccp:1 + ((not (pair? (cdr ccps))) (car ccps)) ; 1 arg + (else + (let* ((v (list->vector ccps)) + (vlen-2 (- (vector-length v) 2)) + (cmap (make-string num-chars)) + (d1 (ccp:domain (vector-ref v (+ vlen-2 1)))) + (d (char-set-fold (lambda (c-in d) + (let lp ((c c-in) (i vlen-2)) + (if (>= i 0) + (let ((ccp (vector-ref v i))) + (if (char-set-contains? (ccp:domain ccp) c) + (lp (string-ref (ccp:map ccp) + (char->ascii c)) + (- i 1)) + + ;; Lose: remove c-in from d. + (char-set-delete! d c-in))) + + ;; Win: C-IN -> C + (begin (string-set! cmap + (char->ascii c-in) + c) + d)))) + (char-set-copy d1) + d1))) + (make-ccp d #f cmap #f))))) + + + +;;; ALIST->CPP +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define (alist->ccp cc-alist . maybe-base-ccp) + (let ((base (:optional maybe-base-ccp ccp:0))) + (if (pair? cc-alist) + (let ((cmap (string-copy (ccp:map base)))) + (make-ccp (install-ccp-alist! cmap + (char-set-copy (ccp:domain base)) + cc-alist) + #f cmap #f)) + base))) + +(define (alist->ccp! alist base) + (linear-update-ccp base (lambda (d cmap) (install-ccp-alist! cmap d alist)))) + +;;; Side-effect CMAP, linear-update and return DOMAIN. +(define (install-ccp-alist! cmap domain alist) + (fold (lambda (from/to d) (let ((from (car from/to)) + (to (cdr from/to))) + (string-set! cmap (char->ascii from) to) + (char-set-adjoin! domain from))) + domain + alist)) + + +;;; PROC->CCP +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; (proc->ccp proc [domain base-ccp]) + +(define (proc->ccp proc . args) + (let-optionals args ((proc-domain char-set:full) + (base ccp:0)) + (let ((cmap (string-copy (ccp:map base)))) + (make-ccp (install-ccp-proc! cmap (char-set-copy (ccp:domain base)) + proc proc-domain) + #f cmap #f)))) + +(define (proc->ccp! proc proc-domain base) + (linear-update-ccp base + (lambda (d cmap) (install-ccp-proc! cmap d proc proc-domain)))) + +(define (install-ccp-proc! cmap domain proc proc-domain) + (char-set-for-each (lambda (c) (string-set! cmap (char->ascii c) (proc c))) + proc-domain) + (char-set-union! domain proc-domain)) + + +;;; CONSTANT-CCP +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; (constant-ccp char [domain base-ccp]) +;;; Extend BASE-CCP with the a map taking every char in DOMAIN to CHAR. +;;; DOMAIN defaults to char-set:full. BASE-CCP defaults to CCP:0. + +(define (constant-ccp char . args) + (let-optionals args ((char-domain char-set:full) (base ccp:0)) + (let ((cmap (string-copy (ccp:map base)))) + (make-ccp (install-constant-ccp! cmap (char-set-copy (ccp:domain base)) + char char-domain) + #f cmap #f)))) + +(define (constant-ccp! char char-domain base) + (linear-update-ccp base + (lambda (d cmap) (install-constant-ccp! cmap d char char-domain)))) + +;;; Install the constant mapping into CMAP0 by side-effect, +;;; linear-update & return DOMAIN0 with the constant-mapping's domain. +(define (install-constant-ccp! cmap0 domain0 char char-domain) + (char-set-for-each (lambda (c) (string-set! cmap0 (char->ascii c) char)) + char-domain) + (char-set-union! domain0 char-domain)) + + +;;; CCP/MAPPINGS +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; (ccp/mappings from1 to1 from2 to2 ...) -> ccp +;;; (extend-ccp/mappings base-ccp from1 to1 from2 to2 ...) -> ccp +;;; (extend-ccp/mappings! base-ccp from1 to1 from2 to2 ...) -> ccp +;;; Each FROM element is either a string or a (lo-char . hi-char) range. +;;; Each TO element is either a string or a lo-char. Strings are replicated +;;; to match the length of the corresponding FROM element. +;;; CCP/MAPPINGS's base CCP is CCP:0 +;;; +;;; Tedious code. + +;;; Internal utility. +;;; Install the FROM->TO mapping pair into DOMAIN & CMAP by side-effect. +;;; Return the new domain. + +(define (install-ccp-mapping-pair! cmap domain from to) + ;; Tedium -- four possibilities here: + ;; str->str, str->lo-char, + ;; range->str, range->lo-char. + (if (string? from) + (if (string? to) + ;; "abc" -> "ABC" + (let ((len1 (string-length from)) + (len2 (string-length to))) + (let lp2 ((i (- len1 1)) + (j (modulo (- len2 1) len1)) + (d domain)) + (if (>= i 0) + (let ((c (string-ref from i))) + (string-set! cmap + (char->ascii c) + (string-ref to i)) + (lp2 (- i 1) + (- (if (> j 0) j len2) 1) + (char-set-adjoin! d c))) + d))) + + ;; "abc" -> #\A + (let lp2 ((i (- (string-length from) 1)) + (j (char->ascii to)) + (d domain)) + (if (>= i 0) + (let ((c (string-ref from i))) + (string-set! cmap + (char->ascii c) + (ascii->char j)) + (lp2 (- i 1) + (- j 1) + (char-set-adjoin! d c))) + d))) + + (let ((from-start (char->ascii (car from))) + (from-end (char->ascii (cdr from)))) + (if (string? to) + (let ((len2-1 (- (string-length to) 1))) + ;; (#\a . #\c) -> "ABC" + (let lp2 ((i from-start) (j 0) (d domain)) + (if (<= i from-end) + (let ((c (string-ref to j))) + (string-set! cmap i c) + (lp2 (+ i 1) + (if (= j len2-1) 0 (+ j 1)) + (char-set-adjoin! d c))) + d))) + + ;; (#\a . #\c) -> #\A + (do ((i from-start (+ i 1)) + (j (char->ascii to) (+ j 1)) + (d domain (begin (string-set! cmap i (ascii->char j)) + (char-set-adjoin d (ascii->char i))))) + ((> i from-end) d)))))) + +;;; Internal utility -- side-effects CMAP; linear-updates & returns DOMAIN. +(define (install-mapping-pairs cmap domain args) + (let lp ((domain domain) (args args)) + (if (pair? args) + (lp (install-ccp-mapping-pair! cmap domain (car args) (cadr args)) + (cddr args)) + domain))) + +(define (ccp/mappings . args) + (let ((cmap (make-string num-chars))) + (make-ccp (install-mapping-pairs (make-string num-chars) + (char-set-copy char-set:empty) + args) + #f cmap #f))) + +(define (extend-ccp/mappings base . args) + (let ((cmap (string-copy (ccp:map base)))) + (make-ccp (install-mapping-pairs cmap (char-set-copy (ccp:domain base)) args) + #f cmap #f))) + +(define (extend-ccp/mappings! base . args) + (linear-update-ccp base (lambda (d cmap) (install-mapping-pairs cmap d args)))) + + +;;; CONSTRUCT-CCP! ccp elt ... +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; The kitchen-sink constructor; static typing be damned. +;;; ELTS are interpreted as follows: +;;; (lo-char . hi-char) to-string|lo-char ; ccp/range +;;; from-string to-string|lo-char ; ccp/range +;;; ccp ; ccp-extend +;;; alist ; alist->ccp +;;; domain char ; ccp-constant +;;; domain proc ; proc->ccp + +(define (construct-ccp! ccp . elts) + (linear-update-ccp ccp (lambda (d cmap) (install-ccp-construct! cmap d elts)))) + +(define (construct-ccp base . elts) + (let ((cmap (string-copy (ccp:map base)))) + (make-ccp (install-ccp-construct! cmap (char-set-copy (ccp:domain base)) elts) + #f cmap #f))) + +;;; Install the mappings into CMAP by side-effect, +;;; linear-update & return DOMAIN with the final domain. + +(define (install-ccp-construct! cmap domain elts) + (let lp ((d domain) (elts elts)) + ;(format #t "d=~s elts=~s\n" d elts) + (if (not (pair? elts)) d + (let ((elt (car elts)) + (elts (cdr elts))) + (cond ((pair? elt) + (cond ((pair? (car elt)) ; ELT is an alist. + (lp (install-ccp-alist! cmap d elt) elts)) + ((char? (car elt)) ; ELT is (lo-char . hi-char) range. + (lp (install-ccp-mapping-pair! cmap d elt (car elts)) + (cdr elts))) + (else (error "Illegal elt to construct-ccp" elt)))) + + ((string? elt) + (lp (install-ccp-mapping-pair! cmap d elt (car elts)) + (cdr elts))) + + ((ccp? elt) (lp (install-ccp-extension! cmap d elt) elts)) + + ((char-set? elt) + (let ((elt2 (car elts)) + (elts (cdr elts))) + (lp (cond ((char? elt2) + (install-constant-ccp! cmap d elt2 elt)) + ((procedure? elt2) + (install-ccp-proc! cmap d elt2 elt)) + (else (error "Illegal elt-pair to construct-ccp" + elt elt2))) + elts))) + + (else (error "Illegal elt to construct-ccp" elt))))))) + + +;;; CCP unfold + +(define (ccp-unfold p f g seed) + (let lp ((seed seed) (ccp (ccp-copy ccp:0))) + (if (p seed) ccp + (lp (g seed) + (receive (from to) (f seed) + (lp (g seed) (ccp-adjoin! ccp from to))))))) + + + +;;; Using CCPs +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; TR ccp string [start end] -> string +;;; CCP-MAP ccp string [start end] -> string +;;; CCP-MAP! ccp string [start end] -> undefined +;;; CCP-APP ccp char -> char or false + +;;; If a char in S is not in CCP's domain, it is dropped from the result. +;;; You can use this to map and delete chars from a string. + +(define (tr ccp s . maybe-start+end) + (let-optionals maybe-start+end ((start 0) (end (string-length s))) + ;; Count up the chars in S that are in the domain, + ;; and allocate the answer string ANS: + (let* ((len (- end start)) + (domain (ccp:domain ccp)) + (ans-len (string-fold (lambda (c numchars) + (if (char-set-contains? domain c) + (+ numchars 1) + numchars)) + 0 s start end)) + (ans (make-string ans-len))) + + ;; Apply the map, installing the resulting chars into ANS: + (string-fold (lambda (c i) (cond ((ccp-app ccp c) => + (lambda (c) + (string-set! ans i c) + (+ i 1))) + (else i))) ; Not in domain -- drop it. + 0 s start end) + ans))) + +(define (ccp-map ccp s . maybe-start+end) + (apply string-map (lambda (c) (ccp-app ccp c)) s maybe-start+end)) + +(define (ccp-map! ccp s . maybe-start+end) + (apply string-map! (lambda (c) (ccp-app ccp c)) s maybe-start+end)) + +(define (ccp-app ccp char) + (and (char-set-contains? (ccp:domain ccp) char) + (string-ref (ccp:map ccp) (char->ascii char)))) + + +;;; Primitive CCPs +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define id-cmap + (let ((m (make-string num-chars))) + (do ((i (- num-chars 1) (- i 1))) + ((< i 0)) + (string-set! m i (ascii->char i))) + m)) + +(define ccp:0 (make-ccp char-set:empty #t id-cmap #t)) +(define ccp:1 (make-ccp char-set:full #t id-cmap #t)) + +(define ccp:upcase (proc->ccp char-upcase char-set:full)) +(define ccp:downcase (proc->ccp char-downcase char-set:full)) hunk ./lib/list-lib.scm 1 +;;; SRFI-1 list-processing library -*- Scheme -*- +;;; Reference implementation +;;; +;;; Copyright (c) 1998, 1999 by Olin Shivers. You may do as you please with +;;; this code as long as you do not remove this copyright notice or +;;; hold me liable for its use. Please send bug reports to shivers@ai.mit.edu. +;;; -Olin + +;;; This is a library of list- and pair-processing functions. I wrote it after +;;; carefully considering the functions provided by the libraries found in +;;; R4RS/R5RS Scheme, MIT Scheme, Gambit, RScheme, MzScheme, slib, Common +;;; Lisp, Bigloo, guile, T, APL and the SML standard basis. It is a pretty +;;; rich toolkit, providing a superset of the functionality found in any of +;;; the various Schemes I considered. + +;;; This implementation is intended as a portable reference implementation +;;; for SRFI-1. See the porting notes below for more information. + +;;; Exported: +;;; xcons tree-copy make-list list-tabulate cons* list-copy +;;; proper-list? circular-list? dotted-list? not-pair? null-list? list= +;;; circular-list length+ +;;; iota +;;; first second third fourth fifth sixth seventh eighth ninth tenth +;;; car+cdr +;;; take drop +;;; take-right drop-right +;;; take! drop-right! +;;; split-at split-at! +;;; last last-pair +;;; zip unzip1 unzip2 unzip3 unzip4 unzip5 +;;; count +;;; append! append-reverse append-reverse! concatenate concatenate! +;;; unfold fold pair-fold reduce +;;; unfold-right fold-right pair-fold-right reduce-right +;;; append-map append-map! map! pair-for-each filter-map map-in-order +;;; filter partition remove +;;; filter! partition! remove! +;;; find find-tail any every list-index +;;; take-while drop-while take-while! +;;; span break span! break! +;;; delete delete! +;;; alist-cons alist-copy +;;; delete-duplicates delete-duplicates! +;;; alist-delete alist-delete! +;;; reverse! +;;; lset<= lset= lset-adjoin +;;; lset-union lset-intersection lset-difference lset-xor lset-diff+intersection +;;; lset-union! lset-intersection! lset-difference! lset-xor! lset-diff+intersection! +;;; +;;; In principle, the following R4RS list- and pair-processing procedures +;;; are also part of this package's exports, although they are not defined +;;; in this file: +;;; Primitives: cons pair? null? car cdr set-car! set-cdr! +;;; Non-primitives: list length append reverse cadr ... cddddr list-ref +;;; memq memv assq assv +;;; (The non-primitives are defined in this file, but commented out.) +;;; +;;; These R4RS procedures have extended definitions in SRFI-1 and are defined +;;; in this file: +;;; map for-each member assoc +;;; +;;; The remaining two R4RS list-processing procedures are not included: +;;; list-tail (use drop) +;;; list? (use proper-list?) + + +;;; A note on recursion and iteration/reversal: +;;; Many iterative list-processing algorithms naturally compute the elements +;;; of the answer list in the wrong order (left-to-right or head-to-tail) from +;;; the order needed to cons them into the proper answer (right-to-left, or +;;; tail-then-head). One style or idiom of programming these algorithms, then, +;;; loops, consing up the elements in reverse order, then destructively +;;; reverses the list at the end of the loop. I do not do this. The natural +;;; and efficient way to code these algorithms is recursively. This trades off +;;; intermediate temporary list structure for intermediate temporary stack +;;; structure. In a stack-based system, this improves cache locality and +;;; lightens the load on the GC system. Don't stand on your head to iterate! +;;; Recurse, where natural. Multiple-value returns make this even more +;;; convenient, when the recursion/iteration has multiple state values. + +;;; Porting: +;;; This is carefully tuned code; do not modify casually. +;;; - It is careful to share storage when possible; +;;; - Side-effecting code tries not to perform redundant writes. +;;; +;;; That said, a port of this library to a specific Scheme system might wish +;;; to tune this code to exploit particulars of the implementation. +;;; The single most important compiler-specific optimisation you could make +;;; to this library would be to add rewrite rules or transforms to: +;;; - transform applications of n-ary procedures (e.g. LIST=, CONS*, APPEND, +;;; LSET-UNION) into multiple applications of a primitive two-argument +;;; variant. +;;; - transform applications of the mapping functions (MAP, FOR-EACH, FOLD, +;;; ANY, EVERY) into open-coded loops. The killer here is that these +;;; functions are n-ary. Handling the general case is quite inefficient, +;;; requiring many intermediate data structures to be allocated and +;;; discarded. +;;; - transform applications of procedures that take optional arguments +;;; into calls to variants that do not take optional arguments. This +;;; eliminates unnecessary consing and parsing of the rest parameter. +;;; +;;; These transforms would provide BIG speedups. In particular, the n-ary +;;; mapping functions are particularly slow and cons-intensive, and are good +;;; candidates for tuning. I have coded fast paths for the single-list cases, +;;; but what you really want to do is exploit the fact that the compiler +;;; usually knows how many arguments are being passed to a particular +;;; application of these functions -- they are usually explicitly called, not +;;; passed around as higher-order values. If you can arrange to have your +;;; compiler produce custom code or custom linkages based on the number of +;;; arguments in the call, you can speed these functions up a *lot*. But this +;;; kind of compiler technology no longer exists in the Scheme world as far as +;;; I can see. +;;; +;;; Note that this code is, of course, dependent upon standard bindings for +;;; the R5RS procedures -- i.e., it assumes that the variable CAR is bound +;;; to the procedure that takes the car of a list. If your Scheme +;;; implementation allows user code to alter the bindings of these procedures +;;; in a manner that would be visible to these definitions, then there might +;;; be trouble. You could consider horrible kludgery along the lines of +;;; (define fact +;;; (let ((= =) (- -) (* *)) +;;; (letrec ((real-fact (lambda (n) +;;; (if (= n 0) 1 (* n (real-fact (- n 1))))))) +;;; real-fact))) +;;; Or you could consider shifting to a reasonable Scheme system that, say, +;;; has a module system protecting code from this kind of lossage. +;;; +;;; This code does a fair amount of run-time argument checking. If your +;;; Scheme system has a sophisticated compiler that can eliminate redundant +;;; error checks, this is no problem. However, if not, these checks incur +;;; some performance overhead -- and, in a safe Scheme implementation, they +;;; are in some sense redundant: if we don't check to see that the PROC +;;; parameter is a procedure, we'll find out anyway three lines later when +;;; we try to call the value. It's pretty easy to rip all this argument +;;; checking code out if it's inappropriate for your implementation -- just +;;; nuke every call to CHECK-ARG. +;;; +;;; On the other hand, if you *do* have a sophisticated compiler that will +;;; actually perform soft-typing and eliminate redundant checks (Rice's systems +;;; being the only possible candidate of which I'm aware), leaving these checks +;;; in can *help*, since their presence can be elided in redundant cases, +;;; and in cases where they are needed, performing the checks early, at +;;; procedure entry, can "lift" a check out of a loop. +;;; +;;; Finally, I have only checked the properties that can portably be checked +;;; with R5RS Scheme -- and this is not complete. You may wish to alter +;;; the CHECK-ARG parameter checks to perform extra, implementation-specific +;;; checks, such as procedure arity for higher-order values. +;;; +;;; The code has only these non-R4RS dependencies: +;;; A few calls to an ERROR procedure; +;;; Uses of the R5RS multiple-value procedure VALUES and the m-v binding +;;; RECEIVE macro (which isn't R5RS, but is a trivial macro). +;;; Many calls to a parameter-checking procedure check-arg: +;;; (define (check-arg pred val caller) +;;; (let lp ((val val)) +;;; (if (pred val) val (lp (error "Bad argument" val pred caller))))) +;;; A few uses of the LET-OPTIONAL and :OPTIONAL macros for parsing +;;; optional arguments. +;;; +;;; Most of these procedures use the NULL-LIST? test to trigger the +;;; base case in the inner loop or recursion. The NULL-LIST? function +;;; is defined to be a careful one -- it raises an error if passed a +;;; non-nil, non-pair value. The spec allows an implementation to use +;;; a less-careful implementation that simply defines NULL-LIST? to +;;; be NOT-PAIR?. This would speed up the inner loops of these procedures +;;; at the expense of having them silently accept dotted lists. + +;;; A note on dotted lists: +;;; I, personally, take the view that the only consistent view of lists +;;; in Scheme is the view that *everything* is a list -- values such as +;;; 3 or "foo" or 'bar are simply empty dotted lists. This is due to the +;;; fact that Scheme actually has no true list type. It has a pair type, +;;; and there is an *interpretation* of the trees built using this type +;;; as lists. +;;; +;;; I lobbied to have these list-processing procedures hew to this +;;; view, and accept any value as a list argument. I was overwhelmingly +;;; overruled during the SRFI discussion phase. So I am inserting this +;;; text in the reference lib and the SRFI spec as a sort of "minority +;;; opinion" dissent. +;;; +;;; Many of the procedures in this library can be trivially redefined +;;; to handle dotted lists, just by changing the NULL-LIST? base-case +;;; check to NOT-PAIR?, meaning that any non-pair value is taken to be +;;; an empty list. For most of these procedures, that's all that is +;;; required. +;;; +;;; However, we have to do a little more work for some procedures that +;;; *produce* lists from other lists. Were we to extend these procedures to +;;; accept dotted lists, we would have to define how they terminate the lists +;;; produced as results when passed a dotted list. I designed a coherent set +;;; of termination rules for these cases; this was posted to the SRFI-1 +;;; discussion list. I additionally wrote an earlier version of this library +;;; that implemented that spec. It has been discarded during later phases of +;;; the definition and implementation of this library. +;;; +;;; The argument *against* defining these procedures to work on dotted +;;; lists is that dotted lists are the rare, odd case, and that by +;;; arranging for the procedures to handle them, we lose error checking +;;; in the cases where a dotted list is passed by accident -- e.g., when +;;; the programmer swaps a two arguments to a list-processing function, +;;; one being a scalar and one being a list. For example, +;;; (member '(1 3 5 7 9) 7) +;;; This would quietly return #f if we extended MEMBER to accept dotted +;;; lists. +;;; +;;; The SRFI discussion record contains more discussion on this topic. + + +;;; Constructors +;;;;;;;;;;;;;;;; + +;;; Occasionally useful as a value to be passed to a fold or other +;;; higher-order procedure. +(define (xcons d a) (cons a d)) + +;;;; Recursively copy every cons. +;(define (tree-copy x) +; (let recur ((x x)) +; (if (not (pair? x)) x +; (cons (recur (car x)) (recur (cdr x)))))) + +;;; Make a list of length LEN. + +(define (make-list len . maybe-elt) + (check-arg (lambda (n) (and (integer? n) (>= n 0))) len make-list) + (let ((elt (cond ((null? maybe-elt) #f) ; Default value + ((null? (cdr maybe-elt)) (car maybe-elt)) + (else (error "Too many arguments to MAKE-LIST" + (cons len maybe-elt)))))) + (do ((i len (- i 1)) + (ans '() (cons elt ans))) + ((<= i 0) ans)))) + + +;(define (list . ans) ans) ; R4RS + + +;;; Make a list of length LEN. Elt i is (PROC i) for 0 <= i < LEN. + +(define (list-tabulate len proc) + (check-arg (lambda (n) (and (integer? n) (>= n 0))) len list-tabulate) + (check-arg procedure? proc list-tabulate) + (do ((i (- len 1) (- i 1)) + (ans '() (cons (proc i) ans))) + ((< i 0) ans))) + +;;; (cons* a1 a2 ... an) = (cons a1 (cons a2 (cons ... an))) +;;; (cons* a1) = a1 (cons* a1 a2 ...) = (cons a1 (cons* a2 ...)) +;;; +;;; (cons first (unfold not-pair? car cdr rest values)) + +(define (cons* first . rest) + (let recur ((x first) (rest rest)) + (if (pair? rest) + (cons x (recur (car rest) (cdr rest))) + x))) + +;;; (unfold not-pair? car cdr lis values) + +(define (list-copy lis) + (let recur ((lis lis)) + (if (pair? lis) + (cons (car lis) (recur (cdr lis))) + lis))) + +;;; IOTA count [start step] (start start+step ... start+(count-1)*step) + +(define (iota count . maybe-start+step) + (check-arg integer? count iota) + (if (< count 0) (error "Negative step count" iota count)) + (let-optionals maybe-start+step ((start 0) (step 1)) + (check-arg number? start iota) + (check-arg number? step iota) + (let ((last-val (+ start (* (- count 1) step)))) + (do ((count count (- count 1)) + (val last-val (- val step)) + (ans '() (cons val ans))) + ((<= count 0) ans))))) + +;;; I thought these were lovely, but the public at large did not share my +;;; enthusiasm... +;;; :IOTA to (0 ... to-1) +;;; :IOTA from to (from ... to-1) +;;; :IOTA from to step (from from+step ...) + +;;; IOTA: to (1 ... to) +;;; IOTA: from to (from+1 ... to) +;;; IOTA: from to step (from+step from+2step ...) + +;(define (%parse-iota-args arg1 rest-args proc) +; (let ((check (lambda (n) (check-arg integer? n proc)))) +; (check arg1) +; (if (pair? rest-args) +; (let ((arg2 (check (car rest-args))) +; (rest (cdr rest-args))) +; (if (pair? rest) +; (let ((arg3 (check (car rest))) +; (rest (cdr rest))) +; (if (pair? rest) (error "Too many parameters" proc arg1 rest-args) +; (values arg1 arg2 arg3))) +; (values arg1 arg2 1))) +; (values 0 arg1 1)))) +; +;(define (iota: arg1 . rest-args) +; (receive (from to step) (%parse-iota-args arg1 rest-args iota:) +; (let* ((numsteps (floor (/ (- to from) step))) +; (last-val (+ from (* step numsteps)))) +; (if (< numsteps 0) (error "Negative step count" iota: from to step)) +; (do ((steps-left numsteps (- steps-left 1)) +; (val last-val (- val step)) +; (ans '() (cons val ans))) +; ((<= steps-left 0) ans))))) +; +; +;(define (:iota arg1 . rest-args) +; (receive (from to step) (%parse-iota-args arg1 rest-args :iota) +; (let* ((numsteps (ceiling (/ (- to from) step))) +; (last-val (+ from (* step (- numsteps 1))))) +; (if (< numsteps 0) (error "Negative step count" :iota from to step)) +; (do ((steps-left numsteps (- steps-left 1)) +; (val last-val (- val step)) +; (ans '() (cons val ans))) +; ((<= steps-left 0) ans))))) + + + +(define (circular-list val1 . vals) + (let ((ans (cons val1 vals))) + (set-cdr! (last-pair ans) ans) + ans)) + +;;; ::= () ; Empty proper list +;;; | (cons ) ; Proper-list pair +;;; Note that this definition rules out circular lists -- and this +;;; function is required to detect this case and return false. + +(define (proper-list? x) + (let lp ((x x) (lag x)) + (if (pair? x) + (let ((x (cdr x))) + (if (pair? x) + (let ((x (cdr x)) + (lag (cdr lag))) + (and (not (eq? x lag)) (lp x lag))) + (null? x))) + (null? x)))) + + +;;; A dotted list is a finite list (possibly of length 0) terminated +;;; by a non-nil value. Any non-cons, non-nil value (e.g., "foo" or 5) +;;; is a dotted list of length 0. +;;; +;;; ::= ; Empty dotted list +;;; | (cons ) ; Proper-list pair + +(define (dotted-list? x) + (let lp ((x x) (lag x)) + (if (pair? x) + (let ((x (cdr x))) + (if (pair? x) + (let ((x (cdr x)) + (lag (cdr lag))) + (and (not (eq? x lag)) (lp x lag))) + (not (null? x)))) + (not (null? x))))) + +(define (circular-list? x) + (let lp ((x x) (lag x)) + (and (pair? x) + (let ((x (cdr x))) + (and (pair? x) + (let ((x (cdr x)) + (lag (cdr lag))) + (or (eq? x lag) (lp x lag)))))))) + +(define (not-pair? x) (not (pair? x))) ; Inline me. + +;;; This is a legal definition which is fast and sloppy: +;;; (define null-list? not-pair?) +;;; but we'll provide a more careful one: +(define (null-list? l) + (cond ((pair? l) #f) + ((null? l) #t) + (else (error "null-pair?: argument out of domain" l)))) + + +(define (list= = . lists) + (or (null? lists) ; special case + + (let lp1 ((list-a (car lists)) (others (cdr lists))) + (or (null? others) + (let ((list-b (car others)) + (others (cdr others))) + (if (eq? list-a list-b) ; EQ? => LIST= + (lp1 list-b others) + (let lp2 ((list-a list-a) (list-b list-b)) + (if (null-list? list-a) + (and (null-list? list-b) + (lp1 list-b others)) + (and (not (null-list? list-b)) + (= (car list-a) (car list-b)) + (lp2 (cdr list-a) (cdr list-b))))))))))) + + + +;;; R4RS, so commented out. +;(define (length x) ; LENGTH may diverge or +; (let lp ((x x) (len 0)) ; raise an error if X is +; (if (pair? x) ; a circular list. This version +; (lp (cdr x) (+ len 1)) ; diverges. +; len))) + +(define (length+ x) ; Returns #f if X is circular. + (let lp ((x x) (lag x) (len 0)) + (if (pair? x) + (let ((x (cdr x)) + (len (+ len 1))) + (if (pair? x) + (let ((x (cdr x)) + (lag (cdr lag)) + (len (+ len 1))) + (and (not (eq? x lag)) (lp x lag len))) + len)) + len))) + +(define (zip list1 . more-lists) (apply map list list1 more-lists)) + + +;;; Selectors +;;;;;;;;;;;;; + +;;; R4RS non-primitives: +;(define (caar x) (car (car x))) +;(define (cadr x) (car (cdr x))) +;(define (cdar x) (cdr (car x))) +;(define (cddr x) (cdr (cdr x))) +; +;(define (caaar x) (caar (car x))) +;(define (caadr x) (caar (cdr x))) +;(define (cadar x) (cadr (car x))) +;(define (caddr x) (cadr (cdr x))) +;(define (cdaar x) (cdar (car x))) +;(define (cdadr x) (cdar (cdr x))) +;(define (cddar x) (cddr (car x))) +;(define (cdddr x) (cddr (cdr x))) +; +;(define (caaaar x) (caaar (car x))) +;(define (caaadr x) (caaar (cdr x))) +;(define (caadar x) (caadr (car x))) +;(define (caaddr x) (caadr (cdr x))) +;(define (cadaar x) (cadar (car x))) +;(define (cadadr x) (cadar (cdr x))) +;(define (caddar x) (caddr (car x))) +;(define (cadddr x) (caddr (cdr x))) +;(define (cdaaar x) (cdaar (car x))) +;(define (cdaadr x) (cdaar (cdr x))) +;(define (cdadar x) (cdadr (car x))) +;(define (cdaddr x) (cdadr (cdr x))) +;(define (cddaar x) (cddar (car x))) +;(define (cddadr x) (cddar (cdr x))) +;(define (cdddar x) (cdddr (car x))) +;(define (cddddr x) (cdddr (cdr x))) + + +(define first car) +(define second cadr) +(define third caddr) +(define fourth cadddr) +(define (fifth x) (car (cddddr x))) +(define (sixth x) (cadr (cddddr x))) +(define (seventh x) (caddr (cddddr x))) +(define (eighth x) (cadddr (cddddr x))) +(define (ninth x) (car (cddddr (cddddr x)))) +(define (tenth x) (cadr (cddddr (cddddr x)))) + +(define (car+cdr pair) (values (car pair) (cdr pair))) + +;;; take & drop + +(define (take lis k) + (check-arg integer? k take) + (let recur ((lis lis) (k k)) + (if (zero? k) '() + (cons (car lis) + (recur (cdr lis) (- k 1)))))) + +(define (drop lis k) + (check-arg integer? k drop) + (let iter ((lis lis) (k k)) + (if (zero? k) lis (iter (cdr lis) (- k 1))))) + +(define (take! lis k) + (check-arg integer? k take!) + (if (zero? k) '() + (begin (set-cdr! (drop lis (- k 1)) '()) + lis))) + +;;; TAKE-RIGHT and DROP-RIGHT work by getting two pointers into the list, +;;; off by K, then chasing down the list until the lead pointer falls off +;;; the end. + +(define (take-right lis k) + (check-arg integer? k take-right) + (let lp ((lag lis) (lead (drop lis k))) + (if (pair? lead) + (lp (cdr lag) (cdr lead)) + lag))) + +(define (drop-right lis k) + (check-arg integer? k drop-right) + (let recur ((lag lis) (lead (drop lis k))) + (if (pair? lead) + (cons (car lag) (recur (cdr lag) (cdr lead))) + '()))) + +;;; In this function, LEAD is actually K+1 ahead of LAG. This lets +;;; us stop LAG one step early, in time to smash its cdr to (). +(define (drop-right! lis k) + (check-arg integer? k drop-right!) + (let ((lead (drop lis k))) + (if (pair? lead) + + (let lp ((lag lis) (lead (cdr lead))) ; Standard case + (if (pair? lead) + (lp (cdr lag) (cdr lead)) + (begin (set-cdr! lag '()) + lis))) + + '()))) ; Special case dropping everything -- no cons to side-effect. + +;(define (list-ref lis i) (car (drop lis i))) ; R4RS + +;;; These use the APL convention, whereby negative indices mean +;;; "from the right." I liked them, but they didn't win over the +;;; SRFI reviewers. +;;; K >= 0: Take and drop K elts from the front of the list. +;;; K <= 0: Take and drop -K elts from the end of the list. + +;(define (take lis k) +; (check-arg integer? k take) +; (if (negative? k) +; (list-tail lis (+ k (length lis))) +; (let recur ((lis lis) (k k)) +; (if (zero? k) '() +; (cons (car lis) +; (recur (cdr lis) (- k 1))))))) +; +;(define (drop lis k) +; (check-arg integer? k drop) +; (if (negative? k) +; (let recur ((lis lis) (nelts (+ k (length lis)))) +; (if (zero? nelts) '() +; (cons (car lis) +; (recur (cdr lis) (- nelts 1))))) +; (list-tail lis k))) +; +; +;(define (take! lis k) +; (check-arg integer? k take!) +; (cond ((zero? k) '()) +; ((positive? k) +; (set-cdr! (list-tail lis (- k 1)) '()) +; lis) +; (else (list-tail lis (+ k (length lis)))))) +; +;(define (drop! lis k) +; (check-arg integer? k drop!) +; (if (negative? k) +; (let ((nelts (+ k (length lis)))) +; (if (zero? nelts) '() +; (begin (set-cdr! (list-tail lis (- nelts 1)) '()) +; lis))) +; (list-tail lis k))) + +(define (split-at x k) + (check-arg integer? k split-at) + (let recur ((lis x) (k k)) + (if (zero? k) (values '() lis) + (receive (prefix suffix) (recur (cdr lis) (- k 1)) + (values (cons (car lis) prefix) suffix))))) + +(define (split-at! x k) + (check-arg integer? k split-at!) + (if (zero? k) (values '() x) + (let* ((prev (drop x (- k 1))) + (suffix (cdr prev))) + (set-cdr! prev '()) + (values x suffix)))) + + +(define (last lis) (car (last-pair lis))) + +(define (last-pair lis) + (check-arg pair? lis last-pair) + (let lp ((lis lis)) + (let ((tail (cdr lis))) + (if (pair? tail) (lp tail) lis)))) + + +;;; Unzippers -- 1 through 5 +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define (unzip1 lis) (map car lis)) + +(define (unzip2 lis) + (let recur ((lis lis)) + (if (null-list? lis) (values lis lis) ; Use NOT-PAIR? to handle + (let ((elt (car lis))) ; dotted lists. + (receive (a b) (recur (cdr lis)) + (values (cons (car elt) a) + (cons (cadr elt) b))))))) + +(define (unzip3 lis) + (let recur ((lis lis)) + (if (null-list? lis) (values lis lis lis) + (let ((elt (car lis))) + (receive (a b c) (recur (cdr lis)) + (values (cons (car elt) a) + (cons (cadr elt) b) + (cons (caddr elt) c))))))) + +(define (unzip4 lis) + (let recur ((lis lis)) + (if (null-list? lis) (values lis lis lis lis) + (let ((elt (car lis))) + (receive (a b c d) (recur (cdr lis)) + (values (cons (car elt) a) + (cons (cadr elt) b) + (cons (caddr elt) c) + (cons (cadddr elt) d))))))) + +(define (unzip5 lis) + (let recur ((lis lis)) + (if (null-list? lis) (values lis lis lis lis lis) + (let ((elt (car lis))) + (receive (a b c d e) (recur (cdr lis)) + (values (cons (car elt) a) + (cons (cadr elt) b) + (cons (caddr elt) c) + (cons (cadddr elt) d) + (cons (car (cddddr elt)) e))))))) + + +;;; append! append-reverse append-reverse! concatenate concatenate! +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define (append! . lists) + ;; First, scan through lists looking for a non-empty one. + (let lp ((lists lists) (prev '())) + (if (not (pair? lists)) prev + (let ((first (car lists)) + (rest (cdr lists))) + (if (not (pair? first)) (lp rest first) + + ;; Now, do the splicing. + (let lp2 ((tail-cons (last-pair first)) + (rest rest)) + (if (pair? rest) + (let ((next (car rest)) + (rest (cdr rest))) + (set-cdr! tail-cons next) + (lp2 (if (pair? next) (last-pair next) tail-cons) + rest)) + first))))))) + +;;; APPEND is R4RS. +;(define (append . lists) +; (if (pair? lists) +; (let recur ((list1 (car lists)) (lists (cdr lists))) +; (if (pair? lists) +; (let ((tail (recur (car lists) (cdr lists)))) +; (fold-right cons tail list1)) ; Append LIST1 & TAIL. +; list1)) +; '())) + +;(define (append-reverse rev-head tail) (fold cons tail rev-head)) + +;(define (append-reverse! rev-head tail) +; (pair-fold (lambda (pair tail) (set-cdr! pair tail) pair) +; tail +; rev-head)) + +;;; Hand-inline the FOLD and PAIR-FOLD ops for speed. + +(define (append-reverse rev-head tail) + (let lp ((rev-head rev-head) (tail tail)) + (if (null-list? rev-head) tail + (lp (cdr rev-head) (cons (car rev-head) tail))))) + +(define (append-reverse! rev-head tail) + (let lp ((rev-head rev-head) (tail tail)) + (if (null-list? rev-head) tail + (let ((next-rev (cdr rev-head))) + (set-cdr! rev-head tail) + (lp next-rev rev-head))))) + + +(define (concatenate lists) (reduce-right append '() lists)) +(define (concatenate! lists) (reduce-right append! '() lists)) + +;;; Fold/map internal utilities +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; These little internal utilities are used by the general +;;; fold & mapper funs for the n-ary cases . It'd be nice if they got inlined. +;;; One the other hand, the n-ary cases are painfully inefficient as it is. +;;; An aggressive implementation should simply re-write these functions +;;; for raw efficiency; I have written them for as much clarity, portability, +;;; and simplicity as can be achieved. +;;; +;;; I use the dreaded call/cc to do local aborts. A good compiler could +;;; handle this with extreme efficiency. An implementation that provides +;;; a one-shot, non-persistent continuation grabber could help the compiler +;;; out by using that in place of the call/cc's in these routines. +;;; +;;; These functions have funky definitions that are precisely tuned to +;;; the needs of the fold/map procs -- for example, to minimize the number +;;; of times the argument lists need to be examined. + +;;; Return (map cdr lists). +;;; However, if any element of LISTS is empty, just abort and return '(). +(define (%cdrs lists) + (call-with-current-continuation + (lambda (abort) + (let recur ((lists lists)) + (if (pair? lists) + (let ((lis (car lists))) + (if (null-list? lis) (abort '()) + (cons (cdr lis) (recur (cdr lists))))) + '()))))) + +(define (%cars+ lists last-elt) ; (append! (map car lists) (list last-elt)) + (let recur ((lists lists)) + (if (pair? lists) (cons (caar lists) (recur (cdr lists))) (list last-elt)))) + +;;; LISTS is a (not very long) non-empty list of lists. +;;; Return two lists: the cars & the cdrs of the lists. +;;; However, if any of the lists is empty, just abort and return [() ()]. + +(define (%cars+cdrs lists) + (call-with-current-continuation + (lambda (abort) + (let recur ((lists lists)) + (if (pair? lists) + (receive (list other-lists) (car+cdr lists) + (if (null-list? list) (abort '() '()) ; LIST is empty -- bail out + (receive (a d) (car+cdr list) + (receive (cars cdrs) (recur other-lists) + (values (cons a cars) (cons d cdrs)))))) + (values '() '())))))) + +;;; Like %CARS+CDRS, but we pass in a final elt tacked onto the end of the +;;; cars list. What a hack. +(define (%cars+cdrs+ lists cars-final) + (call-with-current-continuation + (lambda (abort) + (let recur ((lists lists)) + (if (pair? lists) + (receive (list other-lists) (car+cdr lists) + (if (null-list? list) (abort '() '()) ; LIST is empty -- bail out + (receive (a d) (car+cdr list) + (receive (cars cdrs) (recur other-lists) + (values (cons a cars) (cons d cdrs)))))) + (values (list cars-final) '())))))) + +;;; Like %CARS+CDRS, but blow up if any list is empty. +(define (%cars+cdrs/no-test lists) + (let recur ((lists lists)) + (if (pair? lists) + (receive (list other-lists) (car+cdr lists) + (receive (a d) (car+cdr list) + (receive (cars cdrs) (recur other-lists) + (values (cons a cars) (cons d cdrs))))) + (values '() '())))) + + +;;; count +;;;;;;;;; +(define (count pred list1 . lists) + (check-arg procedure? pred count) + (if (pair? lists) + + ;; N-ary case + (let lp ((list1 list1) (lists lists) (i 0)) + (if (null-list? list1) i + (receive (as ds) (%cars+cdrs lists) + (if (null? as) i + (lp (cdr list1) ds + (if (apply pred (car list1) as) (+ i 1) i)))))) + + ;; Fast path + (let lp ((lis list1) (i 0)) + (if (null-list? lis) i + (lp (cdr lis) (if (pred (car lis)) (+ i 1) i)))))) + + +;;; fold/unfold +;;;;;;;;;;;;;;; + +(define (unfold-right p f g seed . maybe-tail) + (check-arg procedure? p unfold-right) + (check-arg procedure? f unfold-right) + (check-arg procedure? g unfold-right) + (let lp ((seed seed) (ans (:optional maybe-tail '()))) + (if (p seed) ans + (lp (g seed) + (cons (f seed) ans))))) + + +(define (unfold p f g seed . maybe-tail-gen) + (check-arg procedure? p unfold) + (check-arg procedure? f unfold) + (check-arg procedure? g unfold) + (if (pair? maybe-tail-gen) + + (let ((tail-gen (car maybe-tail-gen))) + (if (pair? (cdr maybe-tail-gen)) + (apply error "Too many arguments" unfold p f g seed maybe-tail-gen) + + (let recur ((seed seed)) + (if (p seed) (tail-gen seed) + (cons (f seed) (recur (g seed))))))) + + (let recur ((seed seed)) + (if (p seed) '() + (cons (f seed) (recur (g seed))))))) + + +(define (fold kons knil lis1 . lists) + (check-arg procedure? kons fold) + (if (pair? lists) + (let lp ((lists (cons lis1 lists)) (ans knil)) ; N-ary case + (receive (cars+ans cdrs) (%cars+cdrs+ lists ans) + (if (null? cars+ans) ans ; Done. + (lp cdrs (apply kons cars+ans))))) + + (let lp ((lis lis1) (ans knil)) ; Fast path + (if (null-list? lis) ans + (lp (cdr lis) (kons (car lis) ans)))))) + + +(define (fold-right kons knil lis1 . lists) + (check-arg procedure? kons fold-right) + (if (pair? lists) + (let recur ((lists (cons lis1 lists))) ; N-ary case + (let ((cdrs (%cdrs lists))) + (if (null? cdrs) knil + (apply kons (%cars+ lists (recur cdrs)))))) + + (let recur ((lis lis1)) ; Fast path + (if (null-list? lis) knil + (let ((head (car lis))) + (kons head (recur (cdr lis)))))))) + + +(define (pair-fold-right f zero lis1 . lists) + (check-arg procedure? f pair-fold-right) + (if (pair? lists) + (let recur ((lists (cons lis1 lists))) ; N-ary case + (let ((cdrs (%cdrs lists))) + (if (null? cdrs) zero + (apply f (append! lists (list (recur cdrs))))))) + + (let recur ((lis lis1)) ; Fast path + (if (null-list? lis) zero (f lis (recur (cdr lis))))))) + +(define (pair-fold f zero lis1 . lists) + (check-arg procedure? f pair-fold) + (if (pair? lists) + (let lp ((lists (cons lis1 lists)) (ans zero)) ; N-ary case + (let ((tails (%cdrs lists))) + (if (null? tails) ans + (lp tails (apply f (append! lists (list ans))))))) + + (let lp ((lis lis1) (ans zero)) + (if (null-list? lis) ans + (let ((tail (cdr lis))) ; Grab the cdr now, + (lp tail (f lis ans))))))) ; in case F SET-CDR!s LIS. + + +;;; REDUCE and REDUCE-RIGHT only use RIDENTITY in the empty-list case. +;;; These cannot meaningfully be n-ary. + +(define (reduce f ridentity lis) + (check-arg procedure? f reduce) + (if (null-list? lis) ridentity + (fold f (car lis) (cdr lis)))) + +(define (reduce-right f ridentity lis) + (check-arg procedure? f reduce-right) + (if (null-list? lis) ridentity + (let recur ((head (car lis)) (lis (cdr lis))) + (if (pair? lis) + (f head (recur (car lis) (cdr lis))) + head)))) + + + +;;; Mappers: append-map append-map! pair-for-each map! filter-map map-in-order +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define (append-map f lis1 . lists) + (really-append-map append-map append f lis1 lists)) +(define (append-map! f lis1 . lists) + (really-append-map append-map! append! f lis1 lists)) + +(define (really-append-map who appender f lis1 lists) + (check-arg procedure? f who) + (if (pair? lists) + (receive (cars cdrs) (%cars+cdrs (cons lis1 lists)) + (if (null? cars) '() + (let recur ((cars cars) (cdrs cdrs)) + (let ((vals (apply f cars))) + (receive (cars2 cdrs2) (%cars+cdrs cdrs) + (if (null? cars2) vals + (appender vals (recur cars2 cdrs2)))))))) + + ;; Fast path + (if (null-list? lis1) '() + (let recur ((elt (car lis1)) (rest (cdr lis1))) + (let ((vals (f elt))) + (if (null-list? rest) vals + (appender vals (recur (car rest) (cdr rest))))))))) + + +(define (pair-for-each proc lis1 . lists) + (check-arg procedure? proc pair-for-each) + (if (pair? lists) + + (let lp ((lists (cons lis1 lists))) + (let ((tails (%cdrs lists))) + (if (pair? tails) + (begin (apply proc lists) + (lp tails))))) + + ;; Fast path. + (let lp ((lis lis1)) + (if (not (null-list? lis)) + (let ((tail (cdr lis))) ; Grab the cdr now, + (proc lis) ; in case PROC SET-CDR!s LIS. + (lp tail)))))) + +;;; We stop when LIS1 runs out, not when any list runs out. +(define (map! f lis1 . lists) + (check-arg procedure? f map!) + (if (pair? lists) + (let lp ((lis1 lis1) (lists lists)) + (if (not (null-list? lis1)) + (receive (heads tails) (%cars+cdrs/no-test lists) + (set-car! lis1 (apply f (car lis1) heads)) + (lp (cdr lis1) tails)))) + + ;; Fast path. + (pair-for-each (lambda (pair) (set-car! pair (f (car pair)))) lis1)) + lis1) + + +;;; Map F across L, and save up all the non-false results. +(define (filter-map f lis1 . lists) + (check-arg procedure? f filter-map) + (if (pair? lists) + (let recur ((lists (cons lis1 lists))) + (receive (cars cdrs) (%cars+cdrs lists) + (if (pair? cars) + (cond ((apply f cars) => (lambda (x) (cons x (recur cdrs)))) + (else (recur cdrs))) ; Tail call in this arm. + '()))) + + ;; Fast path. + (let recur ((lis lis1)) + (if (null-list? lis) lis + (let ((tail (recur (cdr lis)))) + (cond ((f (car lis)) => (lambda (x) (cons x tail))) + (else tail))))))) + + +;;; Map F across lists, guaranteeing to go left-to-right. +;;; NOTE: Some implementations of R5RS MAP are compliant with this spec; +;;; in which case this procedure may simply be defined as a synonym for MAP. + +(define (map-in-order f lis1 . lists) + (check-arg procedure? f map-in-order) + (if (pair? lists) + (let recur ((lists (cons lis1 lists))) + (receive (cars cdrs) (%cars+cdrs lists) + (if (pair? cars) + (let ((x (apply f cars))) ; Do head first, + (cons x (recur cdrs))) ; then tail. + '()))) + + ;; Fast path. + (let recur ((lis lis1)) + (if (null-list? lis) lis + (let ((tail (cdr lis)) + (x (f (car lis)))) ; Do head first, + (cons x (recur tail))))))) ; then tail. + + +;;; We extend MAP to handle arguments of unequal length. +(define map map-in-order) + + +;;; filter, remove, partition +;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; FILTER, REMOVE, PARTITION and their destructive counterparts do not +;;; disorder the elements of their argument. + +;; This FILTER shares the longest tail of L that has no deleted elements. +;; If Scheme had multi-continuation calls, they could be made more efficient. + +(define (filter pred lis) ; Sleazing with EQ? makes this + (check-arg procedure? pred filter) ; one faster. + (let recur ((lis lis)) + (if (null-list? lis) lis ; Use NOT-PAIR? to handle dotted lists. + (let ((head (car lis)) + (tail (cdr lis))) + (if (pred head) + (let ((new-tail (recur tail))) ; Replicate the RECUR call so + (if (eq? tail new-tail) lis + (cons head new-tail))) + (recur tail)))))) ; this one can be a tail call. + + +;;; Another version that shares longest tail. +;(define (filter pred lis) +; (receive (ans no-del?) +; ;; (recur l) returns L with (pred x) values filtered. +; ;; It also returns a flag NO-DEL? if the returned value +; ;; is EQ? to L, i.e. if it didn't have to delete anything. +; (let recur ((l l)) +; (if (null-list? l) (values l #t) +; (let ((x (car l)) +; (tl (cdr l))) +; (if (pred x) +; (receive (ans no-del?) (recur tl) +; (if no-del? +; (values l #t) +; (values (cons x ans) #f))) +; (receive (ans no-del?) (recur tl) ; Delete X. +; (values ans #f)))))) +; ans)) + + + +;(define (filter! pred lis) ; Things are much simpler +; (let recur ((lis lis)) ; if you are willing to +; (if (pair? lis) ; push N stack frames & do N +; (cond ((pred (car lis)) ; SET-CDR! writes, where N is +; (set-cdr! lis (recur (cdr lis))); the length of the answer. +; lis) +; (else (recur (cdr lis)))) +; lis))) + + +;;; This implementation of FILTER! +;;; - doesn't cons, and uses no stack; +;;; - is careful not to do redundant SET-CDR! writes, as writes to memory are +;;; usually expensive on modern machines, and can be extremely expensive on +;;; modern Schemes (e.g., ones that have generational GC's). +;;; It just zips down contiguous runs of in and out elts in LIS doing the +;;; minimal number of SET-CDR!s to splice the tail of one run of ins to the +;;; beginning of the next. + +(define (filter! pred lis) + (check-arg procedure? pred filter!) + (let lp ((ans lis)) + (cond ((null-list? ans) ans) ; Scan looking for + ((not (pred (car ans))) (lp (cdr ans))) ; first cons of result. + + ;; ANS is the eventual answer. + ;; SCAN-IN: (CDR PREV) = LIS and (CAR PREV) satisfies PRED. + ;; Scan over a contiguous segment of the list that + ;; satisfies PRED. + ;; SCAN-OUT: (CAR PREV) satisfies PRED. Scan over a contiguous + ;; segment of the list that *doesn't* satisfy PRED. + ;; When the segment ends, patch in a link from PREV + ;; to the start of the next good segment, and jump to + ;; SCAN-IN. + (else (letrec ((scan-in (lambda (prev lis) + (if (pair? lis) + (if (pred (car lis)) + (scan-in lis (cdr lis)) + (scan-out prev (cdr lis)))))) + (scan-out (lambda (prev lis) + (let lp ((lis lis)) + (if (pair? lis) + (if (pred (car lis)) + (begin (set-cdr! prev lis) + (scan-in lis (cdr lis))) + (lp (cdr lis))) + (set-cdr! prev lis)))))) + (scan-in ans (cdr ans)) + ans))))) + + + +;;; Answers share common tail with LIS where possible; +;;; the technique is slightly subtle. + +(define (partition pred lis) + (check-arg procedure? pred partition) + (let recur ((lis lis)) + (if (null-list? lis) (values lis lis) ; Use NOT-PAIR? to handle dotted lists. + (let ((elt (car lis)) + (tail (cdr lis))) + (receive (in out) (recur tail) + (if (pred elt) + (values (if (pair? out) (cons elt in) lis) out) + (values in (if (pair? in) (cons elt out) lis)))))))) + + + +;(define (partition! pred lis) ; Things are much simpler +; (let recur ((lis lis)) ; if you are willing to +; (if (null-list? lis) (values lis lis) ; push N stack frames & do N +; (let ((elt (car lis))) ; SET-CDR! writes, where N is +; (receive (in out) (recur (cdr lis)) ; the length of LIS. +; (cond ((pred elt) +; (set-cdr! lis in) +; (values lis out)) +; (else (set-cdr! lis out) +; (values in lis)))))))) + + +;;; This implementation of PARTITION! +;;; - doesn't cons, and uses no stack; +;;; - is careful not to do redundant SET-CDR! writes, as writes to memory are +;;; usually expensive on modern machines, and can be extremely expensive on +;;; modern Schemes (e.g., ones that have generational GC's). +;;; It just zips down contiguous runs of in and out elts in LIS doing the +;;; minimal number of SET-CDR!s to splice these runs together into the result +;;; lists. + +(define (partition! pred lis) + (check-arg procedure? pred partition!) + (if (null-list? lis) (values lis lis) + + ;; This pair of loops zips down contiguous in & out runs of the + ;; list, splicing the runs together. The invariants are + ;; SCAN-IN: (cdr in-prev) = LIS. + ;; SCAN-OUT: (cdr out-prev) = LIS. + (letrec ((scan-in (lambda (in-prev out-prev lis) + (let lp ((in-prev in-prev) (lis lis)) + (if (pair? lis) + (if (pred (car lis)) + (lp lis (cdr lis)) + (begin (set-cdr! out-prev lis) + (scan-out in-prev lis (cdr lis)))) + (set-cdr! out-prev lis))))) ; Done. + + (scan-out (lambda (in-prev out-prev lis) + (let lp ((out-prev out-prev) (lis lis)) + (if (pair? lis) + (if (pred (car lis)) + (begin (set-cdr! in-prev lis) + (scan-in lis out-prev (cdr lis))) + (lp lis (cdr lis))) + (set-cdr! in-prev lis)))))) ; Done. + + ;; Crank up the scan&splice loops. + (if (pred (car lis)) + ;; LIS begins in-list. Search for out-list's first pair. + (let lp ((prev-l lis) (l (cdr lis))) + (cond ((not (pair? l)) (values lis l)) + ((pred (car l)) (lp l (cdr l))) + (else (scan-out prev-l l (cdr l)) + (values lis l)))) ; Done. + + ;; LIS begins out-list. Search for in-list's first pair. + (let lp ((prev-l lis) (l (cdr lis))) + (cond ((not (pair? l)) (values l lis)) + ((pred (car l)) + (scan-in l prev-l (cdr l)) + (values l lis)) ; Done. + (else (lp l (cdr l))))))))) + + +;;; Inline us, please. +(define (remove pred l) (filter (lambda (x) (not (pred x))) l)) +(define (remove! pred l) (filter! (lambda (x) (not (pred x))) l)) + + + +;;; Here's the taxonomy for the DELETE/ASSOC/MEMBER functions. +;;; (I don't actually think these are the world's most important +;;; functions -- the procedural FILTER/REMOVE/FIND/FIND-TAIL variants +;;; are far more general.) +;;; +;;; Function Action +;;; --------------------------------------------------------------------------- +;;; remove pred lis Delete by general predicate +;;; delete x lis [=] Delete by element comparison +;;; +;;; find pred lis Search by general predicate +;;; find-tail pred lis Search by general predicate +;;; member x lis [=] Search by element comparison +;;; +;;; assoc key lis [=] Search alist by key comparison +;;; alist-delete key alist [=] Alist-delete by key comparison + +(define (delete x lis . maybe-=) + (let ((= (:optional maybe-= equal?))) + (filter (lambda (y) (not (= x y))) lis))) + +(define (delete! x lis . maybe-=) + (let ((= (:optional maybe-= equal?))) + (filter! (lambda (y) (not (= x y))) lis))) + +;;; Extended from R4RS to take an optional comparison argument. +(define (member x lis . maybe-=) + (let ((= (:optional maybe-= equal?))) + (find-tail (lambda (y) (= x y)) lis))) + +;;; R4RS, hence we don't bother to define. +;;; The MEMBER and then FIND-TAIL call should definitely +;;; be inlined for MEMQ & MEMV. +;(define (memq x lis) (member x lis eq?)) +;(define (memv x lis) (member x lis eqv?)) + + +;;; right-duplicate deletion +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; delete-duplicates delete-duplicates! +;;; +;;; Beware -- these are N^2 algorithms. To efficiently remove duplicates +;;; in long lists, sort the list to bring duplicates together, then use a +;;; linear-time algorithm to kill the dups. Or use an algorithm based on +;;; element-marking. The former gives you O(n lg n), the latter is linear. + +(define (delete-duplicates lis . maybe-=) + (let ((elt= (:optional maybe-= equal?))) + (check-arg procedure? elt= delete-duplicates) + (let recur ((lis lis)) + (if (null-list? lis) lis + (let* ((x (car lis)) + (tail (cdr lis)) + (new-tail (recur (delete x tail elt=)))) + (if (eq? tail new-tail) lis (cons x new-tail))))))) + +(define (delete-duplicates! lis maybe-=) + (let ((elt= (:optional maybe-= equal?))) + (check-arg procedure? elt= delete-duplicates!) + (let recur ((lis lis)) + (if (null-list? lis) lis + (let* ((x (car lis)) + (tail (cdr lis)) + (new-tail (recur (delete! x tail elt=)))) + (if (eq? tail new-tail) lis (cons x new-tail))))))) + + +;;; alist stuff +;;;;;;;;;;;;;;; + +;;; Extended from R4RS to take an optional comparison argument. +(define (assoc x lis . maybe-=) + (let ((= (:optional maybe-= equal?))) + (find (lambda (entry) (= x (car entry))) lis))) + +(define (alist-cons key datum alist) (cons (cons key datum) alist)) + +(define (alist-copy alist) + (map (lambda (elt) (cons (car elt) (cdr elt))) + alist)) + +(define (alist-delete key alist . maybe-=) + (let ((= (:optional maybe-= equal?))) + (filter (lambda (elt) (not (= key (car elt)))) alist))) + +(define (alist-delete! key alist . maybe-=) + (let ((= (:optional maybe-= equal?))) + (filter! (lambda (elt) (not (= key (car elt)))) alist))) + + +;;; find find-tail take-while drop-while span break any every list-index +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define (find pred list) + (cond ((find-tail pred list) => car) + (else #f))) + +(define (find-tail pred list) + (check-arg procedure? pred find-tail) + (let lp ((list list)) + (and (not (null-list? list)) + (if (pred (car list)) list + (lp (cdr list)))))) + +(define (take-while pred lis) + (check-arg procedure? pred take-while) + (let recur ((lis lis)) + (if (null-list? lis) '() + (let ((x (car lis))) + (if (pred x) + (cons x (recur (cdr lis))) + '()))))) + +(define (drop-while pred lis) + (check-arg procedure? pred drop-while) + (let lp ((lis lis)) + (if (null-list? lis) '() + (if (pred (car lis)) + (lp (cdr lis)) + lis)))) + +(define (take-while! pred lis) + (check-arg procedure? pred take-while!) + (if (or (null-list? lis) (not (pred (car lis)))) '() + (begin (let lp ((prev lis) (rest (cdr lis))) + (if (pair? rest) + (let ((x (car rest))) + (if (pred x) (lp rest (cdr rest)) + (set-cdr! prev '()))))) + lis))) + +(define (span pred lis) + (check-arg procedure? pred span) + (let recur ((lis lis)) + (if (null-list? lis) (values '() '()) + (let ((x (car lis))) + (if (pred x) + (receive (prefix suffix) (recur (cdr lis)) + (values (cons x prefix) suffix)) + (values '() lis)))))) + +(define (span! pred lis) + (check-arg procedure? pred span!) + (if (or (null-list? lis) (not (pred (car lis)))) (values '() lis) + (let ((suffix (let lp ((prev lis) (rest (cdr lis))) + (if (null-list? rest) rest + (let ((x (car rest))) + (if (pred x) (lp rest (cdr rest)) + (begin (set-cdr! prev '()) + rest))))))) + (values lis suffix)))) + + +(define (break pred lis) (span (lambda (x) (not (pred x))) lis)) +(define (break! pred lis) (span! (lambda (x) (not (pred x))) lis)) + +(define (any pred lis1 . lists) + (check-arg procedure? pred any) + (if (pair? lists) + + ;; N-ary case + (receive (heads tails) (%cars+cdrs (cons lis1 lists)) + (and (pair? heads) + (let lp ((heads heads) (tails tails)) + (receive (next-heads next-tails) (%cars+cdrs tails) + (if (pair? next-heads) + (or (apply pred heads) (lp next-heads next-tails)) + (apply pred heads)))))) ; Last PRED app is tail call. + + ;; Fast path + (and (not (null-list? lis1)) + (let lp ((head (car lis1)) (tail (cdr lis1))) + (if (null-list? tail) + (pred head) ; Last PRED app is tail call. + (or (pred head) (lp (car tail) (cdr tail)))))))) + + +;(define (every pred list) ; Simple definition. +; (let lp ((list list)) ; Doesn't return the last PRED value. +; (or (not (pair? list)) +; (and (pred (car list)) +; (lp (cdr list)))))) + +(define (every pred lis1 . lists) + (check-arg procedure? pred every) + (if (pair? lists) + + ;; N-ary case + (receive (heads tails) (%cars+cdrs (cons lis1 lists)) + (or (not (pair? heads)) + (let lp ((heads heads) (tails tails)) + (receive (next-heads next-tails) (%cars+cdrs tails) + (if (pair? next-heads) + (and (apply pred heads) (lp next-heads next-tails)) + (apply pred heads)))))) ; Last PRED app is tail call. + + ;; Fast path + (or (null-list? lis1) + (let lp ((head (car lis1)) (tail (cdr lis1))) + (if (null-list? tail) + (pred head) ; Last PRED app is tail call. + (and (pred head) (lp (car tail) (cdr tail)))))))) + +(define (list-index pred lis1 . lists) + (check-arg procedure? pred list-index) + (if (pair? lists) + + ;; N-ary case + (let lp ((lists (cons lis1 lists)) (n 0)) + (receive (heads tails) (%cars+cdrs lists) + (and (pair? heads) + (if (apply pred heads) n + (lp tails (+ n 1)))))) + + ;; Fast path + (let lp ((lis lis1) (n 0)) + (and (not (null-list? lis)) + (if (pred (car lis)) n (lp (cdr lis) (+ n 1))))))) + +;;; Reverse +;;;;;;;;;;; + +;R4RS, so not defined here. +;(define (reverse lis) (fold cons '() lis)) + +;(define (reverse! lis) +; (pair-fold (lambda (pair tail) (set-cdr! pair tail) pair) '() lis)) + +(define (reverse! lis) + (let lp ((lis lis) (ans '())) + (if (null-list? lis) ans + (let ((tail (cdr lis))) + (set-cdr! lis ans) + (lp tail lis))))) + +;;; Lists-as-sets +;;;;;;;;;;;;;;;;; + +;;; This is carefully tuned code; do not modify casually. +;;; - It is careful to share storage when possible; +;;; - Side-effecting code tries not to perform redundant writes. +;;; - It tries to avoid linear-time scans in special cases where constant-time +;;; computations can be performed. +;;; - It relies on similar properties from the other list-lib procs it calls. +;;; For example, it uses the fact that the implementations of MEMBER and +;;; FILTER in this source code share longest common tails between args +;;; and results to get structure sharing in the lset procedures. + +(define (%lset2<= = lis1 lis2) (every (lambda (x) (member x lis2 =)) lis1)) + +(define (lset<= = . lists) + (check-arg procedure? = lset<=) + (or (not (pair? lists)) ; 0-ary case + (let lp ((s1 (car lists)) (rest (cdr lists))) + (or (not (pair? rest)) + (let ((s2 (car rest)) (rest (cdr rest))) + (and (or (eq? s2 s1) ; Fast path + (%lset2<= = s1 s2)) ; Real test + (lp s2 rest))))))) + +(define (lset= = . lists) + (check-arg procedure? = lset=) + (or (not (pair? lists)) ; 0-ary case + (let lp ((s1 (car lists)) (rest (cdr lists))) + (or (not (pair? rest)) + (let ((s2 (car rest)) + (rest (cdr rest))) + (and (or (eq? s1 s2) ; Fast path + (and (%lset2<= = s1 s2) (%lset2<= = s2 s1))) ; Real test + (lp s2 rest))))))) + + +(define (lset-adjoin = lis . elts) + (check-arg procedure? = lset-adjoin) + (fold (lambda (elt ans) (if (member elt ans =) ans (cons elt ans))) + lis elts)) + + +(define (lset-union = . lists) + (check-arg procedure? = lset-union) + (reduce (lambda (lis ans) ; Compute ANS + LIS. + (cond ((null? lis) ans) ; Don't copy any lists + ((null? ans) lis) ; if we don't have to. + ((eq? lis ans) ans) + (else + (fold (lambda (elt ans) (if (any (lambda (x) (= x elt)) ans) + ans + (cons elt ans))) + ans lis)))) + '() lists)) + +(define (lset-union! = . lists) + (check-arg procedure? = lset-union!) + (reduce (lambda (lis ans) ; Splice new elts of LIS onto the front of ANS. + (cond ((null? lis) ans) ; Don't copy any lists + ((null? ans) lis) ; if we don't have to. + ((eq? lis ans) ans) + (else + (pair-fold (lambda (pair ans) + (let ((elt (car pair))) + (if (any (lambda (x) (= x elt)) ans) + ans + (begin (set-cdr! pair ans) pair)))) + ans lis)))) + '() lists)) + + +(define (lset-intersection = lis1 . lists) + (check-arg procedure? = lset-intersection) + (let ((lists (delete lis1 lists eq?))) ; Throw out any LIS1 vals. + (cond ((any null-list? lists) '()) ; Short cut + ((null? lists) lis1) ; Short cut + (else (filter (lambda (x) + (every (lambda (lis) (member x lis =)) lists)) + lis1))))) + +(define (lset-intersection! = lis1 . lists) + (check-arg procedure? = lset-intersection!) + (let ((lists (delete lis1 lists eq?))) ; Throw out any LIS1 vals. + (cond ((any null-list? lists) '()) ; Short cut + ((null? lists) lis1) ; Short cut + (else (filter! (lambda (x) + (every (lambda (lis) (member x lis =)) lists)) + lis1))))) + + +(define (lset-difference = lis1 . lists) + (check-arg procedure? = lset-difference) + (let ((lists (filter pair? lists))) ; Throw out empty lists. + (cond ((null? lists) lis1) ; Short cut + ((memq lis1 lists) '()) ; Short cut + (else (filter (lambda (x) + (every (lambda (lis) (not (member x lis =))) + lists)) + lis1))))) + +(define (lset-difference! = lis1 . lists) + (check-arg procedure? = lset-difference!) + (let ((lists (filter pair? lists))) ; Throw out empty lists. + (cond ((null? lists) lis1) ; Short cut + ((memq lis1 lists) '()) ; Short cut + (else (filter! (lambda (x) + (every (lambda (lis) (not (member x lis =))) + lists)) + lis1))))) + + +(define (lset-xor = . lists) + (check-arg procedure? = lset-xor) + (reduce (lambda (b a) ; Compute A xor B: + ;; Note that this code relies on the constant-time + ;; short-cuts provided by LSET-DIFF+INTERSECTION, + ;; LSET-DIFFERENCE & APPEND to provide constant-time short + ;; cuts for the cases A = (), B = (), and A eq? B. It takes + ;; a careful case analysis to see it, but it's carefully + ;; built in. + + ;; Compute a-b and a^b, then compute b-(a^b) and + ;; cons it onto the front of a-b. + (receive (a-b a-int-b) (lset-diff+intersection = a b) + (cond ((null? a-b) (lset-difference b a =)) + ((null? a-int-b) (append b a)) + (else (fold (lambda (xb ans) + (if (member xb a-int-b =) ans (cons xb ans))) + a-b + b))))) + '() lists)) + + +(define (lset-xor! = . lists) + (check-arg procedure? = lset-xor!) + (reduce (lambda (b a) ; Compute A xor B: + ;; Note that this code relies on the constant-time + ;; short-cuts provided by LSET-DIFF+INTERSECTION, + ;; LSET-DIFFERENCE & APPEND to provide constant-time short + ;; cuts for the cases A = (), B = (), and A eq? B. It takes + ;; a careful case analysis to see it, but it's carefully + ;; built in. + + ;; Compute a-b and a^b, then compute b-(a^b) and + ;; cons it onto the front of a-b. + (receive (a-b a-int-b) (lset-diff+intersection! = a b) + (cond ((null? a-b) (lset-difference! b a =)) + ((null? a-int-b) (append! b a)) + (else (pair-fold (lambda (b-pair ans) + (if (member (car b-pair) a-int-b =) ans + (begin (set-cdr! b-pair ans) b-pair))) + a-b + b))))) + '() lists)) + + +(define (lset-diff+intersection = lis1 . lists) + (check-arg procedure? = lset-diff+intersection) + (cond ((every null-list? lists) (values lis1 '())) ; Short cut + ((memq lis1 lists) (values '() lis1)) ; Short cut + (else (partition (lambda (elt) + (not (any (lambda (lis) (member elt lis =)) + lists))) + lis1)))) + +(define (lset-diff+intersection! = lis1 . lists) + (check-arg procedure? = lset-diff+intersection!) + (cond ((every null-list? lists) (values lis1 '())) ; Short cut + ((memq lis1 lists) (values '() lis1)) ; Short cut + (else (partition! (lambda (elt) + (not (any (lambda (lis) (member elt lis =)) + lists))) + lis1)))) hunk ./lib/string-lib.scm 1 +;;; Scheme Underground string-processing library -*- Scheme -*- +;;; Olin Shivers 11/98 + +;;; SRFI DRAFT -- SRFI DRAFT -- SRFI DRAFT -- SRFI DRAFT -- SRFI DRAFT +;;; This is *draft* code for a SRFI proposal. If you see this notice in +;;; production code, you've got obsolete, bad source -- go find the final +;;; non-draft code on the Net. +;;; SRFI DRAFT -- SRFI DRAFT -- SRFI DRAFT -- SRFI DRAFT -- SRFI DRAFT + +;;; Some of this code had (extremely distant) origins in MIT Scheme's string +;;; lib, and was substantially reworked by Olin Shivers (shivers@ai.mit.edu) +;;; 9/98. As such, it is +;;; Copyright (c) 1988-1994 Massachusetts Institute of Technology. +;;; The copyright terms are essentially open-software terms; +;;; the precise terms are at the end of this file. +;;; +;;; The KMP string-search code was massively rehacked from Stephen Bevan's +;;; code, written for scmlib, and is thus covered by the GPL. If that's a +;;; problem, write one from scratch (there are citations to standard textbooks +;;; in the comments), or rip it out and use the ten-line doubly-nested loop +;;; that's commented out just above this code. +;;; +;;; I wish I could mark definitions in this code to be inlined. +;;; Certain functions could benefit from compiler support. +;;; +;;; My policy on checking start/end substring specs is not uniform. +;;; I avoided doing arg checks when the function directly calls another +;;; lower-level function that will check the start/end specs as well. +;;; This has the advantage of not doing redundant checks, but the disadvantage +;;; is that errors are not reported early, at the highest possible call. +;;; There's not much high-level error checking of the other args, anyway. +;;; -Olin + +;;; Exports: +;;; string-map string-map! +;;; string-fold string-unfold +;;; string-fold-right string-unfold-right +;;; string-tabulate +;;; string-for-each string-iter +;;; string-every string-any +;;; string-compare string-compare-ci +;;; substring-compare substring-compare-ci +;;; string= string< string> string<= string>= string<> +;;; string-ci= string-ci< string-ci> string-ci<= string-ci>= string-ci<> +;;; substring= substring<> substring-ci= substring-ci<> +;;; substring< substring> substring-ci< substring-ci> +;;; substring<= substring>= substring-ci<= substring-ci>= +;;; string-upper-case? string-lower-case? +;;; capitalize-string capitalize-words string-downcase string-upcase +;;; capitalize-string! capitalize-words! string-downcase! string-upcase! +;;; string-take string-take-right +;;; string-drop string-drop-right +;;; string-pad string-pad-right +;;; string-trim string-trim-right string-trim-both +;;; string-filter string-delete +;;; string-index string-index-right string-skip string-skip-right +;;; string-prefix-count string-prefix-count-ci +;;; string-suffix-count string-suffix-count-ci +;;; substring-prefix-count substring-prefix-count-ci +;;; substring-suffix-count substring-suffix-count-ci +;;; string-prefix? string-prefix-ci? +;;; string-suffix? string-suffix-ci? +;;; substring-prefix? substring-prefix-ci? +;;; substring-suffix? substring-suffix-ci? +;;; substring? substring-ci? +;;; string-fill! string-copy! string-copy substring +;;; string-reverse string-reverse! reverse-list->string +;;; string->list +;;; string-concat string-concat/shared string-append/shared +;;; xsubstring string-xcopy! +;;; string-null? +;;; join-strings +;;; +;;; string? make-string string string-length string-ref string-set! +;;; string-append list->string +;;; +;;; make-kmp-restart-vector +;;; parse-final-start+end +;;; parse-start+end +;;; check-substring-spec + +;;; Imports +;;; This code has the following non-R5RS dependencies: +;;; - (RECEIVE (var ...) mv-exp body ...) multiple-value binding macro +;;; - Various imports from the char-set library +;;; - ERROR +;;; - LET-OPTIONALS and :OPTIONAL macros for handling optional arguments +;;; - The R5RS SUBSTRING function is accessed using the Scheme 48 +;;; STRUCTURE-REF magic accessor. + + +;;; Support for START/END substring specs +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; This macro parses optional start/end arguments from arg lists, defaulting +;;; them to 0/(string-length s), and checks them for correctness. + +(define-syntax let-start+end + (syntax-rules () + ((let-start+end (start end) proc s-exp args-exp body ...) + (receive (start end) (parse-final-start+end proc s-exp args-exp) + body ...)))) + + +;;; Returns three values: start end rest + +(define (parse-start+end proc s args) + (let ((slen (string-length s))) + (if (pair? args) + + (let ((start (car args)) + (args (cdr args))) + (if (or (not (integer? start)) (< start 0)) + (error "Illegal substring START spec" proc start s) + (receive (end args) + (if (pair? args) + (let ((end (car args)) + (args (cdr args))) + (if (or (not (integer? end)) (< slen end)) + (error "Illegal substring END spec" proc end s) + (values end args))) + (values slen args)) + (if (<= start end) (values start end args) + (error "Illegal substring START/END spec" + proc start end s))))) + + (values 0 (string-length s) '())))) + +(define (parse-final-start+end proc s args) + (receive (start end rest) (parse-start+end proc s args) + (if (pair? rest) (error "Extra arguments to procedure" proc rest) + (values start end)))) + +(define (check-substring-spec proc s start end) + (if (or (< start 0) + (< (string-length s) end) + (< end start)) + (error "Illegal substring START/END spec." proc s start end))) + + + +;;; substring S START [END] +;;; string-copy S [START END] +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; Redefine SUBSTRING so that the END parameter is optional. +;;; SUBSTRINGX is the underlying R5RS SUBSTRING function. All +;;; the code in this file uses the simple SUBSTRINGX, so you can +;;; easily port this code. + +(define substringx (structure-ref scheme substring)) ; Simple R5RS SUBSTRING + +(define (substring s start . maybe-end) ; Our SUBSTRING + (substringx s start (:optional maybe-end (string-length s)))) + +(define (string-copy s . maybe-start+end) + (let-start+end (start end) string-copy s maybe-start+end + (substringx s start end))) + + + +;;; Basic iterators and other higher-order abstractions +;;; (string-map proc s [start end]) +;;; (string-map! proc s [start end]) +;;; (string-fold kons knil s [start end]) +;;; (string-fold-right kons knil s [start end]) +;;; (string-unfold p f g seed) +;;; (string-for-each proc s [start end]) +;;; (string-iter proc s [start end]) +;;; (string-every pred s [start end]) +;;; (string-any pred s [start end]) +;;; (string-tabulate proc len) +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; No guarantees about order in MAP, FOR-EACH, EVERY, ANY procs. +;;; +;;; You want compiler support for high-level transforms on fold and unfold ops. +;;; You'd at least like a lot of inlining for clients of these procedures. +;;; Hold your breath. + +(define (string-map proc s . maybe-start+end) + (let-start+end (start end) string-map s maybe-start+end + (let* ((len (- end start)) + (ans (make-string len))) + (do ((i (- end 1) (- i 1)) + (j (- len 1) (- j 1))) + ((< j 0)) + (string-set! ans j (proc (string-ref s i)))) + ans))) + +(define (string-map! proc s . maybe-start+end) + (let-start+end (start end) string-map! s maybe-start+end + (do ((i (- end 1) (- i 1))) + ((< i start)) + (string-set! s i (proc (string-ref s i)))))) + +(define (string-fold kons knil s . maybe-start+end) + (let-start+end (start end) string-fold s maybe-start+end + (let lp ((v knil) (i start)) + (if (< i end) (lp (kons (string-ref s i) v) (+ i 1)) + v)))) + +(define (string-fold-right kons knil s . maybe-start+end) + (let-start+end (start end) string-fold-right s maybe-start+end + (let lp ((v knil) (i (- end 1))) + (if (>= i start) (lp (kons (string-ref s i) v) (- i 1)) + v)))) + +;;; (string-unfold p f g seed) +;;; This is the fundamental constructor for strings. +;;; - G is used to generate a series of "seed" values from the initial seed: +;;; SEED, (G SEED), (G^2 SEED), (G^3 SEED), ... +;;; - P tells us when to stop -- when it returns true when applied to one +;;; of these seed values. +;;; - F maps each seed value to the corresponding character +;;; in the result string. +;;; +;;; In other words, the following (simple, inefficient) definition holds: +;;; (string-unfold p f g seed) = +;;; (if (p seed) "" +;;; (string-append (string (f seed)) +;;; (string-unfold p f g (g seed)))) +;;; +;;; STRING-UNFOLD is a fairly powerful constructor -- you can use it to +;;; reverse a string, copy a string, convert a list to a string, read +;;; a port into a string, and so forth. Examples: +;;; (port->string port) = +;;; (string-unfold (compose eof-object? peek-char) +;;; read-char identity port) +;;; +;;; (list->string lis) = (string-unfold null? car cdr lis) +;;; +;;; (tabulate-string f size) = (string-unfold (lambda (i) (= i size)) f add1 0) + +;;; A problem with the following simple formulation is that it pushes one +;;; stack frame for every char in the result string -- an issue if you are +;;; using it to read a 100kchar string. So we don't use it -- but I include +;;; it to give a clear, straightforward description of what the function +;;; does. + +;(define (string-unfold p f g seed) +; (let recur ((seed seed) (i 0)) +; (if (p seed) (make-string i) +; (let* ((c (f seed)) +; (s (recur (g seed) (+ i 1)))) +; (string-set! s i c) +; s)))) + +;;; This formulation chunks up the constructed string into 1024-char chunks, +;;; popping the stack frames. So it'll reduce stack growth by a factor of +;;; 1024. Marc Feeley alerted me to this issue and its general solution. + +(define (string-unfold p f g seed) + (string-concat/shared + (let recur ((seed seed)) + (receive (s seed done?) + (let recur2 ((seed seed) (i 0)) + (cond ((p seed) (values (make-string i) seed #t)) + ((>= i 1024) (values (make-string i) seed #f)) + (else (let ((c (f seed))) + (receive (s seed done?) + (recur2 (g seed) (+ i 1)) + (string-set! s i c) + (values s seed done?)))))) + + (if done? (list s) + (cons s (recur seed))))))) + + +;;; This is the same as STRING-UNFOLD, but defined for multiple +;;; seed parameters. If you pass N seeds, then +;;; - P maps N parameters to a boolean. +;;; - F maps N parameters to a character. +;;; - G maps N parameters to N return values. +;;; This definition does a lot of consing; it would need a fair amount +;;; of compiler support to be efficient. + +; Not released +;(define (string-unfoldn p f g . seeds) +; (apply string-append +; (let recur ((seeds seeds)) +; (receive (s seeds done?) +; (let recur2 ((seeds seeds) (i 0)) +; (cond ((apply p seeds) (values (make-string i) seeds #t)) +; ((>= i 1024) (values (make-string i) seeds #f)) +; (else (let ((c (apply f seeds))) +; (receive seeds (apply g seeds) +; (receive (s seeds done?) +; (recur2 seeds (+ i 1)) +; (string-set! s i c) +; (values s seeds done?))))))) +; +; (if done? (list s) +; (cons s (recur seeds))))))) + +(define (string-for-each proc s . maybe-start+end) + (let-start+end (start end) string-for-each s maybe-start+end + (do ((i (- end 1) (- i 1))) + ((< i start)) + (proc (string-ref s i))))) + +(define (string-iter proc s . maybe-start+end) + (let-start+end (start end) string-iter s maybe-start+end + (do ((i start (+ i 1))) + ((>= i end)) + (proc (string-ref s i))))) + +(define (string-every pred s . maybe-start+end) + (let-start+end (start end) string-every s maybe-start+end + (let lp ((i (- end 1))) + (or (< i start) + (and (pred (string-ref s i)) + (lp (- i 1))))))) + +(define (string-any pred s . maybe-start+end) + (let-start+end (start end) string-any s maybe-start+end + (let lp ((i (- end 1))) + (and (>= i start) + (or (pred (string-ref s i)) + (lp (- i 1))))))) + + +(define (string-tabulate proc len) + (let ((s (make-string len))) + (do ((i (- len 1) (- i 1))) + ((< i 0)) + (string-set! s i (proc i))) + s)) + + + +;;; string-prefix-count[-ci] s1 s2 +;;; string-suffix-count[-ci] s1 s2 +;;; substring-prefix-count[-ci] s1 start1 end1 s2 start2 end2 +;;; substring-suffix-count[-ci] s1 start1 end1 s2 start2 end2 +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; Find the length of the common prefix/suffix. +;;; It is not required that the two substrings passed be of equal length. +;;; This was microcode in MIT Scheme -- a very tightly bummed primitive. + +(define (substring-prefix-count s1 start1 end1 s2 start2 end2) + (check-substring-spec substring-prefix-count s1 start1 end1) + (check-substring-spec substring-prefix-count s2 start2 end2) + (let* ((delta (min (- end1 start1) (- end2 start2))) + (end1 (+ start1 delta))) + (let lp ((i start1) (j start2)) + (if (or (>= i end1) + (not (char=? (string-ref s1 i) + (string-ref s2 j)))) + (- i start1) + (lp (+ i 1) (+ j 1)))))) + +(define (substring-suffix-count s1 start1 end1 s2 start2 end2) + (check-substring-spec substring-suffix-count s1 start1 end1) + (check-substring-spec substring-suffix-count s2 start2 end2) + (let* ((delta (min (- end1 start1) (- end2 start2))) + (start1 (- end1 delta))) + (let lp ((i (- end1 1)) (j (- end2 1))) + (if (or (< i start1) + (not (char=? (string-ref s1 i) + (string-ref s2 j)))) + (- (- end1 i) 1) + (lp (- i 1) (- j 1)))))) + +(define (substring-prefix-count-ci s1 start1 end1 s2 start2 end2) + (check-substring-spec substring-prefix-count-ci s1 start1 end1) + (check-substring-spec substring-prefix-count-ci s2 start2 end2) + (let* ((delta (min (- end1 start1) (- end2 start2))) + (end1 (+ start1 delta))) + (let lp ((i start1) (j start2)) + (if (or (>= i end1) + (not (char-ci=? (string-ref s1 i) + (string-ref s2 j)))) + (- i start1) + (lp (+ i 1) (+ j 1)))))) + +(define (substring-suffix-count-ci s1 start1 end1 s2 start2 end2) + (check-substring-spec substring-suffix-count-ci s1 start1 end1) + (check-substring-spec substring-suffix-count-ci s2 start2 end2) + (let* ((delta (min (- end1 start1) (- end2 start2))) + (start1 (- end1 delta))) + (let lp ((i (- end1 1)) (j (- end2 1))) + (if (or (< i start1) + (not (char-ci=? (string-ref s1 i) + (string-ref s2 j)))) + (- (- end1 i) 1) + (lp (- i 1) (- j 1)))))) + + +(define (string-prefix-count s1 s2) + (substring-prefix-count s1 0 (string-length s1) s2 0 (string-length s2))) + +(define (string-suffix-count s1 s2) + (substring-suffix-count s1 0 (string-length s1) s2 0 (string-length s2))) + +(define (string-prefix-count-ci s1 s2) + (substring-prefix-count-ci s1 0 (string-length s1) s2 0 (string-length s2))) + +(define (string-suffix-count-ci s1 s2) + (substring-suffix-count-ci s1 0 (string-length s1) s2 0 (string-length s2))) + + + +;;; string-prefix? s1 s2 +;;; string-suffix? s1 s2 +;;; string-prefix-ci? s1 s2 +;;; string-suffix-ci? s1 s2 +;;; +;;; substring-prefix? s1 start1 end1 s2 start2 end2 +;;; substring-suffix? s1 start1 end1 s2 start2 end2 +;;; substring-prefix-ci? s1 start1 end1 s2 start2 end2 +;;; substring-suffix-ci? s1 start1 end1 s2 start2 end2 +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; These are all simple derivatives of the previous counting funs. + +(define (string-prefix? s1 s2) + (substring-prefix? s1 0 (string-length s1) s2 0 (string-length s2))) + +(define (string-suffix? s1 s2) + (substring-suffix-ci? s1 0 (string-length s1) s2 0 (string-length s2))) + +(define (string-prefix-ci? s1 s2) + (substring-prefix-ci? s1 0 (string-length s1) s2 0 (string-length s2))) + +(define (string-suffix-ci? s1 s2) + (substring-suffix-ci? s1 0 (string-length s1) s2 0 (string-length s2))) + +(define (substring-prefix? s1 start1 end1 s2 start2 end2) + (let ((len1 (- end1 start1))) + (and (<= len1 (- end2 start2)) ; Quick check + (= (substring-prefix-count s1 start1 end1 + s2 start2 end2) + len1)))) + +(define (substring-suffix? s1 start1 end1 s2 start2 end2) + (let ((len1 (- end1 start1))) + (and (<= len1 (- end2 start2)) ; Quick check + (= len1 (substring-suffix-count s1 start1 end1 + s2 start2 end2))))) + +(define (substring-prefix-ci? s1 start1 end1 s2 start2 end2) + (let ((len1 (- end1 start1))) + (and (<= len1 (- end2 start2)) ; Quick check + (= len1 (substring-prefix-count-ci s1 start1 end1 + s2 start2 end2))))) + +(define (substring-suffix-ci? s1 start1 end1 s2 start2 end2) + (let ((len1 (- end1 start1))) + (and (<= len1 (- end2 start2)) ; Quick check + (= len1 (substring-suffix-count-ci s1 start1 end1 + s2 start2 end2))))) + + +;;; string-compare s1 s2 lt-proc eq-proc gt-proc +;;; string-compare-ci s1 s2 eq-proc lt-proc gt-proc +;;; substring-compare s1 start1 end1 s2 start2 end2 +;;; lt-proc eq-proc gt-proc +;;; substring-compare-ci s1 start1 end1 s2 start2 end2 +;;; lt-proc eq-proc gt-proc +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; Primitive string-comparison functions. +;;; Continuation order is different from MIT Scheme. +;;; Continuations are applied to s1's mismatch index; +;;; in the case of equality, this is END1. + +(define (substring-compare s1 start1 end1 s2 start2 end2 + proc< proc= proc>) + (let ((size1 (- end1 start1)) + (size2 (- end2 start2))) + (let ((match (substring-prefix-count s1 start1 end1 s2 start2 end2))) + (if (= match size1) + ((if (= match size2) proc= proc<) end1) + ((if (= match size2) + proc> + (if (char)) + (+ match start1)))))) + +(define (substring-compare-ci s1 start1 end1 s2 start2 end2 + proc< proc= proc>) + (let ((size1 (- end1 start1)) + (size2 (- end2 start2))) + (let ((match (substring-prefix-count-ci s1 start1 end1 s2 start2 end2))) + (if (= match size1) + ((if (= match size2) proc= proc<) end1) + ((if (= match size2) proc> + (if (char-ci)) + (+ start1 match)))))) + +(define (string-compare s1 s2 proc< proc= proc>) + (substring-compare s1 0 (string-length s1) + s2 0 (string-length s2) + proc< proc= proc>)) + +(define (string-compare-ci s1 s2 proc< proc= proc>) + (substring-compare-ci s1 0 (string-length s1) + s2 0 (string-length s2) + proc< proc= proc>)) + + +;;; string= string<> string-ci= string-ci<> +;;; string< string> string-ci< string-ci> +;;; string<= string>= string-ci<= string-ci>= +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; Simple definitions in terms of the previous comparison funs. +;;; Inequality predicates return #f or mismatch index. +;;; I sure hope these defns get integrated. + +(define (string= s1 s2) + (if (eq? s1 s2) (string-length s1) ; Fast path + (string-compare s1 s2 (lambda (i) #f) (lambda (i) i) (lambda (i) #f)))) + +(define (string< s1 s2) + (and (not (eq? s1 s2)) ; Fast path + (string-compare s1 s2 (lambda (i) i) (lambda (i) #f) (lambda (i) #f)))) + +(define (string> s1 s2) + (and (not (eq? s1 s2)) ; Fast path + (string-compare s1 s2 (lambda (i) #f) (lambda (i) #f) (lambda (i) i)))) + +(define (string<= s1 s2) + (if (eq? s1 s2) (string-length s1) ; Fast path + (string-compare s1 s2 (lambda (i) i) (lambda (i) i) (lambda (i) #f)))) + +(define (string>= s1 s2) + (if (eq? s1 s2) (string-length s1) ; Fast path + (string-compare s1 s2 (lambda (i) #f) (lambda (i) i) (lambda (i) i)))) + +(define (string<> s1 s2) + (and (not (eq? s1 s2)) ; Fast path + (string-compare s1 s2 (lambda (i) i) (lambda (i) #f) (lambda (i) i)))) + + +(define (string-ci= s1 s2) + (if (eq? s1 s2) (string-length s1) ; Fast path + (string-compare-ci s1 s2 (lambda (i) #f) (lambda (i) i) (lambda (i) #f)))) + +(define (string-ci< s1 s2) + (and (not (eq? s1 s2)) ; Fast path + (string-compare-ci s1 s2 (lambda (i) i) (lambda (i) #f) (lambda (i) #f)))) + +(define (string-ci> s1 s2) + (and (not (eq? s1 s2)) ; Fast path + (string-compare-ci s1 s2 (lambda (i) #f) (lambda (i) #f) (lambda (i) i)))) + +(define (string-ci<= s1 s2) + (if (eq? s1 s2) (string-length s1) ; Fast path + (string-compare-ci s1 s2 (lambda (i) i) (lambda (i) i) (lambda (i) #f)))) + +(define (string-ci>= s1 s2) + (if (eq? s1 s2) (string-length s1) ; Fast path + (string-compare-ci s1 s2 (lambda (i) #f) (lambda (i) i) (lambda (i) i)))) + +(define (string-ci<> s1 s2) + (and (not (eq? s1 s2)) ; Fast path + (string-compare-ci s1 s2 (lambda (i) i) (lambda (i) #f) (lambda (i) i)))) + + +(define (substring= s1 start1 end1 s2 start2 end2) + (substring-compare s1 start1 end1 + s2 start2 end2 + (lambda (i) #f) + (lambda (i) i) + (lambda (i) #f))) + +(define (substring<> s1 start1 end1 s2 start2 end2) + (substring-compare s1 start1 end1 + s2 start2 end2 + (lambda (i) i) + (lambda (i) #f) + (lambda (i) i))) + +(define (substring< s1 start1 end1 s2 start2 end2) + (substring-compare s1 start1 end1 + s2 start2 end2 + (lambda (i) i) + (lambda (i) #f) + (lambda (i) #f))) + +(define (substring> s1 start1 end1 s2 start2 end2) + (substring< s2 start2 end2 s1 start1 end1)) + +(define (substring<= s1 start1 end1 s2 start2 end2) + (substring-compare s1 start1 end1 + s2 start2 end2 + (lambda (i) i) + (lambda (i) i) + (lambda (i) #f))) + +(define (substring>= s1 start1 end1 s2 start2 end2) + (substring<= s2 start2 end2 s1 start1 end1)) + +(define (substring-ci= s1 start1 end1 s2 start2 end2) + (substring-compare-ci s1 start1 end1 + s2 start2 end2 + (lambda (i) #f) + (lambda (i) i) + (lambda (i) #f))) + +(define (substring-ci<> s1 start1 end1 s2 start2 end2) + (substring-compare-ci s1 start1 end1 + s2 start2 end2 + (lambda (i) i) + (lambda (i) #f) + (lambda (i) i))) + +(define (substring-ci< s1 start1 end1 s2 start2 end2) + (substring-compare-ci s1 start1 end1 + s2 start2 end2 + (lambda (i) i) + (lambda (i) #f) + (lambda (i) #f))) + +(define (substring-ci> s1 start1 end1 s2 start2 end2) + (substring-ci< s2 start2 end2 s1 start1 end1)) + +(define (substring-ci<= s1 start1 end1 s2 start2 end2) + (substring-compare-ci s1 start1 end1 + s2 start2 end2 + (lambda (i) i) + (lambda (i) i) + (lambda (i) #f))) + +(define (substring-ci>= s1 start1 end1 s2 start2 end2) + (substring-ci<= s2 start2 end2 s1 start1 end1)) + + + +;;; Case hacking +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; string-upper-case? +;;; string-lower-case? +;;; +;;; string-upcase s [start end] +;;; string-upcase! s [start end] +;;; string-downcase s [start end] +;;; string-downcase! s [start end] +;;; +;;; capitalize-string s [start end] +;;; capitalize-string! s [start end] +;;; Uppercase first alphanum char, lowercase rest. +;;; +;;; capitalize-words s [start end] +;;; capitalize-words! s [start end] +;;; Capitalize every contiguous alphanum sequence: uppercase +;;; first char, lowercase rest. + +;;; These two use a different definition of an "upper-/lower-case string" +;;; than MIT Scheme uses: + +(define (string-upper-case? s . maybe-start+end) + (not (apply string-any char-lower-case? s maybe-start+end))) + +(define (string-lower-case? s . maybe-start+end) + (not (apply string-any char-upper-case? s maybe-start+end))) + + +(define (string-upcase s . maybe-start+end) + (apply string-map char-upcase s maybe-start+end)) + +(define (string-upcase! s . maybe-start+end) + (apply string-map! char-upcase s maybe-start+end)) + +(define (string-downcase s . maybe-start+end) + (apply string-map char-downcase s maybe-start+end)) + +(define (string-downcase! s . maybe-start+end) + (apply string-map! char-downcase s maybe-start+end)) + + +;;; capitalize-string s [start end] +;;; capitalize-string! s [start end] +;;; Uppercase first alphanum char, lowercase rest. + +(define (really-capitalize-string! s start end) + (cond ((string-index s char-set:alphanumeric start end) => + (lambda (i) + (string-set! s i (char-upcase (string-ref s i))) + (string-downcase! s i))))) + +(define (capitalize-string! s . maybe-start+end) + (let-start+end (start end) capitalize-string! s maybe-start+end + (really-capitalize-string! s start end))) + +(define (capitalize-string s . maybe-start+end) + (let-start+end (start end) capitalize-string s maybe-start+end + (let ((ans (substringx s start end))) + (really-capitalize-string! ans 0 (- end start)) + ans))) + +;;; capitalize-words s [start end] +;;; capitalize-words! s [start end] +;;; Capitalize every contiguous alphanum sequence: uppercase +;;; first char, lowercase rest. + +(define (really-capitalize-words! s start end) + (let lp ((i start)) + (cond ((string-index s char-set:alphanumeric i end) => + (lambda (i) + (string-set! s i (char-upcase (string-ref s i))) + (let ((i1 (+ i 1))) + (cond ((string-skip s char-set:alphanumeric i1 end) => + (lambda (j) + (string-downcase! s i1 j) + (lp (+ j 1)))) + (else (string-downcase! s i1 end))))))))) + +(define (capitalize-words! s . maybe-start+end) + (let-start+end (start end) capitalize-string! s maybe-start+end + (really-capitalize-words! s start end))) + +(define (capitalize-words s . maybe-start+end) + (let-start+end (start end) capitalize-string! s maybe-start+end + (let ((ans (substringx s start end))) + (really-capitalize-words! ans 0 (- end start)) + ans))) + + + +;;; Cutting & pasting strings +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; string-take string nchars +;;; string-drop string nchars +;;; +;;; string-pad string k [char start end] +;;; string-pad-right string k [char start end] +;;; +;;; string-trim string [char/char-set/pred start end] +;;; string-trim-right string [char/char-set/pred start end] +;;; string-trim-both string [char/char-set/pred start end] +;;; +;;; These trimmers invert the char-set meaning from MIT Scheme -- you +;;; say what you want to trim. + +(define (string-take s n) + (if (> n 0) + (substringx s 0 n) + (let ((len (string-length s))) + (substringx s (+ len n) len)))) + +(define (string-drop s n) + (let ((len (string-length s))) + (if (> n 0) + (substringx s n len) + (substringx s 0 (+ len n))))) + +(define (string-trim s . args) + (let-optionals args ((criteria char-set:whitespace) + (start 0) + (end (string-length s))) + (cond ((string-skip s criteria start end) => + (lambda (i) (substringx s i end))) + (else "")))) + +(define (string-trim-right s . args) + (let-optionals args ((criteria char-set:whitespace) + (start 0) + (end (string-length s))) + (cond ((string-skip-right s criteria end start) => + (lambda (i) (substringx s 0 (+ 1 i)))) + (else "")))) + +(define (string-trim-both s . args) + (let-optionals args ((criteria char-set:whitespace) + (start 0) + (end (string-length s))) + (cond ((string-skip s criteria start end) => + (lambda (i) (substringx s i (+ 1 (string-skip-right s criteria end))))) + (else "")))) + + +(define (string-pad-right s n . args) + (let-optionals args ((char #\space) (start 0) (end (string-length s))) + (check-substring-spec string-pad-right s start end) + (let ((len (- end start))) + (cond ((= n len) ; No pad. + (if (zero? start) s (substringx s start end))) + + ((< n len) (substringx s start (+ start n))) ; Trim. + + (else (let ((ans (make-string n char))) + (string-copy! ans 0 s start end) + ans)))))) + +(define (string-pad s n . args) + (let-optionals args ((char #\space) (start 0) (end (string-length s))) + (check-substring-spec string-pad s start end) + (let ((len (- end start))) + (cond ((= n len) ; No pad. + (if (zero? start) s (substringx s start end))) + + ((< n len) (substringx s (- end n) end)) ; Trim. + + (else (let ((ans (make-string n char))) + (string-copy! ans (- n len) s start end) + ans)))))) + + + +;;; Filtering strings +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; string-delete char/char-set/pred string [start end] +;;; string-filter char/char-set/pred string [start end] +;;; +;;; If the filter criteria is a char or char-set, we scan the string twice +;;; with string-fold -- once to determine the length of the result string, +;;; and once to do the filtered copy. +;;; If the filter criteria is a predicate, we don't do this double-scan +;;; strategy, because the predicate might have side-effects or be very +;;; expensive to compute. So we preallocate a temp buffer pessimistically, +;;; and only do one scan over S. This is likely to be faster and more +;;; space-efficient than consing a list. + +(define (string-delete criteria s . maybe-start+end) + (let-start+end (start end) string-delete s maybe-start+end + (if (procedure? criteria) + (let* ((slen (- end start)) + (temp (make-string slen)) + (ans-len (string-fold (lambda (c i) + (if (criteria c) i + (begin (string-set! temp i c) + (+ i 1)))) + 0 s start end))) + (if (= ans-len slen) temp (substringx temp 0 ans-len))) + + (let* ((cset (cond ((char-set? criteria) criteria) + ((char? criteria) (char-set criteria)) + (else (error "string-delete criteria not predicate, char or char-set" criteria)))) + (len (string-fold (lambda (c i) (if (char-set-contains? cset c) + i + (+ i 1))) + 0 s start end)) + (ans (make-string len))) + (string-fold (lambda (c i) (if (char-set-contains? cset c) + i + (begin (string-set! ans i c) + (+ i 1)))) + 0 s start end) + ans)))) + +(define (string-filter criteria s . maybe-start+end) + (let-start+end (start end) string-filter s maybe-start+end + (if (procedure? criteria) + (let* ((slen (- end start)) + (temp (make-string slen)) + (ans-len (string-fold (lambda (c i) + (if (criteria c) + (begin (string-set! temp i c) + (+ i 1)) + i)) + 0 s start end))) + (if (= ans-len slen) temp (substringx temp 0 ans-len))) + + (let* ((cset (cond ((char-set? criteria) criteria) + ((char? criteria) (char-set criteria)) + (else (error "string-delete criteria not predicate, char or char-set" criteria)))) + + (len (string-fold (lambda (c i) (if (char-set-contains? cset c) + (+ i 1) + i)) + 0 s start end)) + (ans (make-string len))) + (string-fold (lambda (c i) (if (char-set-contains? cset c) + (begin (string-set! ans i c) + (+ i 1)) + i)) + 0 s start end) + ans)))) + + + +;;; String search +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; string-index string char/char-set/pred [start end] +;;; string-index-right string char/char-set/pred [end start] +;;; string-skip string char/char-set/pred [start end] +;;; string-skip-right string char/char-set/pred [end start] +;;; Note the odd start/end ordering of index-right and skip-right params. +;;; There's a lot of replicated code here for efficiency. +;;; For example, the char/char-set/pred discrimination has +;;; been lifted above the inner loop of each proc. + +(define (string-index str criteria . maybe-start+end) + (let-start+end (start end) string-index str maybe-start+end + (cond ((char? criteria) + (let lp ((i start)) + (and (< i end) + (if (char=? criteria (string-ref str i)) i + (lp (+ i 1)))))) + ((char-set? criteria) + (let lp ((i start)) + (and (< i end) + (if (char-set-contains? criteria (string-ref str i)) i + (lp (+ i 1)))))) + ((procedure? criteria) + (let lp ((i start)) + (and (< i end) + (if (criteria (string-ref str i)) i + (lp (+ i 1)))))) + (else (error "Second param is neither char-set, char, or predicate procedure." + string-index criteria))))) + +(define (string-index-right str criteria . maybe-end+start) + (let-optionals maybe-end+start ((start 0) (end (string-length str))) + (check-substring-spec string-index-right str start end) + (cond ((char? criteria) + (let lp ((i (- end 1))) + (and (>= i 0) + (if (char=? criteria (string-ref str i)) i + (lp (- i 1)))))) + ((char-set? criteria) + (let lp ((i (- end 1))) + (and (>= i 0) + (if (char-set-contains? criteria (string-ref str i)) i + (lp (- i 1)))))) + ((procedure? criteria) + (let lp ((i (- end 1))) + (and (>= i 0) + (if (criteria (string-ref str i)) i + (lp (- i 1)))))) + (else (error "Second param is neither char-set, char, or predicate procedure." + string-index-right criteria))))) + +(define (string-skip str criteria . maybe-start+end) + (let-start+end (start end) string-skip str maybe-start+end + (cond ((char? criteria) + (let lp ((i start)) + (and (< i end) + (if (char=? criteria (string-ref str i)) + (lp (+ i 1)) + i)))) + ((char-set? criteria) + (let lp ((i start)) + (and (< i end) + (if (char-set-contains? criteria (string-ref str i)) + (lp (+ i 1)) + i)))) + ((char-set? criteria) + (let lp ((i start)) + (and (< i end) + (if (criteria (string-ref str i)) (lp (+ i 1)) + i)))) + (else (error "Second param is neither char-set, char, or predicate procedure." + string-skip criteria))))) + +(define (string-skip-right str criteria . maybe-end+start) + (let-optionals maybe-end+start ((start 0) (end (string-length str))) + (check-substring-spec string-index-right str start end) + (cond ((char? criteria) + (let lp ((i (- end 1))) + (and (>= i 0) + (if (char=? criteria (string-ref str i)) + (lp (- i 1)) + i)))) + ((char-set? criteria) + (let lp ((i (- end 1))) + (and (>= i 0) + (if (char-set-contains? criteria (string-ref str i)) + (lp (- i 1)) + i)))) + ((procedure? criteria) + (let lp ((i (- end 1))) + (and (>= i 0) + (if (criteria (string-ref str i)) (lp (- i 1)) + i)))) + (else (error "CRITERIA param is neither char-set or char." + string-skip-right criteria))))) + + + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; string-fill! string char [start end] +;;; +;;; string-copy! to tstart from [fstart fend] +;;; Guaranteed to work, even if s1 eq s2. + +(define (string-fill! s char . maybe-start+end) + (let-start+end (start end) string-fill! s maybe-start+end + (do ((i (- end 1) (- i 1))) + ((< i start)) + (string-set! s i char)))) + +(define (string-copy! to tstart from . maybe-fstart+fend) + (let-start+end (fstart fend) string-copy! from maybe-fstart+fend + (let ((tend (+ tstart (- fend fstart)))) + (check-substring-spec string-copy! to tstart tend) + (if (> fstart tstart) + (do ((i fstart (+ i 1)) + (j tstart (+ j 1))) + ((>= i fend)) + (string-set! to j (string-ref from i))) + + (do ((i (- fend 1) (- i 1)) + (j (- tend 1) (- j 1))) + ((< i fstart)) + (string-set! to j (string-ref from i))))))) + + + +;;; Returns starting-position or #f if not true. +;;; This implementation is slow & simple. See below for KMP. +;;; Boyer-Moore would be nice. +;(define (substring? substring string . maybe-start+end) +; (let-start+end (start end) string substring? maybe-start+end +; (if (string-null? substring) start +; (let* ((len (string-length substring)) +; (i-bound (- end len)) +; (char1 (string-ref substring start))) +; (let lp ((i 0)) +; (cond ((string-index string char1 i i-bound) => +; (lambda (i) +; (if (substring= substring 0 len string i (+ i len)) +; i +; (lp (+ i 1))))) +; (else #f))))))) + + +;;; Searching for an occurence of a substring +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; This uses the KMP algorithm +;;; "Fast Pattern Matching in Strings" +;;; SIAM J. Computing 6(2):323-350 1977 +;;; D. E. Knuth, J. H. Morris and V. R. Pratt +;;; also described in +;;; "Pattern Matching in Strings" +;;; Alfred V. Aho +;;; Formal Language Theory - Perspectives and Open Problems +;;; Ronald V. Brook (editor) +;;; This algorithm is O(m + n) where m and n are the +;;; lengths of the pattern and string respectively +;;; Original version of this code by bevan; I have substantially rehacked it. + +(define (substring? pattern source . maybe-start+end) + (let-start+end (start end) substring? source maybe-start+end + (really-substring? char=? pattern source start end))) + +(define (substring-ci? pattern source . maybe-start+end) + (let-start+end (start end) substring-ci? source maybe-start+end + (really-substring? char-ci=? pattern source start end))) + +;;; Compute the Knuth-Morris-Pratt restart vector RV for string PATTERN. If +;;; we have matched chars 0..i-1 of PATTERN against a search string S, and +;;; PATTERN[i] doesn't match S[k], then reset i := RV[i], and try again to +;;; match S[k]. If RV[i] = -1, then punt S[k] completely, and move on to +;;; S[k+1] and PATTERN[0]. +;;; +;;; In other words, if you have matched the first i chars of PATTERN, but +;;; the i+1'th char doesn't match, RV[i] tells you what the next-longest +;;; prefix of PATTERN is that you have matched. +;;; +;;; C= is the character comparator -- usefully CHAR= or CHAR-CI=. +;;; +;;; I've split this out as a separate function in case other constant-string +;;; searchers might want to use it. + +(define (make-kmp-restart-vector pattern c=) + (let* ((plen (string-length pattern)) + (rv (make-vector plen))) + (if (> plen 0) + (let ((plen-1 (- plen 1))) + (vector-set! rv 0 -1) + (let lp ((i 0) (j -1)) + (if (< i plen-1) + (if (or (= j -1) + (c= (string-ref pattern i) + (string-ref pattern j))) + (let ((i (+ 1 i)) + (j (+ 1 j))) + (vector-set! rv i j) + (lp i j)) + (lp i (vector-ref rv j))))))) + rv)) + +(define (really-substring? c= pattern source start end) + (let ((plen (string-length pattern)) + (rv (make-kmp-restart-vector pattern c=))) + + ;; The search loop. SJ & PJ are redundant state. + (let lp ((si start) (pi 0) + (sj (- end start)) ; (- end si) -- how many chars left. + (pj plen)) ; (- plen pi) -- how many chars left. + + (if (= pi plen) (- si plen) ; Win. + + (and (<= pj sj) ; Lose. + + (if (c= (string-ref source si) ; Search. + (string-ref pattern pi)) + (lp (+ 1 si) (+ 1 pi) (- sj 1) (- pj 1)) ; Advance. + + (let ((pi (vector-ref rv pi))) ; Retreat. + (if (= pi -1) + (lp (+ si 1) 0 (- sj 1) plen) ; Punt. + (lp si pi sj (- plen pi)))))))))) + + + +;;; Misc +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; (string-reverse s [start end]) +;;; (string-reverse! s [start end]) +;;; (string-null? s) + +(define (string-null? s) (zero? (string-length s))) + +(define (string-reverse s . maybe-start+end) + (let-start+end (start end) string-reverse s maybe-start+end + (let ((ans (make-string (- end start)))) + (do ((i (- end 1) (- i 1)) + (j start (+ j 1))) + ((< i j)) + (string-set! ans i (string-ref s j)) + (string-set! ans j (string-ref s i))) + ans))) + +(define (string-reverse! s . maybe-start+end) + (let-start+end (start end) string-reverse! s maybe-start+end + (do ((i (- end 1) (- i 1)) + (j start (+ j 1))) + ((<= i j)) + (let ((ci (string-ref s i))) + (string-set! s i (string-ref s j)) + (string-set! s j ci))))) + + +(define (reverse-list->string clist) + (let* ((len (length clist)) + (s (make-string len))) + (do ((i (- len 1) (- i 1)) (clist clist (cdr clist))) + ((not (pair? clist))) + (string-set! s i (car clist))) + s)) + + +;(define (string->list s . maybe-start+end) +; (let-start+end (start end) string->list s maybe-start+end +; (do ((i (- end 1) (- i 1)) +; (ans '() (cons (string-ref s i) ans))) +; ((< i start) ans)))) + +(define (string->list s . maybe-start+end) + (apply string-fold-right s cons '() maybe-start+end)) + + + +;;; string-concat string-list -> string +;;; string-concat/shared string-list -> string +;;; string-append/shared s ... -> string +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; STRING-APPEND/SHARED has license to return a string that shares storage +;;; with any of its arguments. In particular, if there is only one non-empty +;;; string amongst its parameters, it is permitted to return that string as +;;; its result. STRING-APPEND, by contrast, always allocates new storage. +;;; +;;; STRING-CONCAT & STRING-CONCAT/SHARED are passed a list of strings, +;;; which they concatenate into a result string. STRING-CONCAT always +;;; allocates a fresh string; STRING-CONCAT/SHARED may (or may not) return +;;; a result that shares storage with any of its arguments. In particular, +;;; if it is applied to a singleton list, it is permitted to return the +;;; car of that list as its value. +;;; +;;; This is portable code, but could be much more efficient w/compiler +;;; support. Especially the n-ary guys. + +;;; We delete the empty strings from the parameter list before handing +;;; off to string-concat/shared. I wrote the recursion out by hand instead +;;; of using list-lib's FILTER or FILTER! to minimize non-R5RS dependencies. + +(define (string-append/shared . strings) (string-concat/shared strings)) + +(define (string-concat/shared strings) + (let ((strings (let recur ((strings strings)) ; Delete empty strings. + (if (pair? strings) + (let ((s (car strings)) + (tail (recur (cdr strings)))) + (if (string-null? s) tail (cons s tail))) + '())))) + + (cond ((not (pair? strings)) "") ; () => "". + ((not (pair? (cdr strings))) (car strings)) ; (s) => s. + (else (string-concat strings))))) ; Allocate & concat. + +; Alas, Scheme 48's APPLY blows up if you have many, many arguments. +;(define (string-concat strings) (apply string-append strings)) + +;;; Here it is written out. I avoid using REDUCE to add up string lengths +;;; to avoid non-R5RS dependencies. +(define (string-concat strings) + (let* ((total (do ((strings strings (cdr strings)) + (i 0 (+ i (string-length (car strings))))) + ((not (pair? strings)) i))) + (ans (make-string total))) + (let lp ((i 0) (strings strings)) + (if (pair? strings) + (let ((s (car strings))) + (string-copy! ans i s) + (lp (+ i (string-length s)) (cdr strings))))) + ans)) + + + + +;;; xsubstring s from [to start end] -> string +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; S is a string; START and END are optional arguments that demarcate +;;; a substring of S, defaulting to 0 and the length of S (e.g., the whole +;;; string). Replicate this substring up and down index space, in both the +;; positive and negative directions. For example, if S = "abcdefg", START=3, +;;; and END=6, then we have the conceptual bidirectionally-infinite string +;;; ... d e f d e f d e f d e f d e f d e f d e f ... +;;; ... -9 -8 -7 -6 -5 -4 -3 -2 -1 0 1 2 3 4 5 6 7 8 9 ... +;;; XSUBSTRING returns the substring of this string beginning at index FROM, +;;; and ending at TO (which defaults to FROM+(END-START)). +;;; +;;; You can use XSUBSTRING in many ways: +;;; - To rotate a string left: (xsubstring "abcdef" 2) => "cdefab" +;;; - To rotate a string right: (xsubstring "abcdef" -2) => "efabcd" +;;; - To replicate a string: (xsubstring "abc" 0 7) => "abcabca" +;;; +;;; Note that +;;; - The FROM/TO indices give a half-open range -- the characters from +;;; index FROM up to, but not including index TO. +;;; - The FROM/TO indices are not in terms of the index space for string S. +;;; They are in terms of the replicated index space of the substring +;;; defined by S, START, and END. +;;; +;;; It is an error if START=END -- although this is allowed by special +;;; dispensation when FROM=TO. + +(define (xsubstring s from . maybe-to+start+end) + (receive (to start end) + (if (pair? maybe-to+start+end) + (let-start+end (start end) xsubstring s (cdr maybe-to+start+end) + (values (car maybe-to+start+end) start end)) + (let ((slen (string-length s))) + (values (+ from slen) 0 slen))) + (let ((slen (- end start)) + (anslen (- to from))) + (cond ((< anslen 0) + (error "Illegal FROM/TO spec passed to xsubstring -- FROM > TO." + s from to start end)) + + ((zero? anslen) "") + ((zero? slen) (error "Empty (sub)string passed to xsubstring" + s from to start end)) + + ((= 1 slen) ; Fast path for 1-char replication. + (make-string anslen (string-ref s start))) + + ;; Selected text falls entirely within one span. + ((= (floor (/ from slen)) (floor (/ to slen))) + (substringx s (+ start (modulo from slen)) + (+ start (modulo to slen)))) + + ;; Selected text requires multiple spans. + (else (let ((ans (make-string anslen))) + (multispan-repcopy! ans 0 s from to start end) + ans)))))) + + +;;; string-xcopy! target tstart s sfrom [sto start end] -> unspecific +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; Exactly the same as xsubstring, but the extracted text is written +;;; into the string TARGET starting at index TSTART. +;;; This operation is not defined if (EQ? TARGET S) -- you cannot copy +;;; a string on top of itself. + +(define (string-xcopy! target tstart s sfrom . maybe-sto+start+end) + (receive (sto start end) + (if (pair? maybe-sto+start+end) + (let-start+end (start end) string-xcopy! s (cdr maybe-sto+start+end) + (values (car maybe-sto+start+end) start end)) + (let ((slen (string-length s))) + (values (+ sfrom slen) 0 slen))) + + (let* ((tocopy (- sto sfrom)) + (tend (+ tstart tocopy)) + (slen (- end start))) + (check-substring-spec string-xcopy! target tstart tend) + (cond ((< tocopy 0) + (error "Illegal FROM/TO spec passed to string-xcopy! -- FROM > TO." + target tstart s sfrom sto start end)) + ((zero? tocopy)) + ((zero? slen) (error "Empty (sub)string passed to string-xcopy!" + target tstart s sfrom sto start end)) + + ((= 1 slen) ; Fast path for 1-char replication. + (string-fill! target (string-ref s start) tstart tend)) + + ;; Selected text falls entirely within one span. + ((= (floor (/ sfrom slen)) (floor (/ sto slen))) + (string-copy! target tstart s + (+ start (modulo sfrom slen)) + (+ start (modulo sto slen)))) + + ;; Multi-span copy. + (else (multispan-repcopy! target tstart s sfrom sto start end)))))) + +;;; This is the core copying loop for XSUBSTRING and STRING-XCOPY! +;;; Internal -- not exported, no careful arg checking. +(define (multispan-repcopy! target tstart s sfrom sto start end) + (let* ((slen (- end start)) + (i0 (+ start (modulo sfrom slen))) + (total-chars (- sto sfrom))) + + ;; Copy the partial span @ the beginning + (string-copy! target tstart s i0 end) + + (let* ((ncopied (- end i0)) ; We've copied this many. + (nleft (- total-chars ncopied)) ; # chars left to copy. + (nspans (quotient nleft slen))) ; # whole spans to copy + + ;; Copy the whole spans in the middle. + (do ((i (+ tstart ncopied) (+ i slen)) ; Current target index. + (nspans nspans (- nspans 1))) ; # spans to copy + ((zero? nspans) + ;; Copy the partial-span @ the end & we're done. + (string-copy! target i s start (+ start (- total-chars (- i tstart))))) + + (string-copy! target i s start end))))) ; Copy a whole span. + + + +;;; (join-strings string-list [delimiter grammar]) => string +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; Paste strings together using the delimiter string. +;;; +;;; (join-strings '("foo" "bar" "baz") ":") => "foo:bar:baz" +;;; +;;; DELIMITER defaults to a single space " " +;;; GRAMMAR is one of the symbols {infix, suffix} and defaults to 'infix. + +;;; (join-strings strings [delim grammar]) + +(define (join-strings strings . args) + (if (pair? strings) + (let-optionals args ((delim " ") (grammar 'infix)) + (let ((strings (reverse strings))) + (let lp ((strings (cdr strings)) + (ans (case grammar + ((infix) (list (car strings))) + ((suffix) (list (car strings) delim)) + (else (error "Illegal join-strings grammar" grammar))))) + (if (pair? strings) + (lp (cdr strings) + (cons (car strings) (cons delim ans))) + + ; All done + (string-concat ans))))) + + "")) ; Special-cased for infix grammar. + + + +;;; MIT Scheme copyright terms +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; This material was developed by the Scheme project at the Massachusetts +;;; Institute of Technology, Department of Electrical Engineering and +;;; Computer Science. Permission to copy and modify this software, to +;;; redistribute either the original software or a modified version, and +;;; to use this software for any purpose is granted, subject to the +;;; following restrictions and understandings. +;;; +;;; 1. Any copy made of this software must include this copyright notice +;;; in full. +;;; +;;; 2. Users of this software agree to make their best efforts (a) to +;;; return to the MIT Scheme project any improvements or extensions that +;;; they make, so that these may be included in future releases; and (b) +;;; to inform MIT of noteworthy uses of this software. +;;; +;;; 3. All materials developed as a consequence of the use of this +;;; software shall duly acknowledge such use, in accordance with the usual +;;; standards of acknowledging credit in academic research. +;;; +;;; 4. MIT has made no warrantee or representation that the operation of +;;; this software will be error-free, and MIT is under no obligation to +;;; provide any services, by way of maintenance, update, or otherwise. +;;; +;;; 5. In conjunction with products arising from the use of this material, +;;; there shall be no use of the name of the Massachusetts Institute of +;;; Technology nor of any adaptation thereof in any advertising, +;;; promotional, or sales literature without prior written consent from +;;; MIT in each case. hunk ./network.scm 4 +;;; See file COPYING. hunk ./network.scm 7 -;;; numerous changes to interface with Guile primitives. hunk ./network.scm 11 + "#include " hunk ./network.scm 16 - "extern int errno;" hunk ./network.scm 51 - (connect-socket sock addr) - sock)) + ;; Close the socket and free the file-descriptors + ;; if the connect fails: + (let ((connected #f)) + (dynamic-wind + (lambda () #f) + (lambda () (connect-socket sock addr) (set! connected #t)) + (lambda () + (if (not connected) + (close-socket sock)))) + (if connected + sock + #f)))) hunk ./network.scm 280 + no-declare ; for Linux hunk ./procobj.scm 2 -;;; Copyright (c) 1993, 1994, 1995 by Olin Shivers. - -;;; parts rewritten for Guile. +;;; Copyright (c) 1993, 1994, 1995 by Olin Shivers. See file COPYING. hunk ./procobj.scm 58 -(define (pid/proc? x) (or (proc? x) (and (integer? x) (>= pid 0)))) +(define (pid/proc? x) (or (proc? x) (and (integer? x) (>= x 0)))) hunk ./procobj.scm 314 - (reverse (reduce (lambda (result wptr) - (let ((val (weak-pointer-ref wptr))) - (if (and val (pred val)) - (cons wptr result) - result))) - '() - lis))) + (fold-right (lambda (wptr result) (let ((val (weak-pointer-ref wptr))) + (if (and val (pred val)) + (cons wptr result) + result))) + '() + lis)) hunk ./rdelim.scm 34 -;;; The C primitive %READ-DELIMITED-FDPORT!/ERRNO relies on knowing the -;;; representation of character sets. If these are changed from their -;;; current representation as 256-element strings, this code must be changed -;;; as well. +;;; The C primitives %READ-DELIMITED-FDPORT!/ERRNO and +;;; %SKIP-CHAR-SET-FDPORT/ERRNO rely on knowing the representation of +;;; character sets. If these are changed from their current representation, +;;; this code must be changed as well. hunk ./rdelim.scm 195 -; (let ((delims (->char-set delims))) +; (let* ((delims (->char-set delims)) +; (sdelims (char-set:s delims))) + hunk ./rdelim.scm 204 -; (%read-delimited-fdport!/errno delims buf gobble? +; (%read-delimited-fdport!/errno sdelims buf gobble? hunk ./rdelim.scm 254 - (let ((port (:optional maybe-port (current-input-port))) - (cset (->char-set skip-chars))) + (let* ((port (:optional maybe-port (current-input-port))) + (cset (->char-set skip-chars)) + (scset (char-set:s cset))) hunk ./rdelim.scm 264 -; (receive (err num-read) (%skip-char-set-fdport/errno cset port) +; (receive (err num-read) (%skip-char-set-fdport/errno scset port) hunk ./rdelim.scm 295 -(define blank-line-regexp (make-regexp "^[ \t]*\n$")) +(define blank-line-regexp (rx bos (* white) #\newline eos)) hunk ./rdelim.scm 306 - ((regexp-exec blank-line-regexp line) (lp)) + ((regexp-search? blank-line-regexp line) (lp)) hunk ./rdelim.scm 313 - (not (regexp-exec blank-line-regexp line))) + (not (regexp-search? blank-line-regexp line))) hunk ./rw.scm 2 -;;; Copyright (c) 1993 by Olin Shivers. -;;; modified to use Guile primitives. +;;; Copyright (c) 1993 by Olin Shivers. See file COPYING. hunk ./rx/cond-package.scm 1 +(define-structure conditionals + (export (define-simple-syntax :syntax) + (when :syntax) + (unless :syntax) + (? :syntax) + (switchq :syntax) + (switch :syntax) + (prog0 :syntax) + (land* :syntax)) + (open scheme) + (begin + +;;; (define-simple-syntax (name subforms ...) expansion) +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define-syntax define-simple-syntax + (syntax-rules () + ((define-simple-syntax (name subforms ...) expansion) + (define-syntax name (syntax-rules () ((name subforms ...) expansion)))))) + + +;;; ? = COND +;;; (WHEN test body ...) (SWITCHQ = key clause ...) +;;; (UNLESS test body ...) (SWITCH = key clause ...) +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; Handy conditional forms. ? is so short that it renders WHEN pretty +;;; much useless. + +(define-simple-syntax (when test body ...) + (if test (begin body ...))) + +(define-simple-syntax (unless test body ...) + (if (not test) (begin body ...))) + +;;; ? is synonym for COND. +(define-simple-syntax (? clause ...) (cond clause ...)) + + +;;; (PROG0 val-exp exp ...) +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define-simple-syntax (prog0 val-exp exp ...) + (let ((v val-exp)) exp ... v)) + + +;;; (land* (clause ...) body ...) -*- Scheme -*- +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; Evaluate each clause. If any clause returns false, land* stops and +;;; returns false. If all the clauses evaluate to a true value, return +;;; the value of the body. +;;; +;;; The difference between LAND* and AND is that LAND* binds names to +;;; the values of its clauses, which may be used by subsequent clauses. +;;; Clauses are of the form +;;; (var exp) ; binds VAR to the value of EXP. +;;; (exp) ; No binding. +;;; var ; Reference -- no binding. +;;; +;;; Example: +;;; (land* ((probe (assq key alist))) +;;; (cdr probe)) +;;; +;;; LAND* is due to Oleg Kiselyov (http://pobox.com/~oleg); I wrote this +;;; simple implementation as a high-level R5RS DEFINE-SYNTAX macro. +;;; Olin 98/9/29 + +(define-syntax land* + (syntax-rules () + ((land* () body ...) (begin body ...)) + + ((land* ((var exp) clause ...) body ...) + (let ((var exp)) (and var (land* (clause ...) body ...)))) + + ((land* ((#f exp) clause ...) body ...) + (and exp (land* (clause ...) body ...))) + + ((land* ((exp) clause ...) body ...) + (and exp (land* (clause ...) body ...))) + + ((land* (var clause ...) body ...) + (and var (land* (clause ...) body ...))))) + + + +;;; Like CASE, but you specify the key-comparison procedure. +;;; SWITCH evaluates its keys each time through the conditional. +;;; SWITCHQ keys are not evaluated -- are simply constants. +;;; (switchq string=? (vector-ref vec i) +;;; (("plus" "minus") ...) +;;; (("times" "div") ...) +;;; (else ...)) + +(define-simple-syntax (switchq compare key clause ...) + (let ((k key) ; Eval KEY and COMPARE + (c compare)) ; just once, then call %switch. + (%switchq c k clause ...))) ; C, K are vars, hence replicable. + +(define-syntax %switchq + (syntax-rules (else) + ((%switchq compare key ((key1 ...) body1 body2 ...) rest ...) + (if (or (compare key 'key1) ...) + (begin body1 body2 ...) + (%switchq compare key rest ...))) + + ((%switchq compare key ((key1 ...)) rest ...) ; Null body. + (if (not (or (compare key 'key1) ...)) + (%switchq compare key rest ...))) + + ((%switchq compare key (else body ...)) + (begin body ...)) + + ((%switchq compare key) '#f))) + + +(define-simple-syntax (switch compare key clause ...) + (let ((k key) ; Eval KEY and COMPARE + (c compare)) ; just once, then call %switch. + (%switch c k clause ...))) ; C, K are vars, hence replicable. + +(define-syntax %switch + (syntax-rules (else) + ((%switch compare key ((key1 ...) body1 body2 ...) rest ...) + (if (or (compare key key1) ...) + (begin body1 body2 ...) + (%switch compare key rest ...))) + + ((%switch compare key ((key1 ...)) rest ...) ; Null body. + (if (not (or (compare key key1) ...)) + (%switch compare key rest ...))) + + ((%switch compare key (else body ...)) + (begin body ...)) + + ((%switch compare key) '#f))) + +;;; I can't get this to work -- S48 complains "too many ...'s". +;(define-syntax switchq +; (syntax-rules (else) +; ((switchq compare key clause ...) +; (letrec-syntax ((%switchq (syntax-rules (else) +; ((%switchq compare key +; ((key1 ...) body1 body2 ...) rest ...) +; (if (or (compare key 'key1) ...) +; (begin body1 body2 ...) +; (%switchq compare key rest ...))) +; +; ; Null body. +; ((%switchq compare key ((key1 ...)) rest ...) +; (if (not (or (compare key 'key1) ...)) +; (%switchq compare key rest ...))) +; +; ((%switchq compare key (else body ...)) +; (begin body ...)) +; +; ((%switchq compare key) '#f)))) +; +; (let ((k key) ; Eval KEY and COMPARE +; (c compare)) ; just once, then call %switch. +; (%switchq c k clause ...)))))); C, K are vars, hence replicable. +)) hunk ./rx/let-match.scm 1 +;;; These are some macros to support using regexp matching. + +(define-structure let-match-package + (export (let-match :syntax) + (if-match :syntax) + (match-cond :syntax)) + (for-syntax (open scheme + signals)) ; For ERROR + + (open scsh scheme) + (access signals) ; for ERROR + + (begin + +;;; (let-match m mvars body ...) +;;; Bind the vars in MVARS to the match & submatch strings of match data M, +;;; and eval the body forms. #F is allowed in the MVARS list, as a don't-care +;;; parameter. +;;; +;;; (if-match m mvars conseq alt) +;;; The same as LET-MATCH -- eval the CONSEQ form in the scope of the +;;; bound MVARS. However, if the match data M evaluates to false, instead +;;; of blowing up, we execute the ALT form instead. + +(define-syntax let-match + (lambda (exp r c) + (if (< (length exp) 3) + (error "No match-vars list in LET-MATCH" exp)) + (let ((m (cadr exp)) ; The match expression + (mvars (caddr exp)) ; The match vars + (body (cdddr exp)) ; The expression's body forms + + (%begin (r 'begin)) + (%match:substring (r 'match:substring)) + (%let* (r 'let*))) + + (cond ((null? mvars) `(,%begin ,@body)) + + ((pair? mvars) + (let* ((msv (or (car mvars) (r 'match-val))) ; "match-struct var" + (sm-bindings (let recur ((i 0) (vars (cdr mvars))) + (if (pair? vars) + (let ((var (car vars)) + (bindings (recur (+ i 1) (cdr vars)))) + (if var + (cons `(,var (,%match:substring ,msv ,i)) + bindings) + bindings)) + '())))) + `(,%let* ((,msv ,m) ,@sm-bindings) ,@body))) + + + (else (error "Illegal match-vars list in LET-MATCH" mvars exp)))))) + +(define-syntax if-match + (syntax-rules () + ((if-match match-exp mvars on-match no-match) + (cond (match-exp => (lambda (m) (let-match m mvars on-match))) + (else no-match))))) + +;;; (MATCH-COND ( ...) +;;; (TEST ...) +;;; (TEST => ) +;;; (ELSE ...)) +;;; +;;; The first clause is as-in IF-MATCH; the next three clauses are as-in COND. +;;; +;;; It would be slicker if we could *add* extra clauses to the syntax +;;; of COND, but Scheme macros aren't extensible this way. + +;;; Two defs. The other expander produces prettier output -- one COND +;;; rather than a mess of nested IF's. +;(define-syntax match-cond +; (syntax-rules (else test =>) +; ((match-cond (else body ...) clause2 ...) (begin body ...)) +; +; ((match-cond) (cond)) +; +; ((match-cond (test exp => proc) clause2 ...) +; (let ((v exp)) (if v (proc v) (match-cond clause2 ...)))) +; +; ((match-cond (test exp body ...) clause2 ...) +; (if exp (begin body ...) (match-cond clause2 ...))) +; +; ((match-cond (test exp) clause2 ...) +; (or exp (match-cond clause2 ...))) +; +; ((match-cond (match-exp mvars body ...) clause2 ...) +; (if-match match-exp mvars (begin body ...) +; (match-cond clause2 ...))))) + +(define-syntax match-cond + (syntax-rules () + ((match-cond clause ...) (match-cond-aux () clause ...)))) + +(define-syntax match-cond-aux + (syntax-rules (test else) + + ;; No more clauses. + ((match-cond-aux (cond-clause ...)) + (cond cond-clause ...)) + + ;; (TEST . ) + ((match-cond-aux (cond-clause ...) + (test . another-cond-clause) clause2 ...) + (match-cond-aux (cond-clause ... another-cond-clause) + clause2 ...)) + + ;; (ELSE ...) + ((match-cond-aux (cond-clause ...) + (else body ...) clause2 ...) + (match-cond-aux (cond-clause ... (else body ...)))) + + ;; ( ...) + ((match-cond-aux (cond-clause ...) + (match-exp mvars body ...) clause2 ...) + (match-cond-aux (cond-clause ... (match-exp => (lambda (m) + (let-match m mvars + body ...)))) + clause2 ...)))) +)) hunk ./rx/oldfuns.scm 1 +;;; These functions were dropped from the regexp API when I shifted scsh's +;;; regexps over to SREs. They are retained for backwards compatibility. +;;; -Olin 8/98 + +;(define (string-match re str . maybe-start) +; (apply regexp-search (->regexp re) str maybe-start)) + +;(define make-regexp posix-string->regexp) + +;(define regexp-exec regexp-search) + +(define (->regexp str-or-re) + (cond ((string? str-or-re) (posix-string->regexp str-or-re)) + ((regexp? str-or-re) str-or-re) + (else (error ->regexp + "Value must be either a Posix regexp string or a regexp value" + str-or-re)))) + +;(define (regexp-quote str) +; (receive (s lev pcount tvec) (regexp->posix-string (re-string str)) +; s)) hunk ./rx/parse.scm 1 +;;; Regexp support for Scheme +;;; Olin Shivers, January 1997, May 1998. + +;;; Todo: +;;; - Better unparsers for (word ...) and (word+ ...). +;;; - Unparse char-sets into set-diff SREs -- find a char set that's a +;;; tight bound, then get the difference. This would really pretty up +;;; things like (- alpha "aeiou") + +;;; Exports: +;;; (sre->regexp sre) SRE->ADT parser +;;; (regexp->sre re) ADT->SRE unparser +;;; +;;; Procedures that parse sexp regexps and translate ADTs for low-level macros: +;;; (parse-sre sre rename compare) +;;; (parse-sres sres rename compare) +;;; (regexp->scheme re rename) +;;; +;;; (char-set->in-pair cset) Char-set unparsing utility + +;;; Character-set dependencies: +;;; The only stuff in here dependent on the implementation's character type +;;; is the char-set parsing and unparsing, which deal with ranges of +;;; characters. We assume an 8-bit ASCII superset. + +;;; Imports: +;;; ? for COND, and SWITCHQ conditional form. +;;; every + +;;; This code is much hairier than it would otherwise be because of the +;;; the presence of , forms, which put a static/dynamic duality over +;;; a lot of the processing -- we have to be prepared to handle either +;;; re's or Scheme epressions that produce re's; char-sets or Scheme +;;; expressions that produce char-sets. It's a pain. +;;; +;;; See comments in re.scm ADT code about building regexp trees that have +;;; code in the record fields instead of values. +;;; +;;; The macro expander works by parsing the regexp form into an re record, +;;; and simplifying it. If the record is completely static, it is then +;;; translated, at macro-expand time, into a Posix regex string. If the +;;; regexp needs runtime values -- e.g, the computed from and to fields in +;;; (** "ha, " (- min 1) (+ max 1)) +;;; -- the expander instead produces Scheme ADT constructors to build +;;; the regexp at run-time. + + +;;; Parser +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;;; Is a parsed regexp completely determined statically, or does it +;;; have dynamic components (e.g., a ,@ or a computed char-set) +;;; in the form of embedded code in some of the regexp's fields? + +(define (static-regexp? re) + (? ((re-seq? re) (every static-regexp? (re-seq:elts re))) + ((re-choice? re) (every static-regexp? (re-choice:elts re))) + + ((re-char-set? re) (char-set? (re-char-set:cset re))) ; Might be code. + + ((re-repeat? re) ; FROM & TO fields might be code. + (let ((to (re-repeat:to re))) + (and (integer? (re-repeat:from re)) + (or (not to) (integer? to)) + (static-regexp? (re-repeat:body re))))) + + ((re-dsm? re) (static-regexp? (re-dsm:body re))) + ((re-submatch? re) (static-regexp? (re-submatch:body re))) + + (else (or (re-bos? re) (re-eos? re) ; Otw, if it's not + (re-bol? re) (re-eol? re) ; one of these, + (re-bow? re) (re-eow? re) ; then it's Scheme code. + (re-string? re))))) + + +;;; Two useful standard char sets +(define nonl-chars (char-set-invert (char-set #\newline))) +(define word-chars (char-set-union (char-set #\_) char-set:alphanumeric)) + +;;; Little utility that should be moved to scsh's utilities.scm +(define (partition pred lis) + (let recur ((in '()) (out '()) (lis lis)) + (if (pair? lis) + (let ((head (car lis)) + (tail (cdr lis))) + (if (pred head) + (recur (cons head in) out tail) + (recur in (cons head out) tail))) + (values in out)))) + + +(define (sre->regexp sre) + (parse-sre sre (lambda (x) x) equal?)) + + +;;; Parse a sexp regexp into a regexp value, which may be "dynamic" -- +;;; i.e., some slots may be filled with the Scheme code that will produce +;;; their true vaues. +;;; +;;; R & C are rename and compare functions for low-level macro expanders. + +;;; These two guys are little front-ends for the main routine. + +(define (parse-sre sre r c) (parse-sre/context sre #t #f r c)) + +(define (parse-sres sres r c) + (re-seq (map (lambda (sre) (parse-sre sre r c)) sres))) + + +;;; (parse-sre/context sre case-sensitive? cset? r c) +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; This is the main entry point. Parse SRE, given the lexical case-sensitivity +;;; flag CASE-SENSITIVE?. If CSET? is true, SRE *must* be parseable as a +;;; char-class SRE, and this function returns a character set, *not* a +;;; regexp value. If CSET? is false, SRE can be any SRE, and this function +;;; returns a regexp value. R and C are low-level macro rename and compare +;;; functions. + +(define (parse-sre/context sre case-sensitive? cset? r c) + (let ((%bos (r 'bos)) (%eos (r 'eos)) + (%bol (r 'bol)) (%eol (r 'eol)) + (%bow (r 'bow)) (%eow (r 'eow)) + + (%word (r 'word)) + + (%flush-submatches (r 'flush-submatches)) + (%coerce-dynamic-charset (r 'coerce-dynamic-charset)) + (%coerce-dynamic-regexp (r 'coerce-dynamic-regexp))) + + (let recur ((sre sre) + (case-sensitive? case-sensitive?) + (cset? cset?)) + + ;; Parse the sequence of regexp expressions SEQ with a lexical + ;; case-sensitivity context of CS?. + (define (parse-seq/context seq cs?) + (if cset? + (if (= 1 (length seq)) + (recur (car sre) cs? #t) + (error "Non-singleton sequence not allowed in char-class context." + seq)) + (re-seq (map (lambda (sre) (recur sre cs? cset?)) + seq)))) + + (define (parse-seq seq) (parse-seq/context seq case-sensitive?)) + (define (parse-char-class sre) (recur sre case-sensitive? #t)) + + (define (non-cset) ; Blow up if cset? is true. + (if cset? (error "Illegal SRE in char-class context." sre))) + + (? ((char? sre) (parse-char-re sre case-sensitive? cset?)) + ((string? sre) (parse-string-re sre case-sensitive? cset?)) + + ((c sre %bos) (non-cset) re-bos) + ((c sre %eos) (non-cset) re-eos) + + ((c sre %bol) (non-cset) re-bol) + ((c sre %eol) (non-cset) re-eol) + + ((c sre %bow) (non-cset) re-bow) + ((c sre %eow) (non-cset) re-eow) + ((c sre %word) (non-cset) re-word) + + ((pair? sre) + (case (car sre) + ((*) (non-cset) (re-repeat 0 #f (parse-seq (cdr sre)))) + ((+) (non-cset) (re-repeat 1 #f (parse-seq (cdr sre)))) + ((?) (non-cset) (re-repeat 0 1 (parse-seq (cdr sre)))) + ((=) (non-cset) (let ((n (cadr sre))) + (re-repeat n n (parse-seq (cddr sre))))) + ((>=) (non-cset) (re-repeat (cadr sre) #f (parse-seq (cddr sre)))) + ((**) (non-cset) (re-repeat (cadr sre) (caddr sre) + (parse-seq (cdddr sre)))) + + ;; Choice is special wrt cset? because it's "polymorphic". + ;; Note that RE-CHOICE guarantees to construct a char-set + ;; or single-char string regexp if all of its args are char + ;; classes. + ((| or) (let ((elts (map (lambda (sre) + (recur sre case-sensitive? cset?)) + (cdr sre)))) + (if cset? + (assoc-cset-op char-set-union 'char-set-union elts r) + (re-choice elts)))) + + ((: seq) (non-cset) (parse-seq (cdr sre))) + + ((word) (non-cset) (parse-seq `(,%bow ,@(cdr sre) ,%eow))) + ((word+) + (recur `(,(r 'word) (,(r '+) (,(r '&) (,(r '|) ,(r 'alphanum) "_") + (,(r '|) . ,(cdr sre))))) + case-sensitive? + cset?)) + + ((submatch) (non-cset) (re-submatch (parse-seq (cdr sre)))) + ((dsm) (non-cset) (re-dsm (parse-seq (cdddr sre)) + (cadr sre) + (caddr sre))) + + ;; We could be more aggressive and push the uncase op down into + ;; partially-static regexps, but enough is enough. + ((uncase) + (let ((re-or-cset (parse-seq (cdr sre)))) ; Depending on CSET?. + (if cset? + + (if (re-char-set? re-or-cset) ; A char set or code + (uncase-char-set re-or-cset) ; producing a char set. + `(,(r 'uncase) ,re-or-cset)) + + (if (static-regexp? re-or-cset) ; A regexp or code + (uncase re-or-cset) ; producing a regexp. + `(,(r 'uncase) + ,(regexp->scheme (simplify-regexp re-or-cset) r)))))) + + ;; These just change the lexical case-sensitivity context. + ((w/nocase) (parse-seq/context (cdr sre) #f)) + ((w/case) (parse-seq/context (cdr sre) #t)) + + ;; , and ,@ + ((unquote) + (let ((exp (cadr sre))) + (if cset? + `(,%coerce-dynamic-charset ,exp) + `(,%flush-submatches (,%coerce-dynamic-regexp ,exp))))) + ((unquote-splicing) + (let ((exp (cadr sre))) + (if cset? + `(,%coerce-dynamic-charset ,exp) + `(,%coerce-dynamic-regexp ,exp)))) + + ((~) (let* ((cs (assoc-cset-op char-set-union 'char-set-union + (map parse-char-class (cdr sre)) + r)) + (cs (if (char-set? cs) + (char-set-invert cs) + `(,(r 'char-set-invert) ,cs)))) + (if cset? cs (make-re-char-set cs)))) + + ((&) (let ((cs (assoc-cset-op char-set-intersection 'char-set-intersection + (map parse-char-class (cdr sre)) + r))) + (if cset? cs (make-re-char-set cs)))) + + ((-) (if (pair? (cdr sre)) + (let* ((cs1 (parse-char-class (cadr sre))) + (cs2 (assoc-cset-op char-set-union 'char-set-union + (map parse-char-class (cddr sre)) + r)) + (cs (if (and (char-set? cs1) (char-set? cs2)) + (char-set-difference cs1 cs2) + `(,(r 'char-set-difference) + ,(if (char-set? cs1) + (char-set->scheme cs1 r) + cs1) + . ,(if (char-set? cs2) + (list (char-set->scheme cs2 r)) + (cdr cs2)))))) + (if cset? cs (make-re-char-set cs))) + (error "SRE set-difference operator (- ...) requires at least one argument"))) + + ((/) (let ((cset (range-class->char-set (cdr sre) case-sensitive?))) + (if cset? cset (make-re-char-set cset)))) + + ((posix-string) + (if (and (= 1 (length (cdr sre))) + (string? (cadr sre))) + (posix-string->regexp (cadr sre)) + (error "Illegal (posix-string ...) SRE body." sre))) + + (else (if (every string? sre) ; A set spec -- ("wxyz"). + (let* ((cs (apply char-set-union + (map string->char-set sre))) + (cs (if case-sensitive? cs (uncase-char-set cs)))) + (if cset? cs (make-re-char-set cs))) + + (error "Illegal SRE" sre))))) + + ;; It must be a char-class name (ANY, ALPHABETIC, etc.) + (else (let ((cs (case sre + ((any) char-set:full) + ((nonl) nonl-chars) + ((lower-case lower) char-set:lower-case) + ((upper-case upper) char-set:upper-case) + ((alphabetic alpha) char-set:alphabetic) + ((numeric digit num) char-set:numeric) + ((alphanumeric alnum alphanum) char-set:alphanumeric) + ((punctuation punct) char-set:punctuation) + ((graphic graph) char-set:graphic) + ((blank) char-set:blank) + ((whitespace space white) char-set:whitespace) + ((printing print) char-set:printing) + ((control cntrl) char-set:control) + ((hex-digit xdigit hex) char-set:hex-digit) + ((ascii) char-set:ascii) + (else (error "Illegal regular expression" sre))))) + (if cset? cs (make-re-char-set cs)))))))) + + +;;; In a CSET? true context, S must be a 1-char string; convert to a char set +;;; according to CASE-SENSITIVE? setting. +;;; In a CSET? false context, convert S to a string re (CASE-SENSITIVE? true), +;;; or a sequence of char-sets (CASE-SENSITIVE? false). + +(define (parse-string-re s case-sensitive? cset?) + (if (= 1 (string-length s)) + (parse-char-re (string-ref s 0) case-sensitive? cset?) + (if cset? + (error "Non-singleton string not allowed in char-class context." s) + ((if case-sensitive? make-re-string uncase-string) s)))) + +(define (parse-char-re c case-sensitive? cset?) + (if case-sensitive? + (if cset? (char-set c) (make-re-string (string c))) + (let ((cset (char-set (char-upcase c) (char-downcase c)))) + (if cset? cset (make-re-char-set cset))))) + + +;;; "Apply" the associative char-set function OP to the char-sets ELTS. +;;; If any of the ELTS is Scheme code instead of a real char set, then +;;; we instead produce Scheme code for the op, using OP-NAME as the name +;;; of the function, and R for the macro renamer function. + +(define (assoc-cset-op op op-name elts r) + (receive (csets code-chunks) (partition char-set? elts) + (if (pair? code-chunks) + (? ((pair? csets) + `(,(r op-name) ,(char-set->scheme (apply op csets) r) + . ,code-chunks)) + ((pair? (cdr code-chunks)) `(,(r op-name) . ,code-chunks)) + (else (car code-chunks))) ; Just one. + (apply op csets)))) + +;;; Parse a (/ ...) char-class into a character set in +;;; case-sensitivity context CS?. +;;; Each can be a character or a string of characters. + +(define (range-class->char-set range-specs cs?) + (let* ((specs (apply string-append + (map (lambda (spec) (if (char? spec) (string spec) spec)) + range-specs))) + (len (string-length specs)) + (cset (char-set-copy char-set:empty))) + (if (odd? len) + (error "Unmatched range specifier" range-specs) + (let lp ((i (- len 1)) (cset cset)) + (if (< i 0) + (if cs? cset (uncase-char-set cset)) ; Case fold if necessary. + (lp (- i 2) + (char-set-union! + cset + (ascii-range->char-set (char->ascii (string-ref specs (- i 1))) + (+ 1 (char->ascii (string-ref specs i))))))))))) + +;;; (regexp->scheme re r) +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; Translate a regexp value RE into raw Scheme code that will create it, with +;;; calls to the regexp ADT constructor functions. R is a renaming function +;;; provided by low-level macro expanders. + +(define (regexp->scheme re r) + (let ((%re-bos (r 're-bos)) (%re-eos (r 're-eos)) + (%re-bol (r 're-bol)) (%re-eol (r 're-eol)) + (%re-bow (r 're-bow)) (%re-eow (r 're-eow)) + (%list (r 'list))) + + (let recur ((re re)) + ;; If (fetch-posix re) = #f, produce (OP . ARGS); + ;; Otherwise, produce (OP/POSIX ,@ARGS '). + (define (doit op op/posix args fetch-posix) + (? ((fetch-posix re) => + (lambda (psx) `(,(r op/posix) ,@args + ',(cre:string psx) ',(cre:tvec psx)))) + + (else `(,(r op) . ,args)))) + + (? ((re-string? re) (if (re-trivial? re) (r 're-trivial) ; Special hack + (doit 'make-re-string 'make-re-string/posix + `(,(re-string:chars re)) + re-string:posix))) + + ((re-seq? re) (doit '%make-re-seq '%make-re-seq/posix + `((,%list . ,(map recur (re-seq:elts re))) + ,(re-seq:tsm re)) + re-seq:posix)) + + ((re-choice? re) (doit '%make-re-choice '%make-re-choice/posix + `((,%list . ,(map recur (re-choice:elts re))) + ,(re-choice:tsm re)) + re-choice:posix)) + + ((re-char-set? re) (if (re-any? re) (r 're-any) ; Special hack for ANY. + (doit 'make-re-char-set 'make-re-char-set/posix + `(,(char-set->scheme (re-char-set:cset re) r)) + re-char-set:posix))) + + ((re-repeat? re) (doit '%make-re-repeat '%make-re-repeat/posix + `(,(re-repeat:from re) + ,(re-repeat:to re) + ,(recur (re-repeat:body re)) + ,(re-repeat:tsm re)) + re-repeat:posix)) + + ((re-dsm? re) (doit '%make-re-dsm '%make-re-dsm/posix + `(,(recur (re-dsm:body re)) + ,(re-dsm:pre-dsm re) + ,(re-dsm:tsm re)) + re-dsm:posix)) + + ((re-submatch? re) (doit '%make-re-submatch '%make-re-submatch/posix + `(,(recur (re-submatch:body re)) + ,(re-submatch:pre-dsm re) + ,(re-submatch:tsm re)) + re-submatch:posix)) + + ((re-bos? re) %re-bos) + ((re-eos? re) %re-eos) + ((re-bol? re) %re-bol) + ((re-eol? re) %re-eol) + ((re-bow? re) %re-bow) + ((re-eow? re) %re-eow) + + (else re))))) + + + +;;; Classify a character set. +;;; We pass in a char set CS and 15 parameters, one for each of the +;;; standard char sets. If we can classify CS as any of these char +;;; sets, we return the corresponding parameter's value, otw #f. +;;; +;;; This is gratuitously optimised by probing cset with a couple of +;;; witness chars (a,A,1,space), and doing an initial filter based +;;; on these witnesses. + +(define (try-classify-char-set cs + full nonl lower upper alpha num alphanum + punct graph white print ctl hex blank ascii) + (let ((a (char-set-contains? cs #\a)) + (biga (char-set-contains? cs #\A)) + (one (char-set-contains? cs #\1)) + (space (char-set-contains? cs #\space))) + + (if a + (if biga + (if space + (and one (switch char-set= cs + ((char-set:full) full) + ((nonl-chars) nonl) + ((char-set:printing) print) + ((char-set:ascii) ascii) + (else #f))) + (if one + (switch char-set= cs + ((char-set:alphanumeric) alphanum) + ((char-set:graphic) graph) + ((char-set:hex-digit) hex) + (else #f)) + (and (char-set= cs char-set:alphabetic) alpha))) + (and (char-set= cs char-set:lower-case) lower)) ; a, not A + + (if biga + (and (not space) (char-set= cs char-set:upper-case) upper) + (if one + (and (not space) (char-set= cs char-set:numeric) num) + (if space + (switch char-set= cs + ((char-set:whitespace) white) + ((char-set:blank) blank) + (else #f)) + (switch char-set= cs + ((char-set:punctuation) punct) + ((char-set:control) ctl) + (else #f)))))))) + + +(define (char-set->scheme cs r) + (let ((try (lambda (cs) + (try-classify-char-set cs + 'char-set:full 'nonl-chars + 'char-set:lower-case 'char-set:upper-case + 'char-set:alphabetic 'char-set:numeric + 'char-set:alphanumeric 'char-set:punctuation + 'char-set:graphic 'char-set:whitespace + 'char-set:printing 'char-set:control + 'char-set:hex-digit 'char-set:blank + 'char-set:ascii)))) + (? ((not (char-set? cs)) cs) ; Dynamic -- *already* Scheme code. + ((char-set-empty? cs) (r 'char-set:empty)) + ((try cs) => r) + ((try (char-set-invert cs)) => + (lambda (name) `(,(r 'char-set-invert) ,name))) + + (else + (receive (loose+ ranges+) (char-set->in-pair cs) + (receive (loose- ranges-) (char-set->in-pair (char-set-invert cs)) + (let ((makeit (r 'spec->char-set))) + (if (< (+ (length loose-) (* 12 (length ranges-))) + (+ (length loose+) (* 12 (length ranges+)))) + `(,makeit #f ,(list->string loose-) ',ranges-) + `(,makeit #t ,(list->string loose+) ',ranges+))))))))) + + + +;;; This code needs work. + +(define (char-set->sre cs r) + (if (char-set? cs) + (let ((try (lambda (cs) + (try-classify-char-set cs + 'any 'nonl + 'lower-case 'upper-case + 'alphabetic 'numeric + 'alphanumeric 'punctuation + 'graphic 'whitespace + 'printing 'control + 'hex-digit 'blank + 'ascii))) + (nchars (char-set-size cs))) + (? ((zero? nchars) `(,(r '|))) + ((= 1 nchars) (apply string (char-set-members cs))) + ((try cs) => r) + ((try (char-set-invert cs)) => + (lambda (name) `(,(r '~) ,name))) + (else (receive (cs rp comp?) (char-set->in-sexp-spec cs) + (let ((args (append (? ((string=? cs "") '()) + ((= 1 (string-length cs)) `(,cs)) + (else `((,cs)))) + (if (string=? rp "") '() + (list `(,(r '/) ,rp)))))) + (if (and (= 1 (length args)) (not comp?)) + (car args) + `(,(r (if comp? '~ '|)) . ,args))))))) + + `(,(r 'unquote) ,cs))) ; dynamic -- , + + +;;; Unparse an re into a *list* of SREs (representing a sequence). +;;; This is for rendering the bodies of DSM, SUBMATCH, **, *, =, >=, and &'s, +;;; that is, forms whose body is an implicit sequence. + +(define (regexp->sres/renamer re r) + (if (re-seq? re) + (let ((elts (re-seq:elts re))) + (if (pair? elts) + (map (lambda (re) (regexp->sre/renamer re r)) elts) + (let ((tsm (re-seq:tsm re)) + (%dsm (r 'dsm))) + (if (zero? tsm) '() `((,%dsm ,tsm 0)))))) ; Empty sequence + (list (regexp->sre/renamer re r)))) ; Not a seq + + +(define (regexp->sre/renamer re r) + (let recur ((re re)) + (? ((re-string? re) (re-string:chars re)) + + ((re-seq? re) `(,(r ':) . ,(regexp->sres/renamer re r))) + + ((re-choice? re) + (let ((elts (re-choice:elts re)) + (%| (r '|))) + (if (pair? elts) + `(,%| . ,(map recur elts)) + (let ((tsm (re-choice:tsm re))) + (if (zero? tsm) `(,%|) `(,(r 'dsm) ,tsm 0 (,%|))))))) + + ((re-char-set? re) (char-set->sre (re-char-set:cset re) r)) + + ((re-repeat? re) + (let ((from (re-repeat:from re)) + (to (re-repeat:to re)) + (bodies (regexp->sres/renamer (re-repeat:body re) r))) + (? ((and (eqv? from 0) (not to)) `(,(r '*) . ,bodies)) + ((and (eqv? from 0) (eqv? to 1)) `(,(r '?) . ,bodies)) + ((and (eqv? from 1) (not to)) `(,(r '+) . ,bodies)) + ((eqv? from to) `(,(r '=) ,to . bodies)) + (to `(,(r '**) ,from ,to . ,bodies)) + (else `(,(r '>=) ,from . ,bodies))))) + + ((re-dsm? re) + `(,(r 'dsm) ,(re-dsm:pre-dsm re) ,(re-dsm:post-dsm re) + . ,(regexp->sres/renamer (re-dsm:body re) r))) + + ((re-submatch? re) + `(,(r 'submatch) . ,(regexp->sres/renamer (re-submatch:body re) r))) + + ((re-bos? re) (r 'bos)) + ((re-eos? re) (r 'eos)) + ((re-bol? re) (r 'bol)) + ((re-eol? re) (r 'eol)) + ((re-bow? re) (r 'bow)) + ((re-eow? re) (r 'eow)) + + (else re)))) ; Presumably it's code. + +(define (regexp->sre re) (regexp->sre/renamer re (lambda (x) x))) + +;;; Character class unparsing +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; This is the code that takes char-sets and converts them into forms suitable +;;; for char-class SRE's or [...] Posix strings. + +;;; Map a char-set to an (| ("...") (/"...")) or (~ ("...") (/"...")) SRE. +;;; We try it both ways, and return whichever is shortest. +;;; We return three values: +;;; - a string of chars that are members in the set; +;;; - a string of chars that, taken in pairs specifying ranges, +;;; give the rest of the members of the set. +;;; - A boolean COMP?, which says whether the set should be complemented +;;; (~ ...) or taken as-is (| ...). +;;; +;;; E.g., ["!?.", "AZaz09", #t] + +(define (char-set->in-sexp-spec cset) + (let ((->sexp-pair (lambda (cset) + (receive (loose ranges) (char-set->in-pair cset) + (values (apply string loose) + (apply string + (fold-right (lambda (r lis) + `(,(car r) ,(cdr r) . ,lis)) + '() ranges))))))) + (receive (cs+ rp+) (->sexp-pair cset) + (receive (cs- rp-) (->sexp-pair (char-set-invert cset)) + (if (< (+ (string-length cs-) (string-length rp-)) + (+ (string-length cs+) (string-length rp+))) + (values cs- rp- #t) + (values cs+ rp+ #f)))))) + +;;; Return 2 values characterizing the char set in a run-length encoding: +;;; - LOOSE List of singleton chars -- elts of the set. +;;; - RANGES List of (from . to) char ranges. +;;; +;;; E.g., [(#\! #\? #\.) +;;; ((#\A . #\Z) (#\a . #\z) (#\0 . #\9))] + +(define (char-set->in-pair cset) + (let ((add-range (lambda (from to loose ranges) + (if from (case (- to from) + ((0) (values (cons (ascii->char from) loose) + ranges)) + ((1) (values `(,(ascii->char from) + ,(ascii->char to) + . ,loose) + ranges)) + ((2) (values `(,(ascii->char from) + ,(ascii->char (+ from 1)) + ,(ascii->char to) + . ,loose) + ranges)) + (else (values loose + `((,(ascii->char from) . + ,(ascii->char to)) + . ,ranges)))) + (values loose ranges))))) + + (let lp ((i 127) (from #f) (to #f) (loose '()) (ranges '())) + (if (< i 0) + (add-range from to loose ranges) + + (let ((i-1 (- i 1))) + (if (char-set-contains? cset (ascii->char i)) + (if from + (lp i-1 i to loose ranges) ; Continue the run. + (lp i-1 i i loose ranges)) ; Start a new run. + + ;; If there's a run going, finish it off. + (receive (loose ranges) (add-range from to loose ranges) + (lp i-1 #f #f loose ranges)))))))) hunk ./rx/posixstr.scm 1 +;;; Regexp-ADT -> Posix-string translator. +;;; Olin Shivers January 1997, May 1998. + +;;; - If the regexp value contains nul character constants, or character sets +;;; that contain the nul character, they will show up in the Posix string +;;; we produce. Spencer's C regexp engine can handle regexp strings that +;;; contain nul bytes, but this might blow up other implementations -- that +;;; is, the nul byte might prematurely terminate the C string passed to the +;;; regexp engine. +;;; +;;; - The code is ASCII-specific in only one place: the expression for +;;; a regexp that matches nothing is the 6-char pattern "[^\000-\177]", +;;; which assumes a 7-bit character code. Note that the static simplifier +;;; can remove *all* occurences of this "empty regexp" except for the +;;; un-simplifiable case of a single, top-level empty regexp, e.g. +;;; (rx (in)) +;;; We can handle this one special case specially, so we shouldn't *ever* +;;; have to produce this ASCII-specific pattern. + +;;; Exports: regexp->posix-string + +;;; Todo: A dumb, simple char-set renderer. + +;;; These functions translate static regular expressions into Posix regexp +;;; strings. They generally return four values: +;;; - string (regexp) +;;; +;;; - syntax level: 0 parenthesized exp, 1 piece, 2 branch, 3 top +;;; ("piece", "branch" and "top" are Spencer's terms): +;;; + A parenthesized exp is syntactically equivalent to a piece. +;;; (But it's useful to know when an exp is parenthesized for +;;; eliminating redundant submatch-generated parens.) +;;; + A piece is something that would bind to a following * +;;; ("a" but not "aa"). +;;; + A branch is a sequence of pieces -- something that would bind to a | +;;; ("ab*d" but not "ab*|d"). That is, a branch is not allowed to contain +;;; top-level |'s. +;;; + Top is for a sequence of branches -- "a|b*c|d". +;;; +;;; - paren count in the returned string. +;;; +;;; [This is a newer description; is it correct?] +;;; - A vector mapping submatches (vector index 0 is submatch 1) +;;; to the paren for that submatch (the first paren is paren #1). +;;; +;;; [This is my original description.] +;;; - Vector of parens numbers used for submatching. The first paren is +;;; numbered 1. #F means a dead submatch -- one we can tell statically +;;; will never match anything. + +;;; Non-R4RS imports: +;;; ? = COND +;;; Multiple-value return: VALUES RECEIVE CALL-WITH-VALUES +;;; SORT-LIST + + +;;; Useful little utility -- pad vector V with +;;; PRE initial and POST following #f's. + +(define (pad-vector pre post v) + (if (= pre post 0) v + (let* ((vlen (vector-length v)) + (alen (+ pre post vlen)) + (ans (make-vector alen #f))) + (do ((from (- vlen 1) (- from 1)) + (to (+ pre vlen -1) (- to 1))) + ((< from 0)) + (vector-set! ans to (vector-ref v from))) + ans))) + +(define (n-falses n) (make-vector n #f)) + + +;;; There's no representation for regexps that never match anything (e.g., +;;; (|)) in strict Posix notation. When we get one of these, we treat it +;;; specially, producing [#f #f #f #f]. +;;; +;;; We can always detect these empty regexps, because they always simplify +;;; to one of these two values: +;;; - (make-re-char-set char-set:empty) +;;; - (dsm m n (make-re-char-set char-set:empty)) + +(define (simple-empty-re? re) + (or (and (re-char-set? re) + (char-set-empty? (re-char-set:cset re))) + (and (re-dsm? re) + (simple-empty-re? (re-dsm:body re))))) + + +;;; Top-level +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define (regexp->posix-string re) + ;; We *must* simplify, to guarantee correct translation. + (let ((re (simplify-regexp re))) + (if (simple-empty-re? re) (values #f #f #f #f) + (translate-regexp re)))) + + +(define (translate-regexp re) + (? ((re-string? re) (translate-string (re-string:chars re))) + + ((re-repeat? re) (translate-repeat re)) + ((re-choice? re) (translate-choice re)) + ((re-seq? re) (translate-seq re)) + ((re-char-set? re) (translate-char-set (re-char-set:cset re))) + + ((re-submatch? re) (translate-submatch re)) + + ((re-bos? re) (values "^" 1 0 '#())) + ((re-eos? re) (values "$" 1 0 '#())) + + ((re-bol? re) (error "Beginning-of-line regexp not supported in this implementation.")) + ((re-eol? re) (error "End-of-line regexp not supported in this implementation.")) + + ((re-bow? re) (values "[[:<:]]" 1 0 '#())) ; These two are + ((re-eow? re) (values "[[:>:]]" 1 0 '#())) ; Spencer-specific. + + ((re-dsm? re) (let ((pre-dsm (re-dsm:pre-dsm re)) + (body (re-dsm:body re))) + (translate-dsm body pre-dsm + (- (re-dsm:tsm re) + (+ pre-dsm (re-tsm body)))))) + + (else (error "Illegal regular expression" re)))) + + +;;; Translate reloc-elt ELT = (N . RE) from a sequence or choice +;;; into a Posix string. +;;; - Relocate the submatch indices by PREV-PCOUNT. +;;; (That is, assume rendering preceding elts used PREV-PCOUNT parens.) +;;; - Assume preceding elements allocated PREV-SMCOUNT submatches +;;; (we may have to pad our returned submatches string with some +;;; initial #F's to account for dead submatches PREV-SMCOUNT through N.) +;;; - If SUB-LEV3? is true, the result string is guaranteed to be < level 3. +;;; This is used by the & and | translators. +;;; - Returns the usual 4 values plus the final submatch count including +;;; this regexp. + +(define (translate-elt elt prev-pcount prev-smcount sub-lev3?) + (let ((offset (car elt)) + (re (cdr elt))) + + (receive (s level pcount submatches) (translate-regexp re) + + ;; Relocate submatch indices by OFFSET and force level <3, if needed: + (receive (s level pcount submatches) + (if (and sub-lev3? (= level 3)) + (values (string-append "(" s ")") + 0 + (+ pcount 1) + (mapv (lambda (sm) (and sm (+ prev-pcount 1 sm))) + submatches)) + (values s level pcount + (mapv (lambda (sm) (and sm (+ prev-pcount sm))) + submatches))) + + ;; Tack onto submatches as many initial #F's as needed to bump + ;; the previous submatches count from PREV-SMCOUNT to OFFSET. + (values s level pcount + (pad-vector (- offset prev-smcount) 0 submatches) + (+ offset (re-tsm re))))))) + + + +;;; Force the string to be level < 3 by parenthesizing it if necessary. + +(define (paren-if-necessary s lev pcount submatches) + (if (< lev 3) + (values s lev pcount submatches) + (values (string-append "(" s ")") + 0 + (+ pcount 1) + (mapv (lambda (sm) (and sm (+ 1 sm))) + submatches)))) + + + +;;; (: re1 ... ren) +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define (translate-seq re) + (let ((elts (re-seq:elts re)) + (tsm (re-seq:tsm re))) + (let recur ((elts elts) (prev-pcount 0) (prev-smcount 0)) + ;; Render a sequence tail ELTS, assuming the previous elements translated + ;; to a string with PREV-PCOUNT parens, and allocated PREV-SMCOUNT + ;; submatches. + (if (pair? elts) + (let* ((elt (car elts)) + (elts (cdr elts))) + + (receive (s1 level1 pcount1 submatches1) + (translate-regexp elt) + + (receive (s1 level1 pcount1 submatches1) + (paren-if-necessary s1 level1 pcount1 submatches1) + + (receive (s level pcount submatches) + (recur elts + (+ pcount1 prev-pcount) + (+ prev-smcount (re-tsm elt))) + + (values (string-append s1 s) + 2 + (+ pcount1 pcount) + (vector-append (mapv (lambda (p) (and p (+ p prev-pcount))) + submatches1) + submatches)))))) + + (values "" 2 0 '#()))))) ; Empty seq + + + +;;; (| re1 ... ren) +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define (translate-choice re) + (let ((elts (re-choice:elts re)) + (tsm (re-choice:tsm re))) + (if (pair? elts) + (let recur ((elts elts) (prev-pcount 0) (prev-smcount 0)) + ;; ELTS is a non-empty choice tail. Render it, assuming the + ;; previous elements translated to a string with PREV-PCOUNT parens, + ;; and allocated PREV-SMCOUNT submatches. + (let ((elt (car elts)) (tail (cdr elts))) + (receive (s1 level1 pcount1 submatches1) (translate-regexp elt) + (let ((submatches1 (mapv (lambda (sm) (and sm (+ sm prev-pcount))) + submatches1))) + (if (pair? tail) + (receive (s level pcount submatches) + (recur tail + (+ pcount1 prev-pcount) + (+ prev-smcount (re-tsm elt))) + (values (string-append s1 "|" s) 3 + (+ pcount1 pcount) + (vector-append submatches1 submatches))) + + (values s1 level1 pcount1 submatches1)))))) + + (values "[^\000-\377]" 1 0 (n-falses tsm))))) ; Empty choice. + + + +;;; Repeated cases: * + ? and {n,m} ranges. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define (translate-repeat re) + (let ((from (re-repeat:from re)) + (to (re-repeat:to re)) + (body (re-repeat:body re)) + (tsm (re-repeat:tsm re))) + + (? ((and to (> from to)) ; Unsatisfiable + (values "[^\000-\377]" 1 0 (n-falses tsm))) + + ((and to (= from to 1)) (translate-seq body)) ; RE{1,1} => RE + + ((and to (= to 0)) ; RE{0,0} => "" + (values "" 2 0 (n-falses tsm))) + + (else ; General case + (receive (s level pcount submatches) (translate-regexp body) + (receive (s level pcount submatches) ; Coerce S to level <2. + (if (> level 1) + (values (string-append "(" s ")") + 0 + (+ pcount 1) + (mapv (lambda (i) (and i (+ i 1))) submatches)) + (values s level pcount submatches)) + + (values (if to + (? ((and (= from 0) (= to 1)) (string-append s "?")) + ((= from to) + (string-append s "{" (number->string to) "}")) + (else + (string-append s "{" (number->string from) + "," (number->string to) "}"))) + (? ((= from 0) (string-append s "*")) + ((= from 1) (string-append s "+")) + (else (string-append s "{" (number->string from) ",}")))) + 1 pcount submatches))))))) + + + +;;; Submatch +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define (translate-submatch re) + (let ((body (re-submatch:body re)) + (pre-dsm (re-submatch:pre-dsm re))) + + ;; Translate the body, along with any leading or trailing dead submatches. + (receive (s level pcount submatches) + (translate-dsm body + pre-dsm + (- (re-submatch:tsm re) + (+ 1 pre-dsm (re-tsm body)))) + + ;; If the whole expression isn't already wrapped in a paren, wrap it. + ;; This outer paren becomes the new submatch -- add to submatches list. + (if (= level 0) + (values s 0 pcount (vector-append '#(1) submatches)) + (values (string-append "(" s ")") + 0 + (+ pcount 1) + (mapv! (lambda (i) (and i (+ i 1))) ; Excuse me. + (vector-append '#(0) submatches))))))) + +;;; Translating DSM +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; Translate the body, and paste enough #F's before and after the submatches +;;; list to account for extra dead submatches. + +(define (translate-dsm body pre-dsm post-dsm) + (receive (s level pcount submatches) (translate-regexp body) + (values s level pcount (pad-vector pre-dsm post-dsm submatches)))) + +;;; Constant regexps +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; Convert a string into a regexp pattern that matches that string exactly -- +;;; quote the special chars with backslashes. + +(define translate-string + (let ((specials (string->char-set "[.*?()|\\$^+"))) + (lambda (s) + (let ((len (string-length s))) + (if (zero? len) + (values "()" 0 1 '#()) ; Special case "" + + (let* ((len2 (string-fold (lambda (c len) ; Length of answer str + (+ len (if (char-set-contains? specials c) 2 1))) + 0 s)) + (s2 (make-string len2))) ; Answer string + + ;; Copy the chars over to S2. + (string-fold (lambda (c i) + ;; Write char C at index I, return the next index. + (let ((i (cond ((char-set-contains? specials c) + (string-set! s2 i #\\) + (+ i 1)) + (else i)))) + (string-set! s2 i c) + (+ i 1))) + 0 s) + (values s2 (if (= len 1) 1 2) + 0 '#()))))))) + + + +;;; Translating char-sets to [...] strings +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; This is the nastiest code in the system. We make an effort to return +;;; succinct encodings of the char-sets, in the event these encodings are +;;; being shown to humans. +;;; - A singleton set is rendered as that char. +;;; - A full set is rendered as "." +;;; - An empty set is rendered as [^\000-\177]. +;;; - Otherwise, render it both as a [...] and as a [^...] spec, and +;;; take whichever is shortest. + +;;; Take a char set, and return the standard +;;; [regexp-string, level, pcount, submatches] +;;; quadruple. +;;; + +(define (translate-char-set cset) + (if (char-set-full? cset) (values "." 1 0 '#()) ; Full set + + (let ((nchars (char-set-size cset)) + (->bracket-string (lambda (cset in?) + (receive (loose ranges) (char-set->in-pair cset) + (hack-bracket-spec loose ranges in?))))) + + (? ((= 0 nchars) (values "[^\000-\177]" 1 0 '#())) ; Empty set + + ((= 1 nchars) ; Singleton set + (translate-string (string (car (char-set-members cset))))) + + ;; General case. Try both [...] and [^...]. + (else (let ((s- (->bracket-string cset #t)) + (s+ (->bracket-string (char-set-invert cset) #f))) + (values (if (< (string-length s-) (string-length s+)) + s- s+) + 1 0 '#()))))))) + + +;;; Commentary +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;;; Hacking special chars in character-class strings: +;;; ] - ^ ]...^- +;;; ] - ]...- +;;; ] ^ ]...^ +;;; ] ]... +;;; - ^ ...^- (or doubleton screw-case) +;;; - ...- +;;; ^ ...^ (or singleton screw-case) +;;; +;;; Two screw cases: +;;; "^-" must be converted to "-^" for IN. +;;; "^" must be converted to non-class "^" for IN. + +;;; Rendering a general char-set into a correct Posix [...] bracket expression +;;; is a complete mess. +;;; +;;; The rules on bracket expressions: +;;; - ] terminates the exp unless it is the first char +;;; (after an optional leading ^). +;;; - .*[\ are not special in bracket expressions. +;;; - However, [. [= and [: *are* special, so you can't follow an +;;; open bracket by one of .=: -- argh. See below. +;;; - ^ isn't special unless it's the first char. +;;; - - is special unless it's first (after an optional ^), last, +;;; or as the ending char in a range (e.g., a--). + +;;; This means: +;;; - You must ensure that ] doesn't begin or terminate a range. +;;; - You must ensure that .=: don't follow [ +;;; + This can happen in the loose char list; +;;; + This can happen in the range list -- consider the pair of +;;; ranges "x-[.-%" Handle this by prohibiting [ as a range-terminator. +;;; + It can happen at the loose/range boundary: %[:-? + +;;; First, run-length encode the set into loose and range-pairs. +;;; If the set is a singleton set, then punt the whole [...] effort, +;;; and do it as a simple char. + +;;; Repeat until stable: +;;; - Sort the ranges in this order: +;;; 1. other ranges; +;;; 2. ranges that begin with ^ (not priority) +;;; 3. ranges that begin with .=: (priority) +;;; 4. ranges that end with [ (priority) +;;; This eliminates [. [= [: problems in the ranges, and +;;; minimises the chances of the problem at the loose/range boundary. +;;; and problems with initial ^ chars. +;;; - Sort the loose chars so that ] is first, then -, then .=:, then [, +;;; then others, then ^. This eliminates [. [= [: problems in the loose +;;; chars, and minimises the chances of the problem at the loose/range +;;; boundary. +;;; - Shrink ranges by moving an opening or closing range char into the +;;; loose-char set: +;;; + If ] opens or closes a range, shrink it out. +;;; + If any range opens with -, shrink it out. +;;; + If the first range opens with .=:, and the last loose char is [, +;;; shrink it out. +;;; + If there are no loose chars, the first range begins with ^, and +;;; we're doing an IN range, shrink out the ^. +;;; + Shrinking a range down to <3 chars means move it's elts into the +;;; loose char set. +;;; - If both [ and - are in the loose char set, +;;; pull - out as special end-hypen. + +;;; Finally, we have to hack things so that ^ doesn't begin an IN sequence. +;;; - If it's a NOT-IN sequence, no worries. +;;; - If ^ is the opening loose char, then it's the only loose char. +;;; If there are ranges, move it to the end of the string. +;;; If there are no ranges, then just punt the char-class and convert +;;; it to a singleton ^. In fact, do this up-front, for any singleton +;;; set. +;;; +;;; If the special end-hyphen flag is set, add - to the end of the string. + +;;; This general approach -- starting out with maximal ranges, and then +;;; shrinking them to avoid other syntax violations -- has the advantage +;;; of not relying on the details of the ASCII encodings. + +;;; Ordering ranges: +;;; 1. other ranges (ordered by start char) +;;; 2. ranges that begin with ^ (not priority) +;;; 3. ranges that begin with .=: +;;; 4. ranges that end with [ (priority over #2 & #3) + +(define (range< r1 r2) + (let ((r1-start (car r1)) (r1-end (cdr r1)) + (r2-start (car r2)) (r2-end (cdr r2))) + (or (char=? r2-end #\[) ; Range ending with [ comes last. + (and (not (char=? r1-end #\[)) + + ;; Range begin with one of .=: comes next-to-last + (or (char=? r2-start #\.) (char=? r2-start #\=) (char=? r2-start #\:) + (and (not (char=? r1-start #\.)) + (not (char=? r1-start #\=)) + (not (char=? r1-start #\:)) + + ;; Range beginning with ^ comes before that. + (or (char=? r1-start #\^) + (and (not (char=? r2-start #\^)) + + ;; Other ranges are ordered by start char. + (< (char->ascii r1-start) + (char->ascii r2-start)))))))))) + +;;; Order loose chars: +;;; ] is first, +;;; - is next, +;;; .=: are next, +;;; [ is next, +;;; then others (ordered by ascii val) +;;; ^ is last. + + +(define (loose<= c1 c2) + (or (char=? c1 #\]) ; ] is first, + (and (not (char=? c2 #\])) + + (or (char=? c1 #\-) ; - is next, + (and (not (char=? c2 #\-)) + + ;; .=: are next, + (or (char=? c1 #\.) (char=? c1 #\=) (char=? c1 #\:) + (and (not (char=? c2 #\.)) + (not (char=? c2 #\=)) + (not (char=? c2 #\:)) + + (or (char=? c1 #\[) ; [ is next, + (and (not (char=? c2 #\[)) + + (or (char=? c2 #\^) ; ^ is last, + (and (not (char=? c1 #\^)) + + ;; other chars by ASCII. + (<= (char->ascii c1) + (char->ascii c2))))))))))))) + +;;; Returns (1) a list of 0-3 loose chars, (2) a list of 0 or 1 ranges. + +(define (shrink-range-start r) + (let ((start (char->ascii (car r))) + (end (char->ascii (cdr r)))) + (shrink-range-finish-up start (+ start 1) end))) + +(define (shrink-range-end r) + (let ((start (char->ascii (car r))) + (end (char->ascii (cdr r)))) + (shrink-range-finish-up end start (- end 1)))) + +(define (shrink-range-finish-up c start end) + (? ((> start end) (values (list (ascii->char c)) '())) ; Empty range + + ((= start end) ; Collapse singleton range. + (values (list (ascii->char c) (ascii->char start)) + '())) + + ((= (+ start 1) end) ; Collapse doubleton range. + (values (list (ascii->char c) (ascii->char start) (ascii->char end)) + '())) + + (else (values (list (ascii->char c)) + (list (cons (ascii->char start) (ascii->char end))))))) + + +;;; We assume the bracket-spec is not a singleton, not empty, and not complete. +;;; (These cases get rendered as the letter, [^\000-\177], and ".", +;;; respectively.) We assume the loose chars and the ranges are all disjoint. + +(define (hack-bracket-spec loose ranges in?) + (let lp ((loose0 loose) (ranges0 ranges) (end-hyphen? #f)) + ;; Repeat until stable: + (let ((loose (sort-list loose0 loose<=)) ; Sort loose chars and ranges. + (ranges (sort-list ranges0 range<))) + + ;; If ] opens or closes a range, shrink it out. + ;; If - opens a range, shrink it out. + (receive (loose ranges) + (let recur ((ranges ranges)) + (if (pair? ranges) + (let* ((range (car ranges)) + (start (car range)) + (end (cdr range)) + (ranges (cdr ranges))) + (receive (new-loose new-ranges) (recur ranges) + (receive (new-loose0 new-ranges0) + (? ((char=? #\] start) + (shrink-range-start range)) + + ((char=? #\] end) + (shrink-range-end range)) + + ((char=? #\- start) + (shrink-range-start range)) + + (else (values '() (list range)))) + (values (append new-loose0 new-loose) + (append new-ranges0 new-ranges))))) + (values loose '()))) + + (? ((or (not (equal? loose0 loose)) ; Loop if anything changed. + (not (equal? ranges0 ranges))) + (lp loose ranges end-hyphen?)) + + ;; If the first range opens with .=:, and the last loose char is [, + ;; shrink it out & loop. + ((and (pair? ranges) + (memv (caar ranges) '(#\. #\= #\:)) + (pair? loose) + (char=? #\[ (car (reverse loose)))) + (receive (new-loose new-ranges) + (shrink-range-start (car ranges)) + (lp (append new-loose loose) (append new-ranges (cdr ranges)) end-hyphen?))) + + ;; If there are no loose chars, the first range begins with ^, and + ;; we're doing an IN range, shrink out the ^. + ((and in? (null? loose) (pair? ranges) (char=? #\^ (caar ranges))) + (receive (new-loose new-ranges) (shrink-range-start (car ranges)) + (lp (append new-loose loose) (append new-ranges ranges) end-hyphen?))) + + ;; If both [ and - are in the loose char set, + ;; pull - out as special end-hypen. + ((and (pair? loose) + (pair? (cdr loose)) + (char=? (car loose) #\[) + (char=? (car loose) #\-)) + (lp (cons (car loose) (cddr loose)) ranges #t)) + + ;; No change! Build the answer... + (else (string-append (if in? "[" "[^") + (list->string loose) + (apply string-append + (map (lambda (r) (string (car r) #\- (cdr r))) + ranges)) + "]"))))))) hunk ./rx/re-fold.scm 1 +;;; Regexp "fold" combinators -*- scheme -*- +;;; Copyright (c) 1998 by Olin Shivers. + +;;; REGEXP-FOLD re kons knil s [finish start] -> value +;;; REGEXP-FOLD-RIGHT re kons knil s [finish start] -> value +;;; REGEXP-FOR-EACH re proc s [start] -> unspecific + +;;; Non-R4RS imports: let-optionals :optional error ? + +;;; regexp-fold re kons knil s [finish start] -> value +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; The following definition is a bit unwieldy, but the intuition is +;;; simple: this procedure uses the regexp RE to divide up string S into +;;; non-matching/matching chunks, and then "folds" the procedure KONS +;;; across this sequence of chunks. +;;; +;;; Search from START (defaulting to 0) for a match to RE; call +;;; this match M. Let I be the index of the end of the match +;;; (that is, (match:end M 0)). Loop as follows: +;;; (regexp-fold re kons (kons START M knil) s finish I) +;;; If there is no match, return instead +;;; (finish START knil) +;;; FINISH defaults to (lambda (i knil) knil) +;;; +;;; In other words, we divide up S into a sequence of non-matching/matching +;;; chunks: +;;; NM1 M1 NM1 M2 ... NMk Mk NMlast +;;; where NM1 is the initial part of S that isn't matched by the RE, M1 is the +;;; first match, NM2 is the following part of S that isn't matched, M2 is the +;;; second match, and so forth -- NMlast is the final non-matching chunk of +;;; S. We apply KONS from left to right to build up a result, passing it one +;;; non-matching/matching chunk each time: on an application (KONS i m KNIL), +;;; the non-matching chunk goes from I to (match:begin m 0), and the following +;;; matching chunk goes from (match:begin m 0) to (match:end m 0). The last +;;; non-matching chunk NMlast is processed by FINISH. So the computation we +;;; perform is +;;; (final q (kons Jk MTCHk ... (kons J2 MTCH2 (kons J1 MTCH1 knil))...)) +;;; where Ji is the index of the start of NMi, MTCHi is a match value +;;; describing Mi, and Q is the index of the beginning of NMlast. + +(define (regexp-fold re kons knil s . maybe-finish+start) + (let-optionals maybe-finish+start ((finish (lambda (i x) x)) + (start 0)) + (if (> start (string-length s)) + (error "Illegal START parameter" + regexp-fold re kons knil s finish start)) + (let lp ((i start) (val knil)) + (? ((regexp-search re s i) => + (lambda (m) + (let ((next-i (match:end m 0))) + (if (= next-i (match:start m 0)) + (error "An empty-string regexp match has put regexp-fold into an infinite loop." + re s start next-i) + (lp next-i (kons i m val)))))) + (else (finish i val)))))) + +;;; regexp-fold-right re kons knil s [finish start] -> value +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; This procedure repeatedly matches regexp RE across string S. +;;; This divides S up into a sequence of matching/non-matching chunks: +;;; NM0 M1 NM1 M2 NM2 ... Mk NMk +;;; where NM0 is the initial part of S that isn't matched by the RE, +;;; M1 is the first match, NM1 is the following part of S that isn't +;;; matched, M2 is the second match, and so forth. We apply KONS from +;;; right to left to build up a result +;;; (final q (kons MTCH1 J1 (kons MTCH2 J2 ...(kons MTCHk JK knil)...))) +;;; where MTCHi is a match value describing Mi, Ji is the index of the end of +;;; NMi (or, equivalently, the beginning of Mi+1), and Q is the index of the +;;; beginning of M1. In other words, KONS is passed a match, an index +;;; describing the following non-matching text, and the value produced by +;;; folding the following text. The FINAL function "polishes off" the fold +;;; operation by handling the initial chunk of non-matching text (NM0, above). +;;; FINISH defaults to (lambda (i knil) knil) + +(define (regexp-fold-right re kons knil s . maybe-finish+start) + (let-optionals maybe-finish+start ((finish (lambda (i x) x)) + (start 0)) + (if (> start (string-length s)) + (error "Illegal START parameter" regexp-fold-right re kons knil s + finish start)) + + (? ((regexp-search re s start) => + (lambda (m) + (finish (match:start m 0) + (let recur ((last-m m)) + (? ((regexp-search re s (match:end last-m 0)) => + (lambda (m) + (let ((i (match:start m 0))) + (if (= i (match:end m 0)) + (error "An empty-string regexp match has put regexp-fold-right into an infinite loop." + re s start i) + (kons last-m i (recur m)))))) + (else (kons last-m (string-length s) knil))))))) + (else (finish (string-length s) knil))))) + +;;; regexp-for-each re proc s [start] -> unspecific +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; Repeatedly match regexp RE against string S. +;;; Apply PROC to each match that is produced. +;;; Matches do not overlap. + +(define (regexp-for-each re proc s . maybe-start) + (let ((start (:optional maybe-start 0))) + (if (> start (string-length s)) + (apply error "Illegal START parameter" regexp-for-each re proc s start) + (let lp ((i start)) + (? ((regexp-search re s i) => + (lambda (m) + (let ((next-i (match:end m 0))) + (if (= (match:start m 0) next-i) + (error "An empty-string regexp match has put regexp-for-each into an infinite loop." + re proc s start next-i)) + (proc m) + (lp next-i))))))))) hunk ./rx/re-high.scm 1 +;;; Regular expression matching for scsh +;;; Copyright (c) 1998 by Olin Shivers. + + +;;; Translates the re to a Posix string, and returns a CRE record, +;;; but doesn't actually compile the Posix string into a C regex_t struct. +;;; Uses the :POSIX field to cache the CRE record. + +(define (compile-regexp re) + (let* ((compile (lambda () (receive (s lev pcount tvec) + (regexp->posix-string re) + (new-cre s tvec)))) + + (check-cache (lambda (fetch set) + (or (fetch re) ; Already cached. + (let ((cre (compile))) ; Compile it, + (set re cre) ; cache it, + cre))))) ; and return it. + + (? ((re-seq? re) + (check-cache re-seq:posix set-re-seq:posix)) + ((re-choice? re) + (check-cache re-choice:posix set-re-choice:posix)) + ((re-repeat? re) + (check-cache re-repeat:posix set-re-repeat:posix)) + ((re-char-set? re) + (check-cache re-char-set:posix set-re-char-set:posix)) + ((re-string? re) + (check-cache re-string:posix set-re-string:posix)) + ((re-submatch? re) + (check-cache re-submatch:posix set-re-submatch:posix)) + ((re-dsm? re) + (check-cache re-dsm:posix set-re-dsm:posix)) + + ((re-bos? re) (or bos-cre (set! bos-cre (compile)))) + ((re-eos? re) (or eos-cre (set! eos-cre (compile)))) + + ((re-bol? re) (error "BOL regexp not supported in this implementation.")) + ((re-eol? re) (error "EOL regexp not supported in this implementation.")) + + ((re-bow? re) (or bow-cre (set! bow-cre (compile)))) + ((re-eow? re) (or eow-cre (set! eow-cre (compile)))) + + (else (error "compile-regexp -- not a regexp" re))))) + +(define bos-cre #f) +(define eos-cre #f) +(define bow-cre #f) +(define eow-cre #f) + + + +(define (regexp-search re str . maybe-start) + (let* ((tsm (re-tsm re)) + (svec (make-vector (+ 1 tsm) #f)) + (evec (make-vector (+ 1 tsm) #f)) + (cre (compile-regexp re))) + (cre-search cre svec evec str (:optional maybe-start 0)))) + + +(define (regexp-search? re str . maybe-start) + (cre-search? (compile-regexp re) str (:optional maybe-start 0))) hunk ./rx/re-low.scm 1 +;;; Regular expression matching for scsh +;;; Copyright (c) 1994 by Olin Shivers. + +(foreign-source + "/* Make sure foreign-function stubs interface to the C funs correctly: */" + "#include " + "#include \"../regexp/regex.h\"" + "#include \"re1.h\"" + "" "" + ) + +;;; Match data for regexp matches. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define-record regexp-match + string ; The string against which we matched + start ; vector of starting indices + end) ; vector of ending indices + +(define (match:start match . maybe-index) + (vector-ref (regexp-match:start match) + (:optional maybe-index 0))) + +(define (match:end match . maybe-index) + (vector-ref (regexp-match:end match) + (:optional maybe-index 0))) + +(define (match:substring match . maybe-index) + (let* ((i (:optional maybe-index 0)) + (start (vector-ref (regexp-match:start match) i))) + (and start (substring (regexp-match:string match) + start + (vector-ref (regexp-match:end match) i))))) + +;;; Compiling regexps +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; There's no legal Posix string expressing the empty match (e.g., (|)) +;;; that will never match anything. So when we have one of these, we set +;;; the STRING field to #f. The matchers will spot this case and handle it +;;; specially. + +;;; We compile the string two ways, on demand -- one for cre-search, and +;;; one for cre-search?. + +(define-record cre ; A compiled regular expression + string ; The Posix string form of the regexp or #F. + max-paren ; Max paren in STRING needed for submatches. + (bytes #f) ; Pointer to the compiled form, in the C heap, or #F. + (bytes/nm #f) ; Same as BYTES, but compiled with no-submatch. + tvec ; Translation vector for the submatches + ((disclose self) (list "cre" (cre:string self)))) + +(define (new-cre str tvec) (make-cre str (max-live-posix-submatch tvec) tvec)) + +(define (max-live-posix-submatch tvec) + (vfold (lambda (sm mlpsm) (if sm (max mlpsm sm) mlpsm)) 0 tvec)) + +; (define (compile-posix-re->c-struct re-string sm?) +; (receive (errcode c-struct) (%compile-re re-string sm?) +; (if (zero? errcode) c-struct +; (error errcode (%regerror-msg errcode c-struct) +; compile-posix-re->c-struct re-string sm?)))) + +(define (compile-posix-re->c-struct re-string sm?) + ;; Guile make-regexp, regexp-exec can't handle REG_NOSUB so ignore "sm?" + ;; for now. + (make-regexp re-string)) + +(define-foreign %compile-re (compile_re (string-desc pattern) (bool submatches?)) + integer ; 0 or error code + (C regex_t*)) + + +;;; Searching with compiled regexps +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; cre-search returns match info; cre-search? is just a predicate. + +; (define (cre-search cre start-vec end-vec str start) +; (let ((re-str (cre:string cre))) ;; RE-STR = #F => empty match. +; (and re-str +; (let* ((C-bytes (or (cre:bytes cre) +; (let ((C-bytes (compile-posix-re->c-struct re-str #t))) +; (set-cre:bytes cre C-bytes) +; (register-re-c-struct cre C-bytes) +; C-bytes))) +; (retcode (%cre-search C-bytes str start +; (cre:tvec cre) +; (cre:max-paren cre) +; start-vec end-vec))) +; (if (integer? retcode) +; (error retcode (%regerror-msg retcode C-bytes) +; cre-search cre start-vec end-vec str start) +; (and retcode (make-regexp-match str start-vec end-vec))))))) + +(define (cre-search cre start-vec end-vec str start) + (let ((re-str (cre:string cre))) ;; RE-STR = #F => empty match. + (and re-str + (let* ((C-bytes (or (cre:bytes cre) + (let ((C-bytes (compile-posix-re->c-struct + re-str #t))) + (set-cre:bytes cre C-bytes) + C-bytes))) + (retcode (regexp-exec C-bytes str start)) + (tvec (cre:tvec cre))) + (cond (retcode + (vector-set! start-vec 0 (car (vector-ref retcode 1))) + (vector-set! end-vec 0 (cdr (vector-ref retcode 1))) + (do ((i (- (vector-length start-vec) 2) (- i 1))) + ((< i 0)) + (let ((j-scm (vector-ref tvec i))) + (cond (j-scm + (let ((k (car (vector-ref retcode j-scm))) + (l (cdr (vector-ref retcode j-scm)))) + (vector-set! start-vec (+ i 1) + (if (= k -1) #f k)) + (vector-set! end-vec (+ i 1) + (if (= l -1) #f l))))))))) + (and retcode (make-regexp-match str start-vec end-vec)))))) + +; (define (cre-search? cre str start) +; (let ((re-str (cre:string cre))) ;; RE-STR = #F => empty match. +; (and re-str +; (let* ((C-bytes (or (cre:bytes/nm cre) +; (let ((C-bytes (compile-posix-re->c-struct re-str #f))) +; (set-cre:bytes/nm cre C-bytes) +; (register-re-c-struct cre C-bytes) +; C-bytes))) +; (retcode (%cre-search C-bytes str start '#() -1 '#() '#()))) +; (if (integer? retcode) +; (error retcode (%regerror-msg retcode C-bytes) +; cre-search? cre str start) +; retcode))))) + +(define (cre-search? cre str start) + (let ((re-str (cre:string cre))) ;; RE-STR = #F => empty match. + (and re-str + (let* ((C-bytes (or (cre:bytes cre) + (let ((C-bytes (compile-posix-re->c-struct + re-str #t))) + (set-cre:bytes cre C-bytes) + C-bytes))) + (retcode (regexp-exec C-bytes str start))) + (and retcode #t))))) + +(define-foreign %cre-search + (re_search ((C "const regex_t *~a") compiled-regexp) + (string-desc str) + (integer start) + (vector-desc tvec) (integer max-psm) + (vector-desc svec) (vector-desc evec)) + desc) ; 0 success, #f no-match, or non-zero int error code. + + +;;; Generate an error msg from an error code. + +(define-foreign %regerror-msg (re_errint2str (integer errcode) + ((C "const regex_t *~a") re)) + string) + + +;;; Reclaiming compiled regexp storage +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; Avert your eyes from the unsightly crock. +;;; +;;; S48 0.36 doesn't have finalizers, so we don't have a way to free +;;; the C regexp_t structure when its CRE record is gc'd. So our current +;;; lame approximation is to keep track of all the CRE's with a list of +;;; (cre-weak-pointer . regex_t*) +;;; pairs. From time to time, we should walk the list. If we deref the +;;; weak pointer and discover the CRE's been GC'd, we free the regex_t +;;; struct. +;;; +;;; Note this code is completely thread unsafe. + +;;; Free the space used by a compiled regexp. +(define-foreign %free-re (free_re ((C regex_t*) re)) ignore) + +;(define *master-cre-list* '()) + +;;; Whenever we make a new CRE, use this proc to add it to the master list. +;(define (register-re-c-struct cre c-bytes) +; (set! *master-cre-list* (cons (cons (make-weak-pointer cre) c-bytes) +; *master-cre-list*))) + +(define (clean-up-cres) #t) + +; (define (clean-up-cres) +; (set! *master-cre-list* +; (fold (lambda (elt lis) +; (if (weak-pointer-ref (car elt)) ; Still alive +; (cons elt lis) +; (begin (%free-re (cdr elt)) +; lis))) +; '() +; *master-cre-list*))) hunk ./rx/re-subst.scm 1 +;;; Substitution ops with regexps +;;; Copyright (c) 1998 by Olin Shivers. + +;;; These function have to be in a separate package because they use +;;; the scsh I/O function WRITE-STRING. The rest of the regexp system +;;; has no dependencies on scsh system code, and is defined independently +;;; of scsh -- which scsh, in turn, relies upon: pieces of scsh-level-0 +;;; use the regexp basics. So we have to split this code out to avoid +;;; a circular dependency in the modules: scsh-level-0 needs the regexp +;;; package which needs WRITE-STRING, which comes from the regexp package. + +(define (regexp-substitute port match . items) + (let* ((str (regexp-match:string match)) + (sv (regexp-match:start match)) + (ev (regexp-match:end match)) + (range (lambda (item) ; Return start & end of + (cond ((integer? item) ; ITEM's range in STR. + (values (vector-ref sv item) + (vector-ref ev item))) + ((eq? 'pre item) (values 0 (vector-ref sv 0))) + ((eq? 'post item) (values (vector-ref ev 0) + (string-length str))) + (else (error "Illegal substitution item." + item + regexp-substitute)))))) + (if port + + ;; Output port case. + (for-each (lambda (item) + (if (string? item) (write-string item port) + (receive (si ei) (range item) + (write-string str port si ei)))) + items) + + ;; Here's the string case. Make two passes -- one to + ;; compute the length of the target string, one to fill it in. + (let* ((len (fold (lambda (item i) + (+ i (if (string? item) (string-length item) + (receive (si ei) (range item) (- ei si))))) + 0 items)) + (ans (make-string len))) + + (fold (lambda (item index) + (cond ((string? item) + (string-copy! ans index item) + (+ index (string-length item))) + (else (receive (si ei) (range item) + (string-copy! ans index str si ei) + (+ index (- ei si)))))) + 0 items) + ans)))) + + + +(define (regexp-substitute/global port re str . items) + (let ((str-len (string-length str)) + (range (lambda (start sv ev item) ; Return start & end of + (cond ((integer? item) ; ITEM's range in STR. + (values (vector-ref sv item) + (vector-ref ev item))) + ((eq? 'pre item) (values start (vector-ref sv 0))) + (else (error "Illegal substitution item." + item + regexp-substitute/global))))) + (num-posts (fold (lambda (item count) + (+ count (if (eq? item 'post) 1 0))) + 0 items))) + + (if (and port (< num-posts 2)) + + ;; Output port case, with zero or one POST items. + (let recur ((start 0)) + (if (<= start str-len) + (let ((match (regexp-search re str start))) + (if match + (let* ((sv (regexp-match:start match)) + (ev (regexp-match:end match)) + (s (vector-ref sv 0)) + (e (vector-ref ev 0)) + (empty? (= s e))) + (for-each (lambda (item) + (cond ((string? item) (write-string item port)) + + ((procedure? item) (write-string (item match) port)) + + ((eq? 'post0 item) + (if (and empty? (< s str-len)) + (write-char (string-ref str s) port))) + + ((eq? 'post item) + (recur (if empty? (+ 1 e) e))) + + (else (receive (si ei) + (range start sv ev item) + (write-string str port si ei))))) + items)) + + (write-string str port start))))) ; No match. + + ;; Either we're making a string, or >1 POST. + (let* ((pieces (let recur ((start 0)) + (if (> start str-len) '() + (let ((match (regexp-search re str start)) + (cached-post #f)) + (if match + (let* ((sv (regexp-match:start match)) + (ev (regexp-match:end match)) + (s (vector-ref sv 0)) + (e (vector-ref ev 0)) + (empty? (= s e))) + (fold (lambda (item pieces) + (cond ((string? item) + (cons item pieces)) + + ((procedure? item) + (cons (item match) pieces)) + + ((eq? 'post0 item) + (if (and empty? (< s str-len)) + (cons (string (string-ref str s)) + pieces) + pieces)) + + ((eq? 'post item) + (if (not cached-post) + (set! cached-post + (recur (if empty? (+ e 1) e)))) + (append cached-post pieces)) + + (else (receive (si ei) + (range start sv ev item) + (cons (substring str si ei) + pieces))))) + '() items)) + + ;; No match. Return str[start,end]. + (list (if (zero? start) str + (substring str start (string-length str))))))))) + + (pieces (reverse pieces))) + (if port (for-each (lambda (p) (write-string p port)) pieces) + (apply string-append pieces)))))) hunk ./rx/re-syntax.scm 1 +;;; SRE syntax support for regular expressions +;;; Olin Shivers, June 1998. + +;;; Export SRE-FORM?, EXPAND-RX + +;;; Is the form an SRE expression? +;;; We only shallowly check the initial keyword of a compound form. + +(define (sre-form? exp r same?) ; An SRE is + (let ((kw? (lambda (x kw) (same? x (r kw))))) + (or (string? exp) ; "foo" + (and (pair? exp) + (let ((head (car exp))) + (or (every string? exp) ; ("aeiou") + (kw? head '*) ; (* re ...) + (kw? head '+) ; (+ re ...) + (kw? head '?) ; (? re ...) + (kw? head '=) ; (= n re ...) + (kw? head '>=) ; (>= n re ...) + (kw? head '**) ; (** m n re ...) + + (kw? head '|) ; (| re ...) + (kw? head 'or) ; (| re ...) + (kw? head ':) ; (: re ...) + (kw? head 'seq) ; (: re ...) + + (kw? head '-) ; (- re ...) + (kw? head '&) ; (& re ...) + (kw? head '~) ; (~ re ...) + + (kw? head 'submatch) ; (submatch re ...) + (kw? head 'dsm) ; (dsm pre post re ...) + + (kw? head 'uncase) ; (uncase re ...) + (kw? head 'w/case) ; (w/case re ...) + (kw? head 'w/nocase) ; (w/nocase re ...) + + (kw? head 'unquote) ; ,exp + (kw? head 'unquote-splicing) ; ,@exp + + (kw? head 'posix-string) ; (posix-string string) + + (kw? head 'word+) ; (word+ re ...) + (kw? head 'word)))) ; (word re ...) + + (kw? exp 'any) ; any + (kw? exp 'nonl) ; nonl + (kw? exp 'word) ; word + (kw? exp 'bos) (kw? exp 'eos) ; bos / eos + (kw? exp 'bol) (kw? exp 'eol) ; bol / eol + (kw? exp 'bow) (kw? exp 'eow) ; bow / eow + + (kw? exp 'lower-case) (kw? exp 'lower); The char class names + (kw? exp 'upper-case) (kw? exp 'upper) + (kw? exp 'alphabetic) (kw? exp 'alpha) + (kw? exp 'numeric) (kw? exp 'num) (kw? exp 'digit) + (kw? exp 'alphanumeric) (kw? exp 'alphanum) (kw? exp 'alnum) + (kw? exp 'blank) + (kw? exp 'control) (kw? exp 'cntrl) + (kw? exp 'printing) (kw? exp 'print) + (kw? exp 'punctuation) (kw? exp 'punct) + (kw? exp 'hex-digit) (kw? exp 'hex) (kw? exp 'xdigit) + (kw? exp 'graphic) (kw? exp 'graph) + (kw? exp 'whitespace) (kw? exp 'white) (kw? exp 'space) + (kw? exp 'ascii)))) + + +;;; (if-sre-form form conseq-form alt-form) +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; If FORM is an SRE, expand into CONSEQ-FORM, otherwise ALT-FORM. +;;; This is useful for expanding a subform of a macro that can +;;; be either a regexp or something else, e.g. +;;; (if-sre-form test ; If TEST is a regexp, +;;; (regexp-search? (rx test) line) ; match it against the line, +;;; (test line)) ; otw it's a predicate. + +;;; The macro is actually defined directly in the module file. +;;; (define-syntax if-sre-form +;;; (lambda (exp r c) +;;; (if (sre-form? (cadr exp) r c) +;;; (caddr exp) +;;; (cadddr exp)))) + + +;;; (RX re ...) +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; The basic SRE form. + +(define (expand-rx exp r c) + (let ((re (simplify-regexp (parse-sres (cdr exp) r c)))) + + ;; If it's static, pre-compute the Posix string & tvec now, + ;; so the re->scheme unparser will find it and toss it into + ;; the constructor. We do this only for the top-level regexp. + (if (static-regexp? re) (compile-regexp re)) + + (regexp->scheme re r))) + + +;(define-syntax rx (syntax-rules () ((rx stuff ...) (really-rx stuff ...)))) +;(define-syntax really-rx +; (syntax-rules () ((really-rx stuff ...) (rx/cs stuff ...)))) +; +;(define-syntax rx/cs (lambda (exp r c) (expand-rx exp #t r c))) +;(define-syntax rx/ci (lambda (exp r c) (expand-rx exp #f r c))) +; +;(define-syntax case-sensitive +; (lambda (exp r c) +; (let ((%ls (r 'let-syntax)) +; (%really-rx (r 'really-rx)) +; (%sr (r 'syntax-rules)) +; (%rx/cs (r 'rx/cs))) +; `(,ls ((,%really-rx (,sr () ((,%really-rx stuff ...) (,%rx/cs stuff ...))))) +; . ,(cdr exp))))) + hunk ./rx/re.scm 1 +;;; The regexp data type +;;; Olin Shivers, January 1997, May 1998. + +;;; A DSM around a choice gets absorbed into the choice's first elt. +;;; But this prevents it from being moved out into a containing +;;; choice or seq elt, or outer DSM. Fix. + +;;; A regexp is a: dsm, submatch, seq, choice, repeat, +;;; char-set, string, bos, eos + +;;; Deleted sub-match regexp +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; This stands for a regexp containing TSM submatches, of which +;;; PRE-DSM come first as dead submatches, then the regexp BODY with its +;;; submatches, then POST-DSM as dead submatches. + +(define-record-type re-dsm :re-dsm + (%%make-re-dsm body pre-dsm tsm posix) + re-dsm? + (body re-dsm:body) ; A Regexp + (pre-dsm re-dsm:pre-dsm) ; Integer -- initial dead submatches + (tsm re-dsm:tsm) ; Total submatch count + (posix re-dsm:posix set-re-dsm:posix)) ; Posix bits + +(define (%make-re-dsm body pre-dsm tsm) (%%make-re-dsm body pre-dsm tsm #f)) + +;;; This is only used in code that the (RX ...) macro produces +;;; for static regexps. +(define (%make-re-dsm/posix body pre-dsm tsm posix-str tvec) + (%%make-re-dsm body pre-dsm tsm (new-cre posix-str tvec))) + +(define (make-re-dsm body pre-dsm post-dsm) + (%make-re-dsm body pre-dsm (+ post-dsm pre-dsm (re-tsm body)))) + +;;; "Virtual field" for the RE-DSM record -- how many dead submatches +;;; come after the body: + +(define (re-dsm:post-dsm re) ; Number of post-body DSM's = + (- (re-dsm:tsm re) ; total submatches + (+ (re-dsm:pre-dsm re) ; minus pre-body dead submatches + (re-tsm (re-dsm:body re))))) ; minus body's submatches. + +;;; Slightly smart DSM constructor: +;;; - Absorb this DSM into an inner dsm, or submatch. +;;; - Punt unnecessary DSM's. + +(define (re-dsm body pre-dsm post-dsm) + (let ((tsm (+ pre-dsm (re-tsm body) post-dsm))) + (receive (body1 pre-dsm1) (open-dsm body) + (let ((pre-dsm (+ pre-dsm pre-dsm1))) + + (? ((= tsm (re-tsm body1)) body1) ; Trivial DSM + + ((re-submatch? body1) ; Absorb into submatch. + (%make-re-submatch (re-submatch:body body1) + (+ pre-dsm (re-submatch:pre-dsm body1)) + tsm)) + + (else (%make-re-dsm body1 pre-dsm tsm))))))) ; Non-trivial DSM + +;;; Take a regexp RE and return an equivalent (re', pre-dsm) pair of values. +;;; Recurses into DSM records. It is the case that +;;; (<= (+ pre-dsm (re-tsm re')) (re-tsm re)) +;;; The post-dsm value is (- (re-tsm re) (re-tsm re') pre-dsm). + +(define (open-dsm re) + (let lp ((re re) (pre-dsm 0)) + (if (re-dsm? re) + (lp (re-dsm:body re) (+ pre-dsm (re-dsm:pre-dsm re))) + (values re pre-dsm)))) + + + +;;; Sequence: (& re ...) +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define-record-type re-seq :re-seq + (%%make-re-seq elts tsm posix) + re-seq? + (elts re-seq:elts) ; Regexp list + (tsm re-seq:tsm) ; Total submatch count + (posix re-seq:posix set-re-seq:posix)) ; Posix record + +(define (%make-re-seq elts tsm) (%%make-re-seq elts tsm #f)) + +;;; This is only used in code that (RE ...) macro produces for static regexps. +(define (%make-re-seq/posix elts tsm posix-str tvec) + (%%make-re-seq elts tsm (new-cre posix-str tvec))) + +(define (make-re-seq res) + (%make-re-seq res + (fold (lambda (re sm-count) (+ (re-tsm re) sm-count)) + 0 res))) + +;;; Slightly smart sequence constructor: +;;; - Flattens nested sequences +;;; - Drops trivial "" elements +;;; - Empty sequence => "" +;;; - Singleton sequence is reduced to its one element. +;;; - We don't descend into DSM's; too much work for this routine. + +(define (re-seq res) + (let ((res (let recur ((res res)) ; Flatten nested seqs & drop ""'s. + (if (pair? res) + (let* ((re (car res)) + (tail (recur (cdr res)))) + (? ((re-seq? re) ; Flatten nested seqs + (append (recur (re-seq:elts re)) tail)) + ((re-trivial? re) tail) ; Drop trivial elts + (else (cons re tail)))) + '())))) + + (if (pair? res) + (if (pair? (cdr res)) + (make-re-seq res) ; General case + (car res)) ; Singleton sequence + re-trivial))) ; Empty seq -- "" + + +;;; Choice: (| re ...) +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define-record-type re-choice :re-choice + (%%make-re-choice elts tsm posix) + re-choice? + (elts re-choice:elts) ; List of rel-items + (tsm re-choice:tsm) ; Total submatch count + (posix re-choice:posix set-re-choice:posix)) ; Posix string + +(define (%make-re-choice elts tsm) (%%make-re-choice elts tsm #f)) + +;;; This is only used in code that (RE ...) macro produces for static regexps. +(define (%make-re-choice/posix elts tsm posix-str tvec) + (%%make-re-choice elts tsm (new-cre posix-str tvec))) + +(define (make-re-choice res) + (%make-re-choice res + (fold (lambda (re sm-count) (+ (re-tsm re) sm-count)) + 0 res))) + +;;; Slightly smart choice constructor: +;;; - Flattens nested choices +;;; - Drops empty (impossible) elements +;;; - Empty choice => empty-match +;;; - Singleton choice is reduced to its one element. +;;; - We don't descend into DSM's; too much work for this routine. +;;; +;;; This routine guarantees to preserve char-classness -- if it is applied +;;; to a list of char-class regexps (char-set and singleton-string re's), +;;; it will return a char-class regexp. + +(define (re-choice res) + (let ((res (let recur ((res res)) ; Flatten nested choices + (if (pair? res) ; & drop empty re's. + (let* ((re (car res)) + (tail (recur (cdr res)))) + (? ((re-choice? re) ; Flatten nested choices + (append (recur (re-choice:elts re)) tail)) + ((re-empty? re) tail) ; Drop empty re's. + (else (cons re tail)))) + '())))) + ;; If all elts are char-class re's, fold them together. + (if (every static-char-class? res) + (let ((cset (apply char-set-union + (map (lambda (elt) + (if (re-char-set? elt) + (re-char-set:cset elt) + (string->char-set (re-string:chars elt)))) + res)))) + (if (= 1 (char-set-size cset)) + (make-re-string (apply string (char-set-members cset))) + (make-re-char-set cset))) + + (if (pair? res) + (if (pair? (cdr res)) + (make-re-choice res) ; General case + (car res)) ; Singleton sequence + re-empty)))) ; Empty choice = ("") + +;;; Repetition (*,?,+,=,>=,**) +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; The repeat record's body contains all of the repeat record's submatches -- +;;; there is no pre-dsm field allowing for initial & trailing dead submatches. +;;; This is not a limit on expressiveness because repeat commutes with dsm -- +;;; we can always move submatches that come before and after body to an outer +;;; DSM. Hence +;;; (= (re-repeat:tsm re) (re-tsm (re-repeat:body re))) + +(define-record-type re-repeat :re-repeat + (%%make-re-repeat from to body tsm posix) + re-repeat? + (from re-repeat:from) ; Integer (Macro expander abuses.) + (to re-repeat:to) ; Integer or #f for infinite (Macro expander abuses.) + (body re-repeat:body) ; Regexp + (tsm re-repeat:tsm) ; Total submatch count + (posix re-repeat:posix set-re-repeat:posix)) ; Posix record + +(define (%make-re-repeat from to body tsm) + (%%make-re-repeat from to body tsm #f)) + +;;; This is only used in code that (RE ...) macro produces for static regexps. +(define (%make-re-repeat/posix from to body tsm posix-str tvec) + (%%make-re-repeat from to body tsm (new-cre posix-str tvec))) + +(define (make-re-repeat from to body) + (%make-re-repeat (check-arg (lambda (from) + (or (not (integer? from)) ; Dynamic + (>= from 0))) + from + make-re-repeat) + (check-arg (lambda (to) + (or (not (integer? to)) ; #f or dynamic + (and (integer? to) (>= to 0)))) + to + make-re-repeat) + body + (re-tsm body))) + +;;; Slightly smart repeat constructor +;;; - Flattens nested repeats. +;;; - re{1,1}, re{0,0}, and re{m,n} where m>n reduced. +;;; - If re is empty-match: from=0 => "", from>0 => empty-match. +;;; - If re is eos, bos, or "", and to <= from, reduce to simply re. +;;; - Commutes into DSM records. + +(define (re-repeat from to body) + (receive (re pre-dsm) (reduce-repeat from to body 0) + (re-dsm re pre-dsm (- (re-tsm body) (+ pre-dsm (re-tsm re)))))) + +;;; This guy does all the work (and is also called by the repeat simplifier) + +(define (reduce-repeat from to body pre-dsm) + (receive (from to body1 pre-dsm) + ;; Collapse nested repeats and dsm's: + (let iter ((from from) (to to) (body body) (dsm0 pre-dsm)) + (receive (body body-dsm0) (open-dsm body) + (let ((dsm0 (+ dsm0 body-dsm0))) + (if (and (integer? from) ; Stop if FROM or TO + (or (not to) (integer? to)) ; are code. + (re-repeat? body)) + (let ((bfrom (re-repeat:from body)) + (bto (re-repeat:to body)) + (bbody (re-repeat:body body))) + (if (or (not (integer? bfrom)) ; Stop if bfrom or + (and bto (not (integer? bto)))) ; bto are code. + (values from to body dsm0) + (iter (* from bfrom) + (and to bto (* to bto)) + bbody + dsm0))) + (values from to body dsm0))))) + + (? ((and (eqv? from 1) (eqv? to 1)) ; re{1,1} => re + (values body1 pre-dsm)) + + ((and (eqv? from 0) (eqv? to 0)) ; re{0,0} => "" + (values re-trivial (+ (re-tsm body1) pre-dsm))) + + ;; re{m,n} => re-empty when m>n: + ((and (integer? from) (integer? to) (> from to)) + (values re-empty (+ (re-tsm body1) pre-dsm))) + + ;; Reduce the body = re-empty case. + ((and (re-empty? body1) (integer? from)) ; (+ (in)) => (in) + (values (if (> from 0) re-empty re-trivial) ; (* (in)) => "" + pre-dsm)) + + ;; If BODY1 is eos, bos, or "", and m<=n, reduce to simply BODY1. + ((and (integer? from) + (or (and (integer? to) (<= from to)) (not to)) + (or (re-eos? body1) + (re-bos? body1) + (and (re-string? body1) + (string=? "" (re-string:chars body1))))) + (values body1 pre-dsm)) + + (else (values (make-re-repeat from to body1) ; general case + pre-dsm))))) + + + +;;; Submatch +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; A submatch record introduces a new submatch. This is followed by +;;; PRE-DSM dead submatches (caused by simplifying the body), then the +;;; BODY, then perhaps more dead submatches, all for a total of TSM +;;; submatches. + +(define-record-type re-submatch :re-submatch + (%%make-re-submatch body pre-dsm tsm posix) + re-submatch? + (body re-submatch:body) ; Regexp + (pre-dsm re-submatch:pre-dsm) ; Deleted submatches preceding the body + (tsm re-submatch:tsm) ; Total submatch count for the record + (posix re-submatch:posix set-re-submatch:posix)) ; Posix string + +(define (%make-re-submatch body pre-dsm tsm) + (%%make-re-submatch body pre-dsm tsm #f)) + +;;; This is only used in code that (RE ...) macro produces for static regexps. +(define (%make-re-submatch/posix body pre-dsm tsm posix-str tvec) + (%%make-re-submatch body pre-dsm tsm (new-cre posix-str tvec))) + + +;;; "Virtual field" for the RE-SUBMATCH record -- how many dead submatches +;;; come after the body: + +(define (re-submatch:post-dsm re) ; Number of post-body DSM's = + (- (re-submatch:tsm re) ; total submatches + (+ 1 ; minus *this* submatch + (re-submatch:pre-dsm re) ; minus pre-body dead submatches + (re-tsm (re-submatch:body re))))); minus body's submatches. + +(define (make-re-submatch body . maybe-pre+post-dsm) + (let-optionals maybe-pre+post-dsm ((pre-dsm 0) (post-dsm 0)) + (%make-re-submatch body pre-dsm (+ pre-dsm 1 (re-tsm body) post-dsm)))) + +;;; Slightly smart submatch constructor +;;; - DSM's unpacked +;;; - If BODY is the re-empty, we'll never match, so just produce a DSM. + +(define (re-submatch body . maybe-pre+post-dsm) + (let-optionals maybe-pre+post-dsm ((pre-dsm 0) (post-dsm 0)) + (let ((tsm (+ 1 pre-dsm (re-tsm body) post-dsm))) + (receive (body1 pre-dsm1) (open-dsm body) + (if (re-empty? body1) + (re-dsm re-empty tsm 0) + (%make-re-submatch body1 (+ pre-dsm pre-dsm1) tsm)))))) + + + +;;; Other regexps : string, char-set, bos & eos +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; Also, re-empty and re-trivial. + +(define-record re-string + chars ; String + (posix #f) ; Posix record + ((disclose self) (list "re-string" (re-string:chars self)))) + +(define re-string make-re-string) ; For consistency w/other re makers. + +;;; This is only used in code that (RE ...) macro produces for static regexps. +(define (make-re-string/posix chars posix-str tvec) + (let ((re (make-re-string chars))) + (set-re-string:posix re (new-cre posix-str tvec)) + re)) + +;;; Matches the empty string anywhere. +(define re-trivial (make-re-string/posix "" "" '#())) + +(define (re-trivial? re) + (and (re-string? re) (zero? (string-length (re-string:chars re))))) + +(define-record re-char-set + cset ; A character set (Macro expander abuses.) + (posix #f)) ; Posix record + +(define re-char-set make-re-char-set) ; For consistency w/other re makers. + +;;; This is only used in code that (RE ...) macro produces for static regexps. +(define (make-re-char-set/posix cs posix-str tvec) + (let ((re (make-re-char-set cs))) + (set-re-char-set:posix re (new-cre posix-str tvec)) + re)) + +;;; Never matches +;;; NEED TO OPTIMIZE - PRE-SET POSIX FIELD. +(define re-empty (make-re-char-set char-set:empty)) + +(define (re-empty? re) + (and (re-char-set? re) + (let ((cs (re-char-set:cset re))) + (and (char-set? cs) ; Might be code... + (char-set-empty? cs))))) + +(define-record re-bos) (define re-bos (make-re-bos)) +(define-record re-eos) (define re-eos (make-re-eos)) + +(define-record re-bol) (define re-bol (make-re-bol)) +(define-record re-eol) (define re-eol (make-re-eol)) + +(define-record re-bow) (define re-bow (make-re-bow)) +(define-record re-eow) (define re-eow (make-re-eow)) + + +(define re-any (make-re-char-set/posix char-set:full "." '#())) + +(define (re-any? re) + (and (re-char-set? re) + (let ((cs (re-char-set:cset re))) + (and (char-set? cs) ; Might be code... + (char-set-full? cs))))) + +(define re-nonl + (make-re-char-set/posix (char-set-invert (char-set #\newline)) + "[^\n]" + '#())) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define (regexp? x) + (or (re-seq? x) (re-choice? x) (re-repeat? x) + (re-char-set? x) (re-string? x) + (re-bos? x) (re-eos? x) + (re-bol? x) (re-eol? x) + (re-bow? x) (re-eow? x) + (re-submatch? x) (re-dsm? x))) + + +;;; Return the total number of submatches bound in RE. + +(define (re-tsm re) + (? ((re-seq? re) (re-seq:tsm re)) + ((re-choice? re) (re-choice:tsm re)) + ((re-repeat? re) (re-repeat:tsm re)) + ((re-dsm? re) (re-dsm:tsm re)) + ((re-submatch? re) (re-submatch:tsm re)) + (else 0))) + + +(define re-word + (let ((wcs (char-set-union char-set:alphanumeric ; Word chars + (char-set #\_)))) + (make-re-seq (list re-bow + (make-re-repeat 1 #f (make-re-char-set wcs)) + re-eow)))) + + +;;; (flush-submatches re) +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; Return regular expression RE with all submatch-binding elements +;;; stripped out -- (= 0 (re-tsm (flush-submatches re))). + +(define (flush-submatches re) + (? ((zero? (re-tsm re)) re) ; RE has no submatches. + + ((re-seq? re) (re-seq (map flush-submatches (re-seq:elts re)))) + ((re-choice? re) (re-choice (map flush-submatches (re-choice:elts re)))) + + ((re-repeat? re) (re-repeat (re-repeat:from re) + (re-repeat:to re) + (flush-submatches (re-repeat:body re)))) + + ((re-submatch? re) (flush-submatches (re-submatch:body re))) + ((re-dsm? re) (flush-submatches (re-dsm:body re))) + + (else re))) + + +;;; Map F over ELTS. (F x) returns two values -- the "real" return value, +;;; and a "changed?" flag. If CHANGED? is false, then the "real" return value +;;; should be identical to the original argument X. MAP/CHANGED constructs +;;; the mapped list sharing as long an unchanged tail as possible with the +;;; list ELTS; if F changes no argument, MAP/CHANGED returns exactly the list +;;; ELTS. MAP/CHANGED returns two values: the mapped list, and a changed? +;;; flag for the entire list. + +(define (map/changed f elts) + (let recur ((elts elts)) + (if (pair? elts) + (let ((elt (car elts))) + (receive (new-elts elts-changed?) (recur (cdr elts)) + (receive (new-elt elt-changed?) (f elt) + (if (or elts-changed? elt-changed?) + (values (cons new-elt new-elts) #t) + (values elts #f))))) + (values '() #f)))) + + +(define (uncase re) + (receive (new-re changed?) + (let recur ((re re)) + (? ((re-seq? re) + (let ((elts (re-seq:elts re))) + (receive (new-elts elts-changed?) + (map/changed recur elts) + (if elts-changed? + (values (%make-re-seq new-elts (re-seq:tsm re)) #t) + (values re #f))))) + + ((re-choice? re) + (let ((elts (re-choice:elts re))) + (receive (new-elts elts-changed?) + (map/changed recur elts) + (if elts-changed? + (values (re-choice new-elts) #t) + (values re #f))))) + + ((re-char-set? re) + (let* ((cs (re-char-set:cset re)) + (new-cs (uncase-char-set cs))) ; Better not be code. + (if (char-set= cs new-cs) + (values re #f) + (values (make-re-char-set new-cs) #t)))) + + ((re-repeat? re) + (receive (new-body body-changed?) (recur (re-repeat:body re)) + (if body-changed? + (values (re-repeat (re-repeat:from re) + (re-repeat:to re) + new-body) + #t) + (values re #f)))) + + ((re-submatch? re) + (receive (new-body body-changed?) (recur (re-submatch? re)) + (if body-changed? + (values (%make-re-submatch new-body + (re-submatch:pre-dsm re) + (re-submatch:tsm re)) + #t) + (values re #f)))) + + ((re-string? re) + (let ((cf-re (uncase-string (re-string:chars re)))) + (if (re-string? cf-re) + (values re #f) + (values cf-re #t)))) + + (else (values re #f)))) + new-re)) + + +;;; (uncase-char-set cs) +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; Return a char-set cs' such that cs' contains every char c in cs in both +;;; its upcase and downcase form. + +(define (uncase-char-set cs) + (char-set-fold (lambda (c new-cset) + (char-set-adjoin! new-cset + (char-downcase c) + (char-upcase c))) + (char-set-copy char-set:empty) + cs)) + + +;;; I actually make an effort to keep this a re-string +;;; if possible (if the string contains no case-sensitive +;;; characters). Returns a regexp matching the string in +;;; a case-insensitive fashion. + +(define (uncase-string s) + ;; SEQ is a list of chars and doubleton char-sets. + (let* ((seq (string-fold-right (lambda (c lis) + (cons (? ((char-lower-case? c) (char-set c (char-upcase c))) + ((char-upper-case? c) (char-set c (char-downcase c))) + (else c)) + lis)) + '() s)) + + ;; Coalesce adjacent chars together into a string. + (fixup (lambda (chars seq) + (if (pair? chars) + (cons (make-re-string (list->string (reverse chars))) + seq) + seq))) + + (new-seq (let recur ((seq seq) (chars '())) + (if (pair? seq) + (let ((elt (car seq)) + (seq (cdr seq))) + (if (char? elt) + (recur seq (cons elt chars)) + (fixup chars (cons (make-re-char-set elt) + (recur seq '()))))) + (fixup chars '()))))) + + (if (= 1 (length new-seq)) (car new-seq) + (make-re-seq new-seq)))) + + + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +(define char-set-full? + (let ((allchars-nchars (char-set-size char-set:full))) + (lambda (cs) (= allchars-nchars (char-set-size cs))))) + +(define (char-set-empty? cs) (zero? (char-set-size cs))) + + +;;; A "char-class" re is either a char-set re or a string re whose string +;;; has only one character. + +(define (re-char-class? re) + (or (re-char-set? re) + (and (re-string? re) + (= 1 (string-length (re-string:chars re)))))) + +(define (static-char-class? re) + (or (and (re-char-set? re) + (char-set? (re-char-set:cset re))) ; This might be code. + (and (re-string? re) ; But never this, so no check. + (= 1 (string-length (re-string:chars re)))))) hunk ./rx/rx-lib.scm 1 +;;; Procedures that appear in code produced by (RX ...). + +;;; In sexp syntax, a , or ,@ form may evaluate to a string, char, +;;; char-set, or regexp value. Coerce one of these to a regexp value. + +(define (coerce-dynamic-regexp x) + (? ((string? x) (make-re-string x)) + ((char? x) (make-re-string (string x))) + ((char-set? x) (make-re-char-set x)) + ((regexp? x) x) + (else (error "Cannot coerce value to regular expression." x)))) + +;;; In a char-set context (e.g., as an operand of the SRE - operator), +;;; a , or form must be coercable to a char-set. + +(define (coerce-dynamic-charset x) + (? ((string? x) + (if (= 1 (string-length x)) (string->char-set x) + (error "Multi-char string not allowed as , or ,@ SRE in char-class context." + x))) + ((char? x) (char-set x)) + ((char-set? x) x) + ((re-char-set? x) (re-char-set:cset x)) + (else (error "Cannot coerce value to character set" x)))) + + +(define (spec->char-set in? loose ranges) + (let ((doit (lambda (loose ranges) + (fold (lambda (r cset) + (let ((from (char->ascii (car r))) + (to (char->ascii (cdr r)))) + (do ((i from (+ i 1)) + (cs cset (char-set-adjoin! cs (ascii->char i)))) + ((> i to) cs)))) + (string->char-set loose) + ranges)))) + (if in? + (doit loose ranges) + (char-set-invert! (doit loose ranges))))) + hunk ./rx/simp.scm 1 +;;; Olin Shivers, June 1998 +;;; Copyright (c) 1998 by the Scheme Underground. + +;;; One export: (simplify-regexp re) -> re + +;;; Regexp simplifier +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; (| (in c1 ...) (in c2 ...) re ...) => (| (in c1 ... c2 ...) re ...) +;;; (| (not-in c1 ...) (not-in c2 ...)) => (| (not-in [intersect (c1 ...) +;;; (c2 ...)]) +;;; A run of BOS's or a run of EOS's in a sequence may be elided. +;;; Nested exponents can be collapsed (*, +, ?) -- multiply the "from's" +;;; together; multiply the "to's" together. +;;; Exponent range [1,1] simplifies, as does [0,0]. +;;; Uniquify branches +;;; Adjacent literals in a sequence can be collapsed +;;; A singleton-char char class can be collapsed into a constant +;;; Nested choices can be collapsed +;;; Nested sequences can be collapsed +;;; An empty sequence (:) can be turned into an empty-string match "". +;;; Singleton choices and sequences can be reduced to their body. +;;; +;;; The simplifier is carefully written so that it won't blow up +;;; when applied to a dynamic regexp -- that is, +;;; - a chunk of Scheme code that produces a regexp instead of +;;; an actual regexp value; +;;; - a repeat regexp whose FROM or TO fields are chunks of Scheme code +;;; rather than integers; +;;; - a char-set regexp whose CSET field is a chunk of Scheme code rather +;;; than an actual char-set value. +;;; This is useful because the RX macro can build such a regexp as part +;;; of its expansion process. + +(define (simplify-regexp re) + (receive (simp-re pre-dsm) (simp-re re) + (re-dsm simp-re pre-dsm (- (re-tsm re) (+ (re-tsm simp-re) pre-dsm))))) + +(define (simp-re re) + (? ((re-string? re) (values re 0)) + ((re-seq? re) (simp-seq re)) + ((re-choice? re) (simp-choice re)) + + ;; Singleton char-sets reduce to the character. + ;; Bear in mind the cset field might be Scheme code instead + ;; of an actual char set if the regexp is dynamic. + ((re-char-set? re) + (values (let ((cs (re-char-set:cset re))) + (if (and (char-set? cs) + (= 1 (char-set-size cs))) + (make-re-string (string (car (char-set-members cs)))) + re)) + 0)) + + ((re-repeat? re) (simp-repeat re)) + + ((re-submatch? re) (simp-submatch re)) + ((re-dsm? re) (simp-dsm re)) + + (else (values re 0)))) + + + +;;; If the body of a submatch is the empty re, reduce it to the empty re. + +(define (simp-submatch re) + (let ((tsm (re-submatch:tsm re)) + (pre-dsm (re-submatch:pre-dsm re))) + (receive (body1 pre-dsm1) (simp-re (re-submatch:body re)) + (if (re-empty? body1) + (values re-empty tsm) + (values (%make-re-submatch body1 (+ pre-dsm pre-dsm1) tsm) + 0))))) + +;;; - Flatten nested DSM's. +;;; - Return pre-dsm field and body field as the two return values. + +(define (simp-dsm re) + (receive (body pre-dsm1) (simp-re (re-dsm:body re)) + (values body (+ (re-dsm:pre-dsm re) pre-dsm1)))) + + + +;;; Simplifying sequences +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; - Collapse nested sequences and DSM's. +;;; - Merge adjacent strings, identical adjacent anchors (bos, eos, etc.). +;;; - Bubble DSM's forwards past elts that don't contain live submatches. +;;; (Going past live submatches would switch the submatch indexes around, +;;; which would be an error). This helps to coalesce DSMs and if we bring +;;; them all the way to the front, we can pop them off and make them a +;;; pre-dsm for the entire seq record. +;;; - If an elt is the re-empty, reduce the whole re to the empty re. +;;; - Reduce singleton and empty seq. + +(define (simp-seq re) + (let ((tsm (re-seq:tsm re)) + (elts (map simplify-regexp (re-seq:elts re)))) + (if (pair? elts) + + (call-with-current-continuation + (lambda (abort) + (receive (pre-dsm head tail) (simp-seq1 elts abort tsm) + (values (if (pair? tail) + (%make-re-seq (cons head tail) (- tsm pre-dsm)) + head) ; Singleton seq + pre-dsm)))) + + (values re-trivial 0)))) ; Empty seq + + +;;; Simplify the non-empty sequence ELTS. +;;; - Return the result split out into three values: +;;; [head-elt-pre-dsm, head-elt, tail]. +;;; - If any elt is the empty (impossible) re, abort by calling +;;; (abort elt tsm). TSM is otherwise unused. + +(define (simp-seq1 elts abort tsm) + (let recur ((elt (car elts)) (elts (cdr elts))) + (receive (elt pre-dsm) (open-dsm elt) + (? ((re-seq? elt) ; Flatten nested seqs. + (let ((sub-elts (re-seq:elts elt))) + (recur (re-dsm (car sub-elts) pre-dsm 0) + (append (cdr sub-elts) elts)))) + + ((re-empty? elt) (abort elt tsm)) ; Bomb out on the empty + ; (impossible) re. + ((pair? elts) + (receive (next-pre-dsm next tail) ; Simplify the tail, + (recur (car elts) (cdr elts)) ; then think about + ; the head: + ;; This guy is called when we couldn't find any other + ;; simplification. If ELT contains live submatches, then + ;; there really is nothing to be done at this step -- just + ;; assemble the pieces together and return them. If ELT + ;; *doesn't* contain any live submatches, do the same, but + ;; bubble its following next-pre-dsm submatches forwards. + (define (no-simp) + (if (has-live-submatches? elt) + (values pre-dsm elt (cons (re-dsm next next-pre-dsm 0) tail)) + (values (+ pre-dsm next-pre-dsm) elt (cons next tail)))) + + ;; Coalesces two adjacent bol's, two adjacent eol's, etc. + (define (coalesce-anchor anchor?) + (if (and (anchor? elt) (anchor? next)) + (values (+ pre-dsm next-pre-dsm) elt tail) + (no-simp))) + + (? ((re-trivial? elt) ; Drop trivial re's. + (values (+ pre-dsm next-pre-dsm) next tail)) + + ;; Coalesce adjacent strings + ((re-string? elt) + (if (re-string? next) + (values (+ pre-dsm next-pre-dsm) + (make-re-string (string-append (re-string:chars elt) + (re-string:chars next))) + tail) + (no-simp))) + + ;; Coalesce adjacent bol/eol/bos/eos/bow/eow's. + ((re-bol? elt) (coalesce-anchor re-bol?)) + ((re-eol? elt) (coalesce-anchor re-eol?)) + ((re-bos? elt) (coalesce-anchor re-bos?)) + ((re-eos? elt) (coalesce-anchor re-eos?)) + ((re-bow? elt) (coalesce-anchor re-bow?)) + ((re-eow? elt) (coalesce-anchor re-eow?)) + (else (no-simp))))) + + (else (values pre-dsm elt '())))))) + + + +;;; Simplifying choices +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; - Collapse nested choices and DSM's. +;;; - Delete re-empty's. +;;; - Merge sets; merge identical anchors (bos, eos, etc.). +;;; But you can't merge across an element that contains a live submatch, +;;; see below. +;;; - A singleton string "c" is included into the char-set merge as a +;;; singleton set. +;;; - Bubble DSM's forwards past elts that don't contain live submatches. +;;; (Going past live submatches would switch the submatch indexes around, +;;; which would be an error). This helps to coalesce DSMs and if we bring +;;; them all the way to the front, we can pop them off and make them a +;;; pre-dsm for the entire seq record. +;;; - Reduce singleton and empty choice. +;;; +;;; You have to be careful simplifying choices -- you can't merge two sets +;;; that appear on different sides of an element containing a live submatch. +;;; The problem is that the assignment of submatches breaks ties left-to-right. +;;; So these aren't the same: +;;; (| (submatch "x") any) (| any (submatch "x")) +;;; The first assigns the submatch, the second doesn't -- the ANY gets credit. +;;; We want to collapse multiple char-sets, bos's, and eos's, but we have +;;; to deal with this issue. So +;;; - When we coalesce anchors, we retain the *leftmost* one. +;;; - We coalesce sets that appear between live-submatch boundaries. +;;; When we do this, we subtract from the set any char that was in +;;; an earlier coalesced char-set. If this gets us down to the empty set, +;;; we drop it. If it gets us down to a singleton set, we convert it into +;;; a singleton string. +;;; Whew. I had to think about this one. + +(define (simp-choice re) + (let ((tsm (re-choice:tsm re))) + + (receive (pre-dsm cset bos? eos? bol? eol? bow? eow? tail) + (simp-choice1 (map simplify-regexp (re-choice:elts re))) + + (let ((tail (assemble-boundary-tail char-set:empty cset + bos? eos? bol? eol? bow? eow? + #f #f #f #f #f #f + tail))) + (values (if (pair? tail) + (if (pair? (cdr tail)) + (%make-re-choice tail (- tsm pre-dsm)) + (car tail)) ; Singleton choice + re-empty) ; Empty choice + pre-dsm))))) + + + +;;; Given the return values from simp-choice1, this tacks all +;;; the various pieces (CSET, BOS?, EOS?, etc.) onto the front of +;;; TAIL. However, elements are not added onto TAIL that are already +;;; described by PREV-CSET, PREV-BOS?, etc. -- they will be added onto +;;; some earlier bit of the final result. + +(define (assemble-boundary-tail prev-cset cset + bos? eos? bol? eol? bow? eow? + prev-bos? prev-eos? + prev-bol? prev-eol? + prev-bow? prev-eow? + tail) + (let* ((cset (char-set-difference cset prev-cset)) + (numchars (char-set-size cset)) + (tail (if (and eos? (not prev-eos?)) (cons re-eos tail) tail)) + (tail (if (and eol? (not prev-eol?)) (cons re-eol tail) tail)) + (tail (if (and eow? (not prev-eow?)) (cons re-eow tail) tail)) + (tail (if (and bow? (not prev-bow?)) (cons re-bow tail) tail)) + (tail (if (and bol? (not prev-bol?)) (cons re-bol tail) tail)) + (tail (if (and bos? (not prev-bos?)) (cons re-bos tail) tail)) + (tail (? ((zero? numchars) tail) ; Drop empty char set. + ((= 1 numchars) ; {c} => "c" + (cons (make-re-string (string (car (char-set-members cset)))) + tail)) + (else (cons (make-re-char-set cset) tail))))) + tail)) + + +;;; Simplify the non-empty list of choices ELTS. +;;; Return the result split out into the values +;;; [pre-dsm, cset, bos?, eos?, bol?, eol?, bow?, eow?, tail] + +(define (simp-choice1 elts) + (let recur ((elts elts) + + (prev-cset char-set:empty) ; Chars we've already seen. + + (prev-bos? #f) (prev-eos? #f) ; These flags say if we've + (prev-bol? #f) (prev-eol? #f) ; already seen one of these + (prev-bow? #f) (prev-eow? #f)) ; anchors. + + + (if (pair? elts) + (let ((elt (car elts)) + (elts (cdr elts))) + (receive (elt pre-dsm) (open-dsm elt) + (if (re-choice? elt) + + ;; Flatten nested choices. + (let ((sub-elts (re-seq:elts elt))) + (receive (tail-pre-dsm cset bos? eos? bol? eol? bow? eow? tail) + (recur (append sub-elts elts) + prev-cset + prev-bos? prev-eos? + prev-bol? prev-eol? + prev-bow? prev-eow?) + (values (+ pre-dsm tail-pre-dsm) + cset bos? eos? bol? eol? bow? eow? tail))) + + ;; Simplify the tail, then think about the head. + (receive (tail-pre-dsm cset bos? eos? bol? eol? bow? eow? tail) + (recur elts + (? ((and (re-string? elt) + (= 1 (string-length (re-string:chars elt)))) + (char-set-union prev-cset + (string->char-set (re-string:chars elt)))) + + ;; The cset might be a Scheme exp. + ((and (re-char-set? elt) + (char-set? (re-char-set:cset elt))) + (char-set-union prev-cset + (re-char-set:cset elt))) + + (else prev-cset)) + (or prev-bos? (re-bos? elt)) + (or prev-eos? (re-eos? elt)) + (or prev-bol? (re-bol? elt)) + (or prev-eol? (re-eol? elt)) + (or prev-bow? (re-bow? elt)) + (or prev-eow? (re-eow? elt))) + + ;; This guy is called when we couldn't find any other + ;; simplification. If ELT contains live submatches, then we + ;; are at a merge boundary, and have to take all the + ;; TAIL-PRE-DSM, CSET, BOS?, EOS?, ... stuff we've collected + ;; and tack them onto TAIL as elements, then put ELT on + ;; front. Otherwise, we can commute TAIL-PRE-DSM, CSET, + ;; BOS?, etc. with ELT, since it contains no live + ;; submatches, so just tack ELT onto TAIL. + + (define (no-simp) + (if (has-live-submatches? elt) + (let ((tail (assemble-boundary-tail prev-cset cset + bos? eos? + bol? eol? + bow? eow? + prev-bos? prev-eos? + prev-bol? prev-eol? + prev-bow? prev-eow? + tail))) + (values pre-dsm char-set:empty #f #f #f #f #f #f + (if (pair? tail) + ;; Tack tail-pre-dsm onto + ;; TAIL's first elt. + (cons elt + (cons (re-dsm (car tail) + tail-pre-dsm 0) + (cdr tail))) + + ;; Squirrel case: TAIL is empty, so use + ;; TAIL-PRE-DSM as ELT's post-dsm. + (list (re-dsm elt 0 tail-pre-dsm))))) + + ;; ELT has no live submatches, so we can commute all + ;; the recursion state forwards past it. + (values (+ pre-dsm tail-pre-dsm) + cset bos? eos? bol? eol? bow? eow? + (cons elt tail)))) + + (? ((and (re-char-set? elt) + (char-set? (re-char-set:cset elt))) ; Might be Scheme code + (values (+ pre-dsm tail-pre-dsm) + (char-set-union cset (re-char-set:cset elt)) + bos? eos? bol? eol? bow? eow? tail)) + + ;; Treat a singleton string "c" as a singleton set {c}. + ((and (re-string? elt) (= 1 (string-length (re-string:chars elt)))) + (values (+ pre-dsm tail-pre-dsm) + (char-set-union cset (string->char-set (re-string:chars elt))) + bos? eos? bol? eol? bow? eow? tail)) + + ;; Coalesce bol/eol/bos/eos/bow/eow's. + ((re-bos? elt) (values (+ pre-dsm tail-pre-dsm) cset + #t eos? bol? eol? bow? eow? tail)) + ((re-eos? elt) (values (+ pre-dsm tail-pre-dsm) cset + bos? #t bol? eol? bow? eow? tail)) + ((re-bol? elt) (values (+ pre-dsm tail-pre-dsm) cset + bos? eos? #t eol? bow? eow? tail)) + ((re-eol? elt) (values (+ pre-dsm tail-pre-dsm) cset + bos? eos? bol? #t bow? eow? tail)) + ((re-bow? elt) (values (+ pre-dsm tail-pre-dsm) cset + bos? eos? bol? eol? #t eow? tail)) + ((re-eow? elt) (values (+ pre-dsm tail-pre-dsm) cset + bos? eos? bol? eol? bow? #t tail)) + + (else (no-simp))))))) + + (values 0 char-set:empty #f #f #f #f #f #f '())))) + + + +(define (simp-repeat re) + (let ((from (re-repeat:from re)) + (to (re-repeat:to re)) + (body (re-repeat:body re))) + (receive (simp-body pre-dsm) (simp-re body) ; Simplify body. + ;; The fancy reductions are all handled by REDUCE-REPEAT. + (reduce-repeat from to simp-body pre-dsm)))) + + + +;;; Does RE contain a live submatch? +;;; If RE is dynamic, we can't tell, so we err conservatively, +;;; which means we say "yes." + +(define (has-live-submatches? re) + (or (re-submatch? re) + (? ((re-seq? re) (every has-live-submatches? (re-seq:elts re))) + ((re-choice? re) (every has-live-submatches? (re-choice:elts re))) + ((re-repeat? re) (has-live-submatches? (re-repeat:body re))) + ((re-dsm? re) (has-live-submatches? (re-dsm:body re))) + + ;; If it's not one of these things, then this isn't a regexp -- it's + ;; a chunk of Scheme code producing a regexp, and we conservatively + ;; return #T -- the expression *might* produce a regexp containing + ;; a live submatch: + (else (not (or (re-char-set? re) (re-string? re) + (re-bos? re) (re-eos? re) + (re-bol? re) (re-eol? re) + (re-bow? re) (re-eow? re))))))) hunk ./rx/spencer.scm 1 +;;; Parse Spencer-style regexps into the regexp ADT. +;;; Olin Shivers, July 1998. + +;;; One export: (posix-string->regexp s) + +;;; Need better error checking on {m,n} brace parsing. + +(define (parse-posix-regexp-string s) + (receive (re i) (parse-posix-exp s 0) + (if (= i (string-length s)) re + (error "Illegal Posix regexp -- terminated early" s i)))) + +(define posix-string->regexp parse-posix-regexp-string) + +;;; An complete expression is a sequence of |-separated branches. + +(define (parse-posix-exp s i) + (let ((len (string-length s))) + (if (< i len) + (let lp ((i i) (branches '())) + (receive (branch i) (parse-posix-branch s i) + (let ((branches (cons branch branches))) + (if (and (< i len) + (char=? #\| (string-ref s i))) + (lp (+ i 1) branches) + (values (re-choice (reverse branches)) i))))) + (values re-trivial i)))) + + +;;; A branch is a sequence of pieces -- stuff that goes in-between |'s. + +(define (parse-posix-branch s i) + (let ((len (string-length s))) + (let lp ((i i) (pieces '())) + (if (< i len) + (receive (piece i) (parse-posix-piece s i) + (let ((pieces (cons piece pieces))) + (if (< i len) + (case (string-ref s i) + ((#\) #\|) (values (re-seq (reverse pieces)) i)) + (else (lp i pieces))) + (values (re-seq (reverse pieces)) i)))) + + (values (re-seq (reverse pieces)) i))))) + + +;;; A piece is an atom possibly followed by a * ? + or {...} multiplier. +;;; I.e. an element of a branch sequence. + +(define (parse-posix-piece s i) + (let ((len (string-length s))) + (receive (atom i) (parse-posix-atom s i) + (if (< i len) + (case (string-ref s i) + ((#\* #\+ #\?) + (receive (from to) (case (string-ref s i) + ((#\*) (values 0 #f)) + ((#\+) (values 1 #f)) + ((#\?) (values 0 1))) + (values (re-repeat from to atom) (+ i 1)))) + + ((#\{) (receive (from to i) (parse-posix-braces s (+ i 1)) + (values (re-repeat from to atom) i))) + + (else (values atom i))) + + (values atom i))))) + + +;;; An atom is something that would bind to a following * operator -- +;;; a letter, [...] charset, ^, $, or (...). + +(define (parse-posix-atom s i) + (let ((len (string-length s))) + (if (< i (string-length s)) + (let ((c (string-ref s i))) + (case c + ((#\^) (values re-bos (+ i 1))) + ((#\$) (values re-eos (+ i 1))) + ((#\.) (values re-any (+ i 1))) + + ((#\[) (parse-posix-bracket s (+ i 1))) + + ((#\() (receive (re i) (parse-posix-exp s (+ i 1)) + (if (and (< i len) (char=? #\) (string-ref s i))) + (values (re-submatch re) (+ i 1)) + (error "Regexp subexpression has no terminating close parenthesis" s i)))) + + ((#\\) (let ((i (+ i 1))) + (if (< i len) + (values (make-re-string (string (string-ref s i))) + (+ i 1)) + (error "Regexps may not terminate with a backslash" s)))) + + ((#\) #\| #\* #\+ #\? #\{) (values re-trivial i)) + + (else (values (make-re-string (string c)) (+ i 1))))) + + (values re-trivial i)))) + + +;;; Parse a [...] or [^...] bracket expression into a regexp. +;;; I is the index of the char following the left bracket. + +(define db-cset (char-set #\. #\= #\:)) ; Not allowed after a #\[. + +(define (parse-posix-bracket s i) + (let ((len (string-length s))) + (if (>= i len) (error "Missing close right bracket in regexp" s i) + + (receive (negate? i0) (let ((c (string-ref s i))) + (if (char=? c #\^) + (values #t (+ i 1)) + (values #f i))) + (let lp ((i i0) (cset (char-set-copy char-set:empty))) + (if (>= i len) (error "Missing close right bracket in regexp" s i) + + (let ((c (string-ref s i)) + (i1 (+ i 1))) + (case c + ((#\[) + ;; We don't handle [..] [==] [::] frobs. + (if (and (< i1 len) + (char-set-contains? db-cset (string-ref s i1))) + (error "double-bracket regexps not supported." s i) + (lp i1 (char-set-adjoin! cset #\[)))) + + ((#\]) (if (= i i0) + (lp i1 (char-set-adjoin! cset #\])) + (let ((cset (if negate? + (char-set-invert! cset) + cset))) + (values (make-re-char-set cset) i1)))) + + ((#\-) (if (or (= i i0) ; first char or last char + (and (< i1 len) + (char=? #\] (string-ref s i1)))) + (lp i1 (char-set-adjoin! cset #\-)) + (error "Illegal - in [...] regexp" s i))) + + ;; Regular letter -- either alone, or startpoint of a range. + (else (if (and (< (+ i1 1) len) + (char=? #\- (string-ref s i1))) + + ;; Range + (let* ((i-tochar (+ i1 1)) + (to (char->ascii (string-ref s i-tochar)))) + (do ((j (char->ascii c) (+ j 1)) + (cset cset (char-set-adjoin! cset (ascii->char j)))) + ((> j to) (lp (+ i-tochar 1) cset)))) + + ;; Just a letter + (lp i1 (char-set-adjoin! cset c)))))))))))) + + +;;; Parse out a [from,to] repetition pair from a {m,n} {m} or {m,} expression. +;;; I is the index of the char following the left brace. + +(define (parse-posix-braces s i) + (let ((comma (string-index s #\, i)) + (rb (string-index s #\} i))) + (if rb + (if (and comma (< comma rb)) + (values (string->number (substring s i comma)) + (and (not (= (+ comma 1) rb)) + (string->number (substring s (+ comma 1) rb))) + (+ rb 1)) + (let ((m (string->number (substring s i rb)))) + (values m m (+ rb 1)))) + (error "Missing close brace in regexp" s i)))) + hunk ./scsh-condition.scm 1 -;;; Copyright (c) 1994 by Olin Shivers +;;; Copyright (c) 1994 by Olin Shivers. See file COPYING. hunk ./scsh-condition.scm 4 -;;; modified for Guile. - hunk ./scsh-version.scm 3 -(define scsh-version-string "0.5") +(define scsh-version-string "0.5.2") hunk ./scsh.scm 2 -;;; Copyright (c) 1992 by Olin Shivers. +;;; Copyright (c) 1992-1999 by Olin Shivers. hunk ./scsh.scm 4 - -;;; modified for Guile. +;;; See file COPYING. hunk ./scsh.scm 11 -;;; (with-continuation #f (lambda () (thunk) (exit 0)))) +;;; (with-continuation (loophole :escape #f) ; Bogus +;;; (lambda () (thunk) (exit 0)))) hunk ./scsh.scm 130 - (cond ((index clist #\: i) => + (cond ((string-index clist #\: i) => hunk ./scsh.scm 212 - (new-env (reduce (lambda (alist key/val) - (alist-update (car key/val) (cdr key/val) alist)) - (env->alist) - alist-delta))) + (new-env (fold (lambda (key/val alist) + (alist-update (car key/val) (cdr key/val) alist)) + (env->alist) + alist-delta))) hunk ./scsh.scm 440 -;;; (reduce-port port reader op . seeds) +;;; (port-fold port reader op . seeds) hunk ./scsh.scm 444 -;;; On eof, return the seeds. -;;; PORT->LIST is just (REDUCE-PORT PORT READ CONS '()) +;;; On eof, return the seeds: (apply value SEEDS). +;;; PORT->LIST is just (PORT-FOLD PORT READ CONS '()) hunk ./scsh.scm 510 -(define (reduce-port port reader op . seeds) - (letrec ((reduce (lambda seeds +(define (port-fold port reader op . seeds) + (letrec ((fold (lambda seeds hunk ./scsh.scm 515 - reduce)))))) - (apply reduce seeds))) + fold)))))) + (apply fold seeds))) + +(define reduce-port + (deprecated-proc port-fold 'reduce-port "Use port-fold instead.")) hunk ./scsh.scm 681 - (if (index prog #\/) + (if (string-index prog #\/) hunk ./sighandlers.scm 1 -;;; Copyright (c) 1993 by Olin Shivers. +;;; Copyright (c) 1993 by Olin Shivers. See file COPYING. hunk ./sighandlers.scm 36 - "extern int errno;" + "#include " hunk ./sighandlers.scm 81 - (string-downcase! + (string-downcase hunk ./stringcoll.scm 1 -;;; Copyright (c) 1994 by Olin Shivers +;;; Copyright (c) 1994 by Olin Shivers. See file COPYING. hunk ./stringcoll.scm 135 - (set-string-collector:chunk-left (- chunk-left 1))) + (set-string-collector:chunk-left sc (- chunk-left 1))) hunk ./stringcoll.scm 142 - (set-string-collector:chunk-left 127) + (set-string-collector:chunk-left sc 127) hunk ./syntax-helpers.scm 6 -;;; Copyright (c) 1993 by Olin Shivers. - -;; modified for Guile. process forms not ported yet. +;;; Copyright (c) 1993 by Olin Shivers. See file COPYING. hunk ./syntax.scm 3 -;;; Copyright (c) 1993 by Olin Shivers. +;;; Copyright (c) 1993 by Olin Shivers. See file COPYING. hunk ./syscalls.scm 2 -;;; Copyright (c) 1993 by Olin Shivers. +;;; Copyright (c) 1993 by Olin Shivers. See file COPYING. hunk ./syscalls.scm 4 -;; Rewritten for Guile in places, incomplete. +;;; Scheme48 implementation. hunk ./syscalls.scm 15 + "#include " hunk ./syscalls.scm 27 - "extern int errno;" - "" hunk ./syscalls.scm 300 - (fchown (integer fd) (uid_t uid) (gid_t gid)) + (fchown (integer fd) (uid_t uid) (gid_t gid)) no-declare ; for NT Cygwin hunk ./syscalls.scm 791 -;;; For Guile it's done in Scheme. hunk ./syscalls.scm 792 -(define-foreign %filter-C-strings! - (filter_stringvec (string pattern) ((C "char const ** ~a") cvec)) - static-string ; error message -- #f if no error. - integer) ; number of files that pass the filter. +;;; 99/7: No one is using this function, so I'm commenting it out. +;;; Later, we could tune up the globber or regexp file-matcher to use +;;; it (but should shift it into the rx directory). But I should also go +;;; to a file-at-a-time cursor model for directory fetching. -Olin + +;(define-foreign %filter-C-strings! +; (filter_stringvec (string-desc pattern) ((C "char const ** ~a") cvec)) +; integer) ; number of files that pass the filter. + +; guile version. +; (define (%filter-C-strings! pattern vec) +; (let ((rx (make-regexp pattern)) +; (len (vector-length vec))) +; (let loop ((i 0) (j 0)) +; (if (= i len) +; (values #f j) +; (loop (+ i 1) +; (if (regexp-exec rx (vector-ref vec i)) +; (begin +; (vector-set! vec j (vector-ref vec i)) +; (+ j 1)) +; j)))))) + hunk ./syscalls.scm 816 -(define (%filter-C-strings! pattern vec) - (let ((rx (make-regexp pattern)) - (len (vector-length vec))) - (let loop ((i 0) (j 0)) - (if (= i len) - (values #f j) - (loop (+ i 1) - (if (regexp-exec rx (vector-ref vec i)) - (begin - (vector-set! vec j (vector-ref vec i)) - (+ j 1)) - j)))))) +;(define (match-files regexp . maybe-dir) +; (let ((dir (:optional maybe-dir "."))) +; (check-arg string? dir match-files) +; (receive (err cvec numfiles) +; (%open-dir (ensure-file-name-is-nondirectory dir)) +; (if err (errno-error err match-files regexp dir)) +; (receive (numfiles) (%filter-C-strings! regexp cvec) +; ;(if err (error err match-files)) +; (%sort-file-vector cvec numfiles) +; (let ((files (C-string-vec->Scheme&free cvec numfiles))) +; (vector->list files)))))) hunk ./syscalls.scm 835 - (let ((i (index var=val #\=))) + (let ((i (string-index var=val #\=))) hunk ./time.scm 2 -;;; Copyright (c) 1994 by Olin Shivers. - -;;; Modified to use Guile primitives. +;;; Copyright (c) 1994 by Olin Shivers. See file COPYING. hunk ./time.scm 68 +(define modify-date:seconds modify-%date:seconds) +(define modify-date:minute modify-%date:minute) +(define modify-date:hour modify-%date:hour) +(define modify-date:month-day modify-%date:month-day) +(define modify-date:month modify-%date:month) +(define modify-date:year modify-%date:year) +(define modify-date:tz-name modify-%date:tz-name) +(define modify-date:tz-secs modify-%date:tz-secs) +(define modify-date:summer? modify-%date:summer?) +(define modify-date:week-day modify-%date:week-day) +(define modify-date:year-day modify-%date:year-day) + hunk ./time.scm 316 - (let* ((offset (modulo offset 86400)) - (h (quotient offset 3600)) + (let* ((offset (modulo offset 86400)) ; seconds/day + (h (quotient offset 3600)) ; seconds/hour hunk ./time.scm 326 - name sign (two-digits h) (two-digits m) (two-digits s))))))) + name sign + (two-digits h) (two-digits m) (two-digits s))))))) hunk ./utilities.scm 2 -;;; Copyright (c) 1993 by Olin Shivers. +;;; Copyright (c) 1993 by Olin Shivers. See file COPYING. hunk ./utilities.scm 19 -(define (index str c . maybe-start) - (let ((start (max 0 (:optional maybe-start 0))) - (len (string-length str))) - (do ((i start (+ 1 i))) - ((or (>= i len) - (char=? c (string-ref str i))) - (and (< i len) i))))) +(define (fold kons knil lis) + (let lp ((lis lis) (ans knil)) + (if (pair? lis) + (lp (cdr lis) (kons (car lis) ans)) + ans))) hunk ./utilities.scm 25 -(define (rindex str c . maybe-start) - (let* ((len (string-length str)) - (start (min (:optional maybe-start len) - len))) - (do ((i (- start 1) (- i 1))) - ((or (< i 0) - (char=? c (string-ref str i))) - (and (>= i 0) i))))) +(define (fold-right kons knil lis) + (let recur ((lis lis)) + (if (pair? lis) + (let ((head (car lis))) ; Won't need LIS after RECUR call. + (kons head (recur (cdr lis)))) + knil))) hunk ./utilities.scm 32 -;;; (f (f (f zero x1) x2) x3) -;;; [Richard's does (f x3 (f x2 (f x1 zero))) -(define (reduce f zero l) - (letrec ((lp (lambda (val rest) - (if (pair? rest) (lp (f val (car rest)) (cdr rest)) - val)))) - (lp zero l))) - hunk ./utilities.scm 53 -(define any first) - hunk ./utilities.scm 63 -(define any? first?) +(define any first?) + +(define (every pred list) + (or (not (pair? list)) + (let lp ((head (car list)) (tail (cdr list))) + (if (pair? tail) + (and (pred head) (lp (car tail) (cdr tail))) + (pred head))))) ; Tail-call the last PRED call. hunk ./utilities.scm 72 -(define (every? pred list) - (letrec ((lp (lambda (list) - (or (not (pair? list)) - (and (pred (car list)) - (lp (cdr list))))))) - (lp list))) hunk ./utilities.scm 77 - ((= i len) ans) + ((>= i len) ans) hunk ./utilities.scm 83 - ((= i len) v) + ((>= i len) v) hunk ./utilities.scm 105 +(define (vector-append . vecs) + (let* ((vlen (fold (lambda (v len) (+ (vector-length v) len)) 0 vecs)) + (ans (make-vector vlen))) + (let lp1 ((vecs vecs) (to 0)) + (if (pair? vecs) + (let* ((vec (car vecs)) + (len (vector-length vec))) + (let lp2 ((from 0) (to to)) + (cond ((< from len) + (vector-set! ans to (vector-ref vec from)) + (lp2 (+ from 1) (+ to 1))) + (else (lp1 (cdr vecs) to))))))) + ans)) + + +(define (vfold kons knil v) + (let ((len (vector-length v))) + (do ((i 0 (+ i 1)) + (ans knil (kons (vector-ref v i) ans))) + ((>= i len) ans)))) + +(define (vfold-right kons knil v) + (do ((i (- (vector-length v) 1) (- i 1)) + (ans knil (kons (vector-ref v i) ans))) + ((< i 0) ans))) + + +;;; We loophole the call to ERROR -- the point is that perhaps the +;;; user will interact with a breakpoint, and proceed with a new +;;; value, which we will then pass to a new invocation of CHECK-ARG +;;; for approval. hunk ./utilities.scm 138 - (check-arg pred (error "Bad argument" val pred caller) caller))) + (check-arg pred + (loophole :value (error "Bad argument" val pred caller)) + caller))) hunk ./utilities.scm 193 - -;;; Copy string SOURCE into TARGET[start,...] - -(define (string-replace! target start source) - (let ((len (string-length source))) - (do ((i (+ start len -1) (- i 1)) - (j (- len 1) (- j 1))) - ((< j 0) target) - (string-set! target i (string-ref source j))))) - - -;;; Copy SOURCE[source-start, source-end) into TARGET[start,) - -(define (substring-replace! target start source source-start source-end) - (do ((i (+ start (- source-end source-start) -1) (- i 1)) - (j (- source-end 1) (- j 1))) - ((< j source-start) target) - (string-set! target i (string-ref source j)))) - - -;;; Compute (... (f (f (f zero c0) c1) c2) ...) - -(define (string-reduce f zero s) - (let ((len (string-length s))) - (let lp ((v zero) (i 0)) - (if (= i len) - v - (lp (f v (string-ref s i)) (+ i 1))))))