[[project @ 1997-01-25 18:26:36 by ghouston] ghouston**19970125182641 Ignore-this: 56306c29dcab68a5a4ae11a819307ea * init.scm: load char-set.scm and rdelim.scm. * Makefile.am (subpkgdata_DATA): add char-set.scm and rdelim.scm. * rdelim.scm: new file from scsh. Some procedures have been implemented in libguile/ice-9 (based on the scsh interfaces), so the versions here just modify them to use scsh char-sets and multiple values. * rw.scm (generic-read-string!/partial, generic-read-string!, generic-write-string/partial, generic-write-string: adapt to change in uniform-array-read/write arguments. * network.scm (generic-receive-message!, generic-receive-message!/partial): adapt to changes in recvfrom! primitive. * char-set.scm: new file from scsh. ] addfile ./char-set.scm addfile ./rdelim.scm hunk ./ChangeLog 1 +Sat Jan 25 01:03:03 1997 Gary Houston + + * init.scm: load char-set.scm and rdelim.scm. + * Makefile.am (subpkgdata_DATA): add char-set.scm and rdelim.scm. + + * rdelim.scm: new file from scsh. Some procedures have been + implemented in libguile/ice-9 (based on the scsh interfaces), so + the versions here just modify them to use scsh char-sets and + multiple values. + + * rw.scm (generic-read-string!/partial, generic-read-string!, + generic-write-string/partial, generic-write-string: adapt + to change in uniform-array-read/write arguments. + + * network.scm (generic-receive-message!, + generic-receive-message!/partial): adapt to changes in recvfrom! + primitive. + + * char-set.scm: new file from scsh. + hunk ./INCOMPAT 17 +%read-delimited! takes a string for its "set of delimiters" parameter. +If the buffer fills, it doesn't peek ahead to check whether the next +character is a delimiter or EOF, since this a) seems to require extra +code or comparisons, b) makes read-delimted! and read-line! more complex +c) doesn't gain much anyway. + hunk ./Makefile.am 6 -subpkgdata_DATA = defrec.scm errno.scm init.scm let-opt.scm netconst.scm \ - network.scm receive.scm rw.scm syntax.scm utilities.scm +subpkgdata_DATA = char-set.scm \ + defrec.scm errno.scm init.scm let-opt.scm netconst.scm \ + network.scm rdelim.scm receive.scm rw.scm syntax.scm utilities.scm hunk ./Makefile.in 48 -subpkgdata_DATA = defrec.scm errno.scm init.scm let-opt.scm netconst.scm \ - network.scm receive.scm rw.scm syntax.scm utilities.scm +subpkgdata_DATA = char-set.scm \ + defrec.scm errno.scm init.scm let-opt.scm netconst.scm \ + network.scm rdelim.scm receive.scm rw.scm syntax.scm utilities.scm hunk ./char-set.scm 1 +;;; -*-Scheme-*- +;;; +;;; Character Sets package +;;; ported from MIT Scheme runtime +;;; by Brian D. Carlstrom +;;; Sleazy code. + +(define char:newline (ascii->char 13)) +(define char:tab (ascii->char 9)) +(define char:linefeed (ascii->char 13)) +(define char:page (ascii->char 12)) +(define char:return (ascii->char 10)) +(define char:space (ascii->char 32)) + +(define (string-fill-range! str lower upper ch) + (do ((index lower (+ index 1))) + ((>= index upper) str) + (string-set! str index ch))) + +(define (char-ascii? char) + (let ((maybe-ascii (char->ascii char))) + (and (<= 0 maybe-ascii 127) maybe-ascii))) + +;;;; Character Sets + +(define (char-set? object) + (and (string? object) + (= (string-length object) 256))) + +(define (char-set . chars) + (chars->char-set chars)) + +(define (chars->char-set chars) + (let ((char-set (make-string 256 (ascii->char 0)))) + (for-each (lambda (char) + (string-set! char-set (char->ascii char) (ascii->char 1))) + chars) + char-set)) + +(define (string->char-set str) + (let ((char-set (make-string 256 (ascii->char 0)))) + (do ((i (- (string-length str) 1) (- i 1))) + ((< i 0) char-set) + (string-set! char-set (char->ascii (string-ref str i)) + (ascii->char 1))))) + +(define (ascii-range->char-set lower upper) + (let ((char-set (make-string 256 (ascii->char 0)))) + (string-fill-range! char-set lower upper (ascii->char 1)) + char-set)) + +(define (predicate->char-set predicate) + (let ((char-set (make-string 256))) + (let loop ((code 0)) + (if (< code 256) + (begin (string-set! char-set code + (if (predicate (ascii->char code)) + (ascii->char 1) + (ascii->char 0))) + (loop (+ 1 code))))) + char-set)) + + +;;; {string, char, char-set, char predicate} -> char-set + +(define (->char-set x) + (cond ((char-set? x) x) + ((string? x) (string->char-set x)) + ((char? x) (char-set x)) + ((procedure? x) (predicate->char-set x)) + (else (error "->char-set: Not a charset, string, char, or predicate." + x)))) + + +;;;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- + +(define (char-set-members char-set) + (define (loop code) + (cond ((>= code 256) '()) + ((zero? (char->ascii (string-ref char-set code))) (loop (+ 1 code))) + (else (cons (ascii->char code) (loop (+ 1 code)))))) + (loop 0)) + +;;; De-releasing CHAR-SET-MEMBER? +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; No other way to do it. MIT Scheme defines it (c-s-m? cset char); scsh 0.3 +;;; defined it (c-s-m? char cset). MIT Scheme's arg order is not consistent +;;; with the MEMBER? procedure or common math notation, but they were here +;;; first, so I didn't want to just silently invert their arg order -- could +;;; break code. I ended up just choosing a new proc name that consistent with +;;; its arg order -- (CHAR-SET-CONTAINS? cset char). + +(define (char-set-contains? char-set char) + (not (zero? (char->ascii (string-ref char-set (char->ascii char)))))) + +;;; This actually isn't exported. Just CYA. +(define (char-set-member? . args) + (error "CHAR-SET-MEMBER? is no longer provided. Use CHAR-SET-CONTAINS? instead.")) + +(define (char-set-invert char-set) + (predicate->char-set + (lambda (char) (not (char-set-contains? char-set char))))) + +(define (char-set-union char-set-1 char-set-2) + (predicate->char-set + (lambda (char) + (or (char-set-contains? char-set-1 char) + (char-set-contains? char-set-2 char))))) + +(define (char-set-intersection char-set-1 char-set-2) + (predicate->char-set + (lambda (char) + (and (char-set-contains? char-set-1 char) + (char-set-contains? char-set-2 char))))) + +(define (char-set-difference char-set-1 char-set-2) + (predicate->char-set + (lambda (char) + (and (char-set-contains? char-set-1 char) + (not (char-set-contains? char-set-2 char)))))) + +;;;; System Character Sets + +(define char-set:upper-case (ascii-range->char-set #x41 #x5B)) +(define char-set:lower-case (ascii-range->char-set #x61 #x7B)) +(define char-set:numeric (ascii-range->char-set #x30 #x3A)) +(define char-set:graphic (ascii-range->char-set #x20 #x7F)) +(define char-set:not-graphic (char-set-invert char-set:graphic)) +(define char-set:whitespace + (char-set char:newline char:tab char:linefeed + char:page char:return char:space)) +(define char-set:not-whitespace (char-set-invert char-set:whitespace)) +(define char-set:alphabetic + (char-set-union char-set:upper-case char-set:lower-case)) +(define char-set:alphanumeric + (char-set-union char-set:alphabetic char-set:numeric)) +(define char-set:standard + (char-set-union char-set:graphic (char-set char:newline))) + +(define (char-upper-case? char) + (char-set-contains? char-set:upper-case char)) + +(define (char-lower-case? char) + (char-set-contains? char-set:lower-case char)) + +(define (char-numeric? char) + (char-set-contains? char-set:numeric char)) + +(define (char-graphic? char) + (char-set-contains? char-set:graphic char)) + +(define (char-whitespace? char) + (char-set-contains? char-set:whitespace char)) + +(define (char-alphabetic? char) + (char-set-contains? char-set:alphabetic char)) + +(define (char-alphanumeric? char) + (char-set-contains? char-set:alphanumeric char)) + +(define (char-standard? char) + (char-set-contains? char-set:standard char)) + +;;; Bullshit legalese +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;$Header: /cvsroot/guile/guile/guile-scsh/Attic/char-set.scm,v 1.1 1997/01/25 18:26:38 ghouston Exp $ + +;Copyright (c) 1988 Massachusetts Institute of Technology + +;This material was developed by the Scheme project at the Massachusetts +;Institute of Technology, Department of Electrical Engineering and +;Computer Science. Permission to copy this software, to redistribute +;it, and to use it for any purpose is granted, subject to the following +;restrictions and understandings. + +;1. Any copy made of this software must include this copyright notice +;in full. + +;2. Users of this software agree to make their best efforts (a) to +;return to the MIT Scheme project any improvements or extensions that +;they make, so that these may be included in future releases; and (b) +;to inform MIT of noteworthy uses of this software. + +;3. All materials developed as a consequence of the use of this +;software shall duly acknowledge such use, in accordance with the usual +;standards of acknowledging credit in academic research. + +;4. MIT has made no warrantee or representation that the operation of +;this software will be error-free, and MIT is under no obligation to +;provide any services, by way of maintenance, update, or otherwise. + +;5. In conjunction with products arising from the use of this material, +;there shall be no use of the name of the Massachusetts Institute of +;Technology nor of any adaptation thereof in any advertising, +;promotional, or sales literature without prior written consent from +;MIT in each case. + hunk ./init.scm 42 +(load-from-path "scsh/rdelim.scm") +(load-from-path "scsh/char-set.scm") hunk ./network.scm 421 - (let* ((rv (recvfrom sockfd (list s i end) flags)) - (nread (cadr rv)) - (addr (caddr rv))) + (let* ((rv (recvfrom! sockfd s flags i end)) + (nread (car rv)) + (addr (cdr rv))) hunk ./network.scm 473 - (let* ((rv (recvfrom sockfd (list s start end) flags)) - (nread (cadr rv)) - (addr (caddr rv))) + (let* ((rv (recvfrom! sockfd s flags start end)) + (nread (car rv)) + (addr (cdr rv))) hunk ./rdelim.scm 1 +;;; Delimited readers +;;; for guile: read-delimited and read-delimited! are implemented in guile and +;;; modified below to use scsh char-sets and multiple values. +;;; read-line is redefined below. +;;; read-paragraph could be fixed. +;;; skip-char-set isn't mentioned in the scsh manual. + +(if (not (defined? 'guile-read-delimited)) + (define guile-read-delimited read-delimited)) +(set! read-delimited + (lambda (delims . args) + (let ((rv + (apply guile-read-delimited (list->string + (char-set-members delims)) args))) + (if (pair? rv) + (values (car rv) (cdr rv)) + rv)))) + +(if (not (defined? 'guile-read-delimited!)) + (define guile-read-delimited! read-delimited!)) +(set! read-delimited! + (lambda (delims . args) + (let ((rv + (apply guile-read-delimited! (list->string + (char-set-members delims)) args))) + (if (pair? rv) + (values (car rv) (cdr rv)) + rv)))) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; These procedures run their inner I/O loop in a C primitive, so they +;;; should be quite fast. +;;; +;;; N.B.: +;;; The C primitive %READ-DELIMITED-FDPORT!/ERRNO relies on knowing the +;;; representation of character sets. If these are changed from their +;;; current representation as 256-element strings, this code must be changed +;;; as well. + +;;; (read-delimited delims [port delim-action]) +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; Returns a string or the EOF object. DELIM-ACTION determines what to do +;;; with the terminating delimiter: +;;; - PEEK +;;; Leave it in the input stream for later reading. +;;; - TRIM (the default) +;;; Drop it on the floor. +;;; - CONCAT +;;; Append it to the returned string. +;;; - SPLIT +;;; Return it as a second return value. +;;; +;;; We repeatedly allocate a buffer and fill it with READ-DELIMITED! +;;; until we hit a delimiter or EOF. Each time through the loop, we +;;; double the total buffer space, so the loop terminates with a log +;;; number of reads, but uses at most double the optimal buffer space. + +;(define (read-delimited delims . args) +; (let-optionals args ((port (current-input-port)) +; (delim-action 'trim)) +; (let ((substr (lambda (s end) ; Smart substring. +; (if (= end (string-length s)) s +; (substring s 0 end)))) +; (delims (->char-set delims)) +; (gobble? (not (eq? delim-action 'peek)))) + +; ;; BUFLEN is total amount of buffer space allocated to date. +; (let lp ((strs '()) (buflen 80) (buf (make-string 80))) +; (receive (terminator num-read) +; (%read-delimited! delims buf gobble? port) +; (if terminator + +; ;; We are done. NUM-READ is either a read count or EOF. +; (let ((retval (if (and (zero? num-read) +; (eof-object? terminator) +; (null? strs)) +; terminator ; EOF -- got nothing. + +; ;; Got something. Stick all the strings +; ;; together, plus the terminator if the +; ;; client said 'CONCAT. +; (let ((s (substr buf num-read))) +; (cond ((and (eq? delim-action 'concat) +; (char? terminator)) +; (apply string-append +; (reverse `(,(string terminator) +; ,s . ,strs)))) + +; ((null? strs) s) ; Gratuitous opt. +; (else (apply string-append +; (reverse (cons s strs))))))))) +; (if (eq? delim-action 'split) +; (values retval terminator) +; retval)) + +; ;; We are not done. Loop and read in some more. +; (lp (cons buf strs) +; (+ buflen buflen) +; (make-string buflen)))))))) + + +;;; (read-delimited! delims buf [port delim-action start end]) +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; Returns: +;;; - EOF if at end of file, and a non-zero read was requested. +;;; - Integer j if that many chars read into BUF. +;;; - #f if the buffer was filled w/o finding a delimiter. +;;; +;;; DELIM-ACTION determines what to do with the terminating delimiter; +;;; it is as in READ-DELIMITED. +;;; +;;; In determining the return value, there is an ambiguous case: when the +;;; buffer is full, *and* the following char is a delimiter char or EOF. +;;; Ties are broken favoring termination over #f -- after filling the buffer, +;;; READ-DELIMITED! won't return #f until it has peeked one past the end +;;; of the buffer to ensure the next char doesn't terminate input (or is EOF). +;;; However, this rule is relaxed with delim-action = CONCAT -- if the buffer +;;; is full, READ-DELIMITED! won't wait around trying to peek at the following +;;; char to determine whether or not it is a delimiter char, since it doesn't +;;; have space to store the character anyway. It simply immediately returns #f; +;;; a following read can pick up the delimiter char. + +;(define (read-delimited! delims buf . args) ; [port delim-action start end] +; (let-optionals args ((port (current-input-port)) +; (delim-action 'trim) +; (start 0) +; (end (string-length buf))) +; (receive (terminator num-read) +; (%read-delimited! delims buf +; (not (eq? delim-action 'peek)) ;Gobble delim? +; port +; start +; (if (eq? delim-action 'concat) +; (- end 1) ; Room for terminator. +; end)) + +; (if terminator ; Check for buffer overflow. +; (let ((retval (if (and (zero? num-read) +; (eof-object? terminator)) +; terminator ; EOF -- got nothing. +; num-read))) ; Got something. + +; (case delim-action +; ((peek trim) retval) +; ((split) (values retval terminator)) +; ((concat) (cond ((char? terminator) +; (string-set! buf (+ start num-read) terminator) +; (+ num-read 1)) +; (else retval))))) + +; ;; Buffer overflow. +; (case delim-action +; ((peek trim) #f) +; ((split) (values #f #f)) +; ((concat) (let ((last (read-char port))) +; (if (char? last) +; (string-set! buf (+ start num-read) last)) +; (and (or (eof-object? last) +; (char-set-contains? (->char-set delims) +; last)) +; (+ num-read 1))))))))) + + +;;; (%read-delimited! delims buf gobble? [port start end]) +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; This low-level routine uses a different interface. It returns two values: +;;; - TERMINATOR: A value describing why the read was terminated: +;;; + character or eof-object => read terminated by this value; +;;; + #f => filled buffer w/o terminating read. +;;; - NUM-READ: Number of chars read into buf. +;;; +;;; Note: +;;; - Invariant: TERMINATOR = #f => NUM-READ = END - START. +;;; - Invariant: TERMINATOR = eof-object and NUM-READ = 0 => at EOF. +;;; - When determining the TERMINATOR return value, ties are broken +;;; favoring character or the eof-object over #f. That is, if the buffer +;;; fills up, %READ-DELIMITED! will peek at one more character from the +;;; input stream to determine if it terminates the input. If so, that +;;; is returned, not #f. +;;; +;;; If GOBBLE? is true, then a terminator character is removed from +;;; the input stream. Otherwise, it is left in place for a following input +;;; operation. + +;(define (%read-delimited! delims buf gobble? . args) +; (let-optionals args ((port (current-input-port)) +; (start 0) +; (end (string-length buf))) + +; (check-arg input-port? port %read-delimited!) ; Arg checking. +; (check-arg char-set? delims %read-delimited!) ; Required, since +; (if (bogus-substring-spec? buf start end) ; we're calling C. +; (error "Illegal START/END substring indices" +; buf start end %read-delimited!)) + +; (let ((delims (->char-set delims))) + +; (if (fdport? port) + +; ;; Direct C support for Unix file ports -- zippy quick. +; (let lp ((start start) (total 0)) +; (receive (terminator num-read) +; (%read-delimited-fdport!/errno delims buf gobble? +; port start end) +; (let ((total (+ num-read total))) +; (cond ((not (integer? terminator)) (values terminator total)) +; ((= terminator errno/intr) (lp (+ start num-read) total)) +; (else (errno-error terminator %read-delimited! +; num-read total +; delims buf gobble? port start end)))))) + +; ;; This is the code for other kinds of ports. +; ;; Mighty slow -- we read each char twice (peek first, then read). +; (let lp ((i start)) +; (let ((c (peek-char port))) +; (cond ((or (eof-object? c) ; Found terminating char or eof +; (char-set-contains? delims c)) +; (if gobble? (read-char port)) +; (values c (- i start))) + +; ((>= i end) ; Filled the buffer. +; (if gobble? (read-char port)) +; (values #f (- i start))) + +; (else (string-set! buf i (read-char port)) +; (lp (+ i 1)))))))))) + + +;(foreign-source +; "#include " +; "" +; "/* Make sure foreign-function stubs interface to the C funs correctly: */" +; "#include \"fdports1.h\"" +; "" "") + +;(define-foreign %read-delimited-fdport!/errno (read_delim (string delims) +; (var-string buf) +; (bool gobble?) +; (desc port) +; (fixnum start) +; (fixnum end)) +; desc ; int => errno; char => terminating char; eof-object; #f => buf ovflow +; fixnum) ; number of chars read into BUF. + + +;(define-foreign %skip-char-set-fdport/errno (skip_chars (string skip-set) +; (desc port)) +; desc ; int => errno; #f => win. +; fixnum) ; number of chars skipped. + + +;(define (skip-char-set skip-chars . maybe-port) +; (let ((port (:optional maybe-port (current-input-port))) +; (cset (->char-set skip-chars))) + +; (cond ((not (input-port? port)) +; (error "Illegal value -- not an input port." port)) + +; ;; Direct C support for Unix file ports -- zippy quick. +; ((fdport? port) +; (let lp ((total 0)) +; (receive (err num-read) (%skip-char-set-fdport/errno cset port) +; (let ((total (+ total num-read))) +; (cond ((not err) total) +; ((= errno/intr err) (lp total)) +; (errno-error err skip-char-set cset port total)))))) + +; ;; This is the code for other kinds of ports. +; ;; Mighty slow -- we read each char twice (peek first, then read). +; (else (let lp ((i 0)) +; (let ((c (peek-char port))) +; (cond ((and (char? c) (char-set-contains? cset c)) +; (read-char port) +; (lp (+ i 1))) +; (else i)))))))) + + + + +;;; (read-line [port delim-action]) +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; Read in a line of data. Input is terminated by either a newline or EOF. +;;; The newline is trimmed from the string by default. + +(define charset:newline (char-set #\newline)) + +(define (read-line . rest) (apply read-delimited charset:newline rest)) + + +;;; (read-paragraph [port handle-delim]) +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;(define blank-line-regexp (make-regexp "^[ \t]*\n$")) + +;(define (read-paragraph . args) +; (let-optionals args ((port (current-input-port)) +; (handle-delim 'trim)) +; ;; First, skip all blank lines. +; (let lp () +; (let ((line (read-line port 'concat))) +; (cond ((eof-object? line) +; (if (eq? handle-delim 'split) (values line line) line)) + +; ((regexp-exec blank-line-regexp line) (lp)) + +; ;; Then, read in non-blank lines. +; (else +; (let lp ((lines (list line))) +; (let ((line (read-line port 'concat))) +; (if (and (string? line) +; (not (regexp-exec blank-line-regexp line))) + +; (lp (cons line lines)) + +; ;; Return the paragraph +; (let ((->str (lambda (lns) (apply string-append (reverse lns))))) +; (case handle-delim +; ((trim) (->str lines)) + +; ((concat) +; (->str (if (eof-object? line) lines (cons line lines)))) + +; ((split) +; (values (->str lines) line)) + +; (else (error "Illegal HANDLE-DELIM parameter to READ-PARAGRAPH"))))))))))))) hunk ./rw.scm 24 - (let ((nread (reader s source start (- end start)))) + (let ((nread (reader s source start end))) hunk ./rw.scm 62 - (let ((nread (reader s source i (- end i)))) + (let ((nread (reader s source i end))) hunk ./rw.scm 101 - (let ((nwritten (writer s target start (- end start)))) + (let ((nwritten (writer s target start end start))) hunk ./rw.scm 130 - (let ((nwritten (writer s target i (- end i)))) - + (let ((nwritten (writer s target i end i)))