;;; @Package uri.scm ;;; @Subtitle Web Uniform Resource Identifiers (URI) in Scheme ;;; @HomePage http://www.neilvandyke.org/uri-scm/ ;;; @Author Neil W. Van Dyke ;;; @AuthorEmail neil@@neilvandyke.org ;;; @Version 0.1 ;;; @Date 18 August 2004 ;; $Id: uri.scm,v 1.436 2004/08/18 10:53:37 neil Exp $ ;;; @legal ;;; Copyright @copyright{} 2004 Neil W. Van Dyke. This program is Free ;;; Software; you can redistribute it and/or modify it under the terms of the ;;; GNU Lesser General Public License as published by the Free Software ;;; Foundation; either version 2.1 of the License, or (at your option) any ;;; later version. This program is distributed in the hope that it will be ;;; useful, but without any warranty; without even the implied warranty of ;;; merchantability or fitness for a particular purpose. See the GNU Lesser ;;; General Public License [LGPL] for details. ;;; @end legal ;;; @section Introduction ;;; ;;; @i{Note: This version of the library has endured some testing, and is being ;;; used in at least one production application, but be advised that some ;;; design refinements and API changes are expected. Especially, we are ;;; reconsidering the use of immutable strings and pairs, would like to add ;;; more of the extensibility of [UriFrame], and need to look at the ;;; forthcoming @acronym{IETF-W3C} standards.} ;;; ;;; @code{uri.scm} is a Scheme code library for parsing, representing, and ;;; transforming Web Uniform Resource Identifiers (URI) [RFC2396], which ;;; includes Uniform Resource Locators (URL) and Uniform Resource Names (URN). ;;; It supports absolute and relative URIs and URI references. ;;; ;;; The library provides two separate interfaces, based on the two supported ;;; representations: a convenient verbatim string representation, and a parsed ;;; representation. From an interface standpoint, URI are immutable objects, ;;; and all operations are functions yielding the same or new immutable URI ;;; objects. Functionality specific to individual URI schemes is generally ;;; outside the scope of this library, and is better supported via separate ;;; companion libraries. ;;; ;;; This library has been designed after experience with [UriFrame], which was ;;; specific to PLT Scheme and dependent on a heavyweight object system. This ;;; library appears in some ways simliar to the @code{uri} module of [SLIB], ;;; but is intended to provide additional functionality. ;;; ;;; The current version of this library is specific to PLT Scheme, but it has ;;; been written with the intention of being portable shortly to most R5RS ;;; Scheme implementations that support popular SRFIs. It officially requires ;;; [SRFI-6], [SRFI-8], [SRFI-9], [SRFI-13] (for @code{string-downcase}), ;;; [SRFI-16], [SRFI-23], and [SRFI-39].@footnote{Scheme implementators are ;;; encouraged to support SRFI-0 and SRFI-7, and to define one or more SRFI-0 ;;; features that identify their language variant and implementation. The ;;; lukewarm popularity of these two SRFIs is a barrier to maintaining portable ;;; versions of this library.} It also requires some regular expression ;;; operations that can be provided by the [Pregexp] library if the ;;; implementation does not provide appropriate native operations. If the ;;; implementation provides immutable strings and pairs, the library will take ;;; advantage of them; on implementations that do not provide these, copying ;;; and hopeful wishes will be used. (require (lib "8.ss" "srfi") (lib "9.ss" "srfi") (lib "13.ss" "srfi")) (define uri-internal:version "0.1") ;; Misc. Portability and Utilities: (define uri-internal:identity (lambda (n) n)) (define (uri-internal:gosc os) (let ((str (get-output-string os))) (close-output-port os) str)) ;; Error Portability and Utilities: (define-syntax uri-internal:error (syntax-rules () ((_ ?p ?m ?o ...) ;; Bigloo: ;; (error ?p ?m (list ?o ...)) ;; else: (error (string-append ?p " : " ?m) ?o ...) ))) ;; Character Portability and Utilities: (define uri-internal:char->ascii char->integer) (define uri-internal:ascii->char integer->char) (define (uri-internal:hex-char->integer c) (case c ((#\0) 0) ((#\1) 1) ((#\2) 2) ((#\3) 3) ((#\4) 4) ((#\5) 5) ((#\6) 6) ((#\7) 7) ((#\8) 8) ((#\9) 9) ((#\A #\a) 10) ((#\B #\b) 11) ((#\C #\c) 12) ((#\D #\d) 13) ((#\E #\e) 14) ((#\F #\f) 15) (else #f))) (define (uri-internal:two-hex-char->ascii-char str k) ;; TODO: Should this be UTF-8? (uri-internal:ascii->char (+ (* 16 (uri-internal:hex-char->integer (string-ref str k))) (uri-internal:hex-char->integer (string-ref str (+ 1 k)))))) ;; Immutability Portability: (define uri-internal:immutable-empty-string (string->immutable-string "")) (define uri-internal:cons-i cons-immutable) (define uri-internal:list-i list-immutable) (define uri-internal:immutable? immutable?) (define (uri-internal:list->list-i lst) ;; Note: currently only works with proper, non-cyclic lists. (let loop ((rest lst)) (cond ((null? rest) lst) ((pair? rest) (if (uri-internal:immutable? rest) (loop (cdr rest)) (let loop ((rest lst)) (if (null? rest) '() (uri-internal:cons-i (car rest) (loop (cdr rest))))))) (else (uri-internal:error "uri-internal:list->list-i" "not a proper list" lst))))) (define (uri-internal:map-i proc . lsts) (uri-internal:list->list-i (apply map proc lsts))) (define (uri-internal:reverse-i lst) (let loop ((new-head '()) (old-rest lst)) (if (null? old-rest) new-head (loop (uri-internal:cons-i (car old-rest) new-head) (cdr old-rest))))) (define uri-internal:string->string-i string->immutable-string) (define (uri-internal:append-i . args) ;; TODO: make sure final arg is immutable? (let loop ((head (car args)) (rest (cdr args))) (cond ((null? rest) head) ((null? head) (loop (car rest) (cdr rest))) (else (let loop2 ((head2 (car head)) (rest2 (cdr head))) (uri-internal:cons-i head2 (if (null? rest2) (loop (car rest) (cdr rest)) (loop2 (car rest2) (cdr rest2))))))))) (define (uri-internal:string-append-i . args) (uri-internal:string->string-i (apply string-append args))) (define (uri-internal:string-i . args) (uri-internal:string->string-i (apply string args))) (define (uri-internal:substring-i str start-k end-k) (uri-internal:string->string-i (if (and (= start-k 0) (or (not end-k) (= end-k (string-length str)))) str (if end-k (substring str start-k end-k) (substring str start-k))))) (define (uri-internal:get-output-string-i . args) (uri-internal:string->string-i (apply get-output-string args))) (define (uri-internal:gosc-i os) (let ((str (uri-internal:get-output-string-i os))) (close-output-port os) str)) ;; Regexp Portability and Utilities: (define uri-internal:make-rx regexp) (define (uri-internal:rxpos-str pos str) (if pos (substring str (car pos) (cdr pos)) #f)) (define (uri-internal:rxpos-lowstr pos str) (let ((s (uri-internal:rxpos-str pos str))) (if s (string-downcase s) #f))) (define (uri-internal:rxpos-lowsym pos str) (let ((s (uri-internal:rxpos-lowstr pos str))) (if s (string->symbol s) s))) (define (uri-internal:rxpos-num pos str) (let ((s (uri-internal:rxpos-str pos str))) (if s (string->number s) #f))) (define (uri-internal:rxpos-str-i pos str) (if pos (uri-internal:substring-i str (car pos) (cdr pos)) #f)) (define (uri-internal:rxpos-lowstr-i pos str) (if pos (uri-internal:string->string-i (string-downcase (substring str (car pos) (cdr pos)))) #f)) (define (uri-internal:rxpos-subtract pos offset) (if (and pos offset (not (zero? offset))) (cons (- (car pos) offset) (- (cdr pos) offset)) pos)) ;; (define (uri-internal:rxpos->rxpos-i pos) ;; (if pos ;; (uri-internal:cons-i (car pos) (cdr pos)) ;; #f)) (define (uri-internal:make-rx-replace-map-posns empty-string os-result-proc substring-result-proc orig-result-proc) (let ((fixed (lambda (rx str proc start end) (let ((end (let ((len (string-length str))) (if (and end (<= end len)) end len)))) (if (>= start end) empty-string (let ((os #f)) (let loop ((beg start)) (let ((rxmatch (regexp-match-positions rx str beg end))) (if rxmatch (begin (if (not os) (set! os (open-output-string))) (let ((skip-end (car (car rxmatch)))) (if (> skip-end beg) (display (substring str beg skip-end) os))) (display (apply proc rxmatch) os) (let ((new-beg (cdr (car rxmatch)))) (if (< new-beg end) (loop new-beg)))) (if os (display (substring str beg end) os))))) (cond (os => os-result-proc) ((or (not (zero? start)) end) (substring-result-proc str start end)) (else (orig-result-proc str))))))))) (case-lambda ((rx str proc start end) (fixed rx str proc start end)) ((rx str proc start) (fixed rx str proc start #f)) ((rx str proc) (fixed rx str proc 0 #f))))) (define uri-internal:rx-replace-map-posns (uri-internal:make-rx-replace-map-posns "" uri-internal:gosc substring string-copy)) (define uri-internal:rx-replace-map-posns-i (uri-internal:make-rx-replace-map-posns uri-internal:immutable-empty-string uri-internal:gosc-i uri-internal:substring-i uri-internal:string->string-i)) (define uri-internal:rx-replace-map-posns/shareok (uri-internal:make-rx-replace-map-posns "" uri-internal:gosc substring uri-internal:identity)) (define (uri-internal:make-rx-replacers/m-i-shareok rx proc) (let ((make-proc (lambda (rx-replace-map-posns) (let ((fixed (lambda (str start end) (rx-replace-map-posns rx str (lambda args (apply proc str args)) start end)))) (case-lambda ((str start end) (fixed str start end)) ((str start) (fixed str start #f)) ((str) (fixed str 0 #f))))))) (values (make-proc uri-internal:rx-replace-map-posns) (make-proc uri-internal:rx-replace-map-posns-i) (make-proc uri-internal:rx-replace-map-posns/shareok)))) (define-syntax uri-internal:with-rx-match-positions (syntax-rules (else) ((_ ?rmp ?matched) (uri-internal:with-rx-match-positions ?rmp ?matched (else (uri-internal:error "[uri-internal:with-rx-match-positions]" "regexp match failed")))) ((_ (?rmp-args ...) ((?vars ...) ?matched-exps ...) (else ?else-exps ...)) (let ((rxmatch (regexp-match-positions ?rmp-args ...))) (if rxmatch (apply (lambda (?vars ...) ?matched-exps ...) rxmatch) (begin ?else-exps ...)))))) (define uri-internal:rxpos-tag (string->symbol "*uri-internal:rxpos*")) (define (uri-internal:make-tagged-rxpos rxpos) (cons uri-internal:rxpos-tag rxpos)) (define (uri-internal:tagged-rxpos? v) (and (pair? v) (eq? (car v) uri-internal:rxpos-tag))) (define (uri-internal:tagged-rxpos-values-or-value v proc) (if (uri-internal:tagged-rxpos? v) (proc (cadr v) (cddr v)) v)) (define (uri-internal:rxpos-subtract-and-tag pos offset) (uri-internal:make-tagged-rxpos (uri-internal:rxpos-subtract pos offset))) (define-syntax uri-internal:uriobj-rxpos-field-proc (syntax-rules () ((_ ?get-proc ?parse-proc ?set-proc) (lambda (uriobj) (let ((v (?get-proc uriobj))) (if (uri-internal:tagged-rxpos? v) (let ((pos (cdr v))) ;; TODO: This is kinda weird to be setting the value to #f. We ;; have to make sure that all the parsed value representations ;; are OK with that. We should probably have a "/pos" form of ;; the procedures instead. (let ((o (if pos (?parse-proc (uriobj->string uriobj) (car pos) (cdr pos)) #f))) (?set-proc uriobj o) o)) v)))))) ;; Utilities: (define (for-each/between each-proc between-proc lst) ;; TODO: Lose this procedure. (if (not (null? lst)) (let loop ((head (car lst)) (rest (cdr lst))) (each-proc head) (if (null? rest) (if #f #f) (begin (between-proc) (loop (car rest) (cdr rest))))))) (define (uri-internal:string-or-f->string-i-or-f str) (if str (uri-internal:string->string-i str) #f)) ;;; @section Escaping and Unescaping ;;; Several procedures to support escaping and unescaping of URI component ;;; strings, as described in [RFC2396 sec. 2.4], are provided. Also provided ;;; are escaping and unescaping procedures that also support @code{+} as an ;;; encoding of a space character, as is used in some HTTP encodings of HTML ;;; forms. ;;; ;;; These procedures have multiple variants, concerning mutability of the ;;; strings they yield, and following the naming convention: ;;; ;;; @table @code ;;; ;;; @item @r{@i{foo}} ;;; Always yields a new, mutable string. ;;; ;;; @item @r{@i{foo}}-i ;;; Always yields an immutable string (or a new string, if the Scheme ;;; implementation does not support immutable string). ;;; ;;; @item @r{@i{foo}}/shareok ;;; If the output is equal to the input, might yield the input string rather ;;; than yielding a copy of it. ;;; ;;; @end table ;;; ;;; Many applications will not call these procedures directly, since most of ;;; this library's interface automatically escapes and unescapes strings as ;;; appropriate. ;;; @defproc uri-escape str [start [end]] @result{} string ;;; @defprocx uri-escape-i str [start [end]] @result{} string ;;; @defprocx uri-escape/shareok str [start [end]] @result{} string ;;; ;;; Yields a URI-escaped encoding of string @var{str}. If @var{start} and ;;; @var{end} are given, then they designate the substring of @var{str} to use. ;;; All characters are escaped, except alphanumerics, minus, underscore, ;;; period, and tilde. For example. ;;; ;;; @lisp ;;; (uri-escape "a = b/c + d") @result{} "a%20%3D%20b%2Fc%20%2B%20d" ;;; @end lisp (define-values (uri-escape uri-escape-i uri-escape/shareok) (uri-internal:make-rx-replacers/m-i-shareok (uri-internal:make-rx "[^-_.~a-zA-Z0-9]") (lambda (str pos) (char->uri-escaped-string (string-ref str (car pos)))))) ;;; @defproc uri-plusescape str [start [end]] @result{} string ;;; @defprocx uri-plusescape-i str [start [end]] @result{} string ;;; @defprocx uri-plusescape/shareok str [start [end]] @result{} string ;;; ;;; Like @code{uri-escape}, except encodes space characters as @code{"+"} ;;; instead of @code{"%20"}. This should generally only be used to mimic the ;;; encoding some Web browsers do of HTML form values. For example: ;;; ;;; @lisp ;;; (uri-plusescape "a = b/c + d") @result{} "a+%3D+b%2Fc+%2B+d" ;;; @end lisp (define-values (uri-plusescape uri-plusescape-i uri-plusescape/shareok) (uri-internal:make-rx-replacers/m-i-shareok (uri-internal:make-rx "( )|[^-_.~a-zA-Z0-9 ]") (lambda (str pos space-pos) (if space-pos "+" (char->uri-escaped-string (string-ref str (car pos))))))) ;;; @defproc uri-unescape str [start [end]] @result{} string ;;; @defprocx uri-unescape-i str [start [end]] @result{} string ;;; @defprocx uri-unescape/shareok str [start [end]] @result{} string ;;; ;;; Yields an URI-unescaped string from the encoding in string @code{str}. If ;;; @var{start} and @var{end} are given, then they designate the substring of ;;; @var{str} to use. For example: ;;; ;;; @lisp ;;; (uri-unescape "a%20b+c%20d") @result{} "a b+c d" ;;; @end lisp (define-values (uri-unescape uri-unescape-i uri-unescape/shareok) (uri-internal:make-rx-replacers/m-i-shareok (uri-internal:make-rx "%([0-9a-fA-F][0-9a-fA-F])?") (lambda (str pos hex-pos) (if hex-pos (uri-internal:two-hex-char->ascii-char str (car hex-pos)) ;; TODO: Benchmark above against: ;; (uri-internal:ascii->char ;; (string->number (substring str (car hex-pos) (cdr hex-pos)) 16)) "%")))) ;;; @defproc uri-unplusescape str [start [end]] @result{} string ;;; @defprocx uri-unplusescape-i str [start [end]] @result{} string ;;; @defprocx uri-unplusescape/shareok str [start [end]] @result{} string ;;; ;;; Like @code{uri-unescape}, but also decodes the plus (@code{+}) character as ;;; to space character. For example: ;;; ;;; @lisp ;;; (uri-unplusescape "a%20b+c%20d") @result{} "a b c d" ;;; @end lisp (define-values (uri-unplusescape uri-unplusescape-i uri-unplusescape/shareok) (uri-internal:make-rx-replacers/m-i-shareok (uri-internal:make-rx "(\\+)|%([0-9a-fA-F][0-9a-fA-F])?") (lambda (str pos plus-pos hex-pos) (cond (plus-pos " ") (hex-pos (uri-internal:two-hex-char->ascii-char str (car hex-pos))) ;; TODO: Benchmark above against: ;; (hex-pos ;; (uri-internal:ascii->char ;; (string->number (substring str (car hex-pos) (cdr hex-pos)) 16) ;; 16)) (else "%"))))) ;;; @defproc char->uri-escaped-string chr @result{} string ;;; @defprocx char->uri-escaped-string-i chr @result{} string ;;; ;;; Yields a URI-escaped of character @var{chr}. For example: ;;; ;;; @lisp ;;; (char->uri-escaped-string #\/) @result{} "%2F" ;;; @end lisp (define-values (char->uri-escaped-string char->uri-escaped-string-i) (let* ((hex #(#\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9 #\A #\B #\C #\D #\E #\F)) (make-proc (lambda (string) (lambda (chr) (let ((n (uri-internal:char->ascii chr))) (string #\% (vector-ref hex (quotient n 16)) (vector-ref hex (remainder n 16)))))))) (values (make-proc string) (make-proc uri-internal:string-i)))) ;; @section URI Objects and URI Strings (define-record-type uri-internal:uriobj (uri-internal:make-uriobj string scheme opaque-k pound-k auth path query) uriobj? (string uriobj->string uri-internal:set-uriobj-string!) (scheme uriobj-scheme uri-internal:set-uriobj-scheme!) (opaque-k uri-internal:uriobj-opaque-k uri-internal:set-uriobj-opaque-k!) (pound-k uri-internal:uriobj-pound-k uri-internal:set-uriobj-pound-k!) (auth uri-internal:uriobj-auth uri-internal:set-uriobj-auth!) (path uri-internal:uriobj-path uri-internal:set-uriobj-path!) (query uri-internal:uriobj-query uri-internal:set-uriobj-query!)) ;;; @section String URI API ;;; This section describes the ``URI string'' API, while the next section ;;; describes the ``URI object,'' (@code{uriobj}) API. All procedures in this ;;; section yield URIs using immutable strings, and accept URIs as strings ;;; (immutable or mutable) or as the opaque objects described in the next ;;; section. ;;; @subsection Writing URIs to Ports and Converting URIs to Strings ;;; @defproc display-uri uri port @result{} undef ;;; @defprocx display-uri/nofragment uri port @result{} undef ;;; ;;; Displays @var{uri} to output port @var{port}. For example: ;;; ;;; @lisp ;;; (display-uri "http://s/foo#bar" (current-output-port)) ;;; @print{} http://s/foo#bar ;;; (display-uri/nofragment "http://s/foo#bar" (current-output-port)) ;;; @print{} http://s/foo ;;; @end lisp (define (display-uri uri port) (display (uri->string uri) port)) (define (display-uri/nofragment uri port) ;; TODO: Do a faster and simpler version, using pound-k and substring (display (uri-without-fragment uri) port)) ;;; @defproc uri->string uri @result{} string ;;; ;;; Yields the full string representation of URI @var{uri}. Of course this is ;;; not needed when using only the string representation of URI, but using this ;;; procedure in libraries permits the @code{uriobj} to also be used. For ;;; example: ;;; ;;; @lisp ;;; (define my-uriobj (string->uriobj "http://www/")) ;;; my-uriobj @result{} # ;;; (uri->string my-uriobj) @result{} "http://www/" ;;; (uri->string "http://www/") @result{} "http://www/" ;;; @end lisp (define (uri->string uri) (cond ((uriobj? uri) (uriobj->string uri)) ((string? uri) uri) (else (uri-internal:error "uri->string" "expected uriobj or string, got:" uri)))) ;;; @subsection URI Schemes ;;; URI schemes are currently represented as lowercase Scheme symbols and ;;; associated data. ;;; @defvar ftp-uri-scheme @result{} urischeme ;;; @defvarx gopher-uri-scheme @result{} urischeme ;;; @defvarx http-uri-scheme @result{} urischeme ;;; @defvarx https-uri-scheme @result{} urischeme ;;; @defvarx imap-uri-scheme @result{} urischeme ;;; @defvarx ipp-uri-scheme @result{} urischeme ;;; @defvarx news-uri-scheme @result{} urischeme ;;; @defvarx nfs-uri-scheme @result{} urischeme ;;; @defvarx telnet-uri-scheme @result{} urischeme ;;; ;;; Some common URI scheme symbols, as a convenience for Scheme code that must ;;; be portable to Scheme implementations with case-insensitive readers. For ;;; example, in some Scheme implementations: ;;; ;;; @lisp ;;; 'ftp @result{} FTP ;;; ftp-uri-scheme @result{} ftp ;;; @end lisp (define ftp-uri-scheme (string->symbol "ftp")) (define gopher-uri-scheme (string->symbol "gopher")) (define http-uri-scheme (string->symbol "http")) (define https-uri-scheme (string->symbol "https")) (define imap-uri-scheme (string->symbol "imap")) (define ipp-uri-scheme (string->symbol "ipp")) (define news-uri-scheme (string->symbol "news")) (define nfs-uri-scheme (string->symbol "nfs")) (define telnet-uri-scheme (string->symbol "telnet")) ;;; @defproc uri-scheme uri @result{} urischeme ;;; ;;; Yields the URI scheme of @var{uri}, or @code{#f} if none can be determined. ;;; For example: ;;; ;;; @lisp ;;; (uri-scheme "Http://www") @result{} http ;;; @end lisp (define (uri-scheme uri) (uriobj-scheme (uri->uriobj uri))) ;; ;;; @defproc uri-with-scheme uri urischeme @result{} string ;; ;;; ;; ;;; TODO: in the current version oo this library, just does a string ;; ;;; replacement of the uri scheme. no sensitivity to default port numbers ;; ;;; of uri schemes (partly because we are getting into territory where we ;; ;;; need to know whether an authority is a server authority. ;; ;; (define (uri-with-scheme uri urischeme) ;; ;; TODO: Just do a string operation here, if it's a string for which we ;; ;; don't have a uriobj. ;; ;; ;; ;; TODO: Maybe force urischeme to a valid one, so that they can use quoted ;; ;; symbols in a case-insensitive Scheme implementation safely. ;; (uriobj->string (uriobj-with-scheme (uri->uriobj uri) urischeme))) (define uri-internal:default-portnums (list (cons ftp-uri-scheme 21) (cons gopher-uri-scheme 70) (cons http-uri-scheme 80) (cons https-uri-scheme 443) (cons imap-uri-scheme 143) (cons ipp-uri-scheme 631) (cons news-uri-scheme 119) (cons nfs-uri-scheme 2049) (cons telnet-uri-scheme 23))) ;;; @defproc register-uri-scheme-default-portnum sym portnum @result{} undef ;;; ;;; Registers integer @var{portnum} as the default port number for the server ;;; authority component of URI scheme @var{sym}. ;;; ;;; @lisp ;;; (define x-foo-uri-scheme (string->symbol "x-foo")) ;;; (register-uri-scheme-default-portnum x-foo-uri-scheme 007) ;;; (register-uri-scheme-default-portnum x-foo-uri-scheme 666) ;;; @error{} cannot change uri scheme default portnum: x-foo 7 666 ;;; @end lisp (define (register-uri-scheme-default-portnum urischeme portnum) ;; TODO: Ideally, we would have a mutex lock around the list. ;; But wait til we make it a hashtable with other URI-scheme data. (let ((old-portnum (urischeme-default-portnum urischeme))) (if old-portnum (if (not (equal? portnum old-portnum)) (uri-internal:error "register-urischeme-default-portnum" "cannot change uri scheme default portnum:" urischeme old-portnum portnum)) (set! uri-internal:default-portnums (cons (cons urischeme portnum) uri-internal:default-portnums))))) ;;; @defproc register-uri-scheme-hierarchical sym @result{} undef ;;; ;;; Registers URI scheme @var{sym} as having a ``hierarchical'' form as ;;; described in [RFC2396 sec. 3]. (define (register-urischeme-hierarchical urischeme) ;; TODO: Mutex it, although pretty benign if we don't. (if (memq urischeme uri-internal:hierarchical-schemes) (if #f #f) (begin (set! uri-internal:hierarchical-schemes (cons urischeme uri-internal:hierarchical-schemes)) (if #f #f)))) ;;; @subsection URI Reference Fragment Identifiers ;;; @defproc uri-fragment uri @result{} string-or-f ;;; @defprocx uri-fragment/escaped uri @result{} string-or-f ;;; ;;; Yields the fragment identifier component of URI (or URI reference) ;;; @var{uri} as a string, or @code{#f} if there is no fragment. ;;; @code{uri-fragment} yields the fragment in unescaped form, and ;;; @code{uri-fragment/escaped} yields an escaped form in the unusual case that ;;; is desired. For example: ;;; ;;; @lisp ;;; (uri-fragment "foo#a%20b") @result{} "a b" ;;; (uri-fragment/escaped "foo#a%20b") @result{} "a%20b" ;;; @end lisp (define (uri-fragment/escaped uri) (uriobj-fragment/escaped (uri->uriobj uri))) (define (uri-fragment uri) (uriobj-fragment (uri->uriobj uri))) ;;; @defproc uri-without-fragment uri @result{} string ;;; ;;; Yields @var{uri} without the fragment component. For example: ;;; ;;; @lisp ;;; (uri-without-fragment "http://w/#bar") @result{} "http://w/" ;;; @end lisp (define (uri-without-fragment uri) (uri-with-fragment uri #f)) ;;; @defproc uri-with-fragment uri fragment @result{} string ;;; @defprocx uri-with-fragment/escaped uri fragment @result{} string ;;; ;;; Yields a URI that is like @var{uri} except with the fragment @var{fragment} ;;; (or no fragment if @var{fragment} is @code{#f}). For example: ;;; ;;; @lisp ;;; (uri-with-fragment "http://w/" "foo") @result{} "http://w/#foo" ;;; (uri-with-fragment "http://w/#foo" "bar") @result{} "http://w/#bar" ;;; (uri-with-fragment "http://w/#bar" #f) @result{} "http://w/" ;;; @end lisp ;;; ;;; The @code{uri-with-fragment/escaped} variant can be used when the desired ;;; fragment string is already in URI-escaped form: ;;; ;;; @lisp ;;; (uri-with-fragment "foo" "a b") @result{} "foo#a%20b" ;;; (uri-with-fragment/escaped "foo" "a%20b") @result{} "foo#a%20b" ;;; @end lisp (define (uri-with-fragment uri fragment) (uriobj->string (uriobj-with-fragment (uri->uriobj uri) fragment))) (define (uri-with-fragment/escaped uri fragment) (uriobj->string (uriobj-with-fragment/escaped (uri->uriobj uri) fragment))) ;;; @subsection Hierarchical URIs ;;; This and some of the following subsections concern ``hierarchical'' generic ;;; URI syntax as described in [RFC2396 sec. 3]. ;;; @defproc uri-hierarchical? uri @result{} boolean ;;; ;;; Yields a Boolean value for whether or not the URI scheme of URI @var{uri} ;;; is known to have a ``hierarchical'' generic URI layout. For example: ;;; ;;; @lisp ;;; (uri-hierarchical? "http://www/") @result{} #t ;;; (uri-hierarchical? "mailto://www/") @result{} #f ;;; (uri-hierarchical? "//www/") @result{} #f ;;; @end lisp (define (uri-hierarchical? uri) (uriobj-hierarchical? (uri->uriobj uri))) ;;; @subsection Server-Based Naming Authorities ;;; Several procedures extract the server authority values from URIs [RFC2396 ;;; sec. 3.2.2]. ;;; @defproc uri-server-userinfo+host+portnum uri @result{} (string-or-f, string-or-f, integer-or-f) ;;; ;;; Yields three values for the server authority of URI @var{uri}: the userinfo ;;; as a string (or @code{#f}), the host as a string (or @code{#f}), and the ;;; effective port number as an integer (or @code{#f}). The effective port ;;; number of a server authority defaults to the default of the URI scheme ;;; unless overridden. For example (note the effective port number is 21, the ;;; default for the @code{ftp} scheme): ;;; ;;; @lisp ;;; (uri-server-userinfo+host+portnum "ftp://anon@@ftp.foo.bar/") ;;; @result{} "anon" "ftp.foo.bar" 21 ;;; @end lisp (define (uri-server-userinfo+host+portnum uri) (uriobj-userinfo+host+portnum (uri->uriobj uri))) ;;; @defproc uri-server-userinfo uri @result{} string-of-f ;;; @defprocx uri-server-host uri @result{} string-of-f ;;; @defprocx uri-server-portnum uri @result{} integer-or-f ;;; ;;; Yield the respective part of the server authority of @var{uri}. See the ;;; discussion of @code{uri-server-userinfo+host+portnum}. (define (uri-server-userinfo uri) (uriserver-userinfo (uriobj-uriserver (uri->uriobj uri)))) (define (uri-server-host uri) (uriserver-host (uriobj-uriserver (uri->uriobj uri)))) (define (uri-server-portnum uri) (uriobj-portnum (uri->uriobj uri))) ;;; @subsection Hierarchical Paths ;;; A parsed hierarchical path [RFC2396 sec. 3] is represented in ;;; @code{uri.scm} as a tuple of a list of path segments and an @dfn{upcount}. ;;; The list of path segments does not contain any ``@code{.}'' or ;;; ``@code{..}'' relative components, as those are removed during parsing. ;;; The upcount is either @code{#f}, meaning an absolute path, or an integer 0 ;;; or greater, meaning a relative path of that many levels ``up.'' A path ;;; segment without any parameters is represented as either a string or, if ;;; empty, @code{#f}. For example: ;;; ;;; @lisp ;;; (uri-path-upcount+segments "/a/b/") @result{} #f ("a" "b" #f) ;;; (uri-path-upcount+segments "/a/b/c") @result{} #f ("a" "b" "c") ;;; (uri-path-upcount+segments "/a/../../../b/c") @result{} 2 ("b" "c") ;;; @end lisp ;;; ;;; @noindent ;;; and: ;;; ;;; @lisp ;;; (uri-path-upcount+segments "/.") @result{} #f () ;;; (uri-path-upcount+segments "/") @result{} #f (#f) ;;; (uri-path-upcount+segments ".") @result{} 0 () ;;; (uri-path-upcount+segments "") @result{} 0 (#f) ;;; (uri-path-upcount+segments "./") @result{} 0 (#f) ;;; (uri-path-upcount+segments "..") @result{} 1 () ;;; (uri-path-upcount+segments "/..") @result{} 1 () ;;; (uri-path-upcount+segments "../") @result{} 1 (#f) ;;; @end lisp ;;; ;;; A path segment with parameters is represented as a list, with the first ;;; element a string or @code{#f} for the path name, and the remaining elements ;;; strings for the parameters. For example: ;;; ;;; @lisp ;;; (uri-path-segments "../../a/b;p1/c/d;p2;p3/;p4") ;;; @result{} ("a" ("b" "p1") "c" ("d" "p2" "p3") (#f "p4")) ;;; @end lisp ;;; ;;; In the current version of @code{uri.scm}, parsed paths are actually ;;; represented in reverse, which simplifies path resolution and permits list ;;; tails to be shared among potentially large numbers of long paths. For ;;; example (@code{uripath} is a concept of the ``object URI'' API): ;;; ;;; @lisp ;;; (let ((base (string->uripath "/a/b/c/index.html"))) ;;; (map (lambda (n) ;;; (resolved-uripath (string->uripath n) base)) ;;; '("x.html" "y/y.html" "../z/z.html"))) ;;; @result{} ;;; (("x.html" . #0=("c" . #1=("b" "a"))) ;;; ("y.html" "y" . #0#) ;;; ("z.html" "z" . #1#)) ;;; @end lisp ;;; @defproc uri-path-upcount+segments uri @result{} (integer-or-f, list-of-urisegment) ;;; @defprocx uri-path-upcount+segments/reverse uri @result{} (integer-or-f, list-of-urisegment) ;;; ;;; Yields the path upcount and the segments of @var{uri} as two values. The ;;; segments list should be considered immutable, as it might be shared ;;; elsewhere. @code{uri-path-upcount+segments/reverse} yields the segments ;;; list in reverse order, and is the more efficient of the two procedures. ;;; ;;; @lisp ;;; (uri-path-upcount+segments/reverse "../a/../../b/./c") ;;; @result{} 2 ("c" "b") ;;; (uri-path-upcount+segments "../a/../../b/./c") ;;; @result{} 2 ("b" "c") ;;; @end lisp (define (uri-path-upcount+segments uri) (uripath-upcount+segments (uriobj-uripath (uri->uriobj uri)))) (define (uri-path-upcount+segments/reverse uri) (uripath-upcount+segments/reverse (uriobj-uripath (uri->uriobj uri)))) ;;; @defproc uri-path-upcount uri @result{} integer-or-f ;;; @defprocx uri-path-segments uri @result{} list-of-urisegment ;;; @defprocx uri-path-segments/reverse uri @result{} list-of-urisegment ;;; ;;; See the documentation for @code{uri-path-upcount+segments}. ;;; ;;; @lisp ;;; (uri-path-upcount "../a/../../b/./c") @result{} 2 ;;; (uri-path-segments "../a/../../b/./c") @result{} ("b" "c") ;;; (uri-path-segments/reverse "../a/../../b/./c") @result{} ("c" "b") ;;; @end lisp (define (uri-path-upcount uri) (uripath-upcount (uriobj-uripath (uri->uriobj uri)))) (define (uri-path-segments uri) (uripath-segments (uriobj-uripath (uri->uriobj uri)))) (define (uri-path-segments/reverse uri) (uripath-segments/reverse (uriobj-uripath (uri->uriobj uri)))) ;;; @defproc urisegment-name urisegment @result{} string-or-f ;;; @defprocx urisegment-params urisegment @result{} list ;;; @defprocx urisegment-name+params urisegment @result{} (string-or-f, list) ;;; @defprocx urisegment-has-params? urisegment @result{} boolean ;;; ;;; Yield the components of a parsed URI segment. The values should be ;;; considered immutable. For example: ;;; ;;; @lisp ;;; (urisegment-name+params "foo") @result{} "foo" () ;;; (urisegment-name+params #f) @result{} #f () ;;; (urisegment-name+params '("foo" "p1" "p2")) @result{} "foo" ("p1" "p2") ;;; (urisegment-name+params '(#f "p1" "p2")) @result{} #f ("p1" "p2") ;;; @end lisp (define (urisegment-name segment) (if (pair? segment) (car segment) segment)) (define (urisegment-params segment) (if (pair? segment) (cdr segment) '())) (define (urisegment-name+params segment) (if (pair? segment) (values (car segment) (cdr segment)) (values segment '()))) (define (urisegment-has-params? segment) (pair? segment)) (define uri-internal:parse-uri-path-params (let ((rx (uri-internal:make-rx "^([^;]+)?(;)?"))) (lambda (str start end) (uri-internal:with-rx-match-positions (rx str start end) ((whole param semi) (uri-internal:cons-i (if param (uri-unescape-i str (car param) (cdr param)) #f) (if semi (uri-internal:parse-uri-path-params str (cdr whole) end) '()))))))) ;; ;;; @defproc uri-path-string uri @result{} string ;; ;; ;;; @defproc uri-without-scheme-and-authority uri @result{} string ;;; @subsection Attribute-Value Queries ;;; This library provides support for parsing the URI query component [RFC2396 ;;; sec. 3.4], as attribute-value lists in the manner of @code{http} URI scheme ;;; queries. Parsed queries are represented as association lists, in which the ;;; @dfn{car} of each pair is the attribute name as a string, and the @dfn{cdr} ;;; is either the attribute value as a string or @code{#t} if no value given. ;;; All strings are URI-unescaped. For example: ;;; ;;; @lisp ;;; (uri-query "?q=fiendish+scheme&case&x=&y=1%2B2") ;;; @result{} ;;; (("q" . "fiendish scheme") ("case" . #t) ("x" . "") ("y" . "1+2")) ;;; @end lisp ;; Note: not so good for @code{imap} URI scheme [RFC2192]. ;;; @defproc uri-query uri @result{} uriquery ;;; ;;; Yields the parsed attribute-value query of @var{uri}, or @code{#f} if no ;;; query. For example: ;;; ;;; @lisp ;;; (uri-query "?x=42&y=1%2B2") @result{} (("x" . "42") ("y" . "1+2")) ;;; @end lisp ;; TODO: Rename this to distinguish from other kinds of queries. (define (uri-query uri) (uriobj-uriquery (uri->uriobj uri))) ;;; @defproc uri-query-value uri attr @result{} string-or-t-or-f ;;; ;;; Yields the value of attribute @var{attr} in @var{uri}'s query, or @code{#f} ;;; if @var{uri} has no query component or no @var{attr} attribute. If the ;;; attribute appears multiple times in the query, the value of the first ;;; occurrence is used. For example: ;;; ;;; @lisp ;;; (uri-query-value "?x=42&y=1%2B2" "y") @result{} "1+2" ;;; @end lisp (define (uri-query-value uri attr) (let ((query (uri-query uri))) (if query (uriquery-value query attr) #f))) ;;; @defproc uriquery-value uriquery attr @result{} string-or-t-or-f ;;; ;;; Yields the value of attribute @var{attr} in @var{uriquery}, or @code{#f} if ;;; there is no such attribute. If the attribute appears multiple times in the ;;; query, the value of the first occurrence is used. (define (uriquery-value uriquery attr) (let ((pair (assoc attr uriquery))) (and pair (cdr pair)))) ;;; @subsection Resolving Relative URI ;;; This subsection concerns resolving relative URI. ;;; @defproc absolute-uri? uri @result{} boolean ;;; ;;; Yields a Boolean value for whether or not URI @var{uri} is @emph{known} by ;;; the library's criteria to be absolute. (define (absolute-uri? uri) (absolute-uriobj? (uri->uriobj uri))) ;;; @defproc resolved-uri uri base-uri @result{} string ;;; ;;; Yields a URI string that is URI @var{uri} possibly resolved with respect to ;;; URI @var{base-uri}, but not necessarily absolute. As an extension to ;;; [RFC2396] rules for resolution, @var{base-uri} may be a relative URI. ;;; ;;; @lisp ;;; (resolved-uri "x.html" "http://w/a/b/c.html") ;;; @result{} "http://w/a/b/x.html" ;;; (resolved-uri "//www:80/" "http:") ;;; @result{} "http://www/" ;;; @end lisp (define (resolved-uri uri base-uri) (cond ((not base-uri) (uri->string uri)) ((uriobj? uri) (uri->string (resolved-uriobj/base-uriobj uri (uri->uriobj base-uri)))) ((equal? uri "") (uri->string base-uri)) (else ;; TODO: If "uri" is a string URI for which we don't yet have a uriobj, we ;; could use "string/base->uriobj", which in the future might be more ;; efficient. (uri->string (resolved-uriobj/base-uriobj (uri->uriobj uri) (uri->uriobj base-uri)))))) ;;; @defproc absolute-uri uri @result{} string ;;; ;;; Yields a URI that may be a variation on @var{uri} that has been forced to ;;; absolute (by, e.g., dropping relative path components, or supplying a ;;; missing path). The result might not be an absolute URI, however, due to ;;; limitations of the library or insufficient information in the URI. For ;;; example: ;;; ;;; @lisp ;;; (absolute-uri "http://w/../a") @result{} "http://w/a" ;;; (absolute-uri "http://w") @result{} "http://w/" ;;; @end lisp (define (absolute-uri uri) (uri->string (absolute-uriobj (uri->uriobj uri)))) ;;; @defproc normalized-uri uri @result{} string ;;; ;;; Yields a possibly ``normalized'' variation on URI @var{uri}, such as by ;;; consistent use of escaping. The exact behavior of this procedure will ;;; change in future versions of the library. (define (normalized-uri uri) (uri->string (normalized-uri (uri->uriobj uri)))) ;;; @section Object URI API ;;; @i{Note: The Object URI API is only sparsely documented, although many of ;;; its procedures have analogues in the String URI API, which is documented in ;;; the preceding section.} ;;; @subsection Predicate ;;; @defproc uriobj? v ;;; @subsection Converting Strings to URI Objects ;; TODO: Add URI extraction from strings, and distinguish from string ;; conversion. The normal *string*->uri* procedures maybe shouldn't strip ;; leading and trailing garbage. Have "extract-uri" and "extract-uriobj" ;; procedures? ;;; @defproc string->uriobj str @result{} uriobj ;;; @defprocx string/base->uriobj str base-uri @result{} uriobj ;;; @defprocx string/base-uriobj->uriobj str base-uriobj @result{} uriobj ;; ;; Note: The value of @code{(uri->string (string->uriobj @var{S}))} will NOT ;; always be equal to @var{S}. (define (string->uriobj str) (substring->uriobj str 0 #f)) (define (string/base-uriobj->uriobj str base-uriobj) (substring/base-uriobj->uriobj str 0 #f base-uriobj)) (define (string/base->uriobj str base-uri) (substring/base-uriobj->uriobj str 0 #f (if base-uri (uri->uriobj base-uri) #f))) ;;; @defproc substring->uriobj str start end @result{} uriobj ;;; @defprocx substring/base->uriobj str start end base-uri @result{} uriobj ;;; @defprocx substring/base-uriobj->uriobj str start end base-uriobj @result{} uriobj (define substring->uriobj (let ((rx (regexp (string-append "^[ \t<]*" "(" ; <1 used "(?:" ; <: scheme-colon "([a-zA-Z][-+.a-zA-Z0-9]*)" ; =2 scheme ":)?" ; >: scheme-colon "(?://" ; <: slashslash-authority "([^<>?/#]*)" ; =3 authority ")?" ; >: slashslash-authority "([^<>?#]*)" ; =4 path "(?:\\?" ; <: question-query "([^<>#]*)" ; =5 query ")?" ; >: question-query "(#[^>]*)?" ; =6 pound-fragment ")" ; >1 used )))) (lambda (str start end) (uri-internal:with-rx-match-positions (rx str start end) ((whole used scheme authority path query pound-fragment) (let ((used-start (car used))) (uri-internal:make-uriobj ;; string (uri-internal:substring-i str used-start (cdr used)) ;; scheme (uri-internal:rxpos-lowsym scheme str) ;; opaque-k (if scheme (+ 1 (cdr (uri-internal:rxpos-subtract scheme used-start))) 0) ;; pound-k (and pound-fragment (car (uri-internal:rxpos-subtract pound-fragment used-start))) ;; auth, path, query (uri-internal:rxpos-subtract-and-tag authority used-start) (uri-internal:rxpos-subtract-and-tag path used-start) (uri-internal:rxpos-subtract-and-tag query used-start)))))))) (define (substring/base-uriobj->uriobj str start end base-uriobj) ;; TODO: Make a version of this that doesn't call substring->uriobj, ;; which will save some allocations in a potentially much-called ;; procedure (for example, parsing an HTML Web page with 1000 URLs while ;; resolving against a known base URI). (if base-uriobj (let ((uriobj (substring->uriobj str start end))) (resolved-uriobj uriobj base-uriobj) uriobj))) (define (substring/base->uriobj str start end base-uri) (substring/base-uriobj->uriobj str start end (if base-uri (uri->uriobj base-uri) #f))) ;;; @defproc uri->uriobj uri @result{} uriobj (define (uri->uriobj uri) (cond ((uriobj? uri) uri) ((string? uri) (string->uriobj uri)) (else (uri-internal:error "uri->uriobj" "expected uriobj or string, got:" uri)))) ;;; @subsection Writing URIs to Ports and Converting URIs to Strings ;;; @defproc display-uriobj uriobj port @result{} undef ;;; @defprocx display-uriobj/nofragment uriobj port @result{} undef (define (display-uriobj uriobj port) (display (uriobj->string uriobj) port)) (define (display-uriobj/nofragment uriobj port) (display (uriobj->string/nofragment uriobj) port)) ;;; @defproc uriobj->string uriobj @result{} string ;;; @defprocx uriobj->string/nofragment uriobj @result{} string (define (uriobj->string/nofragment uriobj) (let ((pound-k (uri-internal:uriobj-pound-k uriobj))) (if pound-k (uri-internal:substring-i (uriobj->string uriobj) 0 pound-k) (uriobj->string uriobj)))) ;;; @subsection URI Schemes ;;; @defproc uriobj-scheme uriobj @result{} urischeme ;;; @defproc uriobj-with-scheme uriobj urischeme @result{} uriobj ;; TODO: Is that right, above? (define (uriobj-with-scheme uriobj urischeme) ;; TODO: what about port numbers on URIs with server authorities? we should ;; at least double check that if we have the default port number under old ;; scheme, we have the default under the new scheme (we currently do, but ;; errors, or a default scheme registered after parsing might break that). ;; we should also decide what it means when the server authority has a ;; non-default port number for the old scheme -- just keep it, probably. ;; Actually, this procedure mainly makes sense only for URIs that don't ;; already have schemes. (if (eq? urischeme (uriobj-scheme uriobj)) uriobj ;; TODO: Make this reuse parsed authority/path/query values, if the new ;; scheme has the same format as the old scheme. (string->uriobj (let ((old-str (uriobj->string uriobj)) (opaque-k (uri-internal:uriobj-opaque-k uriobj))) (if (zero? opaque-k) (uri-internal:string-append-i (urischeme->string urischeme) ":" old-str) (uri-internal:string-append-i (urischeme->string urischeme) (substring old-str (- opaque-k 1) (string-length old-str)))))))) ;;; @defproc string->urischeme str @result{} urischeme ;;; @defprocx symbol->urischeme sym @result{} urischeme ;; ;; ensures is lowercase symbol (define (string->urischeme str) (string->symbol (string-downcase str))) (define (symbol->urischeme sym) (string->symbol (string-downcase (symbol->string sym)))) ;;; @defproc urischeme->string @result{} string (define (urischeme->string urischeme) (symbol->string urischeme)) ;;; @defproc urischeme-hierarchical? urischeme (define (urischeme-hierarchical? urischeme) (and (memq urischeme uri-internal:hierarchical-schemes) #t)) ;;; @defproc urischeme-default-portnum urischeme @result{} integer-or-f (define (urischeme-default-portnum urischeme) (let ((pair (assq urischeme uri-internal:default-portnums))) (if pair (cdr pair) #f))) (define uri-internal:hierarchical-schemes (list ftp-uri-scheme gopher-uri-scheme http-uri-scheme https-uri-scheme imap-uri-scheme ipp-uri-scheme nfs-uri-scheme)) ;;; @subsection URI Reference Fragments ;;; @defproc uriobj-fragment uriobj @result{} string-or-f ;;; @defprocx uriobj-fragment/escaped uriobj @result{} string-or-f (define (uriobj-fragment/escaped uriobj) ;; TODO: Do we want to escape better if the original fragment wasn't? (let ((pound-k (uri-internal:uriobj-pound-k uriobj))) (if pound-k (uri-internal:substring-i (uriobj->string uriobj) (+ 1 pound-k) #f) #f))) (define (uriobj-fragment uriobj) ;; Note: We make this always immutable because some strings we yield will be ;; immutable, so we should be consistent. (let ((pound-k (uri-internal:uriobj-pound-k uriobj))) (if pound-k (uri-unescape-i (uriobj->string uriobj) (+ 1 pound-k)) #f))) ;;; @defproc uriobj-with-fragment uriobj fragment @result{} uriobj ;;; @defprocx uriobj-with-fragment/escaped uriobj fragment @result{} uriobj (define (uriobj-with-fragment uriobj fragment) (uriobj-with-fragment/escaped uriobj (if fragment (uri-escape-i fragment) #f))) (define (uriobj-with-fragment/escaped uriobj fragment) ;; TODO: Don't use string->uriobj and friends here. Or at least copy over ;; any parsed server, path, and query info. (let ((old-pound-k (uri-internal:uriobj-pound-k uriobj))) (if fragment (if old-pound-k (if (equal? (uriobj-fragment/escaped uriobj) fragment) uriobj (string->uriobj (uri-internal:string-append-i (uriobj->string/nofragment uriobj) "#" fragment))) (string->uriobj (uri-internal:string-append-i (uriobj->string uriobj) "#" fragment))) (if old-pound-k (string->uriobj (uriobj->string/nofragment uriobj)) uriobj)))) ;;; @subsection Hierarchical URIs (define (uri-internal:make-hierarchical-uriobj urischeme uriserver uripath uriquery fragment) (receive (string opaque-k pound-k) (uri-internal:build-hierarchical-uriobj-string+opaque+pound urischeme uriserver uripath uriquery fragment) (uri-internal:make-uriobj string urischeme opaque-k pound-k uriserver uripath uriquery))) (define (uri-internal:build-hierarchical-uriobj-string+opaque+pound urischeme uriserver uripath uriquery fragment) ;; Note: this procedure is currently only used when the uri scheme is known, ;; but will probably evolve into a procedure that works with unknown uri ;; schemes, so some code has been written to support that. ;; ;; TODO: fragment should already be *un*escaped (let ((str (let ((os (open-output-string))) (if urischeme (begin (display urischeme os) (write-char #\: os))) (if uriserver (begin (display "//" os) (write-uriserver uriserver os) (if uripath (write-uripath/leading-slash uripath os))) (if uripath (write-uripath uripath os))) (if (and uriquery (not (null? uriquery))) (begin (write-char #\? os) (write-uriquery uriquery os))) (if fragment (begin (write-char #\# os) (display (uri-escape/shareok fragment) os))) (uri-internal:gosc-i os)))) (values str (if urischeme (+ 1 (string-length (symbol->string urischeme))) #f) (if fragment (- (string-length str) (string-length fragment) 1) #f)))) ;; TODO: maybe make the hierarchical-uri parsing functions document that they ;; might only be meaningful on hierarchical uri. maybe refuse to work with ;; known urischemes not known to be hierarchical. ;;; @defproc uriobj-hierarchical? uriobj @result{} boolean (define (uriobj-hierarchical? uriobj) (urischeme-hierarchical? (uriobj-scheme uriobj))) ;;; @subsection Server Authorities ;; ;; uriserver represented as #f, string, or list of 3 elements ;; ;; uri-scheme default port number *not* included in uriserver representation ;;; @defproc uriobj-uriserver uriobj @result{} uriserver (define (uriobj-uriserver uriobj) (let ((substring->uriserver (lambda (str start end) (substring/default-portnum->uriserver str start end (urischeme-default-portnum (uri-scheme uriobj)))))) ((uri-internal:uriobj-rxpos-field-proc uri-internal:uriobj-auth substring->uriserver uri-internal:set-uriobj-auth!) uriobj))) ;;;@defproc uriobj-uriserver+path+query uri @result{} (uriserver, uripath, uriquery) (define (uriobj-uriserver+path+query uriobj) (values (uriobj-uriserver uriobj) (uriobj-uripath uriobj) (uriobj-uriquery uriobj))) ;;; @defproc uri-uriserver uri @result{} uriserver (define (uri-uriserver uri) (uriobj-uriserver (uri->uriobj uri))) ;;;@defproc uri-uriserver+uripath+uriquery uri @result{} (uriserver, uripath, uriquery) ;; ;; for server authorities, not just any authority (define (uri-uriserver+uripath+uriquery uri) (uriobj-uriserver+path+query (uri->uriobj uri))) ;;; @defproc uriobj-userinfo+host+portnum uriobj @result{} (string-or-f, string-or-f, integer-or-f) (define (uriobj-userinfo+host+portnum uriobj) (receive (userinfo host portnum) (uriserver-userinfo+host+portnum (uriobj-uriserver uriobj)) (values userinfo host (or portnum (uriobj-portnum uriobj))))) ;;; @defproc uriobj-portnum uriobj @result{} integer-or-f (define (uriobj-portnum uriobj) (or (uriserver-portnum (uriobj-uriserver uriobj)) (let ((urischeme (uriobj-scheme uriobj))) (and urischeme (urischeme-default-portnum urischeme))))) ;;; @defproc make-uriserver userinfo host portnum @result{} uriserver ;;; @defprocx make-uriserver/default-portnum userinfo host portnum default-portnum @result{} uriserver ;; ;; if uriserver is to have the default port number for a uri scheme, then ;; portnum should be #f. (consider sharing the same uriserver for http and ;; https uri, for example. also consider normal http uri do not show default ;; port number.) (define (make-uriserver/default-portnum userinfo host portnum default-portnum) (let ((portnum (if (equal? portnum default-portnum) #f portnum))) (cond ((or userinfo portnum) (list-immutable (uri-internal:string-or-f->string-i-or-f userinfo) (uri-internal:string-or-f->string-i-or-f host) portnum)) (host (uri-internal:string-or-f->string-i-or-f host)) (else #f)))) (define (make-uriserver userinfo host portnum) (make-uriserver/default-portnum userinfo host portnum #f)) ;;; @defproc make-or-reuse-uriserver userinfo host portnum base-uriserver @result{} uriserver ;;; @defprocx make-or-reuse-uriserver/default-portnum userinfo host portnum base-uriserver default-portnum @result{} uriserver ;; ;; base-uriserver is of same scheme (define (make-or-reuse-uriserver userinfo host portnum base-uriserver) (if (or userinfo host portnum) (if base-uriserver (receive (base-u base-h base-p) (uriserver-userinfo+host+portnum base-uriserver) (if (and (equal? userinfo base-u) (equal? host base-h) (equal? portnum base-p)) base-uriserver (make-uriserver (or userinfo base-u) (or host base-h) (or portnum base-p)))) (make-uriserver userinfo host portnum)) base-uriserver)) (define (make-or-reuse-uriserver/default-portnum userinfo host portnum base-uriserver default-portnum) (make-or-reuse-uriserver userinfo host (if (equal? portnum default-portnum) #f portnum) base-uriserver)) ;;; @defproc string->uriserver str @result{} uriserver ;;; @defprocx string/base->uriserver str base-uriserver @result{} uriserver ;;; @defprocx string/default-portnum->uriserver str default-portnum @result{} uriserver ;;; @defprocx string/base/default-portnum->uriserver str base-uriserver default-portnum @result{} uriserver ;;; @defprocx substring->uriserver str start end @result{} uriserver ;;; @defprocx substring/base->uriserver str start end base-uriserver @result{} uriserver ;;; @defprocx substring/default-portnum->uriserver str start end default-portnum @result{} uriserver ;;; @defprocx substring/base/default-portnum->uriserver str start end base-uriserver default-portnum @result{} uriserver ;; TODO: We sure do have a lot of these procedures... (define (string->uriserver str) (substring/base/default-portnum->uriserver str 0 #f #f #f)) (define (string/base->uriserver str base-uriserver) (substring/base/default-portnum->uriserver str 0 #f base-uriserver #f)) (define (string/base/default-portnum->uriserver str base-uriserver default-portnum) (substring/base/default-portnum->uriserver str 0 #f base-uriserver default-portnum)) (define (string/default-portnum->uriserver str default-portnum) (substring/base/default-portnum->uriserver str 0 #f #f default-portnum)) (define (substring->uriserver str start end) (substring/base/default-portnum->uriserver str start end #f #f)) (define (substring/base->uriserver str start end base-uriserver) (substring/base/default-portnum->uriserver str start end base-uriserver #f)) (define (substring/default-portnum->uriserver str start end default-portnum) (substring/base/default-portnum->uriserver str start end #f default-portnum)) (define substring/base/default-portnum->uriserver (let ((rx (uri-internal:make-rx "^(?:([^@:]+)@)?([^@:]+)?(?::([0-9]+))?"))) (lambda (str start end base-uriserver default-portnum) (uri-internal:with-rx-match-positions (rx str start (or end (string-length str))) ((whole userinfo host portnum) (if (or userinfo host portnum) (make-or-reuse-uriserver/default-portnum (uri-internal:rxpos-str-i userinfo str) (uri-internal:rxpos-lowstr-i host str) (uri-internal:rxpos-num portnum str) base-uriserver default-portnum) #f)))))) ;;; @defproc uriserver-userinfo uriserver @result{} string-or-f ;;; @defprocx uriserver-host uriserver @result{} string-or-f ;;; @defprocx uriserver-portnum uriserver @result{} integer-or-f ;;; @defprocx uriserver-userinfo+host+portnum uriserver @result{} (string-or-f, string-or-f, integer-or-f) (define (uriserver-userinfo uriserver) (if (pair? uriserver) (list-ref uriserver 0) #f)) (define (uriserver-host uriserver) (cond ((not uriserver) #f) ((string? uriserver) uriserver) (else (list-ref uriserver 1)))) (define (uriserver-portnum uriserver) (if (pair? uriserver) (list-ref uriserver 2) #f)) (define (uriserver-userinfo+host+portnum uriserver) (cond ((not uriserver) (values #f #f #f)) ((string? uriserver) (values #f uriserver #f)) (else (apply values uriserver)))) ;;; @defproc write-uriserver uriserver port (define (write-uriserver uriserver port) (receive (userinfo host portnum) (uriserver-userinfo+host+portnum uriserver) (if userinfo (begin (display userinfo port) (write-char #\@ port))) (if host (display host port)) (if portnum (begin (write-char #\: port) (display portnum port))))) ;;; @defproc uriserver-with-default-portnum uriserver default-portnum @result{} uriserver (define (uriserver-with-default-portnum uriserver default-portnum) (if (and default-portnum (equal? (uriserver-portnum uriserver) default-portnum)) (receive (userinfo host portnum) (uriserver-userinfo+host+portnum uriserver) (make-uriserver userinfo host #f)) uriserver)) ;;; @defproc resolved-uriserver uriserver base-uriserver @result{} uriserver ;;; @defprocx resolved-uriserver/default-portnum uriserver base-uriserver default-portnum @result{} uriserver ;; ;; base-uriserver must be from uri of the same scheme as uriserver ;; (define (resolved-uriserver uriserver base-uriserver) ;; (or uriserver base-uriserver)) (define (resolved-uriserver/default-portnum uriserver base-uriserver default-portnum) (uriserver-with-default-portnum (or uriserver base-uriserver) default-portnum)) ;;; @subsection Hierarchical Paths ;;; @defproc uri-path uri @result{} uripath-or-f ;;; @defprocx uri-path/noparams uri @result{} uripath-or-f ;;; @defprocx uriobj-uripath uriobj @result{} uripath-or-f ;;; @defprocx uriobj-uripath/noparams uriobj @result{} uripath-or-f ;; TODO: "path/noparams" might be be an unusual case. Maybe better to have the ;; "/noparams" be on "uripath-segments/noparams" and ;; "uripath-upcount+segments/noparams". Or maybe we should add ;; "uri-path-upcount", "uri-path-segments", "uri-path-segments/noparams", etc. (define uriobj-uripath (uri-internal:uriobj-rxpos-field-proc uri-internal:uriobj-path substring->uripath uri-internal:set-uriobj-path!)) (define (uriobj-uripath/noparams uriobj) (let ((uripath (uriobj-uripath uriobj))) (receive (ups segs) (uripath-upcount+segments/reverse uripath) (let loop ((rest segs)) (cond ((null? rest) uripath) ((urisegment-has-params? (car rest)) (make-uripath/reverse/shareok ups (uri-internal:map-i urisegment-name segs))) (else (loop (cdr rest)))))))) (define (uri-path uri) (uriobj-uripath (uri->uriobj uri))) (define (uri-path/noparams uri) (uriobj-uripath/noparams (uri->uriobj uri))) ;;; @defproc make-uripath upcount segments @result{} uripath ;;; @defprocx make-uripath/reverse upcount segments @result{} uripath ;;; @defprocx make-uripath/reverse/shareok upcount segments @result{} uripath (define (make-uripath/reverse/shareok upcount segments) (if upcount (uri-internal:cons-i upcount segments) segments)) (define (make-uripath/reverse upcount segments) (make-uripath/reverse/shareok upcount (uri-internal:list->list-i segments))) (define (make-uripath upcount segments) (make-uripath/reverse/shareok upcount (uri-internal:reverse-i segments))) ;;; @defproc uripath-with-upcount uripath upcount @result{} uripath ;; ;; possibly new, possibly original (define (uripath-with-upcount uripath upcount) (receive (old-uc old-segs) (uripath-upcount+segments/reverse uripath) (if (equal? upcount old-uc) uripath (make-uripath/reverse/shareok upcount old-segs)))) ;;; @defproc string->uripath str @result{} uripath ;;; @defprocx string/base->uripath str base-uripath @result{} uripath ;;; @defprocx substring->uripath str start end @result{} uripath ;;; @defprocx substring/base->uripath str start end base-uripath @result{} uripath ;; ;; Note: Contrary to [RFC2396], we don't require base to be absolute. (define (string->uripath str) (substring/base->uripath str 0 #f #f)) (define (string/base->uripath str base-uripath) (substring/base->uripath str 0 #f base-uripath)) (define (substring->uripath str start end) (substring/base->uripath str start end #f)) ;; TODO: URI parsing code should ignore whitespace when parsing a ;; "<([Uu][Rr][Ll]:)?[^>]*>" one. Distinguish "extract-uri" or "parse-uri" ;; from "string->uri". (define substring/base->uripath ;; TODO: We can drop the second match-position from "path-rx". (let ((path-rx (uri-internal:make-rx "^(/)?(.+)?$")) (segment-rx (uri-internal:make-rx "^([^/;]+)?(;[^/]*)?(/)?"))) (lambda (str start end base-uripath) (let ((end (or end (string-length str))) (finish (lambda (uc segs) (let ((new-uripath (make-uripath/reverse/shareok uc segs))) (if base-uripath (resolved-uripath new-uripath base-uripath) new-uripath))))) (uri-internal:with-rx-match-positions (path-rx str start end) ((whole leading-slash path-pos) (if path-pos (let loop ((start (car path-pos)) (uc (if leading-slash #f 0)) (segs '())) (uri-internal:with-rx-match-positions (segment-rx str start end) ((whole name-pos params slash) (receive (new-uc new-segs) ;; TODO: Is this right? Escaped "." and ".." count? (let ((name (if name-pos (uri-unescape-i str (car name-pos) (cdr name-pos)) #f))) (cond ((and name (string=? name "..")) (if (null? segs) (values (if uc (+ 1 uc) 1) segs) (values uc (cdr segs)))) ((and name (string=? name ".")) (values uc segs)) (else (values uc (uri-internal:cons-i (if params (uri-internal:cons-i name (uri-internal:parse-uri-path-params str (+ 1 (car params)) (cdr params))) name) segs))))) (if slash (loop (cdr whole) new-uc new-segs) (finish new-uc new-segs)))))) (finish (if leading-slash #f 0) (uri-internal:cons-i #f '()))))))))) ;; TODO: maybe add /noparams variants of uripath accessors, and *maybe* remove ;; the uri-path/noparams ;;; @defproc uripath-upcount uripath @result{} integer-or-f ;;; @defprocx uripath-segments uripath @result{} list ;;; @defprocx uripath-segments/reverse uripath @result{} list ;;; @defprocx uripath-upcount+segments uripath @result{} (integer-or-f, list) ;;; @defprocx uripath-upcount+segments/reverse uripath @result{} (integer-or-f, list) (define (uripath-upcount+segments/reverse path) (cond ((null? path) (values #f '())) ((integer? (car path)) (values (car path) (cdr path))) (else (values #f path)))) (define (uripath-upcount+segments uripath) (receive (uc segs) (uripath-upcount+segments/reverse uripath) (values uc (uri-internal:reverse-i segs)))) (define (uripath-upcount uripath) (and (not (null? uripath)) (integer? (car uripath)) (car uripath))) (define (uripath-segments/reverse path) (cond ((null? path) '()) ((integer? (car path)) (cdr path)) (else path))) (define (uripath-segments uripath) (uri-internal:reverse-i (uripath-segments/reverse uripath))) ;;; @defproc uripath-has-params? uripath @result{} boolean (define (uripath-has-params? uripath) (let ((segs (uripath-segments/reverse uripath))) (let loop ((segs segs)) (cond ((null? segs) #f) ((urisegment-has-params? (car segs)) #t) (else (loop (cdr segs))))))) ;;; @defproc write-uripath uripath port @result{} undef ;;; @defprocx write-uripath/leading-slash uripath port @result{} undef (define (uri-internal:write-uripath/leading-slash-arg uripath port leading-slash?) (receive (uc segs) (uripath-upcount+segments uripath) (cond (uc (if leading-slash? (if (zero? uc) ;; TODO: This isn't right. Look at what the parsing ;; procedure produces, and special-case if need be. (display "/." port) (write-char #\/ port))) (let loop ((i uc)) (if (> i 0) (begin (display "../" port) (loop (- i 1)))))) ((or (not leading-slash?) (null? segs) (car segs) (null? (cdr segs))) (write-char #\/ port)) (else (display "/./" port))) ;; TODO: Make this use segments/reverse, use non-tail recursion on ;; segments, and eliminate use of for-each/between. (if segs (for-each/between (lambda (seg) (let ((name (urisegment-name seg))) (if name (display (uri-escape/shareok (urisegment-name seg)) port)) (for-each (lambda (param) (write-char #\; port) (display (uri-escape/shareok param) port)) (urisegment-params seg)))) (lambda () (write-char #\/ port)) segs)))) (define (write-uripath uripath port) (uri-internal:write-uripath/leading-slash-arg uripath port #f)) (define (write-uripath/leading-slash uripath port) (uri-internal:write-uripath/leading-slash-arg uripath port #t)) ;;; @defproc uripath->string uripath @result{} string ;;; @defprocx uripath->string/leading-slash uripath @result{} string ;;; ;;; ;;; ;;; @lisp ;;; (uri-path-segments "//a/b") @result{} ("b") ;;; (uri-path-segments "/.//a/b") @result{} (#f "a" "b") ;;; @end lisp ;;; ;;; ;;; ;;; @lisp ;;; (uripath->string (string->uripath "//b")) ;;; @result{} "//b" ;;; (uripath->string/leading-slash (string->uripath "//b")) ;;; @result{} "/.//b" ;;; (uripath->string/leading-slash (string->uripath "/a/b")) ;;; @result{} "/a/b" ;;; (uripath->string/leading-slash (string->uripath "/;p1/b")) ;;; @result{} "/;p1/b" ;;; @end lisp (define (uripath->string uripath) (let ((os (open-output-string))) (write-uripath uripath os) (uri-internal:gosc os))) (define (uripath->string/leading-slash uripath) (let ((os (open-output-string))) (write-uripath/leading-slash uripath os) (uri-internal:gosc os))) ;;; @defproc resolved-uripath uripath base-uripath @result{} uripath (define (resolved-uripath uripath base-uripath) (receive (old-upcount old-segs) (uripath-upcount+segments/reverse uripath) (cond ((and (not old-upcount) (null? old-segs)) (receive (base-upcount base-segs) (uripath-upcount+segments/reverse base-uripath) ;; TODO: unify this with below. ;; ;; TODO: check for base-upcount #f below. (make-uripath/reverse/shareok (if old-upcount (max old-upcount (or base-upcount 0)) base-upcount) base-segs))) (old-upcount (receive (base-upcount base-segs) (uripath-upcount+segments/reverse base-uripath) (cond ((not (null? base-segs)) ;; "base-segs" is not null, so... ;; ;; TODO: We can simplify these "usable-base-segs" things once we've ;; tested all the oddball cases. (receive (usable-base-segs usable-base-segs-len) (let ((base-segs-len (length base-segs))) ;; (if (urisegment-name (car base-segs)) (if (null? old-segs) (values base-segs base-segs-len) (values (cdr base-segs) (- base-segs-len 1)))) (let ((base-segs-to-skip (min old-upcount usable-base-segs-len))) (if (< base-segs-to-skip 0) (make-uripath/reverse/shareok (if base-upcount (max (+ old-upcount base-segs-to-skip) base-upcount) (+ old-upcount base-segs-to-skip)) (uri-internal:append-i old-segs usable-base-segs)) (make-uripath/reverse/shareok base-upcount (uri-internal:append-i old-segs (list-tail usable-base-segs base-segs-to-skip))))))) (base-upcount ;; "base-segs" is null, but "base-upcount" is not #f, so... ;; ;; TODO: do we take #f in base-segs into account? (if (> base-upcount old-upcount) (uripath-with-upcount uripath base-upcount) uripath)) (else uripath)))) (else uripath)))) ;;; @defproc absolute-uripath uripath @result{} uripath (define (absolute-uripath uripath) (receive (upc segs) (uripath-upcount+segments/reverse uripath) (cond ((null? segs) (make-uripath/reverse/shareok #f (uri-internal:list-i #f))) (upc (make-uripath/reverse/shareok #f segs)) (else uripath)))) ;;; @subsection Attribute-Value Queries ;;; @defproc uriobj-uriquery uriobj @result{} uriquery (define uriobj-uriquery (uri-internal:uriobj-rxpos-field-proc uri-internal:uriobj-query substring->uriquery uri-internal:set-uriobj-query!)) ;;; @defproc string->uriquery str ;;; @defprocx substring->uriquery str start end (define substring->uriquery (let ((rx (uri-internal:make-rx "^([^&=]+)?(?:=([^&]*))?(&)?"))) (lambda (str start end) (let loop ((start start) (end (or end (string-length str)))) (uri-internal:with-rx-match-positions (rx str start end) ((whole name-pos val-pos amp-pos) (if name-pos (uri-internal:cons-i (uri-internal:cons-i (uri-unescape-i str (car name-pos) (cdr name-pos)) (if val-pos (uri-unplusescape-i str (car val-pos) (cdr val-pos)) #t)) (if amp-pos (loop (cdr amp-pos) end) '())) (if amp-pos (loop (cdr amp-pos) end) '())))))))) (define (string->uriquery str) (substring->uriquery str 0 #f)) ;;; @defproc write-uriquery uriquery port @result{} undef (define (write-uriquery uriquery port) (if (not (null? uriquery)) (let loop ((head (car uriquery)) (rest (cdr uriquery))) (let ((attr (car head)) (val (cdr head))) (if attr (display (uri-escape/shareok attr) port)) (if (string? val) (begin (write-char #\= port) (display (uri-escape/shareok val) port))) (if (null? rest) (if #f #f) (begin (write-char #\& port) (loop (car rest) (cdr rest)))))))) ;;; @subsection Resolving Relative URI ;;; @defproc absolute-uriobj? uriobj @result{} boolean (define (absolute-uriobj? uriobj) (let ((urischeme (uriobj-scheme uriobj))) (if urischeme (if (urischeme-hierarchical? urischeme) (receive (server path query) (uriobj-uriserver+path+query uriobj) (and server path (not (uripath-upcount path)))) #f) #f))) ;;; @defproc resolved-uriobj uriobj base-uri @result{} uriobj ;;; @defprocx resolved-uriobj/base-uriobj uriobj base-uriobj @result{} uriobj (define (resolved-uriobj/base-uriobj uriobj base-uriobj) (cond ((equal? (uriobj->string uriobj) "") base-uriobj) ((absolute-uriobj? uriobj) uriobj) (else (let ((old-urischeme (uriobj-scheme uriobj)) (base-urischeme (uriobj-scheme base-uriobj))) (if (or (and old-urischeme (eq? old-urischeme base-urischeme)) (and (not old-urischeme) base-urischeme)) (let ((new-urischeme base-urischeme)) (if (urischeme-hierarchical? new-urischeme) ;; TODO: Potentially reuse uriobj. (uri-internal:make-hierarchical-uriobj new-urischeme (resolved-uriserver/default-portnum (uriobj-uriserver uriobj) (uriobj-uriserver base-uriobj) (urischeme-default-portnum new-urischeme)) (resolved-uripath (uriobj-uripath uriobj) (uriobj-uripath base-uriobj)) ;; TODO: is this right, that query not inherited? (uriobj-uriquery uriobj) (uriobj-fragment uriobj)) ;; Note: We don't yet know how to resolve non-hierarchical ;; URI, so just yield the original URI but possibly with ;; the scheme of the base. (uriobj-with-scheme uriobj new-urischeme))) uriobj))))) (define (resolved-uriobj uriobj base-uri) (resolved-uriobj/base-uriobj uriobj (uri->uriobj base-uri))) ;;; @defproc absolute-uriobj uriobj @result{} uriobj (define (absolute-uriobj uriobj) (if (uriobj-hierarchical? uriobj) (let* ((old-uripath (uriobj-uripath uriobj)) (new-uripath (absolute-uripath old-uripath))) (if (eq? new-uripath old-uripath) uriobj ;; TODO: Constructing a new uriobj like this is potentially a ;; problem, if uriobj is later made extensible and other fields can ;; be added to it. (uri-internal:make-hierarchical-uriobj (uriobj-scheme uriobj) (uriobj-uriserver uriobj) new-uripath (uriobj-uriquery uriobj) (uriobj-fragment uriobj)))) uriobj)) ;; TODO: normalized-uriobj uriobj @result{} uriobj ;;; @section Tests ;;; The @code{uri.scm} source code file defines a regression test suite for the ;;; library itself, in procedure @code{uri-internal:test}. This test suite can ;;; be disabled in the source code. ;; TODO: Make this test suite use forthcoming portable test harness. (define-syntax uri-internal:tests-body (syntax-rules () ((_ ?x ...) ;; Note: Exactly one of the following two lines must be commented-out. ;;(begin (display "Test suite must be enabled in source code.") (newline)) (let () ?x ...) ))) (define (uri-internal:test) (uri-internal:tests-body (let ((total-count 0) (pass-count 0) (fail-count 0)) (let ((test-proc (lambda (desc expr-quoted expr pred expected) (set! total-count (+ 1 total-count)) (newline) (display ";; ") (display total-count) (display ". ") (display desc) (newline) (write expr-quoted) (newline) (let ((result expr)) (display ";; ==> ") (write result) (newline) (if (pred result expected) (begin (set! pass-count (+ 1 pass-count)) (display ";; PASSED.") (newline)) (begin (set! fail-count (+ 1 fail-count)) (display ";; FAILED; expected:") (newline) (display ";; ") (write expected) (newline)))))) (test-eval-proc (lambda (desc expr-quoted) (newline) (display ";; EVAL: ") (display desc) (newline) (write expr-quoted) (newline)))) (let-syntax ((test/equal (syntax-rules () ((_ desc expr expected) (test-proc desc (quote expr) expr equal? expected)))) (test-eval (syntax-rules () ((_ desc expr) (test-eval-proc desc (quote expr) expr)))) (test-define (syntax-rules () ((_ desc name value) (define name (begin (test-eval-proc desc (list 'define (quote name) (quote value))) value)))))) (newline) (display ";;; BEGIN uri.scm TESTS") (newline) (test/equal "" (uri-escape "") "") (test/equal "" (uri-escape "a b") "a%20b") (test/equal "" (uri-escape "a b c") "a%20b%20c") (test/equal "" (uri-escape "aaa bbb ccc") "aaa%20bbb%20ccc") (test/equal "" (uri-escape " a ") "%20a%20") (test/equal "" (uri-escape "a/b") "a%2Fb") (test/equal "" (uri-escape "a%b") "a%25b") (test/equal "" (uri-plusescape "") "") (test/equal "" (uri-plusescape "a b") "a+b") (test/equal "" (uri-plusescape "a b c") "a+b+c") (test/equal "" (uri-plusescape "aaa bbb ccc") "aaa+bbb+ccc") (test/equal "" (uri-plusescape " a ") "+a+") (test/equal "" (uri-plusescape "a/b") "a%2Fb") (test/equal "" (uri-plusescape "a%b") "a%25b") (test/equal "" (uri-unescape "") "") (test/equal "" (uri-unescape "%") "%") (test/equal "" (uri-unescape "%0") "%0") (test/equal "" (uri-unescape "%1") "%1") (test/equal "" (uri-unescape "a%20b") "a b") (test/equal "" (uri-unescape "a%20b%20c") "a b c") (test/equal "" (uri-unescape "aaa%20bbb%20ccc") "aaa bbb ccc") (test/equal "" (uri-unescape "%20a%20") " a ") (test/equal "" (uri-unescape "a%2Fb") "a/b") (test/equal "" (uri-unescape "a%2fb") "a/b") (test/equal "" (uri-unescape "a%25b") "a%b") (test/equal "" (string->uripath "") '(0 #f)) (test/equal "" (string->uripath ".") '(0)) (test/equal "" (string->uripath "..") '(1)) (test/equal "" (string->uripath "./") '(0 #f)) (test/equal "" (string->uripath "../") '(1 #f)) (test/equal "" (string->uripath "/") '(#f)) (test/equal "" (string->uripath "/.") '()) (test/equal "" (string->uripath "/..") '(1)) (test/equal "" (string->uripath "a/b") '(0 "b" "a")) (test/equal "" (string->uripath "/a/b") '("b" "a")) (test/equal "" (string->uripath "./a/b") '(0 "b" "a")) (test/equal "" (string->uripath "/./a/b") '("b" "a")) (test/equal "" (string->uripath "/../a/b") '(1 "b" "a")) (test/equal "" (string->uripath "/../../a/b") '(2 "b" "a")) (test/equal "" (string->uripath "/../../../a/b") '(3 "b" "a")) (test/equal "" (string->uripath "/a/../b") '("b")) (test/equal "" (string->uripath "/a/../../b") '(1 "b")) (test/equal "" (string->uripath "/a/../../../b") '(2 "b")) (test/equal "" (string->uripath "a/../b") '(0 "b")) (test/equal "" (string->uripath "a/../../b") '(1 "b")) (test/equal "" (string->uripath "a/../../../b") '(2 "b")) (test/equal "" (string->uripath "/") '(#f)) (test/equal "" (string->uripath "//") '(#f #f)) (test/equal "" (string->uripath "///") '(#f #f #f)) (test/equal "" (string->uripath "/a/") '(#f "a")) (test/equal "" (string->uripath "//a") '("a" #f)) (test/equal "" (string->uripath "/;p/a") '("a" (#f "p"))) (test/equal "" (string->uripath "///a") '("a" #f #f)) (test/equal "empty path segment at start of absolute path" (string->uripath "//a/b") '("b" "a" #f)) (test/equal "empty path segment at start of relative path" (string->uripath ".//a/b") '(0 "b" "a" #f)) (test/equal "empty path segment with param at start of absolute path" (string->uripath "/;p/a/b") '("b" "a" (#f "p"))) (test/equal "empty path segment with param at start of relative path" (string->uripath ";p/a/b") '(0 "b" "a" (#f "p"))) (test/equal "empty path segment in middle of path" (string->uripath "/a//b/") '(#f "b" #f "a")) (test/equal "empty path segment with parameter in middle of path" (string->uripath "/a/;p/b/") '(#f "b" (#f "p") "a")) (test/equal "empty path segment with parameter at end of path" (string->uripath "/a/b/;p") '((#f "p") "b" "a")) (test/equal "empty path parameter" (string->uripath "/a/;/b") '("b" (#f #f) "a")) (test/equal "multiple empty path parameters" (string->uripath "/a/;;;/b") '("b" (#f #f #f #f) "a")) (test/equal "path segment beginning with dot" (string->uripath "/a/.b/c") '("c" ".b" "a")) (test/equal "path segment beginning with double-dot" (string->uripath "/a/..b/c") '("c" "..b" "a")) (test/equal "" (uri-path "../../a/b;p1/c/d;p2;p3/;p5") '(2 (#f "p5") ("d" "p2" "p3") "c" ("b" "p1") "a")) (test/equal "" (uri-path/noparams "../../a/b;p1/c/d;p2;p3/e/;p5") '(2 #f "e" "d" "c" "b" "a")) (test/equal "" (string->uriquery "q=fiendish+scheme&case&foo=&x=1%2B2") '(("q" . "fiendish scheme") ("case" . #t) ("foo" . "") ("x" . "1+2"))) (test/equal "" (string->uriquery "") '()) (test/equal "" (string->uriquery "&") '()) (test/equal "" (string->uriquery "&&") '()) (test/equal "" (string->uriquery "x&&") '(("x" . #t))) (test/equal "" (string->uriquery "&&x") '(("x" . #t))) (test/equal "" (uriquery-value (string->uriquery "x=a%20b") "x") "a b") (test/equal "" (uriquery-value (string->uriquery "x=a%20b") "y") #f) (test/equal "" (uriquery-value (string->uriquery "x=") "x") "") (test/equal "" (uriquery-value (string->uriquery "x=&") "x") "") (test/equal "" (uriquery-value (string->uriquery "x") "x") #t) (test/equal "" (uriquery-value (string->uriquery "x&") "x") #t) (test/equal "" (resolved-uripath '(2 "c" "b" "a") '(#f "z" "y" "x")) '("c" "b" "a" "x")) (test/equal "" (uripath->string (resolved-uripath (string->uripath "../../a/b/c") (string->uripath "/x/y/z/"))) "/x/a/b/c") (test/equal "" (uri-with-fragment "http://www/foo#bar" "x y z") "http://www/foo#x%20y%20z") (test/equal "" (uri-with-fragment/escaped "http://www/foo#bar" "x y z") "http://www/foo#x y z") (test/equal "" (string->uriserver "") #f) (test/equal "" (string->uriserver "myhost") "myhost") (test/equal "" (string->uriserver "myhost:80") '(#f "myhost" 80)) (test/equal "" (string->uriserver "myuser@myhost") '("myuser" "myhost" #f)) (test/equal "" (string->uriserver "myuser@myhost:80") '("myuser" "myhost" 80)) (test/equal "" (string/default-portnum->uriserver "www.foo:80" 80) "www.foo") (test/equal "" (string/default-portnum->uriserver "www.foo:8080" 80) '(#f "www.foo" 8080)) (test/equal "" (string/default-portnum->uriserver "u@www.foo:80" 80) '("u" "www.foo" #f)) (test/equal "" (string/default-portnum->uriserver "u@www.foo:8080" 80) '("u" "www.foo" 8080)) (test/equal "" (string/default-portnum->uriserver ":80" 80) #f) (test/equal "" (string/default-portnum->uriserver ":8080" 80) '(#f #f 8080)) (test/equal "" (uri-uriserver "//www:80/") '(#f "www" 80)) (test/equal "" (uri-uriserver (resolved-uri "//www:80/" "http:")) "www") (test/equal "" (uri-server-portnum "http:") 80) (test/equal "" (uri-server-portnum "http://www/") 80) (test/equal "" (uri-server-portnum "http://www:80/") 80) (test/equal "" (uri-server-portnum "http://www:8080/") 8080) (test/equal "" (resolved-uri ".././.././././foo.html" "http://www/aaa/bbb/ccc/index.html") "http://www/aaa/foo.html") (test/equal "" (resolved-uri "." "http://www") ;; TODO: There might not be any good answer to this one. ;; Maybe just leave off the "/."? "http://www/.") (test/equal "" (resolved-uri "x" "http://www/a/b/c/") "http://www/a/b/c/x") (test/equal "" (resolved-uri "../x" "http://www/a/b/c/") "http://www/a/b/x") (test/equal "" (resolved-uri "../../x" "http://www/a/b/c/") "http://www/a/x") (test/equal "" (resolved-uri "../../" "http://www/a/b/c/") "http://www/a/") (test/equal "" (resolved-uri "../.." "http://www/a/b/c/") ;; TODO: This seems like it probably could be right, but ;; should check with an authoritative source. "http://www/a/b") (test/equal "" (resolved-uri "mailto:foo@bar" "http://www/a/index.html") "mailto:foo@bar") (test/equal "" (resolved-uri "www/a/index.html" "mailto:foo@bar") "mailto:www/a/index.html") (test/equal "" (resolved-uri "//www:80/" "http:") "http://www/") (test/equal "" (resolved-uri "/foo?x=1&y=a%20b&z" "http:") "http:/foo?x=1&y=a%20b&z") (test/equal "" (absolute-uri "http:foo") "http:/foo") (test/equal "" (absolute-uri "http:?xxx") "http:/?xxx") (test/equal "" (absolute-uri "http:../foo") "http:/foo") (test/equal "" (absolute-uri "http:") "http:/") (test/equal "" (absolute-uri "mailto:foo") "mailto:foo") (test/equal "" (uripath->string '(#f)) "/") (test/equal "" (uripath->string/leading-slash '(#f)) "/") (test/equal "" (uripath->string (string->uripath "//a")) "//a") (test/equal "" (uripath->string/leading-slash (string->uripath "//a")) "/.//a") ;; (test/equal "" ;; (uri-with-scheme "http://w/" https-uri-scheme) ;; "https://w/") ;; ;; (test/equal "" ;; (uri-with-scheme "http://w:80/" https-uri-scheme) ;; "https://w/") ;; ;; (test/equal "" ;; (uri-with-scheme "http://w:8080/" https-uri-scheme) ;; "https://w:8080/") ;; "imap://minbari.org/gray-council;UIDVALIDITY=385759045/;UID=20" ;; "imap://michael@minbari.org/users.*;type=list" ;; "imap://psicorp.org/~peter/%E6%97%A5%E6%9C%AC%E8%AA%9E/%E5%8F%B0%E5%8C%97" ;; "imap://;AUTH=KERBEROS_V4@minbari.org/gray-council/;uid=20/;section=1.2" ;; "imap://;AUTH=*@minbari.org/gray%20council?SUBJECT%20shadows" (newline) (display ";;; END uri.scm TESTS: ") (display (cond ((zero? fail-count) "all PASSED") ((zero? pass-count) "ALL FAILED") (else "some FAILED"))) (display " (Total: ") (display total-count) (display " Passed: ") (display pass-count) (display " Failed: ") (display fail-count) (display ")") (newline)))))) ;;; @unnumberedsec History ;;; @table @asis ;;; ;;; @item Version 0.1 --- 18-Aug-2004 ;;; Initial release. Incorporates some code from UriFrame. ;;; ;;; @end table ;;; @unnumberedsec References ;;; @table @asis ;;; ;;; @item [LGPL] ;;; Free Software Foundation, ``GNU Lesser General Public License,'' Version ;;; 2.1, February 1999, 59 Temple Place, Suite 330, Boston, MA 02111-1307 ;;; USA.@* ;;; @uref{http://www.gnu.org/copyleft/lesser.html} ;;; ;;; @item [Pregexp] ;;; Dorai Sitaram, ``pregexp: Portable Regular Expressions for Scheme and ;;; Common Lisp,'' version 1e9.@* ;;; @uref{http://www.ccs.neu.edu/home/dorai/pregexp/pregexp.html} ;;; ;;; @item [RFC2192] ;;; C. Newman, ``IMAP URL Scheme,'' IETF RFC 2192, September 1997.@* ;;; @uref{http://www.ietf.org/rfc/rfc2192.txt} ;;; ;;; @item [RFC2224] ;;; B. Callaghan, ``NFS URL Scheme,'' IETF RFC 2224, October 1997.@* ;;; @uref{http://www.ietf.org/rfc/rfc2224.txt} ;;; ;;; @item [RFC2368] ;;; P. Hoffman, L. Masinter, J. Zawinski, ``The mailto URL scheme,'' IETF RFC ;;; 2368, July 1998.@* ;;; @uref{http://www.ietf.org/rfc/rfc2368.txt} ;;; ;;; @item [RFC2396] ;;; T. Berners-Lee, R. Fielding, L. Masinter, ``Uniform Resource Identifiers ;;; (URI): Generic Syntax,'' IETF RFC 2396 , August 1998.@* ;;; @uref{http://www.ietf.org/rfc/rfc2396.txt} ;;; ;;; @item [RFC2732] ;;; R. Hinden, B. Carpenter, L. Masinter, ``Format for Literal IPv6 Addresses ;;; in URL's,'' IETF RFC 2732 , December 1999.@* ;;; @uref{http://www.ietf.org/rfc/rfc2732.txt} ;;; ;;; @item [RFC3305] ;;; M. Mealling, R. Denenberg, (eds.), ``Report from the Joint W3C/IETF URI ;;; Planning Interest Group: Uniform Resource Identifiers (URIs), URLs, and ;;; Uniform Resource Names (URNs): Clarifications and Recommendations,'' IETF ;;; RFC 3305, August 2002.@* ;;; @uref{http://www.ietf.org/rfc/rfc3305.txt} ;;; ;;; @item [SLIB] ;;; Aubrey Jaffer, ``SLIB,'' version 3a1, 30 November 2003.@* ;;; @uref{http://swiss.csail.mit.edu/~jaffer/SLIB} ;;; ;;; @item [SRFI-6] ;;; William D. Clinger, ``Basic String Ports,'' SRFI 6, 1 July 1999.@* ;;; @uref{http://srfi.schemers.org/srfi-6/srfi-6.html} ;;; ;;; @item [SRFI-8] ;;; John David Stone, ``receive: Binding to multiple values,'' 30-Aug-1999.@* ;;; @uref{http://srfi.schemers.org/srfi-8/srfi-8.html} ;;; ;;; @item [SRFI-9] ;;; Richard Kelsey, ``Defining Record Types,'' SRFI 9, 9 September 1999.@* ;;; @uref{http://srfi.schemers.org/srfi-9/srfi-9.html} ;;; ;;; @item [SRFI-13] ;;; Olin Shivers, ``String Libraries,'' SRFI 13, 23 December 2000.@* ;;; @uref{http://srfi.schemers.org/srfi-13/srfi-13.html} ;;; ;;; @item [SRFI-16] ;;; Lars T Hansen, ``Syntax for procedures of variable arity,'' SRFI 16, 10 ;;; March 2000.@* ;;; @uref{http://srfi.schemers.org/srfi-16/srfi-16.html} ;;; ;;; @item [SRFI-23] ;;; Stephan Houben, ``Error reporting mechanism,'' SRFI 23, 26 April 2001.@* ;;; @uref{http://srfi.schemers.org/srfi-23/srfi-23.html} ;;; ;;; @item [SRFI-39] ;;; Marc Feeley, ``Parameter objects,'' SRFI 39, 30 June 2003.@* ;;; @uref{http://srfi.schemers.org/srfi-39/srfi-39.html} ;;; ;;; @item [UriFrame] ;;; Neil W. Van Dyke, ``UriFrame: Web Uniform Resource Identifier ;;; Framework.''@* ;;; @uref{http://www.neilvandyke.org/uriframe/} ;;; ;;; @end table ;; TODO: Check these out: ;; ;; Last Call: URI and IRI Internet-Drafts ;; ;; The Internet Engineering Task Force (IETF) has announced two Last Call ;; Internet-Drafts important for Web addressing. The documents are ;; coordinated IETF-W3C efforts. ;; ;; * "Uniform Resource Identifier (URI): Generic Syntax" is written ;; by Tim Berners-Lee (W3C), Roy Fielding (Day Software) and Larry ;; Masinter (Adobe) with involvement of the W3C Technical ;; Architecture Group (TAG). Last Call ends 13 September. Simple ;; text strings that refer to Internet resources, URIs may refer to ;; documents, resources, to people, and indirectly to anything. URIs ;; are a fundamental component of the Web. Read about the W3C URI ;; Activity and visit the TAG home page. ;; ;; http://www.ietf.org/internet-drafts/draft-fielding-uri-rfc2396bis-06.txt ;; http://www1.ietf.org/mail-archive/web/ietf-announce/current/msg00395.html ;; http://www.w3.org/Addressing/ ;; http://www.w3.org/2001/tag/ ;; ;; * "Internationalized Resource Identifiers (IRIs)" is written by ;; Martin D?rst (W3C) and Michel Suignard (Microsoft) with ;; involvement of the W3C Internationalization Working Group. Lifting ;; the limitation to a subset of US-ASCII previously allowed in Web ;; addresses, IRIs allow characters in the Universal Character Set ;; (Unicode/ISO 10646). Last Call ends 8 September. Visit the W3C ;; Internationalization home page. ;; ;; http://www.ietf.org/internet-drafts/draft-duerst-iri-09.txt ;; http://www1.ietf.org/mail-archive/web/ietf-announce/current/msg00383.html ;; http://www.w3.org/International/