;;; @Package bencode.scm ;;; @Subtitle BitTorrent Bencode Decoding in Scheme ;;; @HomePage http://www.neilvandyke.org/bencode-scm/ ;;; @Author Neil W. Van Dyke ;;; @AuthorEmail neil@@neilvandyke.org ;;; @Version 0.1 ;;; @Date 2005-04-17 ;; $Id: bencode.scm,v 1.26 2005/04/18 11:15:38 neil Exp $ ;;; @legal ;;; Copyright @copyright{} 2005 Neil W. Van Dyke. This program is Free ;;; Software; you can redistribute it and/or modify it under the terms of the ;;; GNU Lesser General Public License as published by the Free Software ;;; Foundation; either version 2.1 of the License, or (at your option) any ;;; later version. This program is distributed in the hope that it will be ;;; useful, but without any warranty; without even the implied warranty of ;;; merchantability or fitness for a particular purpose. See ;;; @url{http://www.gnu.org/copyleft/lesser.html} for details. For other ;;; license options and consulting, contact the author. ;;; @end legal ;; (load "../testeez/testeez.scm") (define-syntax %bencode:testeez (syntax-rules () ((_ x ...) ;; (testeez x ...) (error "Tests disabled.") ))) ;;; @section Introduction ;;; @code{bencode.scm} parses the @dfn{bencoding} format of the BitTorrent ;;; network protocol into basic Scheme data types (and currently PLT-specific ;;; byte strings). This is useful for inspecting @code{.torrent} files, and ;;; might later be used in the implementation of a BitTorrent client or ;;; protocol analyzer. ;;; ;;; The format interpretation is based on the undated ;;; @uref{http://www.bittorrent.com/protocol.html, BitTorrent protocol ;;; documentation Web page} as viewed on 2005-04-17. The mapping from ;;; those bencoding types to Scheme types is: ;;; ;;; @table @dfn ;;; ;;; @item String ;;; PLT byte string. ;;; ;;; @item Integer ;;; Scheme integer. ;;; ;;; @item List ;;; Scheme list. ;;; ;;; @item Dictionary ;;; Scheme list with the symbol @code{dictionary} as its head, and an ;;; association list as its tail. ;;; ;;; @end table ;;; ;;; @noindent ;;; For example, a parse of a certain real-world @code{.torrent} file: ;;; ;;; @lisp ;;; (unbencode (open-input-file "debian.torrent"))) ;;; @result{} ;;; ((dictionary ;;; (#"announce" . #"http://cdimage.debian.org:6969/announce") ;;; (#"comment" . #"Debian CD from cdimage.debian.org") ;;; (#"creation date" . 1105009474) ;;; (#"info" ;;; dictionary ;;; (#"length" . 600158208) ;;; (#"name" . #"debian-30r4-i386-binary-1.iso") ;;; (#"piece length" . 524288) ;;; (#"pieces" . @r{@i{[...large byte string...]}})))) ;;; @end lisp ;;; ;;; @code{bencode.scm} is currently specific to PLT 299/3xx, due to the need ;;; for byte I/O and some representation for byte sequences. Otherwise, the ;;; code has been written to require only R5RS, SRFI-6, and SRFI-23. ;; Byte Operations Portability: (define-syntax %bencode:peek-byte (syntax-rules () ((_ PORT) (peek-byte PORT)))) (define-syntax %bencode:read-byte (syntax-rules () ((_ PORT) (read-byte PORT)))) (define-syntax %bencode:write-byte (syntax-rules () ((_ BYTE PORT) (write-byte BYTE PORT)))) (define-syntax %bencode:eat-byte (syntax-rules () ((_ PORT) (%bencode:read-byte PORT)))) (define-syntax %bencode:open-output-bytes (syntax-rules () ((_) (open-output-bytes)))) (define-syntax %bencode:get-output-bytes (syntax-rules () ((_ PORT) (get-output-bytes PORT)))) ;; Errors: (define-syntax %bencode:premature-eof-error (syntax-rules () ((_) (error "bencoding premature eof")))) (define-syntax %bencode:invalid-char-error (syntax-rules () ((_ CHAR) (error "bencode invalid char:" CHAR)))) ;;; @section API ;;; @defproc unbencode-single port ;;; ;;; Parses a single bencoding object (and any child objects, in the case of a ;;; list or dictionary) from input port @var{port} and yields the Scheme ;;; representation. (define unbencode-single (letrec ((do-digits (lambda (port term num) (let ((c (%bencode:read-byte port))) ;; TODO: Maybe shift the "case" into the "cond", with range ;; test for the digits. Profile. (cond ((eof-object? c) (%bencode:premature-eof-error)) ((eqv? term c) num) (else (case c ((48) (do-digits port term (* 10 num) )) ((49) (do-digits port term (+ (* 10 num) 1))) ((50) (do-digits port term (+ (* 10 num) 2))) ((51) (do-digits port term (+ (* 10 num) 3))) ((52) (do-digits port term (+ (* 10 num) 4))) ((53) (do-digits port term (+ (* 10 num) 5))) ((54) (do-digits port term (+ (* 10 num) 6))) ((55) (do-digits port term (+ (* 10 num) 7))) ((56) (do-digits port term (+ (* 10 num) 8))) ((57) (do-digits port term (+ (* 10 num) 9))) (else (%bencode:invalid-char-error c)))))))) (do-string (lambda (port num) (let ((os (%bencode:open-output-bytes))) (let loop ((len (do-digits port 58 num))) (if (zero? len) (let ((bytes (%bencode:get-output-bytes os))) (close-output-port os) bytes) (let ((b (%bencode:read-byte port))) (if (eof-object? b) (%bencode:premature-eof-error) (begin (%bencode:write-byte b os) (loop (- len 1))))))))))) (lambda (port) (let ((c (%bencode:read-byte port))) ;; TODO: Maybe turn this if-case into a "cond", with range test for the ;; digits. Profile. (if (eof-object? c) #f (case c ((105) ;; "i" (let ((c (%bencode:peek-byte port))) (if (eqv? 45 c) ;; "-" (begin (%bencode:eat-byte port) (- (do-digits port 101 0))) (do-digits port 101 0)))) ((108) ;; "l" (let loop () (let ((c (%bencode:peek-byte port))) (cond ((eof-object? c) (%bencode:premature-eof-error)) ((eqv? 101 c) (%bencode:eat-byte port) '()) (else (cons (or (unbencode-single port) (%bencode:premature-eof-error)) (loop))))))) ((100) ;; "d" (cons 'dictionary (let loop () (let ((c (%bencode:peek-byte port))) (cond ((eof-object? c) (%bencode:premature-eof-error)) ((eqv? 101 c) (%bencode:eat-byte port) '()) (else (cons (cons (or (unbencode-single port) (%bencode:premature-eof-error)) (or (unbencode-single port) (%bencode:premature-eof-error))) (loop)))))))) ((48) (do-string port 0)) ((49) (do-string port 1)) ((50) (do-string port 2)) ((51) (do-string port 3)) ((52) (do-string port 4)) ((53) (do-string port 5)) ((54) (do-string port 6)) ((55) (do-string port 7)) ((56) (do-string port 8)) ((57) (do-string port 9)) (else (%bencode:invalid-char-error c)))))))) ;;; @defproc unbencode port ;;; ;;; Yields a list of the Scheme representations of all bencoding objects parsed ;;; from input port @var{port}. (define (unbencode port) (let ((obj (unbencode-single port))) (if obj (cons obj (unbencode port)) '()))) ;;; @section Tests ;;; The @code{bencode.scm} test suite can be enabled by editing the source code ;;; file and loading @uref{http://www.neilvandyke.org/testeez/, Testeez}. (define (%bencode:unbencode-string str) ;; TODO: This is at least internal-use-only because reading bytes from a ;; string is even messier, portability-wise. (let* ((port (open-input-string str)) (result (unbencode port))) (close-input-port port) result)) (define (%bencode:test) (%bencode:testeez "bencode.scm" (test/equal "" (%bencode:unbencode-string "4:spam") '(#"spam")) (test/equal "" (%bencode:unbencode-string "i3e") '(3)) (test/equal "" (%bencode:unbencode-string "i-3e") '(-3)) (test/equal "" (%bencode:unbencode-string "i0e") '(0)) (test/equal "" (%bencode:unbencode-string "i123e") '(123)) (test/equal "" (%bencode:unbencode-string "i-123e") '(-123)) (test/equal "" (%bencode:unbencode-string "l4:spam4:eggse") '((#"spam" #"eggs"))) (test/equal "" (%bencode:unbencode-string "d3:cow3:moo4:spam4:eggse") '((dictionary (#"cow" . #"moo") (#"spam" . #"eggs")))) (test/equal "" (%bencode:unbencode-string "d4:spaml1:a1:bee") '((dictionary (#"spam" . (#"a" #"b"))))) (test/equal "" (%bencode:unbencode-string (string-append "4:spami3ei-3ei0ei123ei-123el4:spam4:eggsed3:co" "w3:moo4:spam4:eggsed4:spaml1:a1:bee")) '(#"spam" 3 -3 0 123 -123 (#"spam" #"eggs") (dictionary (#"cow". #"moo") (#"spam" . #"eggs")) (dictionary (#"spam". (#"a" #"b"))))) )) ;;; @unnumberedsec History ;;; @table @asis ;;; ;;; @item Version 0.1 --- 2005-04-17 ;;; Initial release. ;;; ;;; @end table