! Copyright (C) 2009 Doug Coleman. ! See http://factorcode.org/license.txt for BSD license. USING: accessors assocs io io.encodings.binary io.streams.byte-array kernel sequences prettyprint vectors sets math streams.object bitstreams combinators byte-arrays ; IN: lzw CONSTANT: clear-code 256 CONSTANT: end-of-information 257 TUPLE: lzw table count omega omega-k output #bits code old-code ; ERROR: index-too-big n ; : lzw-bit-width ( n -- n' ) { { [ dup 510 <= ] [ drop 9 ] } { [ dup 1022 <= ] [ drop 10 ] } { [ dup 2046 <= ] [ drop 11 ] } { [ dup 4094 <= ] [ drop 12 ] } [ index-too-big ] } cond ; : lzw-bit-width-compress ( lzw -- n ) count>> lzw-bit-width ; : lzw-bit-width-uncompress ( lzw -- n ) table>> length lzw-bit-width ; : initial-compress-table ( -- assoc ) 258 iota [ [ 1vector ] keep ] H{ } map>assoc ; : initial-uncompress-table ( -- seq ) 258 iota [ 1vector ] V{ } map-as ; : reset-lzw ( lzw -- lzw ) 257 >>count V{ } clone >>omega V{ } clone >>omega-k 9 >>#bits ; : reset-lzw-compress ( lzw -- lzw ) initial-compress-table >>table reset-lzw ; : reset-lzw-uncompress ( lzw -- lzw ) initial-uncompress-table >>table reset-lzw ; : ( -- obj ) lzw new V{ } clone >>output reset-lzw-compress ; : ( -- obj ) lzw new V{ } clone >>output reset-lzw-uncompress ; : push-k ( lzw ch -- lzw ) over omega>> clone [ push ] keep >>omega-k ; : omega-k-in-table? ( lzw -- ? ) [ omega-k>> ] [ table>> ] bi key? ; ERROR: not-in-table ; : write-output ( lzw -- ) [ [ omega>> ] [ table>> ] bi at* [ not-in-table ] unless ] [ output>> push ] bi ; : omega-k>omega ( lzw -- lzw ) dup omega-k>> clone >>omega ; : new-omega ( lzw -- lzw ) dup omega-k>> 1 tail* >>omega ; : add-omega-k ( lzw -- ) [ [ 1+ ] change-count count>> ] [ omega-k>> ] [ table>> ] tri set-at ; : lzw-compress-char ( lzw -- ) read1 [ push-k [ dup omega-k-in-table? [ omega-k>omega drop ] [ [ write-output ] [ add-omega-k ] [ new-omega drop ] tri ] if ] [ lzw-compress-char ] bi ] [ drop ] if* ; : lzw-compress ( byte-array -- seq ) binary [ [ lzw-compress-char ] keep output>> ] with-input-stream ; : lookup-old-code ( lzw -- vector ) [ old-code>> ] [ table>> ] bi nth ; : lookup-code ( lzw -- vector ) [ code>> ] [ table>> ] bi nth ; : code-in-table? ( lzw -- ? ) [ code>> ] [ table>> length ] bi < ; : code>old-code ( lzw -- lzw ) dup code>> >>old-code ; : write-code ( lzw -- ) [ lookup-code ] [ output>> ] bi push ; : add-to-table ( seq lzw -- ) table>> push ; : lzw-read ( lzw -- lzw n ) dup lzw-bit-width-uncompress read ; DEFER: lzw-uncompress-char : handle-clear-code ( lzw -- ) reset-lzw-uncompress lzw-read dup end-of-information = [ 2drop ! return ] [ >>code [ write-code ] [ code>old-code ] bi lzw-uncompress-char ] if ; : handle-uncompress-code ( lzw -- lzw ) dup code-in-table? [ [ write-code ] [ [ [ lookup-old-code ] [ lookup-code first ] bi suffix ] [ add-to-table ] bi ] [ code>old-code ] tri ] [ [ [ lookup-old-code dup first suffix ] keep [ output>> push ] [ add-to-table ] 2bi ] [ code>old-code ] bi ] if ; : lzw-uncompress-char ( lzw -- ) lzw-read [ >>code dup code>> end-of-information = [ drop ] [ dup code>> clear-code = [ handle-clear-code ] [ handle-uncompress-code lzw-uncompress-char ] if ] if ] [ drop ! return ] if* ; : lzw-uncompress ( seq -- byte-array ) binary [ [ lzw-uncompress-char ] [ output>> concat >byte-array ] bi ] with-input-stream ;