! Copyright (C) 2009 Yun, Jonhyouk. ! See http://factorcode.org/license.txt for BSD license. USING: arrays assocs byte-arrays io io.encodings io.encodings.ascii io.encodings.iana io.files io.streams.string kernel locals math math.order math.parser memoize multiline namespaces sequences splitting ; IN: io.encodings.korean SINGLETON: cp949 ALIAS: ms949 cp949 ALIAS: euc-kr cp949 cp949 "EUC-KR" register-encoding table : cp949.txt-lines ( -- seq ) ! "cp949.txt" from ... ! "resource:work/io/encodings/korean/data/cp949.txt" ascii file-lines ; : (PCL-drop-comments) ( seq -- newseq ) [ "#" split1 drop ] map harvest ; : (PCL-split-column) ( line -- columns ) "\t" split 2 head ; : (PCL-parse-hex) ( s -- n ) 2 short tail hex> ; : (PCL-parse-line) ( line -- code-unicode ) (PCL-split-column) [ (PCL-parse-hex) ] map ; : process-codetable-lines ( lines -- assoc ) (PCL-drop-comments) [ (PCL-parse-line) ] map ; ! convert cp949 <-> unicode MEMO: cp949.txt>alist ( -- alist ) cp949.txt-lines process-codetable-lines ; cp949.txt>alist drop ! parse time hack MEMO: cp949.txt>alist* ( -- alist ) cp949.txt>alist [ reverse ] map ; cp949.txt>alist* drop ! parse time hack (2) MEMO: cp949->unicode ( b -- u ) cp949.txt>alist at ; MEMO: unicode->cp949 ( u -- b ) cp949.txt>alist* at ; :: (2b->1mb) ( c1 c2 -- mb ) c1 8 shift c2 + ; :: (1mb->1st) ( mb -- c1 ) mb HEX: ff00 bitand -8 shift ; :: (1mb->2nd) ( mb -- c2 ) mb HEX: ff bitand ; :: (1mb->2b) ( mb -- c1 c2 ) mb (1mb->1st) mb (1mb->2nd) ; : cp949-1st? ( n -- ? ) dup f = not [ HEX: 81 HEX: fe between? ] when ; : (1byte-unicode?) ( n -- ? ) 0 HEX: ff between? ; M: cp949 encode-char ( char stream encoding -- ) [let | encoding [ ] stream [ ] char [ ] | char unicode->cp949 (1byte-unicode?) [ char 1byte-array stream stream-write ] [ char unicode->cp949 (1mb->2b) 2byte-array stream stream-write ] if ] ; : (replace-eof) ( -- n ) HEX: fffd ; :: (decode-char) ( stream -- char/f ) stream stream-read1 dup cp949-1st? [ ] [ stream stream-read1 (2b->1mb) cp949->unicode ] if ; M: cp949 decode-char ( stream encoding -- char/f ) drop (decode-char) dup f = [ drop (replace-eof) ] [ ] if ; ! TODO: ! TODO: ! EOF