! Copyright (C) 2009 Yun, Jonhyouk. ! See http://factorcode.org/license.txt for BSD license. USING: assocs io io.encodings io.encodings.ascii io.encodings.iana io.files kernel locals math math.parser memoize sequences splitting byte-arrays ; IN: io.encodings.korean SINGLETON: cp949 ALIAS: ms949 cp949 ALIAS: euc-kr cp949 cp949 "EUC-KR" register-encoding table : cp949.txt-lines ( -- seq ) ! "cp949.txt" from ... ! "resource:work/io/encodings/korean/data/cp949.txt" ascii file-lines ; : (PCL-drop-comments) ( seq -- newseq ) [ "#" split1 drop ] map harvest ; : (PCL-split-column) ( line -- columns ) "\t" split 2 head ; : (PCL-parse-hex) ( s -- n ) 2 short tail hex> ; : (PCL-parse-line) ( line -- code-unicode ) (PCL-split-column) [ (PCL-parse-hex) ] map ; : process-codetable-lines ( lines -- assoc ) (PCL-drop-comments) [ (PCL-parse-line) ] map ; ! convert cp949 <-> unicode MEMO: cp949.txt>alist ( -- alist ) cp949.txt-lines process-codetable-lines ; MEMO: cp949.txt>alist* ( -- alist ) cp949.txt>alist [ reverse ] map ; MEMO: cp949->unicode ( b -- u ) cp949.txt>alist at ; MEMO: unicode->cp949 ( u -- b ) cp949.txt>alist* at ; :: (2b->1mb) ( c1 c2 -- mb ) c1 8 shift c2 + ; :: (1mb->1st) ( mb -- c1 ) mb "ff00" hex> bitand -8 shift ; :: (1mb->2nd) ( mb -- c2 ) mb "ff" hex> bitand ; :: (1mb->2b) ( mb -- c1 c2 ) mb (1mb->1st) mb (1mb->2nd) ; M: cp949 encode-char ( char stream encoding -- ) [let | encoding [ ] stream [ ] char [ ] | char unicode->cp949 (1mb->2b) 2byte-array stream stream-write ] ; M: cp949 decode-char ( stream encoding -- char/f ) [let | encoding [ ] stream [ ] c! [ f ] | stream stream-read1 c! c ascii? [ c ] [ c stream stream-read1 (2b->1mb) cp949->unicode ] if ] ; ! TODO: ! TODO: