Paste: io.encodings.korean -- cp949 working...

Author: ageldama
Mode: factor
Date: Mon, 9 Feb 2009 14:25:14
Plain Text |
! Copyright (C) 2009 Yun, Jonhyouk.
! See http://factorcode.org/license.txt for BSD license.
USING: assocs io io.encodings io.encodings.ascii io.encodings.iana
io.files kernel locals math math.parser memoize sequences splitting byte-arrays ;
IN: io.encodings.korean


SINGLETON: cp949

ALIAS: ms949 cp949
ALIAS: euc-kr cp949

cp949 "EUC-KR" register-encoding



<PRIVATE

! parse cp949.txt -> table

: cp949.txt-lines ( -- seq )
    ! "cp949.txt" from ...
    ! <http://unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP949.TXT>
    "resource:work/io/encodings/korean/data/cp949.txt"
    ascii file-lines ;

: (PCL-drop-comments) ( seq -- newseq )
    [ "#" split1 drop ] map harvest ;

: (PCL-split-column) ( line -- columns )
    "\t" split 2 head ;

: (PCL-parse-hex) ( s -- n )
    2 short tail hex> ;

: (PCL-parse-line) ( line -- code-unicode )
    (PCL-split-column)
    [ (PCL-parse-hex) ] map ;

: process-codetable-lines ( lines -- assoc )
    (PCL-drop-comments)
    [ (PCL-parse-line) ] map ;

! convert cp949 <-> unicode

MEMO: cp949.txt>alist ( -- alist )
    cp949.txt-lines process-codetable-lines ;

MEMO: cp949.txt>alist* ( -- alist )
    cp949.txt>alist [ reverse ] map ;

MEMO: cp949->unicode ( b -- u )
    cp949.txt>alist at ;

MEMO: unicode->cp949 ( u -- b )
    cp949.txt>alist* at ;

:: (2b->1mb) ( c1 c2 -- mb )
    c1 8 shift c2 + ;

:: (1mb->1st) ( mb -- c1 )
    mb "ff00" hex> bitand -8 shift ;

:: (1mb->2nd) ( mb -- c2 )
    mb "ff" hex> bitand ;

:: (1mb->2b) ( mb -- c1 c2 )
    mb (1mb->1st)
    mb (1mb->2nd) ;


M: cp949 encode-char ( char stream encoding -- )
    [let | encoding [ ]
           stream [ ]
           char [ ] |
        char unicode->cp949 (1mb->2b) 2byte-array
        stream stream-write ] ;

M: cp949 decode-char ( stream encoding -- char/f )
    [let | encoding [ ]
           stream [ ]
           c! [ f ] |
        stream stream-read1 c!
        c ascii?
        [ c ]
        [ c
          stream stream-read1
          (2b->1mb)
          cp949->unicode ]
        if ] ;



! TODO: <encoder>

! TODO: <decoder>

New Annotation

Summary:
Author:
Mode:
Body: