Paste: start of gedcom parser

Author: doublec
Mode: factor
Date: Fri, 9 Oct 2009 08:19:45
Plain Text |
! Copyright (C) 2009 Chris Double. All Rights Reserved.
! See http://factorcode.org/license.txt for BSD license.
USING:
    accessors
    arrays
    assocs
    combinators
    io
    kernel
    math
    math.parser
    namespaces
    peg.ebnf
    sequences
    splitting
    strings
    vectors
;
IN: gene.gedcom

TUPLE: gedcom source version date note level stack current individuals families notes ;
TUPLE: individual name gender note events reference child-of spouse-of ;
TUPLE: family husband wife children events ;
TUPLE: note text ;
TUPLE: event type date place note ;

: <family> ( -- family )
    family new 
        V{ } clone >>events
        V{ } clone >>children ;

: <event> ( type -- event )
    event new 
        swap >>type ;

: <individual> ( -- individual )
    individual new 
        V{ } clone >>events
        V{ } clone >>child-of
        V{ } clone >>spouse-of ;

: <note> ( -- note )
    note new ;

TUPLE: line level link type text ;

: <line> ( type -- line ) line new swap >>type ;

EBNF: parse-line 
    _      = (" ")*
    number = [0-9]+ => [[ >string string>number ]]
    link   = "@" (!("@") .)+ "@" => [[ second >string "@" "@" surround ]]
    type   = [A-Z_]+ => [[ >string <line> ]]
    text   = .* => [[ >string ]]
    rule   = number:level _ link?:link _ type:type _ text:text => [[ type level >>level link >>link text >>text ]]
;EBNF

: <gedcom> ( -- gedcom )
    gedcom new
        dup >>current
        V{ } clone >>stack 
        H{ } clone >>individuals
        H{ } clone >>families
        H{ } clone >>notes
        -1 >>level ;

: with-gedcom ( gedcom quot -- )
    gedcom swap with-variable ; inline

USE: prettyprint

: gedcom-last ( -- object )
    gedcom get stack>> last ;

: gedcom-stack ( -- object ) 
    gedcom get stack>> ;

: gedcom-level ( -- n )
    gedcom get level>> ;

: gedcom-current ( -- n )
    gedcom get current>> ;

: families ( -- assoc )
    gedcom get families>> ;

: individuals ( -- assoc )
    gedcom get individuals>> ;

: notes ( -- assoc )
    gedcom get notes>> ;

: set-current ( object -- )
    gedcom get swap >>current drop ;

: get-current ( -- object )
    gedcom get current>> ;

: get-last ( -- object )
    gedcom get stack>> last ;

: add-event ( event individual -- event )
    dupd events>> push ;

: add-note ( note individual -- note )
    over >>note drop ;

: handle-linked-line ( line -- )
    dup type>> {
        { "FAM"  [ link>> families [ drop <family> ] cache set-current ] }
        { "INDI" [ link>> individuals [ drop <individual> ] cache set-current ] }
        { "NOTE" [ dup text>> swap link>> notes [ drop <note> ] cache swap >>text get-last add-note set-current ] }
        [ . drop ] 
    } case ;

: handle-unlinked-line ( line -- )
    dup type>> {
        { "BIRT" [ drop "birth" <event> get-last add-event set-current ] }
        { "CHIL" [ text>> individuals [ drop <individual> ] cache dup get-last children>> push drop ] }
        { "CONC" [ text>> get-last text>> swap append get-last swap >>text drop ] }
        { "CONT" [ text>> get-last text>> swap "\n" glue get-last swap >>text drop ] }
        { "DATE" [ gedcom-last swap text>> >>date drop ] }
        { "DEAT" [ drop "death" <event> get-last add-event set-current ] }
        { "FAMC" [ text>> families [ drop <family> ] cache dup get-last child-of>> push drop ] }
        { "FAMS" [ text>> families [ drop <family> ] cache dup get-last spouse-of>> push drop ] }
        { "HEAD" [ drop gedcom get dup >>current drop ] }
        { "HUSB" [ text>> individuals [ drop <individual> ] cache get-last swap >>husband drop ] }
        { "MARR" [ drop "marriage" <event> get-last add-event set-current ] }
        { "NAME" [ gedcom-last swap text>> >>name drop ] }
        { "NOTE" [ text>> <note> swap >>text get-last add-note set-current ] }
        { "OCCU" [ drop "occupation" <event> get-last add-event set-current ] }
        { "PLAC" [ gedcom-last swap text>> >>place drop ] }
        { "RESI" [ drop "residence" <event> get-last add-event set-current ] }
        { "REFN" [ gedcom-last swap text>> >>reference drop ] }
        { "SEX"  [ gedcom-last swap text>> >>gender drop ] }
        { "SOUR" [ gedcom-last swap text>> >>source drop ] }
        { "TRLR" [ drop ] }
        { "WIFE" [ text>> individuals [ drop <individual> ] cache get-last swap >>wife drop ] }
        { "VERS" [ gedcom-last swap text>> >>version drop ] }
        [ . drop ] 
    } case ;

: handle-line ( line -- )
    dup link>> [ handle-linked-line ] [ handle-unlinked-line ] if ;

: parse-gedcom-line ( string -- )
    dup empty? [ 
        drop 
    ] [ 
        parse-line ! "Line " write gedcom get stack>> length gedcom-level 2array . dup . 
        dup level>> gedcom-level 1 - <= [
            gedcom-level over level>> - [ gedcom-stack pop drop ] times
        ] when
        dup level>> gedcom-level 1 + = [
            gedcom-current gedcom-stack push
        ] when
        gedcom get over level>> >>level drop
        handle-line 
    ] if ;

: parse-gedcom-lines ( lines -- gedcom )
    <gedcom> tuck [
         [ parse-gedcom-line ] each
    ] with-gedcom ;

: parse-gedcom-string ( string -- gedcom )
    string-lines parse-gedcom-lines ;

New Annotation

Summary:
Author:
Mode:
Body: