Paste: start of gedcom parser
Author: | doublec |
Mode: | factor |
Date: | Fri, 9 Oct 2009 08:19:45 |
Plain Text |
USING:
accessors
arrays
assocs
combinators
io
kernel
math
math.parser
namespaces
peg.ebnf
sequences
splitting
strings
vectors
;
IN: gene.gedcom
TUPLE: gedcom source version date note level stack current individuals families notes ;
TUPLE: individual name gender note events reference child-of spouse-of ;
TUPLE: family husband wife children events ;
TUPLE: note text ;
TUPLE: event type date place note ;
: <family> ( -- family )
family new
V{ } clone >>events
V{ } clone >>children ;
: <event> ( type -- event )
event new
swap >>type ;
: <individual> ( -- individual )
individual new
V{ } clone >>events
V{ } clone >>child-of
V{ } clone >>spouse-of ;
: <note> ( -- note )
note new ;
TUPLE: line level link type text ;
: <line> ( type -- line ) line new swap >>type ;
EBNF: parse-line
_ = (" ")*
number = [0-9]+ => [[ >string string>number ]]
link = "@" (!("@") .)+ "@" => [[ second >string "@" "@" surround ]]
type = [A-Z_]+ => [[ >string <line> ]]
text = .* => [[ >string ]]
rule = number:level _ link?:link _ type:type _ text:text => [[ type level >>level link >>link text >>text ]]
;EBNF
: <gedcom> ( -- gedcom )
gedcom new
dup >>current
V{ } clone >>stack
H{ } clone >>individuals
H{ } clone >>families
H{ } clone >>notes
-1 >>level ;
: with-gedcom ( gedcom quot -- )
gedcom swap with-variable ; inline
USE: prettyprint
: gedcom-last ( -- object )
gedcom get stack>> last ;
: gedcom-stack ( -- object )
gedcom get stack>> ;
: gedcom-level ( -- n )
gedcom get level>> ;
: gedcom-current ( -- n )
gedcom get current>> ;
: families ( -- assoc )
gedcom get families>> ;
: individuals ( -- assoc )
gedcom get individuals>> ;
: notes ( -- assoc )
gedcom get notes>> ;
: set-current ( object -- )
gedcom get swap >>current drop ;
: get-current ( -- object )
gedcom get current>> ;
: get-last ( -- object )
gedcom get stack>> last ;
: add-event ( event individual -- event )
dupd events>> push ;
: add-note ( note individual -- note )
over >>note drop ;
: handle-linked-line ( line -- )
dup type>> {
{ "FAM" [ link>> families [ drop <family> ] cache set-current ] }
{ "INDI" [ link>> individuals [ drop <individual> ] cache set-current ] }
{ "NOTE" [ dup text>> swap link>> notes [ drop <note> ] cache swap >>text get-last add-note set-current ] }
[ . drop ]
} case ;
: handle-unlinked-line ( line -- )
dup type>> {
{ "BIRT" [ drop "birth" <event> get-last add-event set-current ] }
{ "CHIL" [ text>> individuals [ drop <individual> ] cache dup get-last children>> push drop ] }
{ "CONC" [ text>> get-last text>> swap append get-last swap >>text drop ] }
{ "CONT" [ text>> get-last text>> swap "\n" glue get-last swap >>text drop ] }
{ "DATE" [ gedcom-last swap text>> >>date drop ] }
{ "DEAT" [ drop "death" <event> get-last add-event set-current ] }
{ "FAMC" [ text>> families [ drop <family> ] cache dup get-last child-of>> push drop ] }
{ "FAMS" [ text>> families [ drop <family> ] cache dup get-last spouse-of>> push drop ] }
{ "HEAD" [ drop gedcom get dup >>current drop ] }
{ "HUSB" [ text>> individuals [ drop <individual> ] cache get-last swap >>husband drop ] }
{ "MARR" [ drop "marriage" <event> get-last add-event set-current ] }
{ "NAME" [ gedcom-last swap text>> >>name drop ] }
{ "NOTE" [ text>> <note> swap >>text get-last add-note set-current ] }
{ "OCCU" [ drop "occupation" <event> get-last add-event set-current ] }
{ "PLAC" [ gedcom-last swap text>> >>place drop ] }
{ "RESI" [ drop "residence" <event> get-last add-event set-current ] }
{ "REFN" [ gedcom-last swap text>> >>reference drop ] }
{ "SEX" [ gedcom-last swap text>> >>gender drop ] }
{ "SOUR" [ gedcom-last swap text>> >>source drop ] }
{ "TRLR" [ drop ] }
{ "WIFE" [ text>> individuals [ drop <individual> ] cache get-last swap >>wife drop ] }
{ "VERS" [ gedcom-last swap text>> >>version drop ] }
[ . drop ]
} case ;
: handle-line ( line -- )
dup link>> [ handle-linked-line ] [ handle-unlinked-line ] if ;
: parse-gedcom-line ( string -- )
dup empty? [
drop
] [
parse-line
dup level>> gedcom-level 1 - <= [
gedcom-level over level>> - [ gedcom-stack pop drop ] times
] when
dup level>> gedcom-level 1 + = [
gedcom-current gedcom-stack push
] when
gedcom get over level>> >>level drop
handle-line
] if ;
: parse-gedcom-lines ( lines -- gedcom )
<gedcom> tuck [
[ parse-gedcom-line ] each
] with-gedcom ;
: parse-gedcom-string ( string -- gedcom )
string-lines parse-gedcom-lines ;
New Annotation