! (c)2010 Joe Groff bsd license USING: accessors arrays assocs calendar calendar.format combinators fry io io.directories io.encodings.utf8 io.files io.files.info io.files.types io.pathnames kernel locals math math.parser sequences sorting strings unicode.categories xml.syntax xml.writer xmode.catalog xmode.marker xmode.tokens ; IN: codebook ! Usage: "my/source/tree" utf8 "dest" codebook ! Writes tree.opf, tree.ncx, and tree.html to the "dest" directory ! Use kindlegen -unicode dest/tree.opf to compile tree.mobi file for Kindle CONSTANT: codebook-style { { COMMENT1 [ [XML <-> XML] ] } { COMMENT2 [ [XML <-> XML] ] } { COMMENT3 [ [XML <-> XML] ] } { COMMENT4 [ [XML <-> XML] ] } { DIGIT [ [XML <-> XML] ] } { FUNCTION [ [XML <-> XML] ] } { KEYWORD1 [ [XML <-> XML] ] } { KEYWORD2 [ [XML <-> XML] ] } { KEYWORD3 [ [XML <-> XML] ] } { KEYWORD4 [ [XML <-> XML] ] } { LABEL [ [XML <-> XML] ] } { LITERAL1 [ [XML <-> XML] ] } { LITERAL2 [ [XML <-> XML] ] } { LITERAL3 [ [XML <-> XML] ] } { LITERAL4 [ [XML <-> XML] ] } { MARKUP [ [XML <-> XML] ] } { OPERATOR [ [XML <-> XML] ] } [ drop ] } : first-line ( filename encoding -- line ) [ readln ] with-file-reader ; : code-files ( dir encoding -- files ) '[ [ link-info type>> +regular-file+ = ] filter [ dup [ ] [ _ first-line ] bi ?find-mode ] { } map>assoc [ nip ] assoc-filter [ first ] sort-with ] with-directory-tree-files ; : anchor-char ( char -- str ) { { [ dup alpha? ] [ 1string ] } { [ dup digit? ] [ 1string ] } [ >hex 6 CHAR: 0 pad-head "_" "_" surround ] } cond ; : file-anchor-name ( file -- name ) [ anchor-char ] { } map-as concat ; : toc-list ( files -- list ) natural-sort [ [ file-anchor-name "#" prepend ] [ ] bi [XML
  • ><->
  • XML] ] map ; ! insert zero-width non-joiner between all characters so words can wrap anywhere : zwnj ( string -- s|t|r|i|n|g ) [ CHAR: \u00200c "" 2sequence ] { } map-as concat ; ! We wrap every line in because Kindle tends to forget the font when ! moving back pages : htmlize-tokens ( tokens line# -- html-tokens ) swap [ [ str>> zwnj ] [ id>> ] bi codebook-style case ] map [XML <-> <-> XML] "\n" 2array ; : line#>string ( i line#len -- i-string ) [ number>string ] [ CHAR: \s pad-head ] bi* ; ! tags have to go outside header tags or header formatting will be lost ! when using NCX navigation :: file-page ( file mode encoding -- page ) "Generating HTML for " write file write "..." print flush file encoding file-lines :> lines lines length 1 + number>string length :> line#len file file-anchor-name :> anchor mode load-mode :> rules f lines [| l i | l rules tokenize-line i 1 + line#len line#>string htmlize-tokens ] map-index concat nip :> html-lines [XML >

    <-file->

    <-html-lines->
    XML] ; :: code>html ( dir name files encoding -- html ) "Generating HTML" print flush now timestamp>rfc822 :> timestamp dir absolute-path :> source dir [ files keys toc-list :> toc files [ encoding file-page ] { } assoc>map :> pages <-name->

    <-name->

    Generated from
    <-source->
    at <-timestamp->


      <-toc->
    <-pages-> XML> ] with-directory ; :: code>ncx ( dir name files -- xml ) "Generating NCX table of contents" print flush name ".html" append :> html-name html-name "#toc" append :> toc-url files keys [| file i | file file-anchor-name :> anchor i 2 + number>string :> istr html-name "#" anchor 3append :> url [XML playOrder=<-istr->> <-file-> /> XML] ] map-index :> file-nav-points Table of Contents /> <-file-nav-points-> XML> ; :: code>opf ( dir name -- xml ) "Generating OPF manifest" print flush name ".html" append :> html-name html-name "#toc" append :> toc-url name ".ncx" append :> ncx-name > <-name-> en href=<-html-name-> media-type="text/html" /> media-type="application/x-dtbncx+xml" /> /> /> XML> ; : write-dest-file ( xml dest-dir name ext -- ) append append-path utf8 [ write-xml ] with-file-writer ; :: codebook ( src-dir encoding dest-dir -- ) "Generating ebook for " write src-dir write " in " write dest-dir print flush src-dir file-name :> name src-dir encoding code-files :> files src-dir name files encoding code>html dest-dir name ".html" write-dest-file src-dir name files code>ncx dest-dir name ".ncx" write-dest-file src-dir name code>opf dest-dir name ".opf" write-dest-file "Job's finished" print flush ;