Paste: smalltalk parser

Author: slava
Mode: text
Date: Wed, 1 Apr 2009 02:51:38
Plain Text |
! Copyright (C) 2009 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
USING: peg peg.ebnf smalltalk.ast sequences sequences.deep strings
math.parser kernel arrays byte-arrays math assocs accessors ;
IN: smalltalk.parser

! :mode=text:noTabs=true:

! Based on http://chronos-st.blogspot.com/2007/12/smalltalk-in-one-page.html

ERROR: bad-number str ;

: check-number ( str -- n )
    >string dup string>number [ ] [ bad-number ] ?if ;

EBNF: parse-smalltalk

Character = .
WhitespaceCharacter = (" " | "\t" | "\n" | "\r" )
DecimalDigit = [0-9]
Letter = [A-Za-z]

CommentCharacter = [^"] | '""' => [[ CHAR: " ]]
Comment = '"' (CommentCharacter)*:s '"' => [[ s >string ast-comment boa ]]

OptionalWhiteSpace = (WhitespaceCharacter | Comment)*
Whitespace = (WhitespaceCharacter | Comment)+

LetterOrDigit = DecimalDigit | Letter
Identifier = (Letter | "_"):h (LetterOrDigit | "_")*:t => [[ { h t } flatten >string ]]
Reference = Identifier => [[ ast-name boa ]]

ConstantReference =   "nil" => [[ nil ]]
                    | "false" => [[ f ]]
                    | "true" => [[ t ]]
PseudoVariableReference =   "self" => [[ self ]]
                          | "super" => [[ super ]]
ReservedIdentifier = PseudoVariableReference | ConstantReference

BindableIdentifier = Identifier

UnaryMessageSelector = Identifier

Keyword = Identifier:i ":" => [[ i ":" append ]]

KeywordMessageSelector = Keyword+ => [[ concat ]]
BinarySelectorChar =   "~" | "!" | "@" | "%" | "&" | "*" | "-" | "+"
                     | "=" | "|" | "\" | "<" | ">" | "," | "?" | "/"
BinaryMessageSelector = BinarySelectorChar+ => [[ concat ]]

OptionalMinus = ("-" => [[ CHAR: - ]])?
IntegerLiteral = (OptionalMinus:m UnsignedIntegerLiteral:i) => [[ i m [ neg ] when ]]
UnsignedIntegerLiteral =   Radix:r "r" BaseNIntegerLiteral:b => [[ b >string r base> ]]
                         | DecimalIntegerLiteral => [[ check-number ]]
DecimalIntegerLiteral = DecimalDigit+
Radix = DecimalIntegerLiteral => [[ check-number ]]
BaseNIntegerLiteral = LetterOrDigit+
FloatingPointLiteral = (OptionalMinus
                        DecimalIntegerLiteral
                        ("." => [[ CHAR: . ]] DecimalIntegerLiteral Exponent? | Exponent))
                        => [[ flatten check-number ]]
Exponent = "e" => [[ CHAR: e ]] (OptionalMinus DecimalIntegerLiteral)?

CharacterLiteral = "$" Character:c => [[ c ]]

StringLiteral = "'" (StringLiteralCharacter | "''" => [[ CHAR: ' ]])*:s "'"
                => [[ s >string ]]
StringLiteralCharacter = [^']

SymbolInArrayLiteral =   KeywordMessageSelector
                       | UnaryMessageSelector
                       | BinaryMessageSelector
SymbolLiteral = "#" (SymbolInArrayLiteral | StringLiteral):s => [[ s intern ]]

ArrayLiteral = (ObjectArrayLiteral | ByteArrayLiteral)
ObjectArrayLiteral = "#" NestedObjectArrayLiteral:elts => [[ elts ]]
NestedObjectArrayLiteral = "(" OptionalWhiteSpace
                           (LiteralArrayElement:h
                            (Whitespace LiteralArrayElement:e => [[ e ]])*:t
                            => [[ t h prefix ]]
                           )?:elts OptionalWhiteSpace ")" => [[ elts >array ]]

LiteralArrayElement =   Literal
                      | NestedObjectArrayLiteral
                      | SymbolInArrayLiteral
                      | ConstantReference

ByteArrayLiteral = "#[" OptionalWhiteSpace
                        (UnsignedIntegerLiteral:h
                         (Whitespace UnsignedIntegerLiteral:i => [[ i ]])*:t
                         => [[ t h prefix ]]
                        )?:elts OptionalWhiteSpace "]" => [[ elts >byte-array ]]

FormalBlockArgumentDeclaration = ":" BindableIdentifier:i => [[ i ]]
FormalBlockArgumentDeclarationList =
                FormalBlockArgumentDeclaration:h
                (Whitespace FormalBlockArgumentDeclaration:v => [[ v ]])*:t
                => [[ t h prefix ]]

BlockLiteral = "["
                (OptionalWhiteSpace
                 FormalBlockArgumentDeclarationList:args
                 OptionalWhiteSpace
                 "|"
                 => [[ args ]]
                )?:args
                ExecutableCode:body
                "]" => [[ args >array body ast-block boa ]]

Literal = (ConstantReference
                | FloatingPointLiteral
                | IntegerLiteral
                | CharacterLiteral
                | StringLiteral
                | ArrayLiteral
                | SymbolLiteral
                | BlockLiteral)

NestedExpression = "(" Statement:s OptionalWhiteSpace ")" => [[ s ]]
Operand =       Literal
                | PseudoVariableReference
                | Reference
                | NestedExpression

UnaryMessage = OptionalWhiteSpace
               UnaryMessageSelector:s !(":")
               => [[ s { } ast-message boa ]]
UnaryMessageOperand = UnaryMessageSend | Operand
UnaryMessageSend = UnaryMessageOperand:receiver
                   UnaryMessage:h
                   (OptionalWhiteSpace ";" UnaryMessage:m => [[ m ]])*:t
                   => [[ receiver t h prefix >array <ast-cascade> ]]

BinaryMessage = OptionalWhiteSpace
                BinaryMessageSelector:selector
                OptionalWhiteSpace
                BinaryMessageOperand:rhs
                => [[ selector { rhs } ast-message boa ]]
                                   
BinaryMessageOperand = BinaryMessageSend | UnaryMessageSend | Operand
BinaryMessageSend = (BinaryMessageSend | UnaryMessageSend | Operand):lhs
                    BinaryMessage:h
                   (OptionalWhiteSpace ";" BinaryMessage:m => [[ m ]])*:t
                   => [[ lhs t h prefix >array <ast-cascade> ]]

KeywordMessageSegment = Keyword:k OptionalWhiteSpace BinaryMessageOperand:arg => [[ { k arg } ]]
KeywordMessage = OptionalWhiteSpace
                 KeywordMessageSegment:h
                 (OptionalWhiteSpace KeywordMessageSegment:s => [[ s ]])*:t
                 => [[ t h prefix unzip [ concat ] dip ast-message boa ]]
KeywordMessageSend = (BinaryMessageSend | UnaryMessageSend | Operand):receiver
                     OptionalWhiteSpace
                     KeywordMessage:m
                     => [[ receiver m 1array <ast-cascade> ]]

Message = BinaryMessage | UnaryMessage | KeywordMessage

MessageSend = (MessageSend | Operand):lhs
              Message:h
              (OptionalWhiteSpace ";" Message:m => [[ m ]])*:t
              => [[ lhs t h prefix >array <ast-cascade> ]]

Expression = OptionalWhiteSpace
             (MessageSend | Operand):e
             => [[ e ]]

AssignmentOperation = OptionalWhiteSpace BindableIdentifier:i
                      OptionalWhiteSpace ":=" OptionalWhiteSpace => [[ i ast-name boa ]]
AssignmentStatement = AssignmentOperation:a Statement:s => [[ a s ast-assignment boa ]]
Statement = ClassDeclaration | ForeignClassDeclaration | AssignmentStatement | Expression

MethodReturnOperator = OptionalWhiteSpace "^"
FinalStatement = (MethodReturnOperator Statement:s => [[ s ast-return boa ]])
                 | Statement

LocalVariableDeclarationList = OptionalWhiteSpace "|" OptionalWhiteSpace
                (BindableIdentifier:h
                 (Whitespace BindableIdentifier:b => [[ b ]])*:t
                 => [[ t h prefix ]]
                )?:b OptionalWhiteSpace "|" => [[ b >array ast-local-variables boa ]]

ExecutableCode = (LocalVariableDeclarationList)?:locals
                 ((Statement:s OptionalWhiteSpace "." => [[ s ]])*:h
                 FinalStatement:t (".")? => [[ h t suffix ]])?:body
                 OptionalWhiteSpace
                 => [[ body locals [ suffix ] when* >array ]]

TopLevelForm = ExecutableCode => [[ ast-sequence boa ]]

UnaryMethodHeader = UnaryMessageSelector:selector
                  => [[ { selector { } } ]]
BinaryMethodHeader = BinaryMessageSelector:selector OptionalWhiteSpace BindableIdentifier:identifier
                   => [[ { selector { identifier } } ]]
KeywordMethodHeaderSegment = Keyword:keyword
                             OptionalWhiteSpace
                             BindableIdentifier:identifier => [[ { keyword identifier } ]]
KeywordMethodHeader = KeywordMethodHeaderSegment:h (Whitespace KeywordMethodHeaderSegment:s => [[ s ]])*:t
                    => [[ t h prefix unzip [ concat ] dip 2array ]]
MethodHeader =   KeywordMethodHeader
               | BinaryMethodHeader
               | UnaryMethodHeader
MethodDeclaration = OptionalWhiteSpace "method" OptionalWhiteSpace MethodHeader:header
        OptionalWhiteSpace "["
        ExecutableCode:code
        "]"
        => [[ header first2 code ast-block boa ast-method boa ]]

ClassDeclaration = OptionalWhiteSpace "class" OptionalWhiteSpace Identifier:name
        OptionalWhiteSpace
        ("extends" OptionalWhiteSpace Identifier:superclass OptionalWhiteSpace => [[ superclass ]])?:superclass
        OptionalWhiteSpace "["
        (OptionalWhiteSpace LocalVariableDeclarationList:l => [[ l names>> ]])?:ivars
        (MethodDeclaration:h
         (OptionalWhiteSpace
          "."
          OptionalWhiteSpace
          MethodDeclaration:m => [[ m ]])*:t (".")?
          => [[ t h prefix ]]
         )?:methods
        OptionalWhiteSpace "]"
        => [[ name superclass "Object" or ivars >array methods >array ast-class boa ]]

ForeignClassDeclaration = OptionalWhiteSpace "foreign"
                          OptionalWhiteSpace Identifier:name
                          OptionalWhiteSpace Literal:class
                          => [[ class name ast-foreign boa ]]
End = !(.)

Program = TopLevelForm End

;EBNF

Annotation: Test

Author: ad
Mode: smalltalk
Date: Tue, 29 Oct 2013 09:08:57
Plain Text |
cat cat cat: cat + cat cat: cat / cat

New Annotation

Summary:
Author:
Mode:
Body: