! Copyright (C) 2009 Slava Pestov. ! See http://factorcode.org/license.txt for BSD license. USING: peg peg.ebnf smalltalk.ast sequences sequences.deep strings math.parser kernel arrays byte-arrays math assocs accessors ; IN: smalltalk.parser ! :mode=text:noTabs=true: ! Based on http://chronos-st.blogspot.com/2007/12/smalltalk-in-one-page.html ERROR: bad-number str ; : check-number ( str -- n ) >string dup string>number [ ] [ bad-number ] ?if ; EBNF: parse-smalltalk Character = . WhitespaceCharacter = (" " | "\t" | "\n" | "\r" ) DecimalDigit = [0-9] Letter = [A-Za-z] CommentCharacter = [^"] | '""' => [[ CHAR: " ]] Comment = '"' (CommentCharacter)*:s '"' => [[ s >string ast-comment boa ]] OptionalWhiteSpace = (WhitespaceCharacter | Comment)* Whitespace = (WhitespaceCharacter | Comment)+ LetterOrDigit = DecimalDigit | Letter Identifier = (Letter | "_"):h (LetterOrDigit | "_")*:t => [[ { h t } flatten >string ]] Reference = Identifier => [[ ast-name boa ]] ConstantReference = "nil" => [[ nil ]] | "false" => [[ f ]] | "true" => [[ t ]] PseudoVariableReference = "self" => [[ self ]] | "super" => [[ super ]] ReservedIdentifier = PseudoVariableReference | ConstantReference BindableIdentifier = Identifier UnaryMessageSelector = Identifier Keyword = Identifier:i ":" => [[ i ":" append ]] KeywordMessageSelector = Keyword+ => [[ concat ]] BinarySelectorChar = "~" | "!" | "@" | "%" | "&" | "*" | "-" | "+" | "=" | "|" | "\" | "<" | ">" | "," | "?" | "/" BinaryMessageSelector = BinarySelectorChar+ => [[ concat ]] OptionalMinus = ("-" => [[ CHAR: - ]])? IntegerLiteral = (OptionalMinus:m UnsignedIntegerLiteral:i) => [[ i m [ neg ] when ]] UnsignedIntegerLiteral = Radix:r "r" BaseNIntegerLiteral:b => [[ b >string r base> ]] | DecimalIntegerLiteral => [[ check-number ]] DecimalIntegerLiteral = DecimalDigit+ Radix = DecimalIntegerLiteral => [[ check-number ]] BaseNIntegerLiteral = LetterOrDigit+ FloatingPointLiteral = (OptionalMinus DecimalIntegerLiteral ("." => [[ CHAR: . ]] DecimalIntegerLiteral Exponent? | Exponent)) => [[ flatten check-number ]] Exponent = "e" => [[ CHAR: e ]] (OptionalMinus DecimalIntegerLiteral)? CharacterLiteral = "$" Character:c => [[ c ]] StringLiteral = "'" (StringLiteralCharacter | "''" => [[ CHAR: ' ]])*:s "'" => [[ s >string ]] StringLiteralCharacter = [^'] SymbolInArrayLiteral = KeywordMessageSelector | UnaryMessageSelector | BinaryMessageSelector SymbolLiteral = "#" (SymbolInArrayLiteral | StringLiteral):s => [[ s intern ]] ArrayLiteral = (ObjectArrayLiteral | ByteArrayLiteral) ObjectArrayLiteral = "#" NestedObjectArrayLiteral:elts => [[ elts ]] NestedObjectArrayLiteral = "(" OptionalWhiteSpace (LiteralArrayElement:h (Whitespace LiteralArrayElement:e => [[ e ]])*:t => [[ t h prefix ]] )?:elts OptionalWhiteSpace ")" => [[ elts >array ]] LiteralArrayElement = Literal | NestedObjectArrayLiteral | SymbolInArrayLiteral | ConstantReference ByteArrayLiteral = "#[" OptionalWhiteSpace (UnsignedIntegerLiteral:h (Whitespace UnsignedIntegerLiteral:i => [[ i ]])*:t => [[ t h prefix ]] )?:elts OptionalWhiteSpace "]" => [[ elts >byte-array ]] FormalBlockArgumentDeclaration = ":" BindableIdentifier:i => [[ i ]] FormalBlockArgumentDeclarationList = FormalBlockArgumentDeclaration:h (Whitespace FormalBlockArgumentDeclaration:v => [[ v ]])*:t => [[ t h prefix ]] BlockLiteral = "[" (OptionalWhiteSpace FormalBlockArgumentDeclarationList:args OptionalWhiteSpace "|" => [[ args ]] )?:args ExecutableCode:body "]" => [[ args >array body ast-block boa ]] Literal = (ConstantReference | FloatingPointLiteral | IntegerLiteral | CharacterLiteral | StringLiteral | ArrayLiteral | SymbolLiteral | BlockLiteral) NestedExpression = "(" Statement:s OptionalWhiteSpace ")" => [[ s ]] Operand = Literal | PseudoVariableReference | Reference | NestedExpression UnaryMessage = OptionalWhiteSpace UnaryMessageSelector:s !(":") => [[ s { } ast-message boa ]] UnaryMessageOperand = UnaryMessageSend | Operand UnaryMessageSend = UnaryMessageOperand:receiver UnaryMessage:h (OptionalWhiteSpace ";" UnaryMessage:m => [[ m ]])*:t => [[ receiver t h prefix >array ]] BinaryMessage = OptionalWhiteSpace BinaryMessageSelector:selector OptionalWhiteSpace BinaryMessageOperand:rhs => [[ selector { rhs } ast-message boa ]] BinaryMessageOperand = BinaryMessageSend | UnaryMessageSend | Operand BinaryMessageSend = (BinaryMessageSend | UnaryMessageSend | Operand):lhs BinaryMessage:h (OptionalWhiteSpace ";" BinaryMessage:m => [[ m ]])*:t => [[ lhs t h prefix >array ]] KeywordMessageSegment = Keyword:k OptionalWhiteSpace BinaryMessageOperand:arg => [[ { k arg } ]] KeywordMessage = OptionalWhiteSpace KeywordMessageSegment:h (OptionalWhiteSpace KeywordMessageSegment:s => [[ s ]])*:t => [[ t h prefix unzip [ concat ] dip ast-message boa ]] KeywordMessageSend = (BinaryMessageSend | UnaryMessageSend | Operand):receiver OptionalWhiteSpace KeywordMessage:m => [[ receiver m 1array ]] Message = BinaryMessage | UnaryMessage | KeywordMessage MessageSend = (MessageSend | Operand):lhs Message:h (OptionalWhiteSpace ";" Message:m => [[ m ]])*:t => [[ lhs t h prefix >array ]] Expression = OptionalWhiteSpace (MessageSend | Operand):e => [[ e ]] AssignmentOperation = OptionalWhiteSpace BindableIdentifier:i OptionalWhiteSpace ":=" OptionalWhiteSpace => [[ i ast-name boa ]] AssignmentStatement = AssignmentOperation:a Statement:s => [[ a s ast-assignment boa ]] Statement = ClassDeclaration | ForeignClassDeclaration | AssignmentStatement | Expression MethodReturnOperator = OptionalWhiteSpace "^" FinalStatement = (MethodReturnOperator Statement:s => [[ s ast-return boa ]]) | Statement LocalVariableDeclarationList = OptionalWhiteSpace "|" OptionalWhiteSpace (BindableIdentifier:h (Whitespace BindableIdentifier:b => [[ b ]])*:t => [[ t h prefix ]] )?:b OptionalWhiteSpace "|" => [[ b >array ast-local-variables boa ]] ExecutableCode = (LocalVariableDeclarationList)?:locals ((Statement:s OptionalWhiteSpace "." => [[ s ]])*:h FinalStatement:t (".")? => [[ h t suffix ]])?:body OptionalWhiteSpace => [[ body locals [ suffix ] when* >array ]] TopLevelForm = ExecutableCode => [[ ast-sequence boa ]] UnaryMethodHeader = UnaryMessageSelector:selector => [[ { selector { } } ]] BinaryMethodHeader = BinaryMessageSelector:selector OptionalWhiteSpace BindableIdentifier:identifier => [[ { selector { identifier } } ]] KeywordMethodHeaderSegment = Keyword:keyword OptionalWhiteSpace BindableIdentifier:identifier => [[ { keyword identifier } ]] KeywordMethodHeader = KeywordMethodHeaderSegment:h (Whitespace KeywordMethodHeaderSegment:s => [[ s ]])*:t => [[ t h prefix unzip [ concat ] dip 2array ]] MethodHeader = KeywordMethodHeader | BinaryMethodHeader | UnaryMethodHeader MethodDeclaration = OptionalWhiteSpace "method" OptionalWhiteSpace MethodHeader:header OptionalWhiteSpace "[" ExecutableCode:code "]" => [[ header first2 code ast-block boa ast-method boa ]] ClassDeclaration = OptionalWhiteSpace "class" OptionalWhiteSpace Identifier:name OptionalWhiteSpace ("extends" OptionalWhiteSpace Identifier:superclass OptionalWhiteSpace => [[ superclass ]])?:superclass OptionalWhiteSpace "[" (OptionalWhiteSpace LocalVariableDeclarationList:l => [[ l names>> ]])?:ivars (MethodDeclaration:h (OptionalWhiteSpace "." OptionalWhiteSpace MethodDeclaration:m => [[ m ]])*:t (".")? => [[ t h prefix ]] )?:methods OptionalWhiteSpace "]" => [[ name superclass "Object" or ivars >array methods >array ast-class boa ]] ForeignClassDeclaration = OptionalWhiteSpace "foreign" OptionalWhiteSpace Identifier:name OptionalWhiteSpace Literal:class => [[ class name ast-foreign boa ]] End = !(.) Program = TopLevelForm End ;EBNF