diff --git a/Monicelli.lpp b/Monicelli.lpp index 7d725a1..bb67002 100644 --- a/Monicelli.lpp +++ b/Monicelli.lpp @@ -22,7 +22,6 @@ #include "Parser.hpp" #include -#include static void meta(const char *); @@ -30,6 +29,12 @@ using namespace monicelli; typedef Parser::token token; #define YY_USER_ACTION location->begin.columns(yyleng); + +static inline +bool in(const char *sub, const std::string &str) { + return str.find(sub) != std::string::npos; +} + %} %option ecs stack warn c++ @@ -88,7 +93,7 @@ CHAR [a-zA-Z_] ("il"|"lo"|"la"|"l'"|"i"|"gli"|"le"|"un"|"un'"|"una"|"dei"|"delle") { return token::ARTICLE; } -"più" { +"pi"("ù"|"u`") { return token::OP_PLUS; } "meno" { @@ -142,7 +147,7 @@ CHAR [a-zA-Z_] "e "("b"|"p")"rematura anche, se" { return token::LOOP_CONDITION; } -"che cos'è" { +"che cos'"("è"|"e`") { return token::BRANCH_CONDITION; } "?" { @@ -151,14 +156,14 @@ CHAR [a-zA-Z_] "o tarapia tapioco" { return token::BRANCH_ELSE; } -"e velocità di esecuzione" { +"e velocit"("à"|"a`")" di esecuzione" { return token::BRANCH_END; } ":" { return token::COLON; } -"blinda la supercazzola" { - return token::FUNDECL; +"blinda la supercazzo"("r"|"l")"a" { + return token::FUN_DECL; } "con" { return token::PARAMS; @@ -166,8 +171,8 @@ CHAR [a-zA-Z_] "," { return token::COMMA; } -("b"|"p")"rematurata la supercazzola" { - return token::FUNCALL; +("b"|"p")"rematurata la supercazzo"("r"|"l")"a" { + return token::FUN_CALL; } "o scherziamo"("?")? { return token::FUN_END; @@ -191,13 +196,20 @@ CHAR [a-zA-Z_] return token::ID; } -{DIGIT}+ { - lval->intval = strtol(yytext, NULL, 10); - return token::NUMBER; +[-+]?(({DIGIT}*".")?{DIGIT}+|{DIGIT}+".")([eE][-+]?{DIGIT}+)? { + std::string value(yytext); + + if (in(".", value) || in("e", value) || in("E", value)) { + lval->floatval = std::stod(value); + return token::FLOAT; + } else { + lval->intval = std::stol(value); + return token::NUMBER; + } } . { - return -1; + return token::ERROR; } %% diff --git a/Monicelli.ypp b/Monicelli.ypp index 993b05c..23e2ba0 100644 --- a/Monicelli.ypp +++ b/Monicelli.ypp @@ -1,6 +1,6 @@ /* * Monicelli: an esoteric language compiler - * + * * Copyright (C) 2014 Stefano Sanfilippo * * This program is free software: you can redistribute it and/or modify @@ -50,7 +50,7 @@ static int yylex(Parser::semantic_type*, Parser::location_type*, Scanner&); } -%token MAIN +%token MAIN ERROR %token RETURN %token ARTICLE TYPENAME STAR %token VARDECL ASSIGN @@ -59,7 +59,7 @@ %token LOOP_BEGIN LOOP_CONDITION %token BRANCH_CONDITION BRANCH_BEGIN BRANCH_ELSE BRANCH_END CASE_END %token COLON COMMA -%token FUNDECL PARAMS FUNCALL FUN_END +%token FUN_DECL PARAMS FUN_CALL FUN_END %token ABORT %token ID NUMBER FLOAT @@ -71,15 +71,6 @@ %nonassoc LOWER_THAN_ELSE %nonassoc BRANCH_ELSE -%code { -#include - -static std::stack stmtStack; -static std::stack argsStack; -static std::stack paramsStack; -static std::stack branchCaseStack; -} - %union { int intval; double floatval; @@ -94,39 +85,48 @@ static std::stack branchCaseStack; Input* inputval; Abort* abortval; Branch* branchval; + Branch::Body* branchbodyval; VarDeclaration* declval; Assignment* assignval; Loop* loopval; BranchCase *caseval; + BranchCaseList *caselistval; Return* returnval; Expression* expressionval; + ExpressionList* exprlistval; SemiExpression *semiexpval; Id* idval; Number* numericval; Function* funval; + FunArg *argval; + FunArgList *arglistval; Main* mainval; } %type NUMBER %type FLOAT %type ID -%type TYPENAME - +%type TYPENAME fun_return %type statement -%type branch_body +%type statements +%type branch_body %type assert_stmt %type fun_call +%type arg_decl +%type args_decl args %type fun_decl %type print_stmt %type input_stmt %type abort_stmt %type branch_stmt %type case_stmt +%type cases %type var_decl %type assign_stmt %type loop_stmt %type return_stmt %type expression maybe_expression simple_expression var_init +%type call_arglist call_args %type semi_expression %type variable %type numeric @@ -151,47 +151,45 @@ fun_decls: fun_decls ; fun_decl: - FUNDECL ID { - paramsStack.push(new IdList()); - } args FUN_END { - stmtStack.push(new StatementList()); - } - statements { - $$ = new Function(new Id($2), paramsStack.top(), stmtStack.top()); - paramsStack.pop(); - stmtStack.pop(); + FUN_DECL fun_return ID args FUN_END statements { + $$ = new Function(new Id($3), $2, $4, $6); } ; +fun_return: + /* epsilon */ { $$ = Type::VOID; } | TYPENAME { $$ = $1; } +; args: - /* epsilon */ | PARAMS arglist + /* epsilon */ { $$ = new FunArgList(); }| PARAMS args_decl { $$ = $2; } ; -arglist: - variable { - paramsStack.top()->push_back($1); +args_decl: + arg_decl { + $$ = new FunArgList(); + $$->push_back($1); } - | variable { - paramsStack.top()->push_back($1); + | args_decl COMMA arg_decl { + $1->push_back($3); + } +; +arg_decl: + variable pointer TYPENAME { + $$ = new FunArg($1, $3, $2); } - arglist ; main: - MAIN { - stmtStack.push(new StatementList()); - } - statements { - $$ = new Main(stmtStack.top()); - stmtStack.pop(); + MAIN statements { + $$ = new Main($2); } ; statements: /* epsilon */ { + $$ = new StatementList(); } - | statement { - if ($1 != nullptr) { - stmtStack.top()->push_back($1); + | statements statement { + if ($2 != nullptr) { + $1->push_back($2); } + $$ = $1; } - statements ; statement: assert_stmt { $$ = $1; } @@ -252,61 +250,59 @@ maybe_expression: expression { $$ = $1; } | /* epsilon */ { $$ = nullptr; } ; loop_stmt: - LOOP_BEGIN { - stmtStack.push(new StatementList()); - } - statements LOOP_CONDITION expression { - $$ = new Loop(stmtStack.top(), $5); - stmtStack.pop(); + LOOP_BEGIN statements LOOP_CONDITION expression { + $$ = new Loop($2, $4); } ; branch_stmt: - BRANCH_CONDITION variable BRANCH_BEGIN { - branchCaseStack.push(new BranchCaseList()); - } - branch_body BRANCH_END { - $$ = new Branch($2, branchCaseStack.top(), $5); - branchCaseStack.pop(); + BRANCH_CONDITION variable BRANCH_BEGIN branch_body BRANCH_END { + $$ = new Branch($2, $4); } ; branch_body: cases %prec LOWER_THAN_ELSE { - $$ = nullptr; + $$ = new Branch::Body($1); } - | cases BRANCH_ELSE COLON { - stmtStack.push(new StatementList()); - } - statements { - $$ = stmtStack.top(); - stmtStack.pop(); + | cases BRANCH_ELSE COLON statements { + $$ = new Branch::Body($1, $4); } ; cases: - case_stmt | case_stmt CASE_END cases + case_stmt { + $$ = new BranchCaseList(); + $$->push_back($1); + } + | cases CASE_END case_stmt { + $1->push_back($3); + $$ = $1; + } ; case_stmt: - semi_expression COLON { - stmtStack.push(new StatementList()); - } statements { - branchCaseStack.top()->push_back(new BranchCase($1, stmtStack.top())); - stmtStack.pop(); + semi_expression COLON statements { + $$ = new BranchCase($1, $3); } ; fun_call: - FUNCALL { - argsStack.push(new ExpressionList()); - } - ID call_args FUN_END { - $$ = new FunctionCall(new Id($3), argsStack.top()); - argsStack.pop(); + FUN_CALL ID call_args FUN_END { + $$ = new FunctionCall(new Id($2), $3); } ; call_args: - /* epsilon */ | PARAMS call_arglist + /* epsilon */ { + $$ = new ExpressionList(); + } + | PARAMS call_arglist { + $$ = $2; + } ; call_arglist: - expression { argsStack.top()->push_back($1); } - | expression { argsStack.top()->push_back($1); } COMMA call_arglist + expression { + $$ = new ExpressionList(); + $$->push_back($1); + } + | call_arglist COMMA expression { + $$->push_back($3); + } ; abort_stmt: ABORT { @@ -400,6 +396,7 @@ simple_expression: void Parser::error(const location_type& loc, const std::string &message) { std::cerr << "line " << loc.begin.line << ", col " << loc.begin.column; std::cerr << ": " << message << std::endl; + std::exit(1); } int yylex(Parser::semantic_type *lval, Parser::location_type *loc, Scanner &scanner) { diff --git a/Nodes.cpp b/Nodes.cpp index 92853f6..3dbc097 100644 --- a/Nodes.cpp +++ b/Nodes.cpp @@ -40,6 +40,9 @@ std::ostream& monicelli::operator<<(std::ostream &stream, const Type &type) { case Type::DOUBLE: stream << "double"; break; + case Type::VOID: + stream << "void"; + break; } return stream; @@ -119,9 +122,9 @@ void Branch::emit(std::ostream &stream, int indent) { stream << "if ("; var->emit(stream); - if (cases->size() > 0) { - BranchCase *last = cases->back(); - for (BranchCase *c: *cases) { + if (body->cases->size() > 0) { + BranchCase *last = body->cases->back(); + for (BranchCase *c: *body->cases) { c->emit(stream, indent + 1); if (c != last) { stream << " else if ("; @@ -130,12 +133,12 @@ void Branch::emit(std::ostream &stream, int indent) { } } - if (els == nullptr) { + if (body->els == nullptr) { return; } stream << " else {\n"; - els->emit(stream, indent + 1); + body->els->emit(stream, indent + 1); emitIndent(stream, indent); stream << "}"; } @@ -158,12 +161,16 @@ void Assignment::emit(std::ostream &stream, int indent) { } void Print::emit(std::ostream &stream, int indent) { - stream << "std::cout << "; + stream << "std::cout << ("; expression->emit(stream); - stream << " << std::endl"; + stream << ") << std::endl"; } void Input::emit(std::ostream &stream, int indent) { + stream << "std::cout << \""; + variable->emit(stream); + stream << "? \";\n"; + emitIndent(stream, indent); stream << "std::cin >> "; variable->emit(stream); } @@ -185,16 +192,26 @@ void FunctionCall::emit(std::ostream &stream, int indent) { stream << ")"; } +void FunArg::emit(std::ostream &stream, int indent) { + stream << type << (pointer? "* ": " "); + name->emit(stream); +} + void Function::emit(std::ostream &stream, int indent) { + emitSignature(stream, indent); + stream << " {\n"; + body->emit(stream, indent + 1); + stream << "}\n\n"; +} + +void Function::emitSignature(std::ostream &stream, int indent) { emitIndent(stream, indent); - stream << "void "; + stream << type << ' '; name->emit(stream); stream << "("; args->emit(stream); - stream << ") {\n"; - body->emit(stream, indent + 1); - stream << "}\n\n"; + stream << ")"; } void Main::emit(std::ostream &stream, int indent) { @@ -209,6 +226,15 @@ void Program::emit(std::ostream &stream, int indent) { stream << "#include \n"; stream << "#include \n\n"; + for (Function *f: functions) { + f->emitSignature(stream); + stream << ";\n"; + } + + if (!functions.empty()) { + stream << "\n"; + } + for (Function *f: functions) { f->emit(stream); } diff --git a/Nodes.hpp b/Nodes.hpp index e635bb3..572fa54 100644 --- a/Nodes.hpp +++ b/Nodes.hpp @@ -31,14 +31,18 @@ enum class Type { CHAR, FLOAT, BOOL, - DOUBLE + DOUBLE, + VOID }; std::ostream& operator<<(std::ostream &stream, const Type &type); template -using Pointer = std::unique_ptr; +class Pointer: public std::unique_ptr { +public: + Pointer(T *p = nullptr): std::unique_ptr(p) {} +}; class Emittable { @@ -264,14 +268,23 @@ typedef PointerList BranchCaseList; class Branch: public Statement { public: - Branch(Id *v, BranchCaseList *c, StatementList *e): - var(v), cases(c), els(e) {} + struct Body { + public: + Body(BranchCaseList *c, StatementList *e = nullptr): cases(c), els(e) {} + + private: + Pointer cases; + Pointer els; + + friend class Branch; + }; + + Branch(Id *v, Branch::Body *b): var(v), body(b) {} virtual void emit(std::ostream &stream, int indent = 0); private: Pointer var; - Pointer cases; - Pointer els; + Pointer body; }; @@ -285,20 +298,40 @@ private: }; -class Function: public Emittable { +class FunArg: public Emittable { public: - Function(Id *n, IdList *a, StatementList *b): - name(n), args(a), body(b) {} - virtual ~Function() {} + FunArg(Id *n, Type t, bool p): name(n), type(t), pointer(p) {} + virtual ~FunArg() {} virtual void emit(std::ostream &stream, int indent = 0); private: Pointer name; - Pointer args; + Type type; + bool pointer; +}; + + +typedef ListEmittable FunArgList; + + +class Function: public Emittable { +public: + Function(Id *n, Type r, FunArgList *a, StatementList *b): + name(n), type(r), args(a), body(b) {} + virtual ~Function() {} + + virtual void emit(std::ostream &stream, int indent = 0); + void emitSignature(std::ostream &stream, int indent = 0); + +private: + Pointer name; + Type type; + Pointer args; Pointer body; }; + class Program: public Emittable { public: virtual void emit(std::ostream &stream, int indent = 0); diff --git a/README.md b/README.md index 681bbb7..f3929c8 100644 --- a/README.md +++ b/README.md @@ -34,7 +34,7 @@ Usage subset of C++. For those of you who want to get to the code ASAP, the `examples/` folder contains a set of programs covering most of the features of the language. -A good wat to learn on the field is comparing the resulting C++ with the +A good way to learn on the field is comparing the resulting C++ with the input. Well, mostly with the beautified version of the input, `*.beauty.mc`. The compiler reads from standard input and print result to standard output. @@ -46,10 +46,14 @@ The compiler reads from standard input and print result to standard output. Language overview ================= -The original specification can be found in `Specification.txt`, which -unfortunately is not complete. This project is an ongoing effort to implement -it, which means filling gaps and ambiguities. This file only documents -usable features of the language. +The original specification can be found in `Specification.txt`, and was +initially conceived by my colleagues and dear friends Alessandro Barenghi, +Michele Tartara and Nicola Vitucci, to whom goes my gratitude. + +Unfortunately, their proposal was meant to be a joke and is not complete. +This project is an ongoing effort to produce a rigorous specification for the +language and implement a compiler, which implies filling gaps and ambiguities +with sensible choices. Statements have no terminator, i.e. no semicolon `;` or the like. A single statement can be split across multiple lines and multiple statements can be @@ -58,6 +62,10 @@ words **cannot** be split on multiple lines. A comma might be inserted after each statement, if it fits the sentence ;) +Accented letters can be replaced by the non-accented letter followed by a +backtick `` ` ``, although the use of the correct Italian spelling is strongly +encouraged for maximizing the antani effect. + Main ---- @@ -194,7 +202,7 @@ defined as follows: For example: voglio antani, Necchi come se fosse 10 - stuzzica: + stuzzica antani come fosse antani meno 1 e brematura anche, se antani maggiore di 0 @@ -285,10 +293,62 @@ where the `o tarapia tapioco` part is like the `default` block. Functions --------- -_Coming in a few releases..._. +**Note**: the alternate spelling `supercazzora` might be used in place + of `supercazzola` wherever the latter appears. -We can already parse and emit them, although there are some ambiguities -yet to be solved in the specification. +###Declaration + +A function is declared with the `blinda la supercazzola` statement: + + blinda la supercazzola [] [con [, ...]] o scherziamo? + + +Where `` can be omitted for a void function. For instance: + + blinda la supercazzola Necchi antanizzata con alfio Mascetti o scherziamo? + vaffanzum alfio meno 2! + +is a function of type `Necchi`, taking one argument of type `Mascetti`. +Multiple arguments must be comma-separed, like in: + + blinda la supercazzola Necchi antanizzata con alfio Mascetti, barilotto Necchi o scherziamo? + vaffanzum alfio meno 2! + +which is a function of type `Necchi`, taking two arguments of type `Mascetti` +and `Necchi`. It maps to: + + int antanizzata(char alfio, int barilotto) { + return alfio - 2; + } + +Finally, this: + + blinda la supercazzola antanizzata o scherziamo? + vaffanzum! + +is a `void` function taking no arguments and becomes: + + void antanizzata() { + return; + } + +Functions cannot be nested and can be declared before or after the main in any +order. `mcc` will not check that a return statement is always reachable inside + a non-void function. Failing to return a value leads to undefined behaviour. + +###Invocation + +A function is called with the `brematurata la supercazzola` statement: + + brematurata la supercazzola [con [, ...] o scherziamo? + +Functions might be called inside expressions. For instance, this: + + antani come se fosse brematurata la supercazzola alfio con barilotto diviso 3 o scherziamo? per 2 + +maps to: + + antani = alfio(barilotto / 3) * 2; Exceptions ---------- @@ -311,9 +371,11 @@ Comments -------- Any character after `bituma` is ignored until a line break is encountered. For -instance: +instance, in: - antani come se fossee 4 bituma lorem ipsum + antani come se fosse 4 bituma, scusi, noi siamo in quattro + +`, scusi, noi siamo in quattro` is ignored. Comments are useful to fill the "supercazzola" and make it more readable, since any word (including reserved words) can be inserted into it. diff --git a/Specification.txt b/Specification.txt index 676563a..f0ef8ed 100644 --- a/Specification.txt +++ b/Specification.txt @@ -1,3 +1,14 @@ +------------------------------------------------------------------------- + +NOTE: this document was left here only for historical purposes, + refer to REAMDE.md for the current spec. + +NOTA: questo documento è obsoleto ed è qui solo per memoria storica, in + quanto rappresenta la prima specifica proposta per il linguaggio. + README.md contiene l'attuale versione. + +------------------------------------------------------------------------- + Proposte per la definizione di un linguaggio di programmazione esoterico "Monicelli" Il "main" del programma inizia in corrispondenza di: diff --git a/examples/float.mc b/examples/float.mc new file mode 100644 index 0000000..12389c9 --- /dev/null +++ b/examples/float.mc @@ -0,0 +1,12 @@ +# Test all possible numerical forms (integer and float) + +Lei ha clacsonato + +2.0 a posterdati +-2.0 a posterdati +2. a posterdati +.232 a posterdati +-2 a posterdati +-.2233 a posterdati ++2 a posterdati ++2.233e-23 a posterdati diff --git a/examples/hello-world.mc b/examples/hello-world.mc new file mode 100644 index 0000000..1a8a4d7 --- /dev/null +++ b/examples/hello-world.mc @@ -0,0 +1,16 @@ +# Author: Alessandro Pellegrini +# Released under GPLv3 + +Lei ha clacsonato +voglio una bucaiola, Necchi come se fosse 0 voglio prematurata, Mascetti come se fosse 72 +prematurata a posterdati voglio antifurto, Mascetti come se fosse 87. +voglio una cofandina, Mascetti come se fosse prematurata con scappellamento a sinistra per 1. +cofandina come fosse cofandina meno 33 brematurata la supercazzola antanizzata con antifurto, +cofandina o scherziamo? vaffanzum bucaiola! bituma scusi, noi siamo in quattro. +blinda la supercazzola antanizzata con Alfio Mascetti, tarapia Mascetti o scherziamo? +voglio vicesindaco, Mascetti come se fosse 101 vicesindaco a posterdati voglio pastene, +Mascetti come se fosse vicesindaco più 7 pastene a posterdati bituma in un certo senso. +pastene a posterdati tarapia a posterdati Alfio a posterdati tarapia a posterdati +voglio scappellamento, Mascetti come se fosse 114 scappellamento a posterdati +pastene a posterdati voglio Antani, Mascetti come se fosse pastene meno 8 Antani a posterdati +vaffanzum! diff --git a/examples/syntax.beauty.mc b/examples/syntax.beauty.mc index 59f123a..4d180bc 100644 --- a/examples/syntax.beauty.mc +++ b/examples/syntax.beauty.mc @@ -40,7 +40,7 @@ Lei ha clacsonato vicesindaco come se fosse brematurata la supercazzola avanti con il vicesindaco o scherziamo, vaffanzum 0! -blinda la supercazzola antanizzata con alfio o scherziamo? +blinda la supercazzola Necchi antanizzata con alfio Mascetti, barilotto Necchi o scherziamo? vaffanzum alfio meno 2! bituma al finale? diff --git a/examples/syntax.mc b/examples/syntax.mc index 2c21ac9..911d84a 100644 --- a/examples/syntax.mc +++ b/examples/syntax.mc @@ -10,5 +10,5 @@ o tarapia tapioco: mi porga il cappello e velocità di esecuzione, vicesindaco a posterdati, mi porga il vicesindaco, brematurata la supercazzola tombale con alfio, serio o scherziamo? avvertite don ulrico, ho visto la signora! vicesindaco come se fosse brematurata la supercazzola avanti con il vicesindaco -o scherziamo, vaffanzum 0! blinda la supercazzola antanizzata con alfio o scherziamo? -vaffanzum alfio meno 2! bituma al finale? +o scherziamo, vaffanzum 0! blinda la supercazzola Necchi antanizzata con alfio +Mascetti o scherziamo? vaffanzum alfio meno 2! bituma al finale?