From 7f2a9a34a8c3e3c757066b1bbc6306f8ac4e6f29 Mon Sep 17 00:00:00 2001 From: Stefano Sanfilippo Date: Thu, 12 Oct 2017 22:36:56 +0200 Subject: [PATCH] Push Monicelli 2.0 "Cofandina" --- .clang-format | 11 + .gitignore | 4 - CMakeLists.txt | 97 +---- Doxyfile.in | 305 ++++++++++++++ README.md | 219 +++++----- cmake/FindLLVM.cmake | 54 +++ cmake/FindRagel.cmake | 10 + cmake/bison2.patch | 28 -- cmake/features.cpp | 89 ---- cmake/macosx_homebrew.cmake | 75 ---- cmake/package.cmake | 42 -- examples/Makefile | 10 + examples/turtle/.gitignore | 3 - examples/turtle/Makefile | 20 +- examples/turtle/turtle.cpp | 131 +++--- examples/turtle/turtle.mc | 5 + examples/turtle/turtle.mm | 26 -- src/BitcodeEmitter.cpp | 796 ------------------------------------ src/BitcodeEmitter.hpp | 81 ---- src/CLineParser.cpp | 82 ---- src/CLineParser.hpp | 43 -- src/CMakeLists.txt | 120 +++--- src/CppEmitter.cpp | 340 --------------- src/CppEmitter.hpp | 73 ---- src/Emitter.hpp | 73 ---- src/ModuleLoader.cpp | 55 --- src/ModuleLoader.hpp | 10 - src/ModuleRegistry.cpp | 92 ----- src/ModuleRegistry.hpp | 48 --- src/Monicelli.lpp | 210 ---------- src/Monicelli.ypp | 434 -------------------- src/Nodes.cpp | 102 ----- src/Nodes.hpp | 712 -------------------------------- src/Pointers.hpp | 47 --- src/Runtime.c | 89 ---- src/Runtime.h | 55 --- src/Scanner.hpp | 49 --- src/Scope.hpp | 70 ---- src/asmgen.cpp | 124 ++++++ src/asmgen.h | 30 ++ src/ast-printer.cpp | 267 ++++++++++++ src/ast-printer.h | 17 + src/ast-visitor.h | 48 +++ src/ast.cpp | 22 + src/ast.def | 27 ++ src/ast.h | 448 ++++++++++++++++++++ src/codegen.cpp | 754 ++++++++++++++++++++++++++++++++++ src/codegen.def | 37 ++ src/codegen.h | 23 ++ src/errors.cpp | 52 +++ src/errors.h | 59 +++ src/iterators.h | 48 +++ src/lexer.cpp | 102 +++++ src/lexer.def | 43 ++ src/lexer.h | 186 +++++++++ src/lexer.rl | 182 +++++++++ src/location.h | 59 +++ src/main.cpp | 198 ++++----- src/operators.def | 39 ++ src/options.cpp | 96 +++++ src/options.h | 58 +++ src/parser.cpp | 542 ++++++++++++++++++++++++ src/parser.h | 73 ++++ src/support.cpp | 17 + src/support.h | 28 ++ src/types.def | 16 + 66 files changed, 4131 insertions(+), 4174 deletions(-) create mode 100644 .clang-format create mode 100644 Doxyfile.in create mode 100644 cmake/FindLLVM.cmake create mode 100644 cmake/FindRagel.cmake delete mode 100644 cmake/bison2.patch delete mode 100644 cmake/features.cpp delete mode 100644 cmake/macosx_homebrew.cmake delete mode 100644 cmake/package.cmake create mode 100644 examples/Makefile delete mode 100644 examples/turtle/.gitignore delete mode 100644 examples/turtle/turtle.mm delete mode 100644 src/BitcodeEmitter.cpp delete mode 100644 src/BitcodeEmitter.hpp delete mode 100644 src/CLineParser.cpp delete mode 100644 src/CLineParser.hpp delete mode 100644 src/CppEmitter.cpp delete mode 100644 src/CppEmitter.hpp delete mode 100644 src/Emitter.hpp delete mode 100644 src/ModuleLoader.cpp delete mode 100644 src/ModuleLoader.hpp delete mode 100644 src/ModuleRegistry.cpp delete mode 100644 src/ModuleRegistry.hpp delete mode 100644 src/Monicelli.lpp delete mode 100644 src/Monicelli.ypp delete mode 100644 src/Nodes.cpp delete mode 100644 src/Nodes.hpp delete mode 100644 src/Pointers.hpp delete mode 100644 src/Runtime.c delete mode 100644 src/Runtime.h delete mode 100644 src/Scanner.hpp delete mode 100644 src/Scope.hpp create mode 100644 src/asmgen.cpp create mode 100644 src/asmgen.h create mode 100644 src/ast-printer.cpp create mode 100644 src/ast-printer.h create mode 100644 src/ast-visitor.h create mode 100644 src/ast.cpp create mode 100644 src/ast.def create mode 100644 src/ast.h create mode 100644 src/codegen.cpp create mode 100644 src/codegen.def create mode 100644 src/codegen.h create mode 100644 src/errors.cpp create mode 100644 src/errors.h create mode 100644 src/iterators.h create mode 100644 src/lexer.cpp create mode 100644 src/lexer.def create mode 100644 src/lexer.h create mode 100644 src/lexer.rl create mode 100644 src/location.h create mode 100644 src/operators.def create mode 100644 src/options.cpp create mode 100644 src/options.h create mode 100644 src/parser.cpp create mode 100644 src/parser.h create mode 100644 src/support.cpp create mode 100644 src/support.h create mode 100644 src/types.def diff --git a/.clang-format b/.clang-format new file mode 100644 index 0000000..d47f207 --- /dev/null +++ b/.clang-format @@ -0,0 +1,11 @@ +--- +BasedOnStyle: LLVM +IndentWidth: 2 +--- +Language: Cpp +PointerAlignment: Left +AllowShortIfStatementsOnASingleLine: true +ColumnLimit: 100 +AlignEscapedNewlines: DontAlign +BreakStringLiterals: false +SpaceAfterTemplateKeyword: false diff --git a/.gitignore b/.gitignore index 098b43f..796b96d 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1 @@ -/Monicelli.output -/*.user -/mcc /build -/nbproject diff --git a/CMakeLists.txt b/CMakeLists.txt index 203e57d..75063b4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,84 +1,31 @@ -# -# Monicelli: an esoteric language compiler -# -# Copyright (C) 2014 Stefano Sanfilippo -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program. If not, see . -# +# Copyright 2017 the Monicelli project authors. All rights reserved. +# Use of this source code is governed by a GPLv3 license, see LICENSE.txt. +cmake_minimum_required(VERSION 3.0) project(Monicelli) -cmake_minimum_required(VERSION 2.8) set(CMAKE_BUILD_TYPE Release) -set(CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake) +set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake") -message("== Only a limited set of platforms was tested. We need your help!") -message("== Report build errors to https://github.com/esseks/monicelli/issues") - -## 1. Compiler sanity check - -try_run( - execution_results - supported_compiler - ${CMAKE_CURRENT_BINARY_DIR} - ${CMAKE_CURRENT_SOURCE_DIR}/cmake/features.cpp - COMPILE_DEFINITIONS -std=c++0x - OUTPUT_VARIABLE features_build_log -) - -if (checkfeat) - message(${features_build_log}) -endif() - -if (NOT supported_compiler) - message(FATAL_ERROR - "Some C++11 features we need are not implemented by your compiler.\n" - "Run cmake with -Dcheckfeat=1 to see the exact cause." - ) -endif() - -if (execution_results MATCHES FAILED_TO_RUN) - message(FATAL_ERROR - "Your compiler supports the set of C++11 features we need, " - "but something failed.\n" - "Run cmake with -Dcheckfeat=1 to see the exact cause." - ) -endif() - -## 2. Find Flex and Bison - -include(macosx_homebrew) - -if (CMAKE_HOST_APPLE) - find_package_prefer_brew(BISON REQUIRED) - find_package_prefer_brew(FLEX 2.5 REQUIRED) -else() - find_package(BISON REQUIRED) - find_package(FLEX 2.5 REQUIRED) -endif() - -if (BISON_VERSION VERSION_LESS 2.5) - message(FATAL_ERROR "At least Bison 2.5 is required.") -elseif(BISON_VERSION VERSION_LESS 3.0) - message("== Bison 2.5 was found. You have to apply cmake/bison2.patch...") -endif() - -## 2. Build Monicelli - -include(package) +find_package(Doxygen) add_subdirectory(src) -install(FILES README.md LICENSE.txt DESTINATION doc/) +if (DOXYGEN_FOUND) + set(DOXYGEN_CONFIG ${CMAKE_CURRENT_BINARY_DIR}/Doxyfile) + configure_file( + ${CMAKE_CURRENT_SOURCE_DIR}/Doxyfile.in + ${DOXYGEN_CONFIG} + @ONLY + ) + + add_custom_target(doc + DEPENDS ${DOXYGEN_CONFIG} + COMMAND ${DOXYGEN_EXECUTABLE} ${DOXYGEN_CONFIG} + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} + VERBATIM + ) +endif() + +install(FILES README.md LICENSE.txt DESTINATION doc/monicelli) diff --git a/Doxyfile.in b/Doxyfile.in new file mode 100644 index 0000000..99b1117 --- /dev/null +++ b/Doxyfile.in @@ -0,0 +1,305 @@ +# Doxyfile 1.8.11 + +#--------------------------------------------------------------------------- +# Project related configuration options +#--------------------------------------------------------------------------- +DOXYFILE_ENCODING = UTF-8 +PROJECT_NAME = "Monicelli" +PROJECT_NUMBER = +PROJECT_BRIEF = +PROJECT_LOGO = +OUTPUT_DIRECTORY = @CMAKE_CURRENT_BINARY_DIR@/doxy +CREATE_SUBDIRS = NO +ALLOW_UNICODE_NAMES = NO +OUTPUT_LANGUAGE = English +BRIEF_MEMBER_DESC = YES +REPEAT_BRIEF = YES +ABBREVIATE_BRIEF = +ALWAYS_DETAILED_SEC = NO +INLINE_INHERITED_MEMB = NO +FULL_PATH_NAMES = YES +STRIP_FROM_PATH = +STRIP_FROM_INC_PATH = +SHORT_NAMES = NO +JAVADOC_AUTOBRIEF = NO +QT_AUTOBRIEF = NO +MULTILINE_CPP_IS_BRIEF = NO +INHERIT_DOCS = YES +SEPARATE_MEMBER_PAGES = NO +TAB_SIZE = 4 +ALIASES = +TCL_SUBST = +OPTIMIZE_OUTPUT_FOR_C = NO +OPTIMIZE_OUTPUT_JAVA = NO +OPTIMIZE_FOR_FORTRAN = NO +OPTIMIZE_OUTPUT_VHDL = NO +EXTENSION_MAPPING = +MARKDOWN_SUPPORT = YES +AUTOLINK_SUPPORT = YES +BUILTIN_STL_SUPPORT = NO +CPP_CLI_SUPPORT = NO +SIP_SUPPORT = NO +IDL_PROPERTY_SUPPORT = YES +DISTRIBUTE_GROUP_DOC = NO +GROUP_NESTED_COMPOUNDS = NO +SUBGROUPING = YES +INLINE_GROUPED_CLASSES = NO +INLINE_SIMPLE_STRUCTS = NO +TYPEDEF_HIDES_STRUCT = NO +LOOKUP_CACHE_SIZE = 0 +#--------------------------------------------------------------------------- +# Build related configuration options +#--------------------------------------------------------------------------- +EXTRACT_ALL = NO +EXTRACT_PRIVATE = NO +EXTRACT_PACKAGE = NO +EXTRACT_STATIC = NO +EXTRACT_LOCAL_CLASSES = YES +EXTRACT_LOCAL_METHODS = NO +EXTRACT_ANON_NSPACES = NO +HIDE_UNDOC_MEMBERS = NO +HIDE_UNDOC_CLASSES = NO +HIDE_FRIEND_COMPOUNDS = NO +HIDE_IN_BODY_DOCS = NO +INTERNAL_DOCS = NO +CASE_SENSE_NAMES = YES +HIDE_SCOPE_NAMES = NO +HIDE_COMPOUND_REFERENCE= NO +SHOW_INCLUDE_FILES = YES +SHOW_GROUPED_MEMB_INC = NO +FORCE_LOCAL_INCLUDES = NO +INLINE_INFO = YES +SORT_MEMBER_DOCS = YES +SORT_BRIEF_DOCS = NO +SORT_MEMBERS_CTORS_1ST = NO +SORT_GROUP_NAMES = NO +SORT_BY_SCOPE_NAME = NO +STRICT_PROTO_MATCHING = NO +GENERATE_TODOLIST = YES +GENERATE_TESTLIST = YES +GENERATE_BUGLIST = YES +GENERATE_DEPRECATEDLIST= YES +ENABLED_SECTIONS = +MAX_INITIALIZER_LINES = 30 +SHOW_USED_FILES = YES +SHOW_FILES = YES +SHOW_NAMESPACES = YES +FILE_VERSION_FILTER = +LAYOUT_FILE = +CITE_BIB_FILES = +#--------------------------------------------------------------------------- +# Configuration options related to warning and progress messages +#--------------------------------------------------------------------------- +QUIET = NO +WARNINGS = YES +WARN_IF_UNDOCUMENTED = YES +WARN_IF_DOC_ERROR = YES +WARN_NO_PARAMDOC = NO +WARN_AS_ERROR = NO +WARN_FORMAT = "$file:$line: $text" +WARN_LOGFILE = +#--------------------------------------------------------------------------- +# Configuration options related to the input files +#--------------------------------------------------------------------------- +INPUT = @CMAKE_CURRENT_SOURCE_DIR@/src +INPUT_ENCODING = UTF-8 +FILE_PATTERNS = +RECURSIVE = YES +EXCLUDE = @CMAKE_CURRENT_BINARY_DIR@ +EXCLUDE_SYMLINKS = NO +EXCLUDE_PATTERNS = +EXCLUDE_SYMBOLS = +EXAMPLE_PATH = +EXAMPLE_PATTERNS = +EXAMPLE_RECURSIVE = NO +IMAGE_PATH = +INPUT_FILTER = +FILTER_PATTERNS = +FILTER_SOURCE_FILES = NO +FILTER_SOURCE_PATTERNS = +USE_MDFILE_AS_MAINPAGE = +#--------------------------------------------------------------------------- +# Configuration options related to source browsing +#--------------------------------------------------------------------------- +SOURCE_BROWSER = NO +INLINE_SOURCES = NO +STRIP_CODE_COMMENTS = YES +REFERENCED_BY_RELATION = NO +REFERENCES_RELATION = NO +REFERENCES_LINK_SOURCE = YES +SOURCE_TOOLTIPS = YES +USE_HTAGS = NO +VERBATIM_HEADERS = YES +CLANG_ASSISTED_PARSING = NO +CLANG_OPTIONS = +#--------------------------------------------------------------------------- +# Configuration options related to the alphabetical class index +#--------------------------------------------------------------------------- +ALPHABETICAL_INDEX = YES +COLS_IN_ALPHA_INDEX = 5 +IGNORE_PREFIX = +#--------------------------------------------------------------------------- +# Configuration options related to the HTML output +#--------------------------------------------------------------------------- +GENERATE_HTML = YES +HTML_OUTPUT = html +HTML_FILE_EXTENSION = .html +HTML_HEADER = +HTML_FOOTER = +HTML_STYLESHEET = +HTML_EXTRA_STYLESHEET = +HTML_EXTRA_FILES = +HTML_COLORSTYLE_HUE = 220 +HTML_COLORSTYLE_SAT = 100 +HTML_COLORSTYLE_GAMMA = 80 +HTML_TIMESTAMP = NO +HTML_DYNAMIC_SECTIONS = NO +HTML_INDEX_NUM_ENTRIES = 100 +GENERATE_DOCSET = NO +GENERATE_HTMLHELP = NO +CHM_FILE = +HHC_LOCATION = +GENERATE_CHI = NO +CHM_INDEX_ENCODING = +BINARY_TOC = NO +TOC_EXPAND = NO +DISABLE_INDEX = NO +GENERATE_TREEVIEW = NO +ENUM_VALUES_PER_LINE = 4 +TREEVIEW_WIDTH = 250 +EXT_LINKS_IN_WINDOW = NO +FORMULA_FONTSIZE = 10 +FORMULA_TRANSPARENT = YES +USE_MATHJAX = NO +MATHJAX_FORMAT = HTML-CSS +MATHJAX_RELPATH = http://cdn.mathjax.org/mathjax/latest +MATHJAX_EXTENSIONS = +MATHJAX_CODEFILE = +SEARCHENGINE = YES +SERVER_BASED_SEARCH = NO +EXTERNAL_SEARCH = NO +SEARCHENGINE_URL = +SEARCHDATA_FILE = searchdata.xml +EXTERNAL_SEARCH_ID = +EXTRA_SEARCH_MAPPINGS = +#--------------------------------------------------------------------------- +# Configuration options related to the LaTeX output +#--------------------------------------------------------------------------- +GENERATE_LATEX = YES +LATEX_OUTPUT = latex +LATEX_CMD_NAME = latex +MAKEINDEX_CMD_NAME = makeindex +COMPACT_LATEX = NO +PAPER_TYPE = a4 +EXTRA_PACKAGES = +LATEX_HEADER = +LATEX_FOOTER = +LATEX_EXTRA_STYLESHEET = +LATEX_EXTRA_FILES = +PDF_HYPERLINKS = YES +USE_PDFLATEX = YES +LATEX_BATCHMODE = NO +LATEX_HIDE_INDICES = NO +LATEX_SOURCE_CODE = NO +LATEX_BIB_STYLE = plain +LATEX_TIMESTAMP = NO +#--------------------------------------------------------------------------- +# Configuration options related to the RTF output +#--------------------------------------------------------------------------- +GENERATE_RTF = NO +RTF_OUTPUT = rtf +COMPACT_RTF = NO +RTF_HYPERLINKS = NO +RTF_STYLESHEET_FILE = +RTF_EXTENSIONS_FILE = +RTF_SOURCE_CODE = NO +#--------------------------------------------------------------------------- +# Configuration options related to the man page output +#--------------------------------------------------------------------------- +GENERATE_MAN = NO +MAN_OUTPUT = man +MAN_EXTENSION = .3 +MAN_SUBDIR = +MAN_LINKS = NO +#--------------------------------------------------------------------------- +# Configuration options related to the XML output +#--------------------------------------------------------------------------- +GENERATE_XML = NO +XML_OUTPUT = xml +XML_PROGRAMLISTING = YES +#--------------------------------------------------------------------------- +# Configuration options related to the DOCBOOK output +#--------------------------------------------------------------------------- +GENERATE_DOCBOOK = NO +DOCBOOK_OUTPUT = docbook +DOCBOOK_PROGRAMLISTING = NO +#--------------------------------------------------------------------------- +# Configuration options for the AutoGen Definitions output +#--------------------------------------------------------------------------- +GENERATE_AUTOGEN_DEF = NO +#--------------------------------------------------------------------------- +# Configuration options related to the Perl module output +#--------------------------------------------------------------------------- +GENERATE_PERLMOD = NO +PERLMOD_LATEX = NO +PERLMOD_PRETTY = YES +PERLMOD_MAKEVAR_PREFIX = +#--------------------------------------------------------------------------- +# Configuration options related to the preprocessor +#--------------------------------------------------------------------------- +ENABLE_PREPROCESSING = YES +MACRO_EXPANSION = NO +EXPAND_ONLY_PREDEF = NO +SEARCH_INCLUDES = YES +INCLUDE_PATH = +INCLUDE_FILE_PATTERNS = +PREDEFINED = +EXPAND_AS_DEFINED = +SKIP_FUNCTION_MACROS = YES +#--------------------------------------------------------------------------- +# Configuration options related to external references +#--------------------------------------------------------------------------- +TAGFILES = +GENERATE_TAGFILE = +ALLEXTERNALS = NO +EXTERNAL_GROUPS = YES +EXTERNAL_PAGES = YES +PERL_PATH = /usr/bin/perl +#--------------------------------------------------------------------------- +# Configuration options related to the dot tool +#--------------------------------------------------------------------------- +CLASS_DIAGRAMS = YES +MSCGEN_PATH = +DIA_PATH = +HIDE_UNDOC_RELATIONS = YES +HAVE_DOT = YES +DOT_NUM_THREADS = 0 +DOT_FONTNAME = Helvetica +DOT_FONTSIZE = 10 +DOT_FONTPATH = +CLASS_GRAPH = YES +COLLABORATION_GRAPH = YES +GROUP_GRAPHS = YES +UML_LOOK = NO +UML_LIMIT_NUM_FIELDS = 10 +TEMPLATE_RELATIONS = NO +INCLUDE_GRAPH = YES +INCLUDED_BY_GRAPH = YES +CALL_GRAPH = NO +CALLER_GRAPH = NO +GRAPHICAL_HIERARCHY = YES +DIRECTORY_GRAPH = YES +DOT_IMAGE_FORMAT = png +INTERACTIVE_SVG = NO +DOT_PATH = +DOTFILE_DIRS = +MSCFILE_DIRS = +DIAFILE_DIRS = +PLANTUML_JAR_PATH = +PLANTUML_INCLUDE_PATH = +DOT_GRAPH_MAX_NODES = 50 +MAX_DOT_GRAPH_DEPTH = 0 +DOT_TRANSPARENT = NO +DOT_MULTI_TARGETS = NO +GENERATE_LEGEND = YES +DOT_CLEANUP = YES diff --git a/README.md b/README.md index dcef032..c976890 100644 --- a/README.md +++ b/README.md @@ -1,111 +1,119 @@ -Monicelli -========= +# Monicelli 2.0 "Cofandina" + +This all-new release mainly brings several improvements to the code that make +it easier to hack and build new features. In addition to that: + +* `mcc` now produces an executable by default, no need to use (or install) + `lcc`, as it was previously the case. `mcc cofandina.mc -o cofandina` and + that's it! This feature currently requires a POSIX system (like Linux or + Mac OS X) with a C compiler installed (anything reasonably recent will do). + +* `mcc` does not depend on Boost anymore. + +* `mcc` has a new hand-written parser that should provide better error + messages. Now it's easier to stuzzicate your prematurated supercazzole. + Error messages are in plain English and not very antani. Apologies for that. + +* `mcc` now generates code that directly calls the C standard library. This + allows you to seamlessly link Monicelli object files with C/C++ code, without + any extra dependency on a Monicelli standard library. + +* The code generator in `mcc` has been ported to LLVM 3.8 and will continue + to be updated with new releases. + +* Modules are gone. This was a rather obscure feature that allowed to expose + functions implemented in C/C++ to Monicelli code using a YAML-based language. + Instead, it's now possible to declare a function with an empty body to signal + that it will be implemented in another file, be it in Monicelli or C/C++. + See the updated Turtle example. + +* The C++ transpiler is gone. It might come back again, though. + +# What's Monicelli anyway? Monicelli is an esoterical programming language based on the so-called "supercazzole" from the movie Amici Miei, a masterpiece of the Italian comedy. -There is no way to translate a "supercazzola" to English, so if you don't speak -Italian, I'm afraid you won't understand. I'm really sorry for you :) +Over the past few years I have tried to render the idea of "supercazzola" to +non-Italian speakers, with little success. The closest I got was by describing +it as "comically deceptive gibberish", which sadly does not capture the true +essence of what a "supercazzola" (spelled "supercazzora" according to some) is. +I'm still open to suggestions on how to better present Monicelli (the language) +to the international public. -Compilation -=========== +# Compilation -You will need `bison` version >= 3.0 (Bison 2.5 works but requires manual intervention), -`flex` >= 2.5, `LLVM` >= 3.5, `Boost` >= 1.48, `YAML-cpp` >= 0.5 and any C++11 compiler. -The build scripts are generated using CMake, version >= 2.8. +A part of the Monicelli compiler (the lexer) is generated using `ragel`, which +you will need to have installed. If this is not the case, the configuration +script will warn you. Monicelli is developed with version 6.8, but any +sufficiently recent release should do just fine. + +You will also need to have LLVM development libraries installed, version 3.8. +Other versions might or might not work. + +Finally, you will need CMake, version 3.0 or higher. A typical Makefile-based build workflow would be: - mkdir build/ - cd build/ - cmake .. - make - -During the Makefile generation, the build script will test the compiler for all -the required features. + $ cd monicelli/ + $ mkdir build/ + $ cd build/ + $ cmake .. -DCMAKE_INSTALL_PREFIX="$HOME/mcc" + $ make all install If your tools are installed in non-standard locations (e.g. Bison Brew on Mac OS X), you can alter the search path with: - PATH=/path/to/bison cmake .. + $ PATH=/path/to/ragel cmake .. -If you can't really upgrade to Bison 3.0, a patch for Bison 2.5 -is provided in `cmake/bison2.patch`. You will have to manually apply it with: +`mcc` statically links LLVM, once compiled it will only depend on the C++ +runtime and on `libz`. - patch -p 1 < cmake/bison2.patch +## Note for non-POSIX platforms (like Windows) -However note that compilation with Bison 2.5 is not supported and the patch might be -removed in the future. +The external linker is called using fork+exec for simplicity. This means that +this part of the workflow will **not** work on non-POSIX systems, such as +Windows. There, you will need to disable this feature at build time. You will +only get object files (.o) that you will have to link, including a C runtime +library, by yourself. -###Building with LLVM on Debian/Ubuntu -Debian Testing and Ubuntu >= 14.04 distribute a LLVM 3.5 development package -**which is broken** (see [1](https://bugs.launchpad.net/ubuntu/+source/llvm/+bug/1365432) -and [2](https://bugs.launchpad.net/ubuntu/+source/llvm/+bug/1387011)). +You can disable the invocation of an external linker and make `mcc` compilable +on Windows during CMake configuration by forcing the appropriate flag to OFF: -Luckly, LLVM.org directly provides an APT repo which works fine. -http://llvm.org/apt/ have all the relevant info for installing the repo. -After that, the package we need is `llvm-3.5-dev`. + $ cmake .. -DENABLE_LINKER=OFF -**This is only necessary for compilation, Debian/Ubuntu LLVM runtime libs -and utilities work just fine.** +## Tested platforms -Usage -===== +The reference OS for building and testing Monicelli is Ubuntu 16.04 LTS. If the +build is broken there, then it's a bug. Unfortunately I don't have many other +platforms at hand to test, but it _should_ compile on many more POSIX systems, +including Mac OS X. If you managed to compile Monicelli on your favourite +platform and you needed a patch, it would be great if you could send a PR. -###LLVM frontend -Monicelli emits LLVM bitcode in its default configuration. -A typical compilation workflow would be: +# Usage - $ ./mcc example.mc - $ llc example.bc - $ cc example.s libmcrt.a -o example +Monicelli build an executable by default on POSIX systems +(such as Linux, Mac OS X). Linking requires an external C compiler, anything +decently modern and standard-conformant should do. -In particular, note that the Monicelli runtime library must be compiled in or linked to use -all of the I/O functions. Also note the use of the `llc` utility, which is -provided by LLVM, to produce native assembler from LLVM bitcode. +A typical invocation is very similar to what you would expect from your C +compiler: -Please be aware that the Monicelli standard library depends on the C stdlib, -although this dependency is available on virtually any platform you might -dream of compiling Monicelli on. + $ mcc example.mc -o example + $ ./example -As such, `llvm` utilities are needed for compiling. Only the "low level" -utilities (`opt` and `llc`) are needed, not the whole Clang/Clang++ suite. -Usually, the relevant package goes under the name `llvm`. +Please be aware that the Monicelli compiler depends on the availability of a C +compiler and stdlib, although this dependency should be available on virtually +all platforms where you might think to run `mcc`. -A C compiler is used to simplify the assembling and linking step, but it could -be skipped altogether with a small effort. If you want to try ;) - -`mcc` only performs minimal optimizations in order to ensure readibility when -disassembling with `llvm-dis`. However, you might want to optimize the code -using `opt` LLVM utility: - - $ opt example.bc | llc -o example.s - -in place of the simple `llc` compilation step. See `opt` documentation for a -comprehensive list of optimizations available. - -###C++ transpiler -`mcc` also works as a source to source compiler, which reads Monicelli -and outputs a subset of C++. Use the option `--c++` or `-+` for that. - -A good way to learn on the field is comparing the resulting C++ with the -input. Well, mostly with the beautified version of the input, `*.beauty.mc`. - -The typical command line would be: - - $ ./mcc --c++ examples/primes.mc - $ c++ primes.cpp -o primes - $ ./primes - -Language overview -================= +# Language overview The original specification can be found in `Specification.txt`, and was initially conceived by my colleagues and dear friends Alessandro Barenghi, Michele Tartara and Nicola Vitucci, to whom goes my gratitude. -Unfortunately, their proposal was meant to be a joke and is not complete. +Their proposal was meant to be an elaborate joke and is not complete. This project is an ongoing effort to produce a rigorous specification for the language and implement a compiler, which implies filling gaps and ambiguities with sensible choices. @@ -121,13 +129,13 @@ Accented letters can be replaced by the non-accented letter followed by a backtick `` ` ``, although the use of the correct Italian spelling is strongly encouraged for maximizing the antani effect. -###Get started! +## Getting started real quick + For those of you who want to get to the code ASAP, the `examples/` folder contains a set of programs covering most of the features of the language. -Main ----- +## Main The entry point of the program (the "main") is identified by the phrase: @@ -143,8 +151,8 @@ optionally, no value might be returned with: vaffanzum! -Expressions ------------ +## Expressions + The usual operators are given, but spelled as words to best fit in sentences. They are directly mapped on usual operators as follows: @@ -165,7 +173,7 @@ When evaluating binary expressions whose operands have different types, the type of the result will be the less restrictive between the two. This ensures that no loss takes place when evaluating an expression. -###Binary shift +## Binary shift Binary shift operators have a slighly different syntax: @@ -190,10 +198,9 @@ maps to `antani << 2`. It goes without saying, other expression can be used instead of numbers. Also, the usual precedence rules apply. -**Braces are not implemented**. +**There is no syntax for braces in Monicelli**. -Variables ---------- +## Variables A variable name can contain numbers, upper and lower case character and must not start with a number (the usual rules, that's it). @@ -206,7 +213,7 @@ to the same variable. Consequently, the articles above cannot be used as variable names. -###Assignment +## Assignment A value can be assigned to a variable with the following statement: @@ -218,7 +225,7 @@ The `` initializer is casted to the declared type of the variable, even if the cast will cause some loss. This feature can be (ab)used to introduce C-style casts too. -###Declaration +## Declaration Variables can be declared in any scope. There are 5 variable types, which are directly mapped on C++/C99 types as follows: @@ -246,8 +253,7 @@ for instance: declares a variables called `antani` of type `Necchi` (`int`) and initializes it to 4. -Input/Output ------------- +## Input/Output Variables and expressions can be printed with the statement: @@ -257,8 +263,7 @@ Conversely, a variable might be read from input using: mi porga -Loop ----- +## Loop There is only one loop construct, equivalent to a C `do {} while();`, which is defined as follows: @@ -283,8 +288,7 @@ maps to: `brematura` might be replaced by its alternate form `prematura` -Branch ------- +## Branch The branch construct encompasses both the features of an `if` and a `switch`. The best way to explain it is by comparing its various forms to the corresponding @@ -358,13 +362,12 @@ Finally, here is the equivalent of a `switch () {}`: where the `o tarapia tapioco` part is like the `default` block. -Functions ---------- +## Functions **Note**: the alternate spelling `supercazzora` might be used in place of `supercazzola` wherever the latter appears. -###Declaration +## Declaration A function is declared with the `blinda la supercazzola` statement: @@ -404,7 +407,11 @@ Functions cannot be nested and can be declared before or after the main in any order. `mcc` will not check that a return statement is always reachable inside a non-void function. Failing to return a value leads to undefined behaviour. -###Invocation +A function might be declared with no body, in which case it's treated as a +prototype. A prototype makes the function signature known to the compiler, and +it signals that the function is implemented in another file. + +## Invocation A function is called with the `brematurata la supercazzola` statement: @@ -418,8 +425,7 @@ maps to: antani = alfio(barilotto / 3) * 2; -Exceptions ----------- +## Exceptions The program might be aborted immediately with the statement: @@ -427,16 +433,14 @@ The program might be aborted immediately with the statement: there are no arguments. -Assertions ----------- +## Assertions An assertion block will evaluate its expression and trigger an error message if it is found to be 0 (logical false). An assertion is stated as: ho visto ! -Comments --------- +## Comments Any character after `bituma` is ignored until a line break is encountered. For instance, in: @@ -448,7 +452,7 @@ instance, in: Comments are useful to fill the "supercazzola" and make it more readable, since any word (including reserved words) can be inserted into it. -###Meta comments +## Meta comments In addition to line comments, there are meta comments. A meta comment starts with an hash sign `#` and continues until a line break is encountered, as an @@ -459,10 +463,11 @@ a long "supercazzola". Also, ordinary comments can and should be used in an improper way to fill the sentence, meta comments provide a mechanism for distiguishing "real" comments. -Reserved words and phrases ------------------------- +## Reserved words and phrases -The following phrases are currently reserved with no assigned usage. They cannot be used as variable identifiers, even if they do not serve any other purpose in the current language revision. +The following phrases are currently reserved with no assigned usage. They cannot +be used as variable identifiers, even if they do not serve any other purpose in +the current language revision. * `conte` * `scusi noi siamo in` diff --git a/cmake/FindLLVM.cmake b/cmake/FindLLVM.cmake new file mode 100644 index 0000000..73e497d --- /dev/null +++ b/cmake/FindLLVM.cmake @@ -0,0 +1,54 @@ +# Copyright 2017 the Monicelli project authors. All rights reserved. +# Use of this source code is governed by a GPLv3 license, see LICENSE.txt. + +find_program(LLVM_CONFIG llvm-config) + +if (LLVM_CONFIG STREQUAL "LLVM_CONFIG-NOTFOUND") + message(FATAL_ERROR "Please install the LLVM dev package to compile Monicelli.") +else() + message(STATUS "Found llvm-config: ${LLVM_CONFIG}") +endif() + +execute_process( + COMMAND ${LLVM_CONFIG} --version + OUTPUT_VARIABLE LLVM_VERSION + OUTPUT_STRIP_TRAILING_WHITESPACE +) + +set(TARGET_LLVM_VERSION "3.8.0") + +if (NOT LLVM_VERSION STREQUAL ${TARGET_LLVM_VERSION}) + message(WARNING "Expected LLVM ${TARGET_LLVM_VERSION}, found ${LLVM_VERSION}, build may fail.") +endif() + +execute_process( + COMMAND ${LLVM_CONFIG} --includedir + OUTPUT_VARIABLE LLVM_INCLUDE_DIR + OUTPUT_STRIP_TRAILING_WHITESPACE +) + +execute_process( + COMMAND ${LLVM_CONFIG} --libdir + OUTPUT_VARIABLE LLVM_LIBRARY_DIR + OUTPUT_STRIP_TRAILING_WHITESPACE +) + +execute_process( + COMMAND ${LLVM_CONFIG} --cxxflags + OUTPUT_VARIABLE LLVM_CXXFLAGS + OUTPUT_STRIP_TRAILING_WHITESPACE +) + +execute_process( + COMMAND ${LLVM_CONFIG} --libs all + OUTPUT_VARIABLE LLVM_MODULE_LIBS + OUTPUT_STRIP_TRAILING_WHITESPACE +) + +execute_process( + COMMAND ${LLVM_CONFIG} --system-libs + OUTPUT_VARIABLE LLVM_SYSTEM_LIBS + OUTPUT_STRIP_TRAILING_WHITESPACE +) + +set(LLVM_LIBS ${LLVM_MODULE_LIBS} ${LLVM_SYSTEM_LIBS}) diff --git a/cmake/FindRagel.cmake b/cmake/FindRagel.cmake new file mode 100644 index 0000000..d50216e --- /dev/null +++ b/cmake/FindRagel.cmake @@ -0,0 +1,10 @@ +# Copyright 2017 the Monicelli project authors. All rights reserved. +# Use of this source code is governed by a GPLv3 license, see LICENSE.txt. + +find_program(RAGEL ragel) + +if (${RAGEL} STREQUAL "RAGEL-NOTFOUND") + message(FATAL_ERROR "Please install ragel to compile Monicelli.") +else() + message(STATUS "Found ragel: ${RAGEL}") +endif() diff --git a/cmake/bison2.patch b/cmake/bison2.patch deleted file mode 100644 index 523e805..0000000 --- a/cmake/bison2.patch +++ /dev/null @@ -1,28 +0,0 @@ -diff --git a/Monicelli.ypp b/Monicelli.ypp -index 028506d..9817f1e 100644 ---- a/Monicelli.ypp -+++ b/Monicelli.ypp -@@ -23,7 +23,7 @@ - } - - %skeleton "lalr1.cc" --%require "3.0" -+%require "2.5" - %language "c++" - - %defines -@@ -31,9 +31,11 @@ - %locations - %token-table - --%define parse.error verbose --%define api.namespace {monicelli} --%define parser_class_name {Parser} -+%{ -+#define YYERROR_VERBOSE -+%} -+%define namespace monicelli -+%define parser_class_name Parser - - %lex-param {Scanner &scanner} - %parse-param {Scanner &scanner} diff --git a/cmake/features.cpp b/cmake/features.cpp deleted file mode 100644 index e28e8e6..0000000 --- a/cmake/features.cpp +++ /dev/null @@ -1,89 +0,0 @@ -/* - * Monicelli: an esoteric language compiler - * - * Copyright (C) 2014 Stefano Sanfilippo - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -/** - * Minimum program containing all required C++11 features. - * If a compiler cannot compile this, then it won't compile Monicelli. - */ - -#include -#include -#include - -#include -#include -#include - - -const int global_i = 0; - -template -using Foo = std::vector; - -struct TestingReferenceBinding { - TestingReferenceBinding(int const& ii) { - assert(&ii == &global_i); - } - - void operator=(int const& ii) { - assert(&ii == &global_i); - } - - void operator=(int&&) { - assert(false); - } -}; - -enum class Dummy { - FOO, BAR, BAZ -}; - -class Banana { - int yep() const noexcept { - return 0; - } - - virtual void something() {} -}; - -class Phone: public Banana { - virtual void something() override {} -}; - -int main() { - std::unique_ptr foo(new int{0}); - std::vector bar = {1, 2, 3}; - for (int baz: bar) { - baz += 1; - } - char *str = nullptr; - Banana a; - Banana b = std::move(a); - long c = std::stol("100"); - - // Boost::Optional sanity check for old compilers - int const& iref = global_i; - assert(&iref == &global_i); - - TestingReferenceBinding ttt = global_i; - ttt = global_i; - - TestingReferenceBinding ttt2 = iref; - ttt2 = iref; -} diff --git a/cmake/macosx_homebrew.cmake b/cmake/macosx_homebrew.cmake deleted file mode 100644 index 947d335..0000000 --- a/cmake/macosx_homebrew.cmake +++ /dev/null @@ -1,75 +0,0 @@ -# -# Monicelli: an esoteric language compiler -# -# Copyright (C) 2014 Stefano Sanfilippo -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program. If not, see . -# - -# The following macro has been adapted from -# https://gist.github.com/steakknife/c36c99b51703fc6f6c1b - -# If is running under Mac OS X and the Homebrew package manager is installed, -# then Homebrew-specific directories for the given package are automatically -# added to the appropriate CMake search paths. - -macro(find_package_prefer_brew _package) - unset(_brew_path) - unset(_brew_pkg_lower) - unset(_has_brew) - find_program(_has_brew NAMES brew DOC "path to Homebrew executable") - if(_has_brew) - string(TOLOWER ${_package} _brew_pkg_lower) - execute_process(COMMAND brew --prefix ${_brew_pkg_lower} ERROR_QUIET - OUTPUT_STRIP_TRAILING_WHITESPACE OUTPUT_VARIABLE _brew_path) - if(EXISTS ${_brew_path}) - set(_brew_cmake_module_path ${CMAKE_MODULE_PATH}) - set(_brew_cmake_program_path ${CMAKE_PROGRAM_PATH}) - set(_brew_cmake_include_path ${CMAKE_INCLUDE_PATH}) - set(_brew_cmake_library_path ${CMAKE_LIBRARY_PATH}) - - if(NOT CMAKE_MODULE_PATH) - list(INSERT CMAKE_MODULE_PATH 0 "${_brew_path}") - endif() - - if(NOT CMAKE_PROGRAM_PATH AND EXISTS "${_brew_path}/bin") - list(INSERT CMAKE_PROGRAM_PATH 0 "${_brew_path}/bin") - endif() - - if(NOT CMAKE_INCLUDE_PATH AND EXISTS "${_brew_path}/include") - list(INSERT CMAKE_INCLUDE_PATH 0 "${_brew_path}/include") - endif() - - if(NOT CMAKE_LIBRARY_PATH AND EXISTS "${_brew_path}/lib") - list(INSERT CMAKE_LIBRARY_PATH 0 "${_brew_path}/lib") - endif() - - find_package(${_package} ${ARGN}) - - set(CMAKE_MODULE_PATH ${_brew_cmake_module_path}) - set(CMAKE_PROGRAM_PATH ${_brew_cmake_program_path}) - set(CMAKE_INCLUDE_PATH ${_brew_cmake_include_path}) - set(CMAKE_LIBRARY_PATH ${_brew_cmake_library_path}) - - unset(_brew_cmake_module_path) - unset(_brew_cmake_program_path) - unset(_brew_cmake_include_path) - unset(_brew_cmake_library_path) - else() - find_package(${_package} ${ARGN}) - endif() - else() - find_package(${_package} ${ARGN}) - endif() -endmacro(find_package_prefer_brew) diff --git a/cmake/package.cmake b/cmake/package.cmake deleted file mode 100644 index 747141f..0000000 --- a/cmake/package.cmake +++ /dev/null @@ -1,42 +0,0 @@ -# -# Monicelli: an esoteric language compiler -# -# Copyright (C) 2014 Stefano Sanfilippo -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program. If not, see . -# - -include(InstallRequiredSystemLibraries) - -set(CPACK_GENERATOR "TGZ;DEB;RPM;ZIP") - -set(CPACK_PACKAGE_DESCRIPTION_SUMMARY - "Monicelli: an esoteric programming language, come se fosse antani." -) -set(CPACK_PACKAGE_VENDOR "Stefano Sanfilippo") -set(CPACK_DEBIAN_PACKAGE_MAINTAINER "Stefano Sanfilippo") - -set(CPACK_PACKAGE_DESCRIPTION_FILE "${CMAKE_CURRENT_SOURCE_DIR}/README.md") -set(CPACK_RESOURCE_FILE_LICENSE "${CMAKE_CURRENT_SOURCE_DIR}/LICENSE.txt") - -set(CPACK_PACKAGE_VERSION_MAJOR "1") -set(CPACK_PACKAGE_VERSION_MINOR "0") -set(CPACK_PACKAGE_VERSION_PATCH "0") - -set(CPACK_PACKAGE_INSTALL_DIRECTORY - "monicelli-${CMake_VERSION_MAJOR}.${CMake_VERSION_MINOR}" -) - -include(CPack) - diff --git a/examples/Makefile b/examples/Makefile new file mode 100644 index 0000000..6b9ecee --- /dev/null +++ b/examples/Makefile @@ -0,0 +1,10 @@ +MCC=mcc +EXAMPLES=factorial hello-world primes return fibonacci mandelbrot float + +all: $(EXAMPLES) + +clean: + $(RM) $(EXAMPLES) + +%: %.mc + $(MCC) $< -o $@ diff --git a/examples/turtle/.gitignore b/examples/turtle/.gitignore deleted file mode 100644 index 959feb0..0000000 --- a/examples/turtle/.gitignore +++ /dev/null @@ -1,3 +0,0 @@ -/tartaruga -/turtle.bc -/barilotto.png diff --git a/examples/turtle/Makefile b/examples/turtle/Makefile index fa8e0f3..a37875b 100644 --- a/examples/turtle/Makefile +++ b/examples/turtle/Makefile @@ -1,6 +1,14 @@ -compile: - # Make sure to have mcc somewhere in path - mcc turtle.mm turtle.mc - llc turtle.bc - c++ turtle.s turtle.cpp -I../.. -lcairo -o tartaruga - rm -f turtle.s +MCC=mcc + +all: turtle + +clean: + $(RM) *.o turtle + +turtle.mc:; + +%.mc.o: %.mc + $(MCC) -c $< -o $@ + +turtle: turtle.mc.o turtle.cpp + $(CXX) -std=c++11 $^ -lcairo -o $@ diff --git a/examples/turtle/turtle.cpp b/examples/turtle/turtle.cpp index bf7d619..1dbd454 100644 --- a/examples/turtle/turtle.cpp +++ b/examples/turtle/turtle.cpp @@ -1,103 +1,102 @@ -#include "Runtime.h" - #include + +#include +#include #include - -class Turtle { +class Turtle final { public: - Turtle(int sizeX, int sizeY) { - surface = cairo_image_surface_create(CAIRO_FORMAT_RGB24, sizeX, sizeY); - context = cairo_create(surface); - reset(); - } + Turtle(int sizeX, int sizeY) { + surface = cairo_image_surface_create(CAIRO_FORMAT_RGB24, sizeX, sizeY); + context = cairo_create(surface); + reset(); + } - virtual ~Turtle() { - cairo_destroy(context); - cairo_surface_destroy(surface); - } + ~Turtle() { + cairo_destroy(context); + cairo_surface_destroy(surface); + } - void reset() { - setColor(1, 1, 1); - paint(); - setLineWidth(3); - setColor(1, 0, 0); - moveTo(0, 0); - } + void reset() { + setColor(1, 1, 1); + paint(); + setLineWidth(3); + setColor(1, 0, 0); + moveTo(0, 0); + } - void setColor(double r, double g, double b) { - cairo_set_source_rgb(context, r, g, b); - } + void setColor(double r, double g, double b) { + cairo_set_source_rgb(context, r, g, b); + } - void setLineWidth(double size) { - cairo_set_line_width(context, size); - } + void setLineWidth(double size) { + cairo_set_line_width(context, size); + } - void paint() { - cairo_paint(context); - } + void paint() { + cairo_paint(context); + } - void moveTo(double x, double y) { - cairo_move_to(context, x, y); - } + void moveTo(double x, double y) { + cairo_move_to(context, x, y); + } - void rectangleTo(double width, double height) { - double baseX; - double baseY; + void rectangleTo(double width, double height) { + double baseX; + double baseY; - cairo_get_current_point(context, &baseX, &baseY); - cairo_rectangle(context, baseX, baseY, width, height); - cairo_fill(context); + cairo_get_current_point(context, &baseX, &baseY); + cairo_rectangle(context, baseX, baseY, width, height); + cairo_fill(context); - moveTo(baseX, baseY); - } + moveTo(baseX, baseY); + } - void lineTo(double x, double y) { - cairo_line_to(context, x, y); - cairo_stroke(context); - moveTo(x, y); - } + void lineTo(double x, double y) { + cairo_line_to(context, x, y); + cairo_stroke(context); + moveTo(x, y); + } - void save(char const* filename) { - cairo_surface_write_to_png(surface, filename); - } + void save(char const* filename) { + cairo_surface_write_to_png(surface, filename); + } - void save(std::string const& filename) { - save(filename.c_str()); - } + void save(std::string const& filename) { + save(filename.c_str()); + } private: - cairo_surface_t *surface; - cairo_t *context; + cairo_surface_t *surface; + cairo_t *context; }; -static Turtle *turtle = 0; +static std::unique_ptr turtle; extern "C" { -void cofandina(Monicelli_Int x, Monicelli_Int y) { - if (turtle != 0) delete turtle; - turtle = new Turtle(x, y); +void cofandina(int32_t x, int32_t y) { + turtle.reset(new Turtle(x, y)); } -void pulitina(Monicelli_Double r, Monicelli_Double g, Monicelli_Double b) { - turtle->setColor(r, g, b); +void pulitina(double r, double g, double b) { + turtle->setColor(r, g, b); } -void pastene(Monicelli_Double size) { - turtle->setLineWidth(size); +void pastene(double size) { + turtle->setLineWidth(size); } -void muovi(Monicelli_Double x, Monicelli_Double y) { - turtle->moveTo(x, y); +void muovi(double x, double y) { + turtle->moveTo(x, y); } -void ispettore(Monicelli_Double x, Monicelli_Double y) { - turtle->lineTo(x, y); +void ispettore(double x, double y) { + turtle->lineTo(x, y); } void barilotto() { - turtle->save("barilotto.png"); + turtle->save("barilotto.png"); } } // extern diff --git a/examples/turtle/turtle.mc b/examples/turtle/turtle.mc index 5d450bf..d8c04f9 100644 --- a/examples/turtle/turtle.mc +++ b/examples/turtle/turtle.mc @@ -1,3 +1,8 @@ +blinda la supercazzola cofandina con x Necchi, y Necchi o scherziamo? +blinda la supercazzola pastene con w Sassaroli o scherziamo? +blinda la supercazzola ispettore con x Sassaroli, y Sassaroli o scherziamo? +blinda la supercazzola barilotto o scherziamo? + Lei ha clacsonato prematurata la supercazzola cofandina con 200, 200 o scherziamo? prematurata la supercazzola pastene con 3 o scherziamo? diff --git a/examples/turtle/turtle.mm b/examples/turtle/turtle.mm deleted file mode 100644 index 6ebdffd..0000000 --- a/examples/turtle/turtle.mm +++ /dev/null @@ -1,26 +0,0 @@ -help: - Turtle graphics in Monicelli! - -source: - - Turtle.cpp - -functions: - cofandina: - args: {x: int, y: int} - type: void - help: Crea una superficie di x per y pixel. - pulitina: - args: {r: double, g: double, b: double} - help: Imposta il colore dell'output. - pastene: - args: {size: double} - help: Imposta la dimensione in pixel della penna. - muovi: - args: {x: double, y: double} - help: Porta la penna nel punto specificato. - ispettore: - args: {x: double, y: double} - help: Traccia una linea dal punto corrente al punto specificato. - barilotto: - docs: Salva l'immagine come barilotto.png - diff --git a/src/BitcodeEmitter.cpp b/src/BitcodeEmitter.cpp deleted file mode 100644 index 5a4f695..0000000 --- a/src/BitcodeEmitter.cpp +++ /dev/null @@ -1,796 +0,0 @@ -/* - * Monicelli: an esoteric language compiler - * - * Copyright (C) 2014 Stefano Sanfilippo - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#include "BitcodeEmitter.hpp" -#include "Scope.hpp" -#include "Nodes.hpp" -#include "ModuleRegistry.hpp" - -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include - -// Yes, that's right, no ending ; -#define GUARDED(call) if (!(call)) return false - - -using namespace monicelli; -using llvm::getGlobalContext; - - -struct BitcodeEmitter::Private { - llvm::Value *retval = nullptr; - llvm::AllocaInst *funcRetval = nullptr; - llvm::BasicBlock *funcExit = nullptr; - - llvm::IRBuilder<> builder = llvm::IRBuilder<>(getGlobalContext()); - Scope scope; - Pointer optimizer; -}; - -static -llvm::AllocaInst* allocateVar(llvm::Function *func, Id const& name, llvm::Type *type) { - llvm::IRBuilder<> builder(&func->getEntryBlock(), func->getEntryBlock().begin()); - return builder.CreateAlloca(type, 0, name.getValue().c_str()); -} - -static -llvm::AllocaInst* allocateReturnVariable(llvm::Function *func) { - llvm::IRBuilder<> builder(&func->getEntryBlock(), func->getEntryBlock().begin()); - return builder.CreateAlloca(func->getReturnType(), 0, "result"); -} - -static -bool reportError(Localizable const& node, std::initializer_list const& what) { - std::cerr << "line " << node.getLocation().begin.line << ", "; - std::cerr << "col " << node.getLocation().begin.column << ": "; - - for (std::string const& chunk: what) { - std::cerr << chunk << ' '; - } - std::cerr << std::endl; - - return false; -} - -#define I64 llvm::Type::getInt64Ty(getGlobalContext()) -#define I8 llvm::Type::getInt8Ty(getGlobalContext()) -#define I1 llvm::Type::getInt1Ty(getGlobalContext()) -#define F llvm::Type::getFloatTy(getGlobalContext()) -#define D llvm::Type::getDoubleTy(getGlobalContext()) -#define V llvm::Type::getVoidTy(getGlobalContext()) - -static const std::unordered_map> TYPECAST_MAP = { - {I64, { {I8, I64}, {I1, I64}, { F, D}, {D, D}}}, - { I8, {{I64, I64}, {I1, I8}, { F, F}, {D, D}}}, - { I1, {{I64, I64}, {I8, I8}, { F, F}, {D, D}}}, - { F, {{I64, D}, {I8, F}, {I1, F}, {D, D}}}, - { D, {{I64, D}, {I8, D}, {I1, D}, { F, D} }} -}; - -static -Type MonicelliType(llvm::Type const* type) { - if (type == I64) { - return Type::INT; - } else if (type == I8) { - return Type::CHAR; - } else if (type == I1) { - return Type::BOOL; - } else if (type == D) { - return Type::DOUBLE; - } else if (type == F) { - return Type::FLOAT; - } else if (type == V) { - return Type::VOID; - } - - return Type::UNKNOWN; -} - -static -llvm::Type *LLVMType(Type const& type) { - switch (type) { - case Type::INT: - return I64; - case Type::CHAR: - return I8; - case Type::FLOAT: - return F; - case Type::BOOL: - return I1; - case Type::DOUBLE: - return D; - case Type::VOID: - return V; - case Type::UNKNOWN: - return nullptr; // FIXME - } - - return nullptr; -} - -static -llvm::Type* deduceResultType(llvm::Value *left, llvm::Value *right) { - llvm::Type *lt = left->getType(); - llvm::Type *rt = right->getType(); - - if (lt == rt) return rt; - - auto subTable = TYPECAST_MAP.find(lt); - if (subTable != TYPECAST_MAP.end()) { - auto resultType = subTable->second.find(rt); - if (resultType != subTable->second.end()) return resultType->second; - } - - return nullptr; -} - -#undef I64 -#undef I8 -#undef I1 -#undef F -#undef D -#undef V - -static inline -bool isFP(llvm::Type *type) { - return type->isFloatTy() || type->isDoubleTy(); -} - -static inline -bool isInt(llvm::Type *type) { - return type->isIntegerTy(); -} - -static -llvm::Value* coerce(BitcodeEmitter::Private *d, llvm::Value *val, llvm::Type *toType) { - llvm::Type *fromType = val->getType(); - - if (fromType == toType) return val; - - if (isInt(toType)) { - if (isFP(fromType)) { - return d->builder.CreateFPToSI(val, toType); - } else if (isInt(fromType)) { - return d->builder.CreateSExtOrTrunc(val, toType); - } - } - else if (isFP(toType) && isInt(fromType)) { - return d->builder.CreateSIToFP(val, toType); - } - else if (fromType->isFloatTy() && toType->isDoubleTy()) { - return d->builder.CreateFPExt(val, toType); - } - else if (fromType->isDoubleTy() && toType->isFloatTy()) { - return d->builder.CreateFPTrunc(val, toType); - } - - return nullptr; -} - -static -llvm::Value* isTrue(BitcodeEmitter::Private *d, llvm::Value* test, llvm::Twine const& label="") { - llvm::Value *one = llvm::ConstantInt::get(getGlobalContext(), llvm::APInt(1, 0)); - return d->builder.CreateICmpNE( - coerce(d, test, one->getType()), one, label - ); -} - -static const std::map PUT_NAMES = {{ - {Type::BOOL, "__Monicelli_putBool"}, - {Type::CHAR, "__Monicelli_putChar"}, - {Type::FLOAT, "__Monicelli_putFloat"}, - {Type::DOUBLE, "__Monicelli_putDouble"}, - {Type::INT, "__Monicelli_putInt"} -}}; - -static const std::map GET_NAMES = { - {Type::BOOL, "__Monicelli_getBool"}, - {Type::CHAR, "__Monicelli_getChar"}, - {Type::FLOAT, "__Monicelli_getFloat"}, - {Type::DOUBLE, "__Monicelli_getDouble"}, - {Type::INT, "__Monicelli_getInt"} -}; - -static const std::string ABORT_NAME = "__Monicelli_abort"; -static const std::string ASSERT_NAME = "__Monicelli_assert"; - -static -bool convertAndStore(BitcodeEmitter::Private *d, llvm::AllocaInst *dest, llvm::Value *expression) { - llvm::Type *varType = dest->getAllocatedType(); - expression = coerce(d, expression, varType); - if (expression == nullptr) return false; - d->builder.CreateStore(expression, dest); - return true; -} - -BitcodeEmitter::BitcodeEmitter() { - module = std::unique_ptr( - new llvm::Module("monicelli", getGlobalContext()) - ); - d = new Private; - - d->optimizer = Pointer( - new llvm::legacy::FunctionPassManager(module.get()) - ); - - d->optimizer->add(llvm::createBasicAliasAnalysisPass()); - d->optimizer->add(llvm::createInstructionCombiningPass()); - d->optimizer->add(llvm::createReassociatePass()); - d->optimizer->add(llvm::createGVNPass()); - d->optimizer->add(llvm::createCFGSimplificationPass()); - d->optimizer->doInitialization(); -} - -BitcodeEmitter::~BitcodeEmitter() { - delete d; -} - -bool BitcodeEmitter::emit(Return const& node) { - if (node.getExpression()) { - GUARDED(node.getExpression()->emit(this)); - llvm::Type *type = d->builder.GetInsertBlock()->getParent()->getReturnType(); - assert(d->funcRetval != nullptr); - d->builder.CreateStore(coerce(d, d->retval, type), d->funcRetval); - } - - d->builder.CreateBr(d->funcExit); - - return true; -} - -bool BitcodeEmitter::emit(Loop const& node) { - llvm::Function *father = d->builder.GetInsertBlock()->getParent(); - - llvm::BasicBlock *body = llvm::BasicBlock::Create( - getGlobalContext(), "loop", father - ); - - d->builder.CreateBr(body); - d->builder.SetInsertPoint(body); - - llvm::BasicBlock *condition = llvm::BasicBlock::Create( - getGlobalContext(), "loopcondition" - ); - - GUARDED(ensureBasicBlock(node.getBody(), condition)); - - father->getBasicBlockList().push_back(condition); - d->builder.SetInsertPoint(condition); - - GUARDED(node.getCondition().emit(this)); - - llvm::Value *loopTest = isTrue(d, d->retval, "looptest"); - - llvm::BasicBlock *after = llvm::BasicBlock::Create( - getGlobalContext(), "afterloop", father - ); - - d->builder.CreateCondBr(loopTest, body, after); - d->builder.SetInsertPoint(after); - - return true; -} - -bool BitcodeEmitter::emit(VarDeclaration const& node) { - llvm::Function *father = d->builder.GetInsertBlock()->getParent(); - llvm::Type *varType = LLVMType(node.getType()); - llvm::AllocaInst *alloc = allocateVar(father, node.getId(), varType); - - if (node.getInitializer()) { - GUARDED(node.getInitializer()->emit(this)); - if (!convertAndStore(d, alloc, d->retval)) { - return reportError(node, { - "Invalid inizializer for variable", node.getId().getValue() - }); - } - } - - // TODO pointers - - d->scope.push(node.getId().getValue(), alloc); - - return true; -} - -bool BitcodeEmitter::emit(Assignment const& node) { - auto var = d->scope.lookup(node.getName().getValue()); - - if (!var) { - return reportError(node, { - "Attempting assignment to undefined variable", - node.getName().getValue() - }); - } - - GUARDED(node.getValue().emit(this)); - if (!convertAndStore(d, *var, d->retval)) { - return reportError(node, { - "Invalid assignment to variable", node.getName().getValue() - }); - } - - return true; -} - -bool BitcodeEmitter::emit(Print const& node) { - std::vector callargs; - GUARDED(node.getExpression().emit(this)); - callargs.push_back(d->retval); - - Type printType = MonicelliType(d->retval->getType()); - - if (printType == Type::UNKNOWN) { - return reportError(node, {"Attempting to print unknown type"}); - } - - auto toCall = PUT_NAMES.find(printType); - - if (toCall == PUT_NAMES.end()) { - return reportError(node, {"Unknown print function for type"}); - } - - llvm::Function *callee = module->getFunction(toCall->second); - - if (callee == nullptr) { - return reportError(node, {"Print function was not registered"}); - } - - d->builder.CreateCall(callee, callargs); - - return true; -} - -bool BitcodeEmitter::emit(Input const& node) { - auto lookupResult = d->scope.lookup(node.getVariable().getValue()); - - if (!lookupResult) { - return reportError(node, { - "Attempting to read undefined variable", - node.getVariable().getValue() - }); - } - - llvm::AllocaInst *variable = *lookupResult; - Type inputType = MonicelliType(variable->getAllocatedType()); - - if (inputType == Type::UNKNOWN) { - return reportError(node, {"Attempting to read unknown type"}); - } - - auto toCall = GET_NAMES.find(inputType); - - if (toCall == GET_NAMES.end()) { - return reportError(node, { - "Unknown input function for type" - }); - } - - llvm::Function *callee = module->getFunction(toCall->second); - - if (callee == nullptr) { - return reportError(node, { - "Input function was not registered for type" - }); - } - - llvm::Value *readval = d->builder.CreateCall(callee); - d->builder.CreateStore(readval, variable); - - return true; -} - -bool BitcodeEmitter::emit(Abort const& node) { - llvm::Function *callee = module->getFunction(ABORT_NAME); - - if (callee == nullptr) { - return reportError(node, {"Abort function was not registered"}); - } - - d->builder.CreateCall(callee); - - return true; -} - -bool BitcodeEmitter::emit(Assert const& node) { - llvm::Function *callee = module->getFunction(ASSERT_NAME); - - if (callee == nullptr) { - return reportError(node, {"Assert function was not registered"}); - } - - node.getExpression().emit(this); - d->builder.CreateCall(callee, {coerce(d, d->retval, LLVMType(Type::BOOL))}); - - return true; -} - -bool BitcodeEmitter::emit(FunctionCall const& node) { - llvm::Function *callee = module->getFunction(node.getName().getValue()); - - if (callee == 0) { - return reportError(node, { - "Attempting to call undefined function", - node.getName().getValue() + "()" - }); - } - - if (callee->arg_size() != node.getArgs().size()) { - return reportError(node, { - "Argument number mismatch in call of", - node.getName().getValue() + "()", - "expected", std::to_string(callee->arg_size()), - "given", std::to_string(node.getArgs().size()) - }); - } - - auto param = callee->getArgumentList().begin(); - std::vector callargs; - for (Expression const& arg: node.getArgs()) { - GUARDED(arg.emit(this)); - callargs.push_back(coerce(d, d->retval, param->getType())); - ++param; - } - - d->retval = d->builder.CreateCall(callee, callargs); - - return true; -} - -bool BitcodeEmitter::emit(Branch const& node) { - Branch::Body const& body = node.getBody(); - llvm::Function *func = d->builder.GetInsertBlock()->getParent(); - - llvm::BasicBlock *thenbb = llvm::BasicBlock::Create( - getGlobalContext(), "then", func - ); - llvm::BasicBlock *elsebb = llvm::BasicBlock::Create( - getGlobalContext(), "else" - ); - llvm::BasicBlock *mergebb = llvm::BasicBlock::Create( - getGlobalContext(), "endif" - ); - - assert(!body.getCases().empty()); - BranchCase const& last = body.getCases().back(); - - for (BranchCase const& cas: body.getCases()) { - emitSemiExpression(node.getVar(), cas.getCondition()); - d->builder.CreateCondBr( - isTrue(d, d->retval, "condition"), thenbb, elsebb - ); - d->builder.SetInsertPoint(thenbb); - - GUARDED(ensureBasicBlock(cas.getBody(), mergebb)); - - func->getBasicBlockList().push_back(elsebb); - d->builder.SetInsertPoint(elsebb); - - if (&cas != &last) { - thenbb = llvm::BasicBlock::Create(getGlobalContext(), "then", func); - elsebb = llvm::BasicBlock::Create(getGlobalContext(), "else"); - } - } - - if (body.getElse()) { - GUARDED(ensureBasicBlock(*body.getElse(), mergebb)); - } else { - d->builder.CreateBr(mergebb); - } - - func->getBasicBlockList().push_back(mergebb); - d->builder.SetInsertPoint(mergebb); - - return true; -} - -bool BitcodeEmitter::emit(FunctionPrototype const& node) { - std::vector argTypes; - - for (FunArg const& arg: node.getArgs()) { - argTypes.emplace_back(LLVMType(arg.getType())); - } - - std::unordered_set argsSet; - for (FunArg const& arg: node.getArgs()) { - std::string const& name = arg.getName().getValue(); - if (argsSet.find(name) != argsSet.end()) { - return reportError(node, { - "Two arguments with same name to function", - node.getName().getValue() + "():", name - }); - } - argsSet.insert(name); - } - - llvm::FunctionType *ftype = llvm::FunctionType::get( - LLVMType(node.getType()), argTypes, false - ); - - llvm::Function *func = llvm::Function::Create( - ftype, llvm::Function::ExternalLinkage, node.getName().getValue(), module.get() - ); - - if (func->getName() != node.getName().getValue()) { - func->eraseFromParent(); - func = module->getFunction(node.getName().getValue()); - - if (!func->empty()) { - return reportError(node, { - "Redefining function", node.getName().getValue() - }); - } - - if (func->arg_size() != node.getArgs().size()) { - return reportError(node, { - "Argument number mismatch in definition vs declaration of", - node.getName().getValue() + "()", - "expected", std::to_string(func->arg_size()), - "given", std::to_string(node.getArgs().size()) - }); - } - } - - auto argToEmit = func->arg_begin(); - for (FunArg const& arg: node.getArgs()) { - argToEmit->setName(arg.getName().getValue()); - ++argToEmit; - } - - d->retval = func; - - return true; -} - -bool BitcodeEmitter::emit(Function const& node) { - GUARDED(node.getPrototype().emit(this)); - llvm::Function *func = dynamic_cast(d->retval); - - assert(func != nullptr); - - llvm::BasicBlock *bb = llvm::BasicBlock::Create( - getGlobalContext(), "entry", func - ); - d->builder.SetInsertPoint(bb); - - bool isNotVoid = node.getPrototype().getType() != Type::VOID; - - d->funcRetval = isNotVoid? allocateReturnVariable(func): nullptr; - d->funcExit = llvm::BasicBlock::Create(getGlobalContext(), "return"); - - d->scope.enter(); - - auto argToAlloc = func->arg_begin(); - for (FunArg const& arg: node.getPrototype().getArgs()) { - llvm::AllocaInst *alloc = allocateVar( - func, arg.getName(), LLVMType(arg.getType()) - ); - d->builder.CreateStore(argToAlloc, alloc); - d->scope.push(arg.getName().getValue(), alloc); - ++argToAlloc; - } - - for (Statement const& stat: node.getBody()) { - GUARDED(stat.emit(this)); - } - - d->scope.leave(); - - if (!d->builder.GetInsertBlock()->getTerminator()) { - d->builder.CreateBr(d->funcExit); - } - - func->getBasicBlockList().push_back(d->funcExit); - d->builder.SetInsertPoint(d->funcExit); - - if (isNotVoid) { - d->builder.CreateRet(d->builder.CreateLoad(d->funcRetval)); - } else { - d->builder.CreateRetVoid(); - } - - verifyFunction(*func); - - d->optimizer->run(*func); - - return true; -} - -bool BitcodeEmitter::emit(Module const& node) { - return true; -} - -bool BitcodeEmitter::emit(Program const& program) { - auto const& externals = getModuleRegistry().getRegisteredFunctions(); - for (FunctionPrototype const& proto: externals) { - GUARDED(proto.emit(this)); - } - - for (Function const& function: program.getFunctions()) { - GUARDED(function.getPrototype().emit(this)); - } - - for (Function const& function: program.getFunctions()) { - GUARDED(function.emit(this)); - } - - if (program.getMain()) { - GUARDED(program.getMain()->emit(this)); - } - - verifyModule(*module); - - return true; -} - -bool BitcodeEmitter::emit(Id const& node) { - auto value = d->scope.lookup(node.getValue()); - - if (!value) { - return reportError(node, { - "Undefined variable", node.getValue() - }); - } - - d->retval = d->builder.CreateLoad(*value, node.getValue().c_str()); - - return true; -} - -bool BitcodeEmitter::emit(Integer const& node) { - d->retval = llvm::ConstantInt::get( - getGlobalContext(), llvm::APInt(64, node.getValue(), true) - ); - - return true; -} - -bool BitcodeEmitter::emit(Float const& node) { - d->retval = llvm::ConstantFP::get( - getGlobalContext(), llvm::APFloat(node.getValue()) - ); - - return true; -} - -#define HANDLE(intop, fpop) \ - if (fp) { \ - d->retval = d->builder.Create##fpop(left, right); \ - } else { \ - d->retval = d->builder.Create##intop(left, right); \ - } - -#define HANDLE_INT_ONLY(op, symbol) \ - if (fp) { \ - return reportError(node, {"Operator " #symbol " cannot be applied to float values!"}); \ - } else { \ - d->retval = d->builder.Create##op(left, right); \ - } - -static -bool createOp(BitcodeEmitter::Private *d, Localizable const& node, llvm::Value *left, Operator op, llvm::Value *right) { - llvm::Type *retType = deduceResultType(left, right); - - if (retType == nullptr) { - return reportError(node, {"Cannot combine operators."}); - } - - bool fp = isFP(retType); - - left = coerce(d, left, retType); - right = coerce(d, right, retType); - - if (left == nullptr || right == nullptr) { - return reportError(node, {"Cannot convert operators to result type."}); - } - - switch (op) { - case Operator::PLUS: - HANDLE(Add, FAdd) - break; - case Operator::MINUS: - HANDLE(Sub, FSub) - break; - case Operator::TIMES: - HANDLE(Mul, FMul) - break; - case Operator::DIV: - HANDLE(SDiv, FDiv) - break; - case Operator::SHL: - HANDLE_INT_ONLY(Shl, <<); - break; - case Operator::SHR: - HANDLE_INT_ONLY(LShr, >>); - break; - case Operator::LT: - HANDLE(ICmpULT, FCmpULT) - break; - case Operator::GT: - HANDLE(ICmpUGT, FCmpUGT) - break; - case Operator::GTE: - HANDLE(ICmpUGE, FCmpUGE) - break; - case Operator::LTE: - HANDLE(ICmpULE, FCmpULE) - break; - case Operator::EQ: - HANDLE(ICmpEQ, FCmpOEQ) - break; - } - - return true; -} - -#undef HANDLE -#undef HANDLE_INT_ONLY - -bool BitcodeEmitter::emit(BinaryExpression const& expression) { - GUARDED(expression.getLeft().emit(this)); - llvm::Value *left = d->retval; - - GUARDED(expression.getRight().emit(this)); - llvm::Value *right = d->retval; - - GUARDED(createOp(d, expression, left, expression.getOperator(), right)); - - return true; -} - -bool BitcodeEmitter::emitSemiExpression(Id const& left, SemiExpression const& right) { - GUARDED(left.emit(this)); - llvm::Value *lhs = d->retval; - - GUARDED(right.getLeft().emit(this)); - llvm::Value *rhs = d->retval; - - GUARDED(createOp(d, right, lhs, right.getOperator(), rhs)); - - return true; -} - -bool BitcodeEmitter::ensureBasicBlock(PointerList const& statements, llvm::BasicBlock *after) { - d->scope.enter(); - for (Statement const& statement: statements) { - GUARDED(statement.emit(this)); - } - d->scope.leave(); - - if (!d->builder.GetInsertBlock()->getTerminator()) { - d->builder.CreateBr(after); - } - - return true; -} - diff --git a/src/BitcodeEmitter.hpp b/src/BitcodeEmitter.hpp deleted file mode 100644 index 2a716a2..0000000 --- a/src/BitcodeEmitter.hpp +++ /dev/null @@ -1,81 +0,0 @@ -#ifndef BITCODE_HPP -#define BITCODE_HPP - -/* - * Monicelli: an esoteric language compiler - * - * Copyright (C) 2014 Stefano Sanfilippo - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#include "Emitter.hpp" -#include "Pointers.hpp" - - -namespace llvm { - class Module; - class Function; - class BasicBlock; -} - -namespace monicelli { - -class SemiExpression; -class Statement; - -class BitcodeEmitter: public Emitter { -public: - BitcodeEmitter(); - BitcodeEmitter(BitcodeEmitter &) = delete; - virtual ~BitcodeEmitter(); - - virtual bool emit(Return const&) override; - virtual bool emit(Loop const&) override; - virtual bool emit(VarDeclaration const&) override; - virtual bool emit(Assignment const&) override; - virtual bool emit(Print const&) override; - virtual bool emit(Input const&) override; - virtual bool emit(Abort const&) override; - virtual bool emit(Assert const&) override; - virtual bool emit(Branch const&) override; - virtual bool emit(FunctionPrototype const&) override; - virtual bool emit(Function const&) override; - virtual bool emit(Module const&) override; - virtual bool emit(Program const&) override; - - virtual bool emit(Id const&) override; - virtual bool emit(Integer const&) override; - virtual bool emit(Float const&) override; - virtual bool emit(FunctionCall const&) override; - virtual bool emit(BinaryExpression const&) override; - - llvm::Module const& getModule() const { - return *module; - } - - struct Private; - -private: - bool emitSemiExpression(Id const& left, SemiExpression const& right); - bool ensureBasicBlock(PointerList const& statements, llvm::BasicBlock *after); - - Pointer module; - Private *d; -}; - -} - -#endif - diff --git a/src/CLineParser.cpp b/src/CLineParser.cpp deleted file mode 100644 index 0f7b210..0000000 --- a/src/CLineParser.cpp +++ /dev/null @@ -1,82 +0,0 @@ -/* - * Monicelli: an esoteric language compiler - * - * Copyright (C) 2014 Stefano Sanfilippo - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#include "CLineParser.hpp" - -#include -#include -#include -#include -#include - -namespace po = boost::program_options; -using namespace monicelli; - -static po::variables_map CONFIG; -static const std::string VERSION_STRING = - "mcc version 1.0.0\n" - "\n" - "Monicelli compiler \n" - "\n" - "Copyright © 2014,2015 Stefano Sanfilippo\n" - "\n" - "This program comes with ABSOLUTELY NO WARRANTY;\n" - "This is free software, and you are welcome to redistribute it\n" - "under certain conditions; See LICENSE.txt for all details" -; -static const std::string USAGE_STRING = "mcc, Monicelli compiler.\n\nUsage: "; - -po::variables_map const& monicelli::getConfig() { - return CONFIG; -} - -void monicelli::parseCommandLine(int argc, char **argv) { - po::options_description desc( - USAGE_STRING + argv[0] + " [options] file.mc ..." - ); - desc.add_options() - ("help,h", "display this help message") - ("version,v", "display version") - ("c++,+", "emit C++ source code instead of LLVM bitcode") - ("input,i", po::value>(), "input files to process") - ; - - po::positional_options_description positional; - positional.add("input", -1); - - po::store( - po::command_line_parser(argc, argv) - .options(desc) - .positional(positional) - .run(), - CONFIG - ); - - po::notify(CONFIG); - - if (configHas("help")) { - std::cout << desc; - exit(0); - } - - if (configHas("version")) { - std::cout << VERSION_STRING << std::endl; - exit(0); - } -} diff --git a/src/CLineParser.hpp b/src/CLineParser.hpp deleted file mode 100644 index 46dae91..0000000 --- a/src/CLineParser.hpp +++ /dev/null @@ -1,43 +0,0 @@ -#ifndef CLINE_PARSER_HPP -#define CLINE_PARSER_HPP - -/* - * Monicelli: an esoteric language compiler - * - * Copyright (C) 2014 Stefano Sanfilippo - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#include - -namespace monicelli { - -boost::program_options::variables_map const& getConfig(); - -void parseCommandLine(int argc, char **argv); - -template inline -T config(std::string const& name) { - return getConfig()[name].as(); -} - -static inline -bool configHas(std::string const& name) { - return getConfig().count(name); -} - -} - -#endif diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 76ea90e..90875a5 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1,92 +1,64 @@ -# -# Monicelli: an esoteric language compiler -# -# Copyright (C) 2014 Stefano Sanfilippo -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program. If not, see . -# +# Copyright 2017 the Monicelli project authors. All rights reserved. +# Use of this source code is governed by a GPLv3 license, see LICENSE.txt. -## 1. Find Flex and Bison +find_package(LLVM REQUIRED) +find_package(Ragel REQUIRED) -find_package(BISON REQUIRED) -find_package(FLEX 2.5 REQUIRED) +option(ENABLE_LINKER "Enable the Monicelli linker. Requires POSIX." ON) -if (BISON_VERSION VERSION_LESS 2.5) - message(FATAL_ERROR "At least Bison 2.5 is required.") -elseif(BISON_VERSION VERSION_LESS 3.0) - message("== Bison 2.5 was found. You have to apply cmake/bison2.patch...") +if (ENABLE_LINKER) + add_definitions(-DMONICELLI_ENABLE_LINKER) endif() -## 2. External components - -find_package(Boost 1.48 REQUIRED regex system filesystem program_options) -find_package(LLVM REQUIRED CONFIG) - -find_library(YAML_LIBRARIES yaml-cpp) -find_path(YAML_INCLUDE_DIRS yaml.h /usr/include/yaml-cpp/) - add_definitions( - ${Boost_DEFINITIONS} - ${LLVM_DEFINITIONS} + -std=c++11 + -Wall -Wextra -Werror + # The lexer uses implicit fallthroughs all over, but it's OK. + -Wno-implicit-fallthrough + ${LLVM_CXXFLAGS} + -g -O2 -UNDEBUG ) include_directories( - ${Boost_INCLUDE_DIRS} - ${LLVM_INCLUDE_DIRS} - ${YAML_INCLUDE_DIRS} + ${CMAKE_CURRENT_SOURCE_DIR} + ${LLVM_INCLUDE_DIR} ) -## 3. Build - -include_directories( - ${CMAKE_CURRENT_BINARY_DIR} - ${CMAKE_CURRENT_SOURCE_DIR} +link_directories( + ${LLVM_LIBRARY_DIR} ) -bison_target(Parser Monicelli.ypp ${CMAKE_CURRENT_BINARY_DIR}/Parser.cpp) -flex_target(Scanner Monicelli.lpp ${CMAKE_CURRENT_BINARY_DIR}/Lexer.cpp) -add_flex_bison_dependency(Scanner Parser) +add_custom_command( + OUTPUT lexer.rl.cpp + MAIN_DEPENDENCY lexer.rl + DEPENDS lexer.h + COMMAND ${RAGEL} -G2 "${CMAKE_CURRENT_SOURCE_DIR}/lexer.rl" -o lexer.rl.cpp + WORKING_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}" + VERBATIM +) add_executable(mcc - main.cpp Nodes.cpp CLineParser.cpp - ModuleRegistry.cpp ModuleLoader.cpp - ${BISON_Parser_OUTPUTS} ${FLEX_Scanner_OUTPUTS} - CppEmitter.cpp BitcodeEmitter.cpp + main.cpp + asmgen.cpp + codegen.cpp + codegen.def + ast.cpp + ast.def + ast-visitor.h + ast-printer.cpp + parser.cpp + lexer.cpp + lexer.def + "${CMAKE_CURRENT_BINARY_DIR}/lexer.rl.cpp" + options.cpp + errors.cpp + support.cpp + location.h + iterators.h + types.def + operators.def ) -target_compile_options(mcc PRIVATE - ${LLVM_CXXFLAGS} ${Boost_CXXFLAGS} - -Wall -Wextra -Werror -Wno-unused-parameter -Wno-deprecated-register - -std=c++0x -DYYDEBUG=0 -) - -llvm_map_components_to_libnames(LLVM_LIBRARIES - support core native bitwriter -) - -target_link_libraries(mcc - ${Boost_LIBRARIES} - ${LLVM_LIBRARIES} - ${YAML_LIBRARIES} -) - -## 5. Build the runtime library too - -add_library(mcrt STATIC Runtime.c) - -## 6. Install targets - -install(TARGETS mcc DESTINATION bin/) -install(TARGETS mcrt DESTINATION lib/) +target_link_libraries(mcc ${LLVM_LIBS}) +install(TARGETS mcc RUNTIME DESTINATION bin) diff --git a/src/CppEmitter.cpp b/src/CppEmitter.cpp deleted file mode 100644 index 55acef8..0000000 --- a/src/CppEmitter.cpp +++ /dev/null @@ -1,340 +0,0 @@ -/* - * Monicelli: an esoteric language compiler - * - * Copyright (C) 2014 Stefano Sanfilippo - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#include - -#include "Nodes.hpp" -#include "CppEmitter.hpp" -#include "Pointers.hpp" - -using namespace monicelli; - -// Yes, that's right, no ending ; -#define GUARDED(call) if (!(call)) return false - -static const std::string STATEMENT_TERMINATOR = ";\n"; -static const std::string BLOCK = " "; - - -void CppEmitter::indent() { - indent_chars += 1; -} - -void CppEmitter::dedent() { - indent_chars -= 1; -} - -bool CppEmitter::emitIndent() { - for (int i = 0; i < indent_chars; ++i) { - stream << BLOCK; - } - - return stream; -} - -bool CppEmitter::emit(Program const& program) { - for (Module m: program.getModules()) { - GUARDED(m.emit(this)); - stream << "\n"; - } - - if (!program.getModules().empty()) { - stream << "\n"; - } - - for (Function const& function: program.getFunctions()) { - emit(function.getPrototype()); - stream << ";\n"; - } - - if (!program.getFunctions().empty()) { - stream << "\n"; - } - - for (Function const& function: program.getFunctions()) { - GUARDED(function.emit(this)); - } - - if (program.getMain()) { - GUARDED(program.getMain()->emit(this)); - } - - return stream; -} - -bool CppEmitter::emitStatements(PointerList const& node) { - for (Statement const& s: node) { - emitIndent(); - GUARDED(s.emit(this)); - stream << STATEMENT_TERMINATOR; - } - return stream; -} - -bool CppEmitter::emitMain(Function const& main) { - stream << "int main() {\n"; - indent(); - emitStatements(main.getBody()); - dedent(); - stream << "}\n"; - return stream; -} - -bool CppEmitter::emit(Id const& id) { - stream << id.getValue(); - return stream; -} - -bool CppEmitter::emit(Integer const& num) { - stream << num.getValue(); - return stream; -} - -bool CppEmitter::emit(Float const& num) { - stream << num.getValue(); - return stream; -} - -bool CppEmitter::emit(Return const& node) { - stream << "return"; - - if (node.getExpression()) { - stream << ' '; - GUARDED(node.getExpression()->emit(this)); - } - - return stream; -} - -bool CppEmitter::emit(Print const& node) { - bool needsBraces = - (dynamic_cast(&node.getExpression()) == nullptr) - && - (dynamic_cast(&node.getExpression()) == nullptr) - ; - - stream << "std::cout << "; - if (needsBraces) { - stream << '('; - } - - GUARDED(node.getExpression().emit(this)); - - if (needsBraces) { - stream << ')'; - } - stream << " << std::endl"; - - return stream; -} - -bool CppEmitter::emit(Input const& node) { - stream << "std::cout << \""; - GUARDED(node.getVariable().emit(this)); - stream << "? \";\n"; - emitIndent(); - stream << "std::cin >> "; - GUARDED(node.getVariable().emit(this)); - - return stream; -} - -bool CppEmitter::emit(Abort const&) { - stream << "std::exit(1)"; - - return stream; -} - -bool CppEmitter::emit(Assert const& node) { - stream << "assert("; - GUARDED(node.getExpression().emit(this)); - stream << ")"; - - return stream; -} - -bool CppEmitter::emit(Loop const& loop) { - stream << "do {\n"; - indent(); - emitStatements(loop.getBody()); - dedent(); - emitIndent(); - stream << "} while ("; - GUARDED(loop.getCondition().emit(this)); - stream << ")"; - - return stream; -} - -bool CppEmitter::emitBranchCase(BranchCase const& node) { - emitBranchCondition(node.getCondition()); - stream << ") {\n"; - indent(); - emitStatements(node.getBody()); - dedent(); - emitIndent(); - stream << "}"; - - return stream; -} - -bool CppEmitter::emit(Branch const& branch) { - auto &body = branch.getBody(); - auto &var = branch.getVar(); - - stream << "if ("; - GUARDED(var.emit(this)); - - if (!body.getCases().empty()) { - BranchCase const& last = body.getCases().back(); - for (BranchCase const& cas: body.getCases()) { - emitBranchCase(cas); - if (&cas != &last) { - stream << " else if ("; - GUARDED(var.emit(this)); - } - } - } - - if (!body.getElse()) { - return stream; - } - - stream << " else {\n"; - indent(); - emitStatements(*body.getElse()); - dedent(); - emitIndent(); - stream << "}"; - - return stream; -} - -bool CppEmitter::emit(Assignment const& assignment) { - GUARDED(assignment.getName().emit(this)); - stream << " = "; - GUARDED(assignment.getValue().emit(this)); - - return stream; -} - - -bool CppEmitter::emitFunctionArglist(PointerList const& args) { - if (args.empty()) return stream; - - Expression const& last = args.back(); - for (Expression const& arg: args) { - GUARDED(arg.emit(this)); - if (&arg != &last) { - stream << ", "; - } - } - - return stream; -} - - -bool CppEmitter::emit(FunctionCall const& funcall) { - GUARDED(funcall.getName().emit(this)); - stream << "("; - emitFunctionArglist(funcall.getArgs()); - stream << ")"; - - return stream; -} - -bool CppEmitter::emit(Function const& function) { - emit(function.getPrototype()); - stream << " {\n"; - indent(); - emitStatements(function.getBody()); - dedent(); - stream << "}\n\n"; - - return stream; -} - -bool CppEmitter::emitFunctionParams(PointerList const& funargs) { - if (funargs.empty()) return stream; - - FunArg const& last = funargs.back(); - for (FunArg const& funarg: funargs) { - stream << funarg.getType() << (funarg.isPointer()? "* ": " "); - GUARDED(funarg.getName().emit(this)); - if (&funarg != &last) { - stream << ", "; - } - } - - return stream; -} - -bool CppEmitter::emit(Module const& module) { - bool system = (module.getType() == Module::SYSTEM); - stream << "#include " << (system? '<': '"') << module.getName() << (system? '>': '"'); - - return stream; -} - -bool CppEmitter::emit(FunctionPrototype const& proto) { - if (proto.getName().getValue() == "main") { - stream << "int "; - } else { - stream << proto.getType() << ' '; - } - GUARDED(proto.getName().emit(this)); - stream << "("; - emitFunctionParams(proto.getArgs()); - stream << ")"; - - return stream; -} - -bool CppEmitter::emit(VarDeclaration const& decl) { - stream << decl.getType() << ' '; - if (decl.isPointer()) stream << '*'; - GUARDED(decl.getId().emit(this)); - - if (decl.getInitializer()) { - stream << " = "; - GUARDED(decl.getInitializer()->emit(this)); - } - - return stream; -} - -bool CppEmitter::emit(BinaryExpression const& node) { - GUARDED(node.getLeft().emit(this)); - stream << ' ' << node.getOperator() << ' '; - GUARDED(node.getRight().emit(this)); - - return stream; -} - -bool CppEmitter::emitBranchCondition(SemiExpression const& node) { - bool braces = (dynamic_cast(&node.getLeft()) == nullptr); - - stream << ' ' << node.getOperator() << ' '; - if (braces) stream << "("; - GUARDED(node.getLeft().emit(this)); - if (braces) stream << ")"; - - return stream; -} - diff --git a/src/CppEmitter.hpp b/src/CppEmitter.hpp deleted file mode 100644 index 85a760a..0000000 --- a/src/CppEmitter.hpp +++ /dev/null @@ -1,73 +0,0 @@ -#ifndef CPPEMITTER_HPP -#define CPPEMITTER_HPP - -/* - * Monicelli: an esoteric language compiler - * - * Copyright (C) 2014 Stefano Sanfilippo - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#include "Emitter.hpp" - -#include - - -namespace monicelli { - -class CppEmitter: public Emitter { -public: - CppEmitter(std::ostream *stream): stream(*stream), indent_chars(0) {} - - virtual bool emit(Return const&) override; - virtual bool emit(Loop const&) override; - virtual bool emit(VarDeclaration const&) override; - virtual bool emit(Assignment const&) override; - virtual bool emit(Print const&) override; - virtual bool emit(Input const&) override; - virtual bool emit(Abort const&) override; - virtual bool emit(Assert const&) override; - virtual bool emit(Branch const&) override; - virtual bool emit(FunctionPrototype const&) override; - virtual bool emit(Function const&) override; - virtual bool emit(Module const&) override; - virtual bool emit(Program const&) override; - - virtual bool emit(FunctionCall const&) override; - virtual bool emit(Id const&) override; - virtual bool emit(Integer const&) override; - virtual bool emit(Float const&) override; - virtual bool emit(BinaryExpression const&) override; - -private: - bool emitIndent(); - bool emitFunctionParams(PointerList const& funargs); - bool emitFunctionArglist(PointerList const& args); - bool emitStatements(PointerList const& node); - bool emitBranchCondition(SemiExpression const& node); - bool emitBranchCase(BranchCase const& node); - bool emitMain(Function const& main); - - void indent(); - void dedent(); - - std::ostream &stream; - int indent_chars; -}; - -} - -#endif - diff --git a/src/Emitter.hpp b/src/Emitter.hpp deleted file mode 100644 index 53d7cc3..0000000 --- a/src/Emitter.hpp +++ /dev/null @@ -1,73 +0,0 @@ -#ifndef EMITTER_HPP -#define EMITTER_HPP - -/* - * Monicelli: an esoteric language compiler - * - * Copyright (C) 2014 Stefano Sanfilippo - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -namespace monicelli { - -class Id; -class Number; -class Integer; -class Float; -class Return; -class Loop; -class VarDeclaration; -class Assignment; -class Print; -class Input; -class Abort; -class Assert; -class FunctionCall; -class Branch; -class Main; -class FunctionPrototype; -class Function; -class Module; -class Program; -class BinaryExpression; - - -class Emitter { -public: - virtual bool emit(Return const&) = 0; - virtual bool emit(Loop const&) = 0; - virtual bool emit(VarDeclaration const&) = 0; - virtual bool emit(Assignment const&) = 0; - virtual bool emit(Print const&) = 0; - virtual bool emit(Input const&) = 0; - virtual bool emit(Abort const&) = 0; - virtual bool emit(Assert const&) = 0; - virtual bool emit(FunctionPrototype const&) = 0; - virtual bool emit(Branch const&) = 0; - virtual bool emit(Function const&) = 0; - virtual bool emit(Module const&) = 0; - virtual bool emit(Program const&) = 0; - - virtual bool emit(Id const&) = 0; - virtual bool emit(Integer const&) = 0; - virtual bool emit(Float const&) = 0; - virtual bool emit(FunctionCall const&) = 0; - virtual bool emit(BinaryExpression const&) = 0; -}; - -} - -#endif - diff --git a/src/ModuleLoader.cpp b/src/ModuleLoader.cpp deleted file mode 100644 index 75d157d..0000000 --- a/src/ModuleLoader.cpp +++ /dev/null @@ -1,55 +0,0 @@ -#include "Nodes.hpp" -#include "ModuleRegistry.hpp" -#include "ModuleLoader.hpp" - -#include -#include - -using namespace monicelli; - - -static -Type toType(std::string const& value) { - if (value == "int") { - return Type::INT; - } else if (value == "float") { - return Type::FLOAT; - } else if (value == "double") { - return Type::DOUBLE; - } else if (value == "char") { - return Type::CHAR; - } else if (value == "bool") { - return Type::BOOL; - } else { - return Type::VOID; - } -} - -void monicelli::loadModule(std::string const& from, ModuleRegistry &to) { - YAML::Node module = YAML::LoadFile(from); - - if (!module["functions"]) return; - - for (auto const& proto: module["functions"]) { - PointerList *args = new PointerList(); - for (auto const& arg: proto.second["args"]) { - args->push_back(new FunArg( - new Id(arg.first.as()), - toType(arg.second.as()), - false - )); - } - - Type type; - if (proto.second["type"]) { - type = toType(proto.second["type"].as()); - } else { - type = Type::VOID; - } - - to.registerFunction(new FunctionPrototype( - new Id(proto.first.as()), type, args - )); - } -} - diff --git a/src/ModuleLoader.hpp b/src/ModuleLoader.hpp deleted file mode 100644 index 74f21da..0000000 --- a/src/ModuleLoader.hpp +++ /dev/null @@ -1,10 +0,0 @@ -#ifndef MODULE_LOADER_HPP -#define MODULE_LOADER_HPP - -namespace monicelli { - -void loadModule(std::string const& from, monicelli::ModuleRegistry &to); - -} - -#endif diff --git a/src/ModuleRegistry.cpp b/src/ModuleRegistry.cpp deleted file mode 100644 index 13444c1..0000000 --- a/src/ModuleRegistry.cpp +++ /dev/null @@ -1,92 +0,0 @@ -/* - * Monicelli: an esoteric language compiler - * - * Copyright (C) 2014 Stefano Sanfilippo - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#include "ModuleRegistry.hpp" -#include "Pointers.hpp" -#include "Nodes.hpp" - - -using namespace monicelli; - -static ModuleRegistry globalRegistry; - -ModuleRegistry& monicelli::getModuleRegistry() { - return globalRegistry; -} - -struct ModuleRegistry::Private { - boost::ptr_unordered_set prototypes; -}; - -ModuleRegistry::ModuleRegistry() { - d = new Private; -} - -ModuleRegistry::~ModuleRegistry() { - delete d; -} - -PointerSet const& ModuleRegistry::getRegisteredFunctions() const { - return d->prototypes; -} - -void ModuleRegistry::registerFunction(FunctionPrototype *proto) { - d->prototypes.insert(proto); -} - -#define PUT(type, funcname) \ - new FunctionPrototype { \ - new Id {#funcname}, Type::VOID, \ - plist_of({ \ - new FunArg {new Id {"value"}, type, false} \ - }), \ - } - -#define GET(type, funcname) \ - new FunctionPrototype { \ - new Id {#funcname}, type, \ - new PointerList {}, \ - } - -void monicelli::registerStdLib(ModuleRegistry &r) { - r.registerFunction(PUT(Type::CHAR, __Monicelli_putBool)); - r.registerFunction(PUT(Type::CHAR, __Monicelli_putChar)); - r.registerFunction(PUT(Type::FLOAT, __Monicelli_putFloat)); - r.registerFunction(PUT(Type::DOUBLE, __Monicelli_putDouble)); - r.registerFunction(PUT(Type::INT, __Monicelli_putInt)); - r.registerFunction(GET(Type::CHAR, __Monicelli_getBool)); - r.registerFunction(GET(Type::CHAR, __Monicelli_getChar)); - r.registerFunction(GET(Type::FLOAT, __Monicelli_getFloat)); - r.registerFunction(GET(Type::DOUBLE, __Monicelli_getDouble)); - r.registerFunction(GET(Type::INT, __Monicelli_getInt)); - r.registerFunction(new FunctionPrototype { - new Id("__Monicelli_assert"), Type::VOID, - plist_of({ - new FunArg {new Id("condition"), Type::CHAR, false} - }) - }); - r.registerFunction(new FunctionPrototype { - new Id("__Monicelli_abort"), Type::VOID, - new PointerList {} - }); - -} - -#undef PUT -#undef GET diff --git a/src/ModuleRegistry.hpp b/src/ModuleRegistry.hpp deleted file mode 100644 index 8c7c565..0000000 --- a/src/ModuleRegistry.hpp +++ /dev/null @@ -1,48 +0,0 @@ -#ifndef MODULE_REGISTRY_HPP -#define MODULE_REGISTRY_HPP - -/* - * Monicelli: an esoteric language compiler - * - * Copyright (C) 2014 Stefano Sanfilippo - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#include "Pointers.hpp" - -namespace monicelli { - -class FunctionPrototype; - -class ModuleRegistry { -public: - ModuleRegistry(); - ModuleRegistry(ModuleRegistry&) = delete; - virtual ~ModuleRegistry(); - - PointerSet const& getRegisteredFunctions() const; - void registerFunction(FunctionPrototype *proto); - -private: - struct Private; - Private *d; -}; - -ModuleRegistry& getModuleRegistry(); -void registerStdLib(ModuleRegistry &); - -} - -#endif diff --git a/src/Monicelli.lpp b/src/Monicelli.lpp deleted file mode 100644 index e29a6bf..0000000 --- a/src/Monicelli.lpp +++ /dev/null @@ -1,210 +0,0 @@ -%{ -/* - * Monicelli: an esoteric language compiler - * - * Copyright (C) 2014 Stefano Sanfilippo - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#include "Scanner.hpp" -#include "Parser.hpp" - -#include - -using namespace monicelli; -typedef Parser::token token; - -#define YY_USER_ACTION location->begin.columns(yyleng); - -static inline -bool in(const char *sub, const std::string &str) { - return str.find(sub) != std::string::npos; -} - -%} - -%option ecs stack warn c++ -%option nodefault noyywrap nounput yylineno -%option yyclass="Scanner" - -DIGIT [0-9] -HEXDIGIT [0-9a-fA-F] -CHAR [a-zA-Z_] - -%x shift - -%% - -("#"|"bituma")[^\n]* {} - -"Lei ha clacsonato" { - return token::MAIN; -} -"vaffanzum" { - return token::RETURN; -} -"Necchi" { - lval->typeval = Type::INT; - return token::TYPENAME; -} -"Mascetti" { - lval->typeval = Type::CHAR; - return token::TYPENAME; -} -"Perozzi" { - lval->typeval = Type::FLOAT; - return token::TYPENAME; -} -"Melandri" { - lval->typeval = Type::BOOL; - return token::TYPENAME; -} -"Sassaroli" { - lval->typeval = Type::DOUBLE; - return token::TYPENAME; -} -"conte" { - return token::STAR; -} -"voglio" { - return token::VARDECL; -} -"come "("se ")?"fosse" { - return token::ASSIGN; -} -("il"|"lo"|"la"|"l'"|"i"|"gli"|"le"|"un"|"un'"|"una"|"dei"|"delle") { - return token::ARTICLE; -} -"pi"("ù"|"u`") { - return token::OP_PLUS; -} -"meno" { - return token::OP_MINUS; -} -"per" { - return token::OP_TIMES; -} -"diviso" { - return token::OP_DIV; -} -"con scappellamento a" { - BEGIN(shift); -} -"per" { - BEGIN(INITIAL); -} -"sinistra" { - return token::OP_SHL; -} -"destra" { - return token::OP_SHR; -} -"minore "("di"|"del") { - return token::OP_LT; -} -"maggiore "("di"|"del") { - return token::OP_GT; -} -"minore o uguale "("a"|"di") { - return token::OP_LTE; -} -"maggiore o uguale "("a"|"di") { - return token::OP_GTE; -} -"a posterdati" { - return token::PRINT; -} -"mi porga" { - return token::INPUT; -} -"ho visto" { - return token::ASSERT; -} -"!" { - return token::BANG; -} -"stuzzica" { - return token::LOOP_BEGIN; -} -"e "("b"|"p")"rematura anche, se" { - return token::LOOP_CONDITION; -} -"che cos'"("è"|"e`") { - return token::BRANCH_CONDITION; -} -"?" { - return token::BRANCH_BEGIN; -} -"o tarapia tapioco" { - return token::BRANCH_ELSE; -} -"e velocit"("à"|"a`")" di esecuzione" { - return token::BRANCH_END; -} -":" { - return token::COLON; -} -"blinda la supercazzo"("r"|"l")"a" { - return token::FUN_DECL; -} -"con" { - return token::PARAMS; -} -"," { - return token::COMMA; -} -("b"|"p")"rematurata la supercazzo"("r"|"l")"a" { - return token::FUN_CALL; -} -"o scherziamo"("?")? { - return token::FUN_END; -} -"avvertite don ulrico" { - return token::ABORT; -} -"o magari" { - return token::CASE_END; -} - -"\n" { - location->begin.lines(); -} - -[ \t\f\v\r] { -} - -{CHAR}({DIGIT}|{CHAR})* { - lval->strval = new std::string(yytext); - return token::ID; -} - -[-+]?(({DIGIT}*".")?{DIGIT}+|{DIGIT}+".")([eE][-+]?{DIGIT}+)? { - std::string value(yytext); - - if (in(".", value) || in("e", value) || in("E", value)) { - lval->floatval = std::stod(value); - return token::FLOAT; - } else { - lval->intval = std::stol(value); - return token::NUMBER; - } -} - -. { - return token::ERROR; -} - -%% - diff --git a/src/Monicelli.ypp b/src/Monicelli.ypp deleted file mode 100644 index 9068d07..0000000 --- a/src/Monicelli.ypp +++ /dev/null @@ -1,434 +0,0 @@ -/* - * Monicelli: an esoteric language compiler - * - * Copyright (C) 2014 Stefano Sanfilippo - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -%code top { - #include "Nodes.hpp" - using namespace monicelli; -} - -%skeleton "lalr1.cc" -%require "3.0" -%language "c++" - -%defines -%locations -%token-table - -%define parse.error verbose -%define api.namespace {monicelli} -%define parser_class_name {Parser} - -%lex-param {Scanner &scanner} -%parse-param {Scanner &scanner} -%parse-param {Program &program} - -%code requires { - #include "Nodes.hpp" - - namespace monicelli { - class Scanner; - } -} - -%code { - static int yylex(Parser::semantic_type*, Parser::location_type*, Scanner&); -} - -%token MAIN ERROR -%token RETURN -%token ARTICLE TYPENAME STAR -%token VARDECL ASSIGN -%token PRINT INPUT -%token ASSERT BANG -%token LOOP_BEGIN LOOP_CONDITION -%token BRANCH_CONDITION BRANCH_BEGIN BRANCH_ELSE BRANCH_END CASE_END -%token COLON COMMA -%token FUN_DECL PARAMS FUN_CALL FUN_END -%token ABORT -%token ID NUMBER FLOAT - -%left OP_LT OP_GT OP_LTE OP_GTE -%left OP_PLUS OP_MINUS -%left OP_TIMES OP_DIV -%left OP_SHL OP_SHR - -%nonassoc LOWER_THAN_ELSE -%nonassoc BRANCH_ELSE - -%union { - int intval; - double floatval; - std::string* strval; - bool boolval; - Type typeval; - Statement* statementval; - PointerList* statlistval; - Assert* assertval; - FunctionCall* callval; - Print* printval; - Input* inputval; - Abort* abortval; - Branch* branchval; - Branch::Body* branchbodyval; - VarDeclaration* declval; - Assignment* assignval; - Loop* loopval; - BranchCase *caseval; - PointerList *caselistval; - Return* returnval; - Expression* expressionval; - PointerList* exprlistval; - SemiExpression *semiexpval; - Id* idval; - Number* numericval; - Function* funval; - FunctionPrototype* protoval; - FunArg *argval; - PointerList *arglistval; -} - -%type NUMBER -%type FLOAT -%type ID -%type TYPENAME fun_return -%type statement -%type statements -%type branch_body -%type assert_stmt -%type fun_call -%type arg_decl -%type args_decl args -%type fun_decl main -%type fun_proto -%type print_stmt -%type input_stmt -%type abort_stmt -%type branch_stmt -%type case_stmt -%type cases -%type var_decl -%type assign_stmt -%type loop_stmt -%type return_stmt -%type expression maybe_expression simple_expression expression_inner -%type var_init -%type call_arglist call_args -%type semi_expression semi_expression_inner -%type variable -%type numeric -%type pointer - -%start program - -%% - -program: - /* epsilon */ - | fun_decls main fun_decls { - program.setMain($2); - } -; -fun_decls: - /* epsilon */ - | fun_decl { - program.addFunction($1); - } - fun_decls -; -fun_decl: - fun_proto statements { - $$ = new Function($1, $2); - } -; -fun_proto: - FUN_DECL fun_return ID args FUN_END { - $$ = new FunctionPrototype(new Id($3), $2, $4); - } -; -fun_return: - /* epsilon */ { $$ = Type::VOID; } | TYPENAME { $$ = $1; } -; -args: - /* epsilon */ { - $$ = new PointerList(); - } - | PARAMS args_decl { - $$ = $2; - } -; -args_decl: - arg_decl { - $$ = new PointerList(); - $$->push_back($1); - } - | args_decl COMMA arg_decl { - $1->push_back($3); - } -; -arg_decl: - variable pointer TYPENAME { - $$ = new FunArg($1, $3, $2); - } -; -main: - MAIN statements { - $$ = makeMain($2); - } -; -statements: - /* epsilon */ { - $$ = new PointerList(); - } - | statements statement { - if ($2 != nullptr) { - $2->setLocation(@2); - $1->push_back($2); - } - $$ = $1; - } -; -statement: - assert_stmt { $$ = $1; } - | fun_call { $$ = $1; } - | print_stmt { $$ = $1; } - | input_stmt { $$ = $1; } - | abort_stmt { $$ = $1; } - | branch_stmt { $$ = $1; } - | var_decl { $$ = $1; } - | assign_stmt { $$ = $1; } - | loop_stmt { $$ = $1; } - | return_stmt { $$ = $1; } - | COMMA { $$ = nullptr; } -; -var_decl: - VARDECL variable COMMA pointer TYPENAME var_init { - $$ = new VarDeclaration($2, $5, $4, $6); - } -; -pointer: - /* epsilon */ { $$ = false; } | STAR { $$ = true; } -; -var_init: - /* epsilon */ { $$ = nullptr; } | ASSIGN expression { $$ = $2; } -; -numeric: - NUMBER { $$ = new Integer($1); } | FLOAT { $$ = new Float($1); } -; -variable: - ID { - $$ = new Id($1); - } - | ARTICLE ID { - $$ = new Id($2); - } -; -assign_stmt: - variable ASSIGN expression { - $$ = new Assignment($1, $3); - } -; -print_stmt: - expression PRINT { - $$ = new Print($1); - program.addModule(new Module("iostream", Module::SYSTEM)); - } -; -input_stmt: - INPUT variable { - $$ = new Input($2); - program.addModule(new Module("iostream", Module::SYSTEM)); - } -; -return_stmt: - RETURN maybe_expression BANG { - $$ = new Return($2); - } -; -maybe_expression: - expression { $$ = $1; } | /* epsilon */ { $$ = nullptr; } -; -loop_stmt: - LOOP_BEGIN statements LOOP_CONDITION expression { - $$ = new Loop($2, $4); - } -; -branch_stmt: - BRANCH_CONDITION variable BRANCH_BEGIN branch_body BRANCH_END { - $2->setLocation(@2); - $$ = new Branch($2, $4); - } -; -branch_body: - cases %prec LOWER_THAN_ELSE { - $$ = new Branch::Body($1); - } - | cases BRANCH_ELSE COLON statements { - $$ = new Branch::Body($1, $4); - } -; -cases: - case_stmt { - $$ = new PointerList(); - $$->push_back($1); - } - | cases CASE_END case_stmt { - $1->push_back($3); - $$ = $1; - } -; -case_stmt: - semi_expression COLON statements { - $$ = new BranchCase($1, $3); - } -; -fun_call: - FUN_CALL ID call_args FUN_END { - $$ = new FunctionCall(new Id($2), $3); - } -; -call_args: - /* epsilon */ { - $$ = new PointerList(); - } - | PARAMS call_arglist { - $$ = $2; - } -; -call_arglist: - expression { - $$ = new PointerList(); - $$->push_back($1); - } - | call_arglist COMMA expression { - $$->push_back($3); - } -; -abort_stmt: - ABORT { - $$ = new Abort(); - program.addModule(new Module("cstdlib", Module::SYSTEM)); - } -; -assert_stmt: - ASSERT expression BANG { - $$ = new Assert($2); - program.addModule(new Module("cassert", Module::SYSTEM)); - } -; -expression: - expression_inner { - $1->setLocation(@1); - $$ = $1; - } -; -expression_inner: - simple_expression { - $$ = $1; - } - | expression OP_LT expression { - $$ = new ExpLt($1, $3); - } - | expression OP_GT expression { - $$ = new ExpGt($1, $3); - } - | expression OP_LTE expression { - $$ = new ExpLte($1, $3); - } - | expression OP_GTE expression { - $$ = new ExpGte($1, $3); - } - | expression OP_PLUS expression { - $$ = new ExpPlus($1, $3); - } - | expression OP_MINUS expression { - $$ = new ExpMinus($1, $3); - } - | expression OP_TIMES expression { - $$ = new ExpTimes($1, $3); - } - | expression OP_DIV expression { - $$ = new ExpDiv($1, $3); - } - | expression OP_SHL expression { - $$ = new ExpShl($1, $3); - } - | expression OP_SHR expression { - $$ = new ExpShr($1, $3); - } -; -semi_expression: - semi_expression_inner { - $1->setLocation(@1); - $$ = $1; - } -; -semi_expression_inner: - expression { - $$ = new SemiExpEq($1); - } - | OP_LT expression { - $$ = new SemiExpLt($2); - } - | OP_GT expression { - $$ = new SemiExpGt($2); - } - | OP_LTE expression { - $$ = new SemiExpLte($2); - } - | OP_GTE expression { - $$ = new SemiExpGte($2); - } - | OP_PLUS expression { - $$ = new SemiExpPlus($2); - } - | OP_MINUS expression { - $$ = new SemiExpMinus($2); - } - | OP_TIMES expression { - $$ = new SemiExpTimes($2); - } - | OP_DIV expression { - $$ = new SemiExpDiv($2); - } - | OP_SHL expression { - $$ = new SemiExpShl($2); - } - | OP_SHR expression { - $$ = new SemiExpShr($2); - } -; -simple_expression: - fun_call { $$ = $1; } - | numeric { $$ = $1; } - | variable { $$ = $1; } -; -%% - -#include "Scanner.hpp" - -void Parser::error(const location_type& loc, const std::string &message) { - std::cerr << "line " << loc.begin.line << ", col " << loc.begin.column; - std::cerr << ": " << message << std::endl; - std::exit(1); -} - -int yylex(Parser::semantic_type *lval, Parser::location_type *loc, Scanner &scanner) { - return scanner.yylex(lval, loc); -} - diff --git a/src/Nodes.cpp b/src/Nodes.cpp deleted file mode 100644 index 55504eb..0000000 --- a/src/Nodes.cpp +++ /dev/null @@ -1,102 +0,0 @@ -/* - * Monicelli: an esoteric language compiler - * - * Copyright (C) 2014 Stefano Sanfilippo - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#include "Nodes.hpp" -#include - -using namespace monicelli; - -Function *monicelli::makeMain(PointerList *body) { - PointerList *noargs = new PointerList(); - - FunctionPrototype *proto = new FunctionPrototype( - new Id("main"), Type::VOID, noargs - ); - - return new Function(proto, body); -} - -std::ostream& monicelli::operator<<(std::ostream &stream, Type const& type) { - switch (type) { - case Type::INT: - stream << "int"; - break; - case Type::CHAR: - stream << "char"; - break; - case Type::FLOAT: - stream << "float"; - break; - case Type::BOOL: - stream << "bool"; - break; - case Type::DOUBLE: - stream << "double"; - break; - case Type::VOID: - stream << "void"; - break; - case Type::UNKNOWN: - stream << "???????????"; - break; - } - - return stream; -} - -std::ostream& monicelli::operator<<(std::ostream &stream, Operator const& op) { - switch (op) { - case Operator::PLUS: - stream << '+'; - break; - case Operator::MINUS: - stream << '-'; - break; - case Operator::TIMES: - stream << '*'; - break; - case Operator::DIV: - stream << '/'; - break; - case Operator::SHL: - stream << "<<"; - break; - case Operator::SHR: - stream << ">>"; - break; - case Operator::LT: - stream << '<'; - break; - case Operator::GT: - stream << '>'; - break; - case Operator::GTE: - stream << ">="; - break; - case Operator::LTE: - stream << "<="; - break; - case Operator::EQ: - stream << "=="; - break; - } - - return stream; -} - diff --git a/src/Nodes.hpp b/src/Nodes.hpp deleted file mode 100644 index 997852f..0000000 --- a/src/Nodes.hpp +++ /dev/null @@ -1,712 +0,0 @@ -#ifndef NODES_HPP -#define NODES_HPP - -/* - * Monicelli: an esoteric language compiler - * - * Copyright (C) 2014 Stefano Sanfilippo - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#include "Emitter.hpp" -#include "Pointers.hpp" - -#include "location.hh" - -#include -#include -#include - -#define maybe_return(val) \ - if ((val) != nullptr) return *(val); else return boost::none; - -namespace monicelli { - -enum class Type { - INT, - CHAR, - FLOAT, - BOOL, - DOUBLE, - VOID, - UNKNOWN -}; - -std::ostream& operator<<(std::ostream&, Type const&); - -enum class Operator { - PLUS, MINUS, TIMES, DIV, - SHL, SHR, - LT, GT, GTE, LTE, EQ -}; - -std::ostream& operator<<(std::ostream&, Operator const&); - -class Localizable { -public: - void setLocation(location const& l) { - loc = l; - } - - location const& getLocation() const { - return loc; - } - -private: - location loc; -}; - -class Emittable: public Localizable { -public: - virtual ~Emittable() {} - virtual bool emit(Emitter *emitter) const = 0; -}; - - -class Statement: virtual public Emittable {}; - -class Expression: virtual public Emittable {}; - -class SimpleExpression: public Expression {}; - -class SemiExpression: public Localizable { -public: - SemiExpression(Operator op, Expression *l): op(op), left(l) {} - - Expression const& getLeft() const { - return *left; - } - - Operator getOperator() const { - return op; - } - -private: - Operator op; - Pointer left; -}; - - -class Id: public SimpleExpression { -public: - explicit Id(std::string *c): value(c) {} - explicit Id(char const* c) { - value = Pointer(new std::string(c)); - } - explicit Id(std::string const& c) { - value = Pointer(new std::string(c)); - } - - virtual bool emit(Emitter *emitter) const { - return emitter->emit(*this); - } - - std::string const& getValue() const { - return *value; - } - -private: - Pointer value; -}; - -static inline -bool operator==(Id const& a, Id const& b) { - return a.getValue() == b.getValue(); -} - - -class Number: public SimpleExpression {}; - -class Integer: public Number { -public: - Integer(long i): value(i) {} - - virtual bool emit(Emitter *emitter) const { - return emitter->emit(*this); - } - - long getValue() const { - return value; - } - -private: - long value; -}; - - -class Float: public Number { -public: - Float(double f): value(f) {} - - virtual bool emit(Emitter *emitter) const { - return emitter->emit(*this); - } - - double getValue() const { - return value; - } - -private: - double value; -}; - - -class Return: public Statement { -public: - explicit Return(Expression *e): expression(e) {} - - virtual bool emit(Emitter *emitter) const { - return emitter->emit(*this); - } - - boost::optional getExpression() const { - maybe_return(expression); - } - -private: - Pointer expression; -}; - - -class Loop: public Statement { -public: - Loop(PointerList *b, Expression *c): body(b), condition(c) {} - - virtual bool emit(Emitter *emitter) const { - return emitter->emit(*this); - } - - PointerList const& getBody() const { - return *body; - } - - Expression const& getCondition() const { - return *condition; - } - -private: - Pointer> body; - Pointer condition; -}; - - -class VarDeclaration: public Statement { -public: - VarDeclaration(Id *n, Type t, bool p, Expression *i): - name(n), point(p), init(i), type(t) {} - - virtual bool emit(Emitter *emitter) const { - return emitter->emit(*this); - } - - Id const& getId() const { - return *name; - } - - bool isPointer() const { - return point; - } - - boost::optional getInitializer() const { - maybe_return(init); - } - - Type getType() const { - return type; - } - -private: - Pointer name; - bool point; - Pointer init; - Type type; -}; - - -class Assignment: public Statement { -public: - Assignment(Id *n, Expression *v): name(n), value(v) {} - - virtual bool emit(Emitter *emitter) const { - return emitter->emit(*this); - } - - Id const& getName() const { - return *name; - } - - Expression const& getValue() const { - return *value; - } - -private: - Pointer name; - Pointer value; -}; - - -class Print: public Statement { -public: - explicit Print(Expression *e): expression(e) {} - - virtual bool emit(Emitter *emitter) const { - return emitter->emit(*this); - } - - Expression const& getExpression() const { - return *expression; - } - -private: - Pointer expression; -}; - - -class Input: public Statement { -public: - explicit Input(Id *v): variable(v) {} - - virtual bool emit(Emitter *emitter) const { - return emitter->emit(*this); - } - - Id const& getVariable() const { - return *variable; - } - -private: - Pointer variable; -}; - - -class Abort: public Statement { -public: - virtual bool emit(Emitter *emitter) const { - return emitter->emit(*this); - } -}; - - -class Assert: public Statement { -public: - explicit Assert(Expression *e): expression(e) {} - - virtual bool emit(Emitter *emitter) const { - return emitter->emit(*this); - } - - Expression const& getExpression() const { - return *expression; - } - -private: - Pointer expression; -}; - - -class FunctionCall: public Statement, public Expression { -public: - FunctionCall(Id *n, PointerList *a): name(n), args(a) {} - - virtual bool emit(Emitter *emitter) const { - return emitter->emit(*this); - } - - Id const& getName() const { - return *name; - } - - PointerList const& getArgs() const { - return *args; - } - -private: - Pointer name; - Pointer> args; -}; - -class BranchCase: public Localizable { -public: - BranchCase(SemiExpression *c, PointerList *b): condition(c), body(b) {} - - SemiExpression const& getCondition() const { - return *condition; - } - - PointerList const& getBody() const { - return *body; - } - -private: - Pointer condition; - Pointer> body; -}; - - -class Branch: public Statement { -public: - class Body: public Localizable { - public: - Body(PointerList *c, PointerList *e = nullptr): cases(c), els(e) {} - - PointerList const& getCases() const { - return *cases; - } - - boost::optional const&> getElse() const { - maybe_return(els); - } - - private: - Pointer> cases; - Pointer> els; - }; - - Branch(Id *v, Branch::Body *b): var(v), body(b) {} - - virtual bool emit(Emitter *emitter) const { - return emitter->emit(*this); - } - - Id const& getVar() const { - return *var; - } - - Branch::Body const& getBody() const { - return *body; - } - -private: - Pointer var; - Pointer body; -}; - - -Function *makeMain(PointerList *body); - -class FunArg: public Localizable { -public: - FunArg(Id *n, Type t, bool p): name(n), type(t), pointer(p) {} - - Id const& getName() const { - return *name; - } - - Type getType() const { - return type; - } - - bool isPointer() const { - return pointer; - } - -private: - Pointer name; - Type type; - bool pointer; -}; - - -class FunctionPrototype: public Emittable { -public: - FunctionPrototype(Id *n, Type r, PointerList *a): - name(n), type(r), args(a) {} - - virtual bool emit(Emitter *emitter) const { - return emitter->emit(*this); - } - - Id const& getName() const { - return *name; - } - - Type getType() const { - return type; - } - - PointerList const& getArgs() const { - return *args; - } - -private: - Pointer name; - Type type; - Pointer> args; -}; - -static inline -bool operator==(const FunctionPrototype &a, const FunctionPrototype &b) { - return a.getName() == b.getName(); -} - -static inline -size_t hash_value(const monicelli::FunctionPrototype &e) { - return std::hash()(e.getName().getValue()); -} - -class Function: public Emittable { -public: - Function(FunctionPrototype *p, PointerList *b): - prototype(p), body(b) {} - - virtual bool emit(Emitter *emitter) const { - return emitter->emit(*this); - } - - FunctionPrototype const& getPrototype() const { - return *prototype; - } - - PointerList const& getBody() const { - return *body; - } - -private: - Pointer prototype; - Pointer> body; -}; - - -class Module: public Emittable { -public: - enum ModuleType { - SYSTEM, USER - }; - - Module(const std::string &n, ModuleType s): name(n), type(s) {} - - virtual bool emit(Emitter *emitter) const { - return emitter->emit(*this); - } - - std::string const& getName() const { - return name; - } - - ModuleType getType() const { - return type; - } - -private: - std::string name; - ModuleType type; -}; - -static inline -bool operator==(const Module &a, const Module &b) { - return (a.getName() == b.getName()) && (a.getType() == b.getType()); -} - -static inline -size_t hash_value(const monicelli::Module &e) { - return std::hash()(e.getName()) ^ std::hash()(e.getType()); -} - -class Program: public Emittable { -public: - virtual bool emit(Emitter *emitter) const { - return emitter->emit(*this); - } - - void setMain(Function *m) { - main = Pointer(m); - } - - void addFunction(Function *f) { - functions.push_back(f); - } - - void addModule(Module *m) { - modules.insert(m); - } - - boost::optional getMain() const { - maybe_return(main); - } - - PointerList const& getFunctions() const { - return functions; - } - - PointerSet const& getModules() const { - return modules; - } - -private: - Pointer main; - PointerList functions; - PointerSet modules; -}; - - -class BinaryExpression: public Expression { -public: - BinaryExpression(Expression *l, Operator op, Expression *r): - left(l), op(op), right(r) {} - - virtual bool emit(Emitter *emitter) const { - return emitter->emit(*this); - } - - Expression const& getLeft() const { - return *left; - } - - Expression const& getRight() const { - return *right; - } - - Operator getOperator() const { - return op; - } - -private: - Pointer left; - Operator op; - Pointer right; -}; - - -class ExpLt: public BinaryExpression { -public: - ExpLt(Expression *l, Expression *r): BinaryExpression(l, Operator::LT, r) {} -}; - - -class ExpGt: public BinaryExpression { -public: - ExpGt(Expression *l, Expression *r): BinaryExpression(l, Operator::GT, r) {} -}; - - -class ExpLte: public BinaryExpression { -public: - ExpLte(Expression *l, Expression *r): BinaryExpression(l, Operator::LTE, r) {} -}; - - -class ExpGte: public BinaryExpression { -public: - ExpGte(Expression *l, Expression *r): BinaryExpression(l, Operator::GTE, r) {} -}; - - -class ExpPlus: public BinaryExpression { -public: - ExpPlus(Expression *l, Expression *r): BinaryExpression(l, Operator::PLUS, r) {} - -}; - - -class ExpMinus: public BinaryExpression { -public: - ExpMinus(Expression *l, Expression *r): BinaryExpression(l, Operator::MINUS, r) {} -}; - - -class ExpTimes: public BinaryExpression { -public: - ExpTimes(Expression *l, Expression *r): BinaryExpression(l, Operator::TIMES, r) {} -}; - - -class ExpDiv: public BinaryExpression { -public: - ExpDiv(Expression *l, Expression *r): BinaryExpression(l, Operator::DIV, r) {} -}; - - -class ExpShl: public BinaryExpression { -public: - ExpShl(Expression *l, Expression *r): BinaryExpression(l, Operator::SHL, r) {} -}; - - -class ExpShr: public BinaryExpression { -public: - ExpShr(Expression *l, Expression *r): BinaryExpression(l, Operator::SHR, r) {} -}; - -class SemiExpEq: public SemiExpression { -public: - SemiExpEq(Expression *l): SemiExpression(Operator::EQ, l) {} -}; - - -class SemiExpLt: public SemiExpression { -public: - SemiExpLt(Expression *l): SemiExpression(Operator::LT, l) {} -}; - - -class SemiExpGt: public SemiExpression { -public: - SemiExpGt(Expression *l): SemiExpression(Operator::GT, l) {} -}; - - -class SemiExpLte: public SemiExpression { -public: - SemiExpLte(Expression *l): SemiExpression(Operator::LTE, l) {} -}; - - -class SemiExpGte: public SemiExpression { -public: - SemiExpGte(Expression *l): SemiExpression(Operator::GTE, l) {} -}; - - -class SemiExpPlus: public SemiExpression { -public: - SemiExpPlus(Expression *l): SemiExpression(Operator::PLUS, l) {} -}; - - -class SemiExpMinus: public SemiExpression { -public: - SemiExpMinus(Expression *l): SemiExpression(Operator::MINUS, l) {} -}; - - -class SemiExpTimes: public SemiExpression { -public: - SemiExpTimes(Expression *l): SemiExpression(Operator::TIMES, l) {} -}; - - -class SemiExpDiv: public SemiExpression { -public: - SemiExpDiv(Expression *l): SemiExpression(Operator::DIV, l) {} -}; - - -class SemiExpShl: public SemiExpression { -public: - SemiExpShl(Expression *l): SemiExpression(Operator::SHR, l) {} -}; - - -class SemiExpShr: public SemiExpression { -public: - SemiExpShr(Expression *l): SemiExpression(Operator::SHL, l) {} -}; - -} // namespace - -#undef maybe_return - -#endif - diff --git a/src/Pointers.hpp b/src/Pointers.hpp deleted file mode 100644 index 5667dee..0000000 --- a/src/Pointers.hpp +++ /dev/null @@ -1,47 +0,0 @@ -#ifndef POINTERS_HPP -#define POINTERS_HPP - -/* - * Monicelli: an esoteric language compiler - * - * Copyright (C) 2014 Stefano Sanfilippo - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#include -#include - -#include -#include - -namespace monicelli { - -template using Pointer = std::unique_ptr; -template using PointerList = boost::ptr_vector; -template using PointerSet = boost::ptr_unordered_set; - -template -PointerList* plist_of(std::initializer_list elements) { - PointerList *result = new PointerList(elements.size()); - for (T *el: elements) { - result->push_back(el); - } - return result; -} - -} - -#endif - diff --git a/src/Runtime.c b/src/Runtime.c deleted file mode 100644 index b341702..0000000 --- a/src/Runtime.c +++ /dev/null @@ -1,89 +0,0 @@ -/* - * Monicelli: an esoteric language compiler - * - * Copyright (C) 2014 Stefano Sanfilippo - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#include "Runtime.h" - -#include -#include -#include - - -void __Monicelli_putBool(Monicelli_Bool value) { - puts(value? "vero\n": "falso\n"); -} - -void __Monicelli_putChar(Monicelli_Char value) { - printf("%c", value); -} - -void __Monicelli_putInt(Monicelli_Int value) { - printf("%ld\n", value); -} - -void __Monicelli_putFloat(Monicelli_Float value) { - printf("%g\n", value); -} - -void __Monicelli_putDouble(Monicelli_Double value) { - printf("%lg\n", value); -} - -Monicelli_Bool __Monicelli_getBool() { - Monicelli_Bool tmp; - printf("%s", "? "); - scanf("%c", &tmp); - return tmp != 0? 1: 0; -} - -Monicelli_Char __Monicelli_getChar() { - Monicelli_Char tmp; - printf("%s", "? "); - scanf("%c", &tmp); - return tmp; -} - -Monicelli_Int __Monicelli_getInt() { - Monicelli_Int tmp; - printf("%s", "? "); - scanf("%ld", &tmp); - return tmp; -} - -Monicelli_Float __Monicelli_getFloat() { - Monicelli_Float tmp; - printf("%s", "? "); - scanf("%f", &tmp); - return tmp; -} - -Monicelli_Double __Monicelli_getDouble() { - Monicelli_Double tmp; - printf("%s", "? "); - scanf("%lf", &tmp); - return tmp; -} - -void __Monicelli_abort() { - abort(); -} - -void __Monicelli_assert(Monicelli_Bool condition) { - assert(condition); -} - diff --git a/src/Runtime.h b/src/Runtime.h deleted file mode 100644 index d6865da..0000000 --- a/src/Runtime.h +++ /dev/null @@ -1,55 +0,0 @@ -#ifndef RUNTIME_H -#define RUNTIME_H - -/* - * Monicelli: an esoteric language compiler - * - * Copyright (C) 2014 Stefano Sanfilippo - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#include - -typedef int8_t Monicelli_Bool; -typedef int8_t Monicelli_Char; -typedef int64_t Monicelli_Int; -typedef float Monicelli_Float; -typedef double Monicelli_Double; - -#ifdef __cplusplus -extern "C" { -#endif - -void __Monicelli_putBool(Monicelli_Bool value); -void __Monicelli_putChar(Monicelli_Char value); -void __Monicelli_putInt(Monicelli_Int value); -void __Monicelli_putFloat(Monicelli_Float value); -void __Monicelli_putDouble(Monicelli_Double value); - -Monicelli_Bool __Monicelli_getBool(); -Monicelli_Char __Monicelli_getChar(); -Monicelli_Int __Monicelli_getInt(); -Monicelli_Float __Monicelli_getFloat(); -Monicelli_Double __Monicelli_getDouble(); - -void __Monicelli_abort(); - -void __Monicelli_assert(Monicelli_Bool condition); - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/src/Scanner.hpp b/src/Scanner.hpp deleted file mode 100644 index cda90f6..0000000 --- a/src/Scanner.hpp +++ /dev/null @@ -1,49 +0,0 @@ -#ifndef SCANNER_HPP -#define SCANNER_HPP - -/* - * Monicelli: an esoteric language compiler - * - * Copyright (C) 2014 Stefano Sanfilippo - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#ifndef yyFlexLexerOnce -#include -#endif - -#include "Parser.hpp" - -namespace monicelli { - -class Scanner: public yyFlexLexer { -public: - Scanner(std::istream *in): yyFlexLexer(in) {} - - int yylex(Parser::semantic_type *lval, Parser::location_type *loc) { - this->lval = lval; - location = loc; - return yylex(); - } - -private: - int yylex(); - Parser::semantic_type *lval; - Parser::location_type *location; -}; - -} // monicelli - -#endif diff --git a/src/Scope.hpp b/src/Scope.hpp deleted file mode 100644 index 562c045..0000000 --- a/src/Scope.hpp +++ /dev/null @@ -1,70 +0,0 @@ -#ifndef SCOPE_HPP -#define SCOPE_HPP - -/* - * Monicelli: an esoteric language compiler - * - * Copyright (C) 2014 Stefano Sanfilippo - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#include -#include - -#include -#include - - -namespace monicelli { - -template -class Scope { -public: - boost::optional lookup(Key name) { - for (auto const& table: boost::adaptors::reverse(tables)) { - auto result = table.find(name); - if (result != table.end()) { - return result->second; - } - } - - return boost::none; - } - - void push(Key const& key, Value const& value) { - tables.back().insert({key, value}); - } - - void enter() { - tables.emplace_back(); - } - - void leave() { - if (!tables.empty()) { - tables.pop_back(); - } - } - - void drop() { - tables.clear(); - } - -private: - std::vector> tables; -}; - -} - -#endif diff --git a/src/asmgen.cpp b/src/asmgen.cpp new file mode 100644 index 0000000..8ec6742 --- /dev/null +++ b/src/asmgen.cpp @@ -0,0 +1,124 @@ +// Copyright 2017 the Monicelli project authors. All rights reserved. +// Use of this source code is governed by a GPLv3 license, see LICENSE.txt. + +#include "asmgen.h" +#include "errors.h" + +#include "llvm/IR/LegacyPassManager.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/TargetSelect.h" +#include "llvm/Target/TargetMachine.h" + +#include +#include +#include + +#ifdef MONICELLI_ENABLE_LINKER +#include +#include +#include +#endif + +namespace monicelli { + +void registerTargets() { + llvm::InitializeAllTargetInfos(); + llvm::InitializeAllTargets(); + llvm::InitializeAllTargetMCs(); + llvm::InitializeAllAsmParsers(); + llvm::InitializeAllAsmPrinters(); +} + +llvm::TargetMachine* getTargetMachine(const std::string& triple, const std::string& cpu, + const std::string& features) { + std::string error; + auto target = llvm::TargetRegistry::lookupTarget(triple, error); + + if (!target) { + std::cerr << "While determining target: " << error << '\n'; + exit(1); + } + + llvm::TargetOptions opt; + auto reloc_model = llvm::Reloc::Model::Static; + return target->createTargetMachine(triple, cpu, features, opt, reloc_model); +} + +void writeAssembly(const std::string& to_filename, llvm::Module* module, + llvm::TargetMachine* target_machine) { + std::error_code error_code; + llvm::raw_fd_ostream output{to_filename, error_code, llvm::sys::fs::F_None}; + + if (error_code) { + std::cerr << "Could not open '" << to_filename << "' for output: " << error_code.message() + << '\n'; + exit(1); + } + + llvm::legacy::PassManager asm_generator; + auto file_type = llvm::TargetMachine::CGFT_ObjectFile; + + if (target_machine->addPassesToEmitFile(asm_generator, output, file_type)) { + std::cerr << "Cannot emit an object file of this type\n"; + exit(1); + } + + asm_generator.run(*module); + output.flush(); +} + +#ifdef MONICELLI_ENABLE_LINKER + +static const char* C_COMPILER = "c99"; + +void linkAssembly(const std::string& output_name, const std::vector& object_files, + bool keep_object_files) { + // Linking a C object file with certain modern libc's is so complicated that + // we just let a C compiler do it for us. This function assumes POSIX, and + // most recent POSIX-compliant systems will also adopt the recommendation + // to have a C compiler installed and called c99. Very old systems will have + // c89 instead. cc exists as well, but it's not specified by POSIX. + + int cc_argc = object_files.size() + 1 + 2 + 1; + std::unique_ptr cc_args { new const char*[cc_argc] }; + int i = 0; + cc_args[i++] = C_COMPILER; + if (!output_name.empty()) { + cc_args[i++] = "-o"; + cc_args[i++] = output_name.c_str(); + } + for (const auto& object_file : object_files) { + assert(object_file[0] != '-' && "The option parser allowed a filename starting with -"); + cc_args[i++] = object_file.c_str(); + } + cc_args[i] = nullptr; + + pid_t pid = fork(); + + if (pid == 0) { + if (execvp(C_COMPILER, const_cast(cc_args.get())) == -1) { + std::cerr << "Failed to launch the linker. Check that '" << C_COMPILER << "' is installed.\n"; + exit(1); + } + UNREACHABLE("Successfully returned from exec()?"); + } + + if (pid == -1) { + std::cerr << "Failed to spawn the linker process.\n"; + exit(1); + } + + waitpid(pid, nullptr, 0); + + if (keep_object_files) return; + + for (const auto& object_file : object_files) { + unlink(object_file.c_str()); + } +} + +#endif + +} // namespace monicelli diff --git a/src/asmgen.h b/src/asmgen.h new file mode 100644 index 0000000..7c39123 --- /dev/null +++ b/src/asmgen.h @@ -0,0 +1,30 @@ +#ifndef MONICELLI_ASMGEN_H +#define MONICELLI_ASMGEN_H + +// Copyright 2017 the Monicelli project authors. All rights reserved. +// Use of this source code is governed by a GPLv3 license, see LICENSE.txt. + +#include "llvm/IR/Module.h" +#include "llvm/Target/TargetMachine.h" + +#include +#include + +namespace monicelli { + +void registerTargets(); + +llvm::TargetMachine* getTargetMachine(const std::string& triple, const std::string& cpu, + const std::string& features); + +void writeAssembly(const std::string& to_filename, llvm::Module* module, + llvm::TargetMachine* target_machine); + +#ifdef MONICELLI_ENABLE_LINKER +void linkAssembly(const std::string& output_name, const std::vector& object_files, + bool keep_object_files = false); +#endif + +} // namespace monicelli + +#endif diff --git a/src/ast-printer.cpp b/src/ast-printer.cpp new file mode 100644 index 0000000..71a590b --- /dev/null +++ b/src/ast-printer.cpp @@ -0,0 +1,267 @@ +// Copyright 2017 the Monicelli project authors. All rights reserved. +// Use of this source code is governed by a GPLv3 license, see LICENSE.txt. + +#include "ast-visitor.h" +#include "ast.h" + +#include + +using namespace monicelli; + +namespace { + +static const char* baseTypeToString(const VarType::BaseType type) { + switch (type) { +#define RETURN_BASE_NAME(NAME, _1, _2, _3, _4, STRING) \ + case VarType::NAME: \ + return STRING; + BUILTIN_TYPES(RETURN_BASE_NAME) +#undef RETURN_BASE_NAME + default: + UNREACHABLE("Unhandled VarType::BaseType."); + } +} + +static std::ostream& operator<<(std::ostream& stream, const VarType& type) { + stream << baseTypeToString(type.getBaseType()); + if (type.isPointer()) { + stream << '*'; + } + return stream; +} + +class AstPrinter final : public ConstAstVisitor { +public: + AstPrinter(std::ostream& stream) : expression_level_(0), indent_level_(0), stream_(stream) {} + + void visitModule(const Module* module) { + for (const Function* function : module->functions()) { + visitFunction(function); + } + if (module->hasEntryPoint()) { + visitFunction(module->getEntryPoint()); + } + } + + void visitFunction(const Function* function) { + stream() << function->getReturnType() << ' '; + if (function->isEntryPoint()) { + stream() << "main"; + } else { + stream() << function->getName(); + } + stream() << '('; + + bool first = true; + for (const FunctionParam& param : function->params()) { + if (first) { + first = false; + } else { + stream() << ", "; + } + assert(!param.getType().isVoid()); + stream() << param.getType() << ' ' << param.getArg().getName(); + } + + stream() << ')'; + + if (!function->body_empty()) { + stream(false) << " {\n"; + IndentGuard guard(this); + for (const Statement* statement : function->body()) { + visit(statement); + stream() << '\n'; + } + stream(false) << "}"; + } + + stream(false) << "\n\n"; + } + + void visitAssertStatement(const AssertStatement* s) { + stream() << "assert "; + visit(s->getExpression()); + } + + void visitInputStatement(const InputStatement* s) { + stream() << "read " << s->getVariable().getName(); + } + + void visitExpressionStatement(const ExpressionStatement* s) { visit(s->getExpression()); } + + void visitAbortStatement(const AbortStatement*) { stream() << "abort"; } + + void visitVardeclStatement(const VardeclStatement* s) { + assert(!s->getType().isVoid()); + stream() << s->getType() << ' ' << s->getVariable().getName(); + if (s->hasInitializer()) { + stream(false) << " = "; + visit(s->getInitializer()); + } + } + + void visitBranchStatement(const BranchStatement* branch) { + stream() << "branch " << branch->getLeadVariable().getName() << " {\n"; + { + IndentGuard guard(this); + for (const BranchCase& c : branch->cases()) { + stream() << "case"; + visit(c.getExpression()); + stream(false) << " {\n"; + { + IndentGuard guard(this); + for (const Statement* s : c.body()) { + visit(s); + stream(false) << '\n'; + } + } + stream() << "}\n"; + } + if (branch->hasBranchElse()) { + stream() << "else {\n"; + { + IndentGuard guard(this); + for (const Statement* s : branch->getBranchElse()->body()) { + visit(s); + stream(false) << '\n'; + } + } + stream() << "}\n"; + } + } + stream() << "}"; + } + + void visitLoopStatement(const LoopStatement* s) { + stream() << "do {\n"; + { + IndentGuard guard(this); + for (const Statement* is : s->body()) { + visit(is); + stream(false) << '\n'; + } + } + stream() << "} while "; + visit(s->getCondition()); + } + + void visitReturnStatement(const ReturnStatement* s) { + ExpressionNestingGuard guard{this}; + stream() << "return"; + if (s->hasExpression()) { + stream(false) << ' '; + visit(s->getExpression()); + } + } + + void visitPrintStatement(const PrintStatement* s) { + ExpressionNestingGuard guard{this}; + stream() << "print "; + visit(s->getExpression()); + } + + void visitAssignStatement(const AssignStatement* s) { + ExpressionNestingGuard guard{this}; + stream() << s->getVariable().getName() << " = "; + visit(s->getExpression()); + } + + void visitAtomicExpression(const AtomicExpression* s) { + ExpressionNestingGuard guard{this}; + switch (s->getType()) { + case AtomicExpression::FLOAT: + stream(false) << s->getFloatValue(); + break; + case AtomicExpression::INTEGER: + stream(false) << s->getIntValue(); + break; + case AtomicExpression::IDENTIFIER: + stream(false) << s->getIdentifierValue().getName(); + break; + default: + UNREACHABLE("Unhanlded AtomicExpression type."); + } + } + + void visitBinaryExpression(const BinaryExpression* s) { + if (isNestedExpression()) stream(false) << '('; + { + ExpressionNestingGuard guard{this}; + if (!s->isSemiExpression()) visit(s->getLeft()); + stream(false) << ' ' << s->getOperatorRepresentation() << ' '; + visit(s->getRight()); + } + if (isNestedExpression()) stream(false) << ')'; + } + + void visitFunctionCallExpression(const FunctionCallExpression* s) { + stream(!isNestedExpression()) << s->getFunctionName() << '('; + ExpressionNestingGuard guard{this}; + bool first = true; + for (const Expression* arg : s->args()) { + if (first) { + first = false; + } else { + stream(false) << ", "; + } + visit(arg); + } + stream(false) << ')'; + } + +private: + class IndentGuard final { + public: + IndentGuard(AstPrinter* printer) : printer_(printer) { printer_->increaseIndent(); } + + ~IndentGuard() { printer_->decreaseIndent(); } + + private: + AstPrinter* printer_; + }; + + class ExpressionNestingGuard final { + public: + ExpressionNestingGuard(AstPrinter* printer) : printer_(printer) { + ++printer->expression_level_; + } + + ~ExpressionNestingGuard() { --printer_->expression_level_; } + + private: + AstPrinter* printer_; + }; + + bool isNestedExpression() const { return expression_level_ > 0; } + + void increaseIndent() { indent_level_ += 2; } + void decreaseIndent() { + if (indent_level_ >= 2) indent_level_ -= 2; + } + + std::ostream& stream(bool indent = true) { + if (indent) { + static const char* spaces = " "; + for (int i = 0; i < indent_level_ / 32; ++i) { + stream_ << spaces; + } + stream_.write(spaces, indent_level_ % 32); + } + return stream_; + } + + int expression_level_; + int indent_level_; + std::ostream& stream_; +}; + +} // namespace + +namespace monicelli { + +void printAst(std::ostream& stream, const AstNode* node) { + AstPrinter printer{stream}; + printer.visit(node); +} + +} // namespace monicelli diff --git a/src/ast-printer.h b/src/ast-printer.h new file mode 100644 index 0000000..a8bff9c --- /dev/null +++ b/src/ast-printer.h @@ -0,0 +1,17 @@ +#ifndef MONICELLI_AST_PRINTER_H +#define MONICELLI_AST_PRINTER_H + +// Copyright 2017 the Monicelli project authors. All rights reserved. +// Use of this source code is governed by a GPLv3 license, see LICENSE.txt. + +#include + +namespace monicelli { + +class AstNode; + +void printAst(std::ostream& stream, const AstNode* node); + +} // namespace monicelli + +#endif diff --git a/src/ast-visitor.h b/src/ast-visitor.h new file mode 100644 index 0000000..5e49b6b --- /dev/null +++ b/src/ast-visitor.h @@ -0,0 +1,48 @@ +#ifndef MONICELLI_AST_VISITOR_H +#define MONICELLI_AST_VISITOR_H + +// Copyright 2017 the Monicelli project authors. All rights reserved. +// Use of this source code is governed by a GPLv3 license, see LICENSE.txt. + +#include "ast.def" +#include "ast.h" +#include "errors.h" + +namespace monicelli { + +#define DECLARE_CLASS(NAME) class NAME; + +#define DEFAULT_VISIT(NAME) \ + T visit##NAME(const NAME*) { UNREACHABLE("Unhandled " #NAME "."); } + +#define DISPATCH_CONST_STATEMENT(NAME) \ + case AstNode::TYPE_##NAME: \ + return derived().visit##NAME(static_cast(node)); + +AST_NODES(DECLARE_CLASS) + +template class ConstAstVisitor { +public: + AST_NODES(DEFAULT_VISIT) + + T visit(const AstNode* node) { + switch (node->getClassType()) { + AST_NODES(DISPATCH_CONST_STATEMENT) + default: + UNREACHABLE("Unknown AstNode type class."); + } + } + + T visit(const AstNode& node) { return visit(&node); } + +private: + AstVisitorImpl& derived() { return *static_cast(this); } +}; + +#undef DECLARE_CLASS +#undef DEFAULT_VISIT +#undef DISPATCH_CONST_STATEMENT + +} // namespace monicelli + +#endif diff --git a/src/ast.cpp b/src/ast.cpp new file mode 100644 index 0000000..7ba9659 --- /dev/null +++ b/src/ast.cpp @@ -0,0 +1,22 @@ +// Copyright 2017 the Monicelli project authors. All rights reserved. +// Use of this source code is governed by a GPLv3 license, see LICENSE.txt. + +#include "ast.h" +#include "errors.h" + +namespace monicelli { + +// static +const char* BinaryExpression::getOperatorRepresentation(BinaryExpression::Type type) { + switch (type) { +#define RETURN_OP_STRING(_, NAME, __, STRING) \ + case Type::NAME: \ + return STRING; + AST_BINARY_OPERATORS(RETURN_OP_STRING) +#undef RETURN_OP_STRING + default: + UNREACHABLE("Unhandled BinaryExpression type."); + } +} + +} // namespace monicelli diff --git a/src/ast.def b/src/ast.def new file mode 100644 index 0000000..76a46dd --- /dev/null +++ b/src/ast.def @@ -0,0 +1,27 @@ +#ifndef MONICELLI_AST_DEF +#define MONICELLI_AST_DEF + +// Copyright 2017 the Monicelli project authors. All rights reserved. +// Use of this source code is governed by a GPLv3 license, see LICENSE.txt. + +#include "operators.def" +#include "types.def" + +#define AST_NODES(V) \ + V(AssertStatement) \ + V(ExpressionStatement) \ + V(InputStatement) \ + V(AbortStatement) \ + V(BranchStatement) \ + V(VardeclStatement) \ + V(LoopStatement) \ + V(ReturnStatement) \ + V(PrintStatement) \ + V(AssignStatement) \ + V(Function) \ + V(Module) \ + V(FunctionCallExpression) \ + V(BinaryExpression) \ + V(AtomicExpression) \ + +#endif diff --git a/src/ast.h b/src/ast.h new file mode 100644 index 0000000..cb8e3b3 --- /dev/null +++ b/src/ast.h @@ -0,0 +1,448 @@ +#ifndef MONICELLI_AST_H +#define MONICELLI_AST_H + +// Copyright 2017 the Monicelli project authors. All rights reserved. +// Use of this source code is governed by a GPLv3 license, see LICENSE.txt. + +#include "ast.def" +#include "iterators.h" +#include "location.h" + +#include +#include +#include +#include + +namespace monicelli { + +class Variable final : public LocationMixin { +public: + const std::string& getName() const { return name_; } + +private: + std::string name_; + + friend class Parser; +}; + +class VarType final { +public: + enum BaseType { +#define DECLARE_TYPE(NAME, _1, _2, _3, _4, _5) NAME, + BUILTIN_TYPES(DECLARE_TYPE) +#undef DECLARE_TYPE + }; + + VarType() : base_type_(VarType::VOID), pointer_(false) {} + + bool isVoid() const { return base_type_ == BaseType::VOID && !pointer_; } + BaseType getBaseType() const { return base_type_; } + bool isPointer() const { return pointer_; } + +private: + BaseType base_type_; + bool pointer_; + + friend class Parser; +}; + +class FunctionParam final { +public: + FunctionParam(const Variable& name, const VarType& type) : name_(name), type_(type) {} + + const Variable& getArg() const { return name_; } + VarType getType() const { return type_; } + +private: + Variable name_; + VarType type_; + + friend class Parser; +}; + +class AstNode { +public: + enum ClassType : uint8_t { +#define DECL_EXPR_TYPE(NAME) TYPE_##NAME, + AST_NODES(DECL_EXPR_TYPE) +#undef DECL_EXPR_TYPE + }; + + ClassType getClassType() const { return type_tag_; } + +protected: + AstNode(ClassType type_tag) : type_tag_(type_tag) {} + +private: + ClassType type_tag_; +}; + +class Expression : public AstNode, public LocationMixin { +public: + Expression(Expression::ClassType type) : AstNode(type) {} + virtual ~Expression() = default; + + bool isFunctionCall() const { return getClassType() == Expression::TYPE_FunctionCallExpression; } +}; + +class BinaryExpression final : public Expression { +public: + enum Type { +#define DECLARE_OP(_, NAME, __, ___) NAME, + AST_BINARY_OPERATORS(DECLARE_OP) +#undef DECLARE_OP + }; + + Type getType() const { return type_; } + bool isSemiExpression() const { return is_semi_; } + const Expression* getLeft() const { return left_.get(); } + const Expression* getRight() const { return right_.get(); } + + static const char* getOperatorRepresentation(BinaryExpression::Type type); + + const char* getOperatorRepresentation() const { return getOperatorRepresentation(type_); } + +private: + BinaryExpression(Type type, std::shared_ptr left, Expression* right, bool is_semi) + : Expression(Expression::TYPE_BinaryExpression), type_(type), is_semi_(is_semi), left_(left), + right_(right) {} + + Type type_; + bool is_semi_; + std::shared_ptr left_; + std::unique_ptr right_; + + friend class Parser; +}; + +class AtomicExpression final : public Expression { +public: + enum Type { IDENTIFIER, INTEGER, FLOAT }; + + ~AtomicExpression() override { + if (type_ == Type::IDENTIFIER) { + identifier_value_.~Variable(); + } + } + + Type getType() const { return type_; } + + uint64_t getIntValue() const { + assert(type_ == Type::INTEGER); + return int_value_; + } + + double getFloatValue() const { + assert(type_ == Type::FLOAT); + return fp_value_; + } + + const Variable& getIdentifierValue() const { + assert(type_ == Type::IDENTIFIER); + return identifier_value_; + } + +private: + AtomicExpression() : Expression(Expression::TYPE_AtomicExpression) {} + + static std::unique_ptr fromInt(uint64_t value) { + std::unique_ptr expression{new AtomicExpression}; + expression->type_ = Type::INTEGER; + expression->int_value_ = value; + return expression; + } + + static std::unique_ptr fromFloat(double value) { + std::unique_ptr expression{new AtomicExpression}; + expression->type_ = Type::FLOAT; + expression->fp_value_ = value; + return expression; + } + + static std::unique_ptr fromIdentifier(const Variable& value) { + std::unique_ptr expression{new AtomicExpression}; + expression->type_ = Type::IDENTIFIER; + new (&expression->identifier_value_) Variable{value}; + return expression; + } + + Type type_; + union { + uint64_t int_value_; + double fp_value_; + Variable identifier_value_; + }; + + friend class Parser; +}; + +class Statement : public AstNode { +public: + Statement(Statement::ClassType type) : AstNode(type) {} + + virtual ~Statement() = default; +}; + +class AssertStatement final : public Statement { +public: + AssertStatement() : Statement(Statement::TYPE_AssertStatement) {} + + const Expression* getExpression() const { return expression_.get(); } + +private: + std::unique_ptr expression_; + + friend class Parser; +}; + +class FunctionCallExpression final : public Expression { +public: + typedef PointerVectorConstIter FunctionArgsConstIter; + + FunctionCallExpression() : Expression(Expression::TYPE_FunctionCallExpression) {} + + const std::string& getFunctionName() const { return function_name_; } + FunctionArgsConstIter args_begin() const { return function_args_.cbegin(); } + FunctionArgsConstIter args_end() const { return function_args_.cend(); } + ConstRangeWrapper args() const { return {args_begin(), args_end()}; } + +private: + std::string function_name_; + std::vector> function_args_; + + friend class Parser; +}; + +class ExpressionStatement final : public Statement { +public: + ExpressionStatement() : Statement(Statement::TYPE_ExpressionStatement) {} + + const Expression* getExpression() const { return expression_.get(); } + +private: + std::unique_ptr expression_; + + friend class Parser; +}; + +class InputStatement final : public Statement { +public: + InputStatement() : Statement(Statement::TYPE_InputStatement) {} + + const Variable& getVariable() const { return variable_; } + +private: + Variable variable_; + + friend class Parser; +}; + +class AbortStatement final : public Statement { +public: + AbortStatement() : Statement(Statement::TYPE_AbortStatement) {} + +private: + friend class Parser; +}; + +class BranchCase final { +public: + typedef PointerVectorConstIter BodyConstIter; + + const Expression* getExpression() const { return expression_.get(); } + BodyConstIter begin_body() const { return body_.cbegin(); } + BodyConstIter end_body() const { return body_.cend(); } + ConstRangeWrapper body() const { return {begin_body(), end_body()}; } + +private: + std::unique_ptr expression_; + std::vector> body_; + + friend class Parser; +}; + +class BranchElse final { +public: + typedef PointerVectorConstIter BodyConstIter; + + BodyConstIter begin_body() const { return body_.cbegin(); } + BodyConstIter end_body() const { return body_.cend(); } + ConstRangeWrapper body() const { return {begin_body(), end_body()}; } + +private: + std::vector> body_; + + friend class Parser; +}; + +class BranchStatement final : public Statement { +public: + typedef std::vector::const_iterator BranchCaseConstIter; + + BranchStatement() : Statement(Statement::TYPE_BranchStatement) {} + + const Variable& getLeadVariable() const { return lead_var_; } + + BranchCaseConstIter begin_cases() const { return cases_.cbegin(); } + BranchCaseConstIter end_cases() const { return cases_.cend(); } + ConstRangeWrapper cases() const { return {begin_cases(), end_cases()}; } + + bool hasBranchElse() const { return static_cast(maybe_else_case_); } + const BranchElse* getBranchElse() const { + assert(hasBranchElse()); + return maybe_else_case_.get(); + } + +private: + Variable lead_var_; + std::vector cases_; + std::unique_ptr maybe_else_case_; + + friend class Parser; +}; + +class VardeclStatement final : public Statement { +public: + VardeclStatement() : Statement(Statement::TYPE_VardeclStatement) {} + + const Variable& getVariable() const { return variable_; } + const VarType& getType() const { return type_; } + + bool hasInitializer() const { return static_cast(maybe_init_); } + const Expression* getInitializer() const { + assert(hasInitializer()); + return maybe_init_.get(); + } + +private: + Variable variable_; + VarType type_; + std::unique_ptr maybe_init_; + + friend class Parser; +}; + +class LoopStatement final : public Statement { +public: + typedef PointerVectorConstIter BodyConstIter; + + LoopStatement() : Statement(Statement::TYPE_LoopStatement) {} + + BodyConstIter begin_body() const { return body_.cbegin(); } + BodyConstIter end_body() const { return body_.cend(); } + ConstRangeWrapper body() const { return {begin_body(), end_body()}; } + + const Expression* getCondition() const { return condition_.get(); } + +private: + std::vector> body_; + std::unique_ptr condition_; + + friend class Parser; +}; + +class ReturnStatement final : public Statement { +public: + ReturnStatement() : Statement(Statement::TYPE_ReturnStatement) {} + + bool hasExpression() const { return static_cast(maybe_expression_); } + const Expression* getExpression() const { + assert(hasExpression()); + return maybe_expression_.get(); + } + +private: + std::unique_ptr maybe_expression_; + + friend class Parser; +}; + +class PrintStatement final : public Statement { +public: + PrintStatement() : Statement(Statement::TYPE_PrintStatement) {} + + const Expression* getExpression() const { return expression_.get(); } + +private: + std::unique_ptr expression_; + + friend class Parser; +}; + +class AssignStatement final : public Statement { +public: + AssignStatement() : Statement(Statement::TYPE_AssignStatement) {} + + const Variable& getVariable() const { return variable_; } + const Expression* getExpression() const { return expression_.get(); } + +private: + std::unique_ptr expression_; + Variable variable_; + + friend class Parser; +}; + +class Function final : public AstNode { +public: + Function() : AstNode(Statement::TYPE_Function) {} + + typedef std::vector::const_iterator FunctionParamConstIter; + typedef PointerVectorConstIter BodyConstIter; + + const std::string& getName() const { return name_; } + const VarType& getReturnType() const { return return_type_; } + bool isEntryPoint() const { return name_.empty(); } + + int params_size() const { return params_.size(); } + FunctionParamConstIter begin_params() const { return params_.cbegin(); } + FunctionParamConstIter end_params() const { return params_.cend(); } + ConstRangeWrapper params() const { + return {begin_params(), end_params()}; + } + + BodyConstIter begin_body() const { return body_.cbegin(); } + BodyConstIter end_body() const { return body_.cend(); } + bool body_empty() const { return body_.empty(); } + ConstRangeWrapper body() const { return {begin_body(), end_body()}; } + +private: + std::string name_; + VarType return_type_; + std::vector params_; + std::vector> body_; + + friend class Parser; +}; + +class Module final : public AstNode { +public: + typedef PointerVectorConstIter FunctionsConstIter; + + Module() : AstNode(AstNode::TYPE_Module) {} + + bool hasEntryPoint() const { return static_cast(maybe_entry_point_); } + const Function* getEntryPoint() const { + assert(hasEntryPoint()); + return maybe_entry_point_.get(); + } + + FunctionsConstIter begin_functions() const { return functions_.cbegin(); } + FunctionsConstIter end_functions() const { return functions_.cend(); } + ConstRangeWrapper functions() const { + return {begin_functions(), end_functions()}; + } + + const std::string& getSourceFilename() const { return source_filename_; } + +private: + std::vector> functions_; + std::unique_ptr maybe_entry_point_; + std::string source_filename_; + + friend class Parser; +}; + +} // namespace monicelli + +#endif diff --git a/src/codegen.cpp b/src/codegen.cpp new file mode 100644 index 0000000..2eba666 --- /dev/null +++ b/src/codegen.cpp @@ -0,0 +1,754 @@ +// Copyright 2017 the Monicelli project authors. All rights reserved. +// Use of this source code is governed by a GPLv3 license, see LICENSE.txt. + +#include "codegen.def" +#include "ast-visitor.h" +#include "parser.h" + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/LegacyPassManager.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/TypeBuilder.h" +#include "llvm/IR/Verifier.h" +#include "llvm/Support/raw_os_ostream.h" +#include "llvm/Transforms/Scalar.h" + +#include + +using namespace monicelli; + +namespace { + +class NestedScopes final { +public: + class Guard final { + public: + Guard(NestedScopes& context) : context_(context) { context_.enterScope(); } + + ~Guard() { context_.leaveScope(); } + + private: + NestedScopes& context_; + }; + + NestedScopes() {} + + NestedScopes(NestedScopes&) = delete; + NestedScopes& operator=(NestedScopes&) = delete; + + llvm::Value* lookup(const std::string& name); + + bool define(const std::string& name, llvm::Value* def) { + assert(!scopes_.empty() && "Trying to define outside any scope"); + auto result = scopes_.back().insert({name, def}); + return result.second; + } + + void enterScope() { scopes_.emplace_back(); } + + void leaveScope() { + assert(!scopes_.empty() && "Trying to leave a scope, but there is none"); + scopes_.pop_back(); + } + + void reset() { scopes_.clear(); } + bool empty() const { return scopes_.empty(); } + +private: + std::vector> scopes_; +}; + +class IRGenerator; + +class ResultTypeCalculator : public ConstAstVisitor, + public ErrorReportingMixin { +public: + ResultTypeCalculator(IRGenerator* codegen, const std::string& source_filename) + : ErrorReportingMixin(source_filename), codegen_(codegen) {} + + llvm::Type* visitBinaryExpression(const BinaryExpression* e); + llvm::Type* visitAtomicExpression(const AtomicExpression* e); + llvm::Type* visitFunctionCallExpression(const FunctionCallExpression* e); + +private: + IRGenerator* codegen_; +}; + +class IRGenerator final : public ConstAstVisitor, + public ErrorReportingMixin { +public: + IRGenerator(llvm::LLVMContext& context, const std::string& source_filename) + : ErrorReportingMixin(source_filename), context_(context), builder_(context), + exit_block_(nullptr), return_var_(nullptr), type_calculator_(this, source_filename) {} + + std::unique_ptr releaseModule() { return std::move(module_); } + llvm::Module* getModule() { return module_.get(); } + + llvm::Value* visitModule(const Module* m); + llvm::Value* visitFunction(const Function* f); + llvm::Value* visitVardeclStatement(const VardeclStatement* s); + llvm::Value* visitReturnStatement(const ReturnStatement* r); + llvm::Value* visitAssignStatement(const AssignStatement* a); + llvm::Value* visitBranchStatement(const BranchStatement* b); + llvm::Value* visitLoopStatement(const LoopStatement* l); + llvm::Value* visitInputStatement(const InputStatement* s); + llvm::Value* visitPrintStatement(const PrintStatement* p); + llvm::Value* visitExpressionStatement(const ExpressionStatement* s) { + visit(s->getExpression()); + return nullptr; + } + llvm::Value* visitBinaryExpression(const BinaryExpression* e); + llvm::Value* visitAtomicExpression(const AtomicExpression* e); + llvm::Value* visitFunctionCallExpression(const FunctionCallExpression* f); + +private: + llvm::Function* declareFunction(const Function* f); + std::string getFunctionName(const Function* f) { + return f->isEntryPoint() ? "main" : f->getName(); + } + + void declareBuiltins(); + + template const char* getFormatSpecifier(llvm::Type* type); + template llvm::Value* getFormatString(llvm::Type* type); + template void callIOBuiltin(llvm::Type* type, llvm::Value* value); + + llvm::Type* getIRType(const VarType& type) { + auto base_type = getIRBaseType(type.getBaseType()); + if (type.isPointer()) { + return base_type->getPointerTo(); + } + return base_type; + } + + llvm::Type* getIRBaseType(VarType::BaseType type); + llvm::Value* ensureType(llvm::Value* value, llvm::Type* type); + const char* getSourceBaseType(llvm::Type* type); + std::string getSourceType(llvm::Type* type); + + llvm::Value* evalTruthiness(llvm::Value* val); + + llvm::Function* current_function() { return builder_.GetInsertBlock()->getParent(); } + + llvm::LLVMContext& context_; + + llvm::IRBuilder<> builder_; + std::unique_ptr module_; + + NestedScopes var_scopes_; + llvm::DenseMap input_format_strings_cache_; + llvm::DenseMap output_format_strings_cache_; + llvm::BasicBlock* exit_block_; + llvm::AllocaInst* return_var_; + + ResultTypeCalculator type_calculator_; + + friend class ResultTypeCalculator; +}; + +} // namespace + +llvm::Value* NestedScopes::lookup(const std::string& name) { + for (auto c = scopes_.crbegin(), end = scopes_.crend(); c != end; ++c) { + auto result = c->find(name); + if (result != c->end()) return result->second; + } + return nullptr; +} + +void IRGenerator::declareBuiltins() { + llvm::FunctionType* printf_type = llvm::TypeBuilder::get(context_); + auto no_alias = llvm::AttributeSet().addAttribute(context_, 1, llvm::Attribute::NoAlias); + + module_->getOrInsertFunction("printf", printf_type, no_alias); + module_->getOrInsertFunction("scanf", printf_type, no_alias); +} + +llvm::Value* IRGenerator::visitModule(const Module* m) { + module_ = llvm::make_unique("antani", context_); + + declareBuiltins(); + + for (const Function* f : m->functions()) { + declareFunction(f); + } + if (m->hasEntryPoint()) declareFunction(m->getEntryPoint()); + + for (const Function* f : m->functions()) { + visit(f); + } + if (m->hasEntryPoint()) visit(m->getEntryPoint()); + + llvm::verifyModule(*module_); + + return nullptr; +} + +llvm::Function* IRGenerator::declareFunction(const Function* ast_f) { + std::vector param_types; + param_types.reserve(ast_f->params_size()); + for (const FunctionParam& param : ast_f->params()) { + param_types.push_back(getIRType(param.getType())); + } + auto type = llvm::FunctionType::get(getIRType(ast_f->getReturnType()), param_types, false); + + llvm::Function* f = llvm::Function::Create(type, llvm::Function::ExternalLinkage, + getFunctionName(ast_f), module_.get()); + + auto ast_arg = ast_f->begin_params(); + for (auto& arg : f->args()) { + arg.setName(ast_arg->getArg().getName()); + ++ast_arg; + } + assert(ast_arg == ast_f->end_params()); + + return f; +} + +llvm::Value* IRGenerator::visitFunction(const Function* ast_f) { + llvm::Function* f = module_->getFunction(getFunctionName(ast_f)); + assert(f && "This function should have had a prototype defined"); + + if (ast_f->body_empty()) return f; + + NestedScopes::Guard scopes_guard{var_scopes_}; + llvm::BasicBlock* entry = llvm::BasicBlock::Create(context_, "entry", f); + builder_.SetInsertPoint(entry); + + if (!f->getReturnType()->isVoidTy()) { + return_var_ = builder_.CreateAlloca(f->getReturnType(), nullptr, "result"); + if (ast_f->isEntryPoint()) { + assert(f->getReturnType()->isIntegerTy()); + builder_.CreateStore(ensureType(builder_.getInt64(0), f->getReturnType()), return_var_); + } + } else { + return_var_ = nullptr; + } + + for (auto& arg : f->args()) { + auto arg_ptr = builder_.CreateAlloca(arg.getType(), nullptr, arg.getName()); + builder_.CreateStore(&arg, arg_ptr); + var_scopes_.define(arg.getName(), arg_ptr); + } + + exit_block_ = llvm::BasicBlock::Create(context_, "exit"); + + for (const Statement* s : ast_f->body()) { + visit(s); + } + + builder_.CreateBr(exit_block_); + + f->getBasicBlockList().push_back(exit_block_); + builder_.SetInsertPoint(exit_block_); + + if (return_var_) { + builder_.CreateRet(builder_.CreateLoad(return_var_)); + } else { + builder_.CreateRetVoid(); + } + + llvm::verifyFunction(*f); + + exit_block_ = nullptr; + return_var_ = nullptr; + + return f; +} + +llvm::Value* IRGenerator::visitVardeclStatement(const VardeclStatement* s) { + const auto& name = s->getVariable().getName(); + llvm::AllocaInst* var = builder_.CreateAlloca(getIRType(s->getType()), nullptr, name); + if (!var_scopes_.define(name, var)) { + error(&s->getVariable(), "redefining an existing variable"); + } + if (s->hasInitializer()) { + llvm::Value* init = visit(s->getInitializer()); + auto original_init_type = init->getType(); + auto target_type = var->getType()->getPointerElementType(); + init = ensureType(init, target_type); + if (!init) { + error(s->getInitializer(), "cannot initialize variable of type", getSourceType(target_type), + "with expression of type", getSourceType(original_init_type)); + } + builder_.CreateStore(init, var); + } + return var; +} + +llvm::Value* IRGenerator::visitReturnStatement(const ReturnStatement* r) { + if (r->hasExpression()) { + auto return_value = visit(r->getExpression()); + auto original_return_type = return_value->getType(); + auto return_type = return_var_->getType()->getPointerElementType(); + return_value = ensureType(return_value, return_type); + if (!return_value) { + error(r->getExpression(), "cannot return expression of type", original_return_type, + "from function of type", return_type); + } + builder_.CreateStore(return_value, return_var_); + } + builder_.CreateBr(exit_block_); + // Code after the return will end up in this unreachable BB and DCE will + // take care of it. + llvm::BasicBlock* after = llvm::BasicBlock::Create(context_, "return.after", current_function()); + builder_.SetInsertPoint(after); + // This one is not necessary, but will help catch codegen errors. + builder_.CreateUnreachable(); + return nullptr; +} + +llvm::Value* IRGenerator::visitAssignStatement(const AssignStatement* a) { + auto val = visit(a->getExpression()); + assert(val && "unhandled error while building expression"); + auto var = var_scopes_.lookup(a->getVariable().getName()); + if (!var) { + error(&a->getVariable(), "assigning to undefined variable", a->getVariable().getName()); + } + auto original_val_type = val->getType(); + auto target_type = var->getType()->getPointerElementType(); + val = ensureType(val, target_type); + if (!val) { + error(a->getExpression(), "cannot assign expression of type", getSourceType(original_val_type), + "to variable of type", getSourceType(target_type)); + } + builder_.CreateStore(val, var); + return nullptr; +} + +llvm::Value* IRGenerator::evalTruthiness(llvm::Value* val) { + if (llvm::isa(val)) return val; + auto val_type = val->getType(); + if (!val_type->isIntegerTy() && !val_type->isFloatingPointTy()) { + return nullptr; + } + auto zero = ensureType(builder_.getInt64(0), val_type); + return builder_.CreateICmpNE(val, zero, "cond"); +} + +llvm::Value* IRGenerator::visitBranchStatement(const BranchStatement* b) { + llvm::BasicBlock* entry_bb = + llvm::BasicBlock::Create(context_, "branch.head", current_function()); + llvm::BasicBlock* exit_bb = llvm::BasicBlock::Create(context_, "branch.after"); + + builder_.CreateBr(entry_bb); + builder_.SetInsertPoint(entry_bb); + + llvm::BasicBlock* case_cond_bb = + llvm::BasicBlock::Create(context_, "branch.case.cond", current_function()); + + builder_.CreateBr(case_cond_bb); + builder_.SetInsertPoint(case_cond_bb); + + for (const BranchCase& branch_case : b->cases()) { + llvm::Value* condition = visit(branch_case.getExpression()); + auto condition_type = condition->getType(); + condition = evalTruthiness(condition); + if (!condition) { + error(branch_case.getExpression(), "cannot convert expression of type", + getSourceType(condition_type), "to boolean."); + } + case_cond_bb = llvm::BasicBlock::Create(context_, "branch.case.cond"); + llvm::BasicBlock* case_body_bb = + llvm::BasicBlock::Create(context_, "branch.case.body", current_function()); + + builder_.CreateCondBr(condition, case_body_bb, case_cond_bb); + builder_.SetInsertPoint(case_body_bb); + for (const Statement* s : branch_case.body()) { + visit(s); + } + builder_.CreateBr(exit_bb); + current_function()->getBasicBlockList().push_back(case_cond_bb); + builder_.SetInsertPoint(case_cond_bb); + } + + if (b->hasBranchElse()) { + NestedScopes::Guard scope_guard{var_scopes_}; + llvm::BasicBlock* else_bb = + llvm::BasicBlock::Create(context_, "branch.else", current_function()); + builder_.CreateBr(else_bb); + builder_.SetInsertPoint(else_bb); + for (const Statement* s : b->getBranchElse()->body()) { + visit(s); + } + } + + builder_.CreateBr(exit_bb); + current_function()->getBasicBlockList().push_back(exit_bb); + builder_.SetInsertPoint(exit_bb); + + return nullptr; +} + +llvm::Value* IRGenerator::visitLoopStatement(const LoopStatement* l) { + llvm::BasicBlock* body_bb = llvm::BasicBlock::Create(context_, "loop.body", current_function()); + builder_.CreateBr(body_bb); + builder_.SetInsertPoint(body_bb); + + { + NestedScopes::Guard scope_guard{var_scopes_}; + for (const Statement* s : l->body()) { + visit(s); + } + } + + llvm::BasicBlock* condition_bb = + llvm::BasicBlock::Create(context_, "loop.condition", current_function()); + llvm::BasicBlock* after_bb = llvm::BasicBlock::Create(context_, "loop.after"); + + builder_.CreateBr(condition_bb); + builder_.SetInsertPoint(condition_bb); + auto condition = visit(l->getCondition()); + auto condition_type = condition->getType(); + condition = evalTruthiness(condition); + if (!condition) { + error(l->getCondition(), "cannot convert expression of type", getSourceType(condition_type), + "to boolean"); + } + builder_.CreateCondBr(condition, body_bb, after_bb); + + current_function()->getBasicBlockList().push_back(after_bb); + builder_.SetInsertPoint(after_bb); + + return nullptr; +} + +const char* IRGenerator::getSourceBaseType(llvm::Type* type) { + assert(type->isIntegerTy() || type->isFloatingPointTy()); +#define RETURN_BASE_NAME(_1, TYPE, _2, _3, SOURCE_NAME, _4) \ + if (type == builder_.get##TYPE##Ty()) { \ + return SOURCE_NAME; \ + } + BUILTIN_TYPES(RETURN_BASE_NAME) +#undef RETURN_BASE_NAME + + UNREACHABLE("Unhandled base type."); +} + +std::string IRGenerator::getSourceType(llvm::Type* type) { + std::string name; + while (type->isPointerTy()) { + name += "conte "; + type = type->getPointerElementType(); + } + name += getSourceBaseType(type); + return name; +} + +namespace { + +template<> const char* IRGenerator::getFormatSpecifier(llvm::Type* type) { + assert(type->isIntegerTy() || type->isFloatingPointTy()); +#define RETURN_SPECIFIER(_1, TYPE, SPEC, _2, _3, _4) \ + if (type == builder_.get##TYPE##Ty()) { \ + return SPEC; \ + } + BUILTIN_TYPES(RETURN_SPECIFIER) +#undef RETURN_SPECIFIER + + UNREACHABLE("Unhandled input format specifier"); +} + +template<> const char* IRGenerator::getFormatSpecifier(llvm::Type* type) { + assert(type->isIntegerTy() || type->isFloatingPointTy()); +#define RETURN_SPECIFIER(_1, TYPE, _2, SPEC, _3, _4) \ + if (type == builder_.get##TYPE##Ty()) { \ + return SPEC; \ + } + BUILTIN_TYPES(RETURN_SPECIFIER) +#undef RETURN_SPECIFIER + + UNREACHABLE("Unhandled output format specifier"); +} + +} // namespace + +template llvm::Value* IRGenerator::getFormatString(llvm::Type* type) { + auto& cache = output ? output_format_strings_cache_ : input_format_strings_cache_; + auto hit = cache.find(type); + if (hit != cache.end()) return hit->second; + auto specifier = getFormatSpecifier(type); + auto format = builder_.CreateGlobalStringPtr(specifier, "format"); + cache.insert({type, format}); + return format; +} + +template void IRGenerator::callIOBuiltin(llvm::Type* type, llvm::Value* value) { + auto builtin = module_->getFunction(output ? "printf" : "scanf"); + assert(builtin && "Builtin was not declared"); + llvm::Value* args[] = {getFormatString(type), value}; + builder_.CreateCall(builtin, args); +} + +llvm::Value* IRGenerator::visitInputStatement(const InputStatement* s) { + auto var = var_scopes_.lookup(s->getVariable().getName()); + if (!var) { + error(&s->getVariable(), "reading an undefined variable"); + } + assert(var->getType()->isPointerTy()); + + auto target = var; + auto target_type = target->getType()->getPointerElementType(); + bool reading_bool = target_type == builder_.getInt1Ty(); + if (!target_type->isIntegerTy() && !target_type->isFloatingPointTy()) { + error(&s->getVariable(), "can only read integers and floating point"); + } + + if (reading_bool) { + target = builder_.CreateAlloca(builder_.getInt32Ty()); + } + + callIOBuiltin(target_type, target); + + if (reading_bool) { + builder_.CreateStore(evalTruthiness(builder_.CreateLoad(target)), var); + } + + return nullptr; +} + +llvm::Value* IRGenerator::visitPrintStatement(const PrintStatement* p) { + auto value = visit(p->getExpression()); + auto type = value->getType(); + if (!type->isIntegerTy() && !type->isFloatingPointTy()) { + error(p->getExpression(), "only integer and float valued expressions may be printed"); + } + // Integer promotion for variadic call. + if (type->isIntegerTy() && type->getIntegerBitWidth() < 32) { + value = builder_.CreateZExt(value, builder_.getInt32Ty()); + } + // Same, for floating point. + if (type == builder_.getFloatTy()) { + value = builder_.CreateFPCast(value, builder_.getDoubleTy()); + } + callIOBuiltin(type, value); + return nullptr; +} + +llvm::Value* IRGenerator::visitBinaryExpression(const BinaryExpression* e) { + auto lhs = visit(e->getLeft()); + auto rhs = visit(e->getRight()); + + if (lhs->getType()->isPointerTy() || rhs->getType()->isPointerTy()) { + error(e, "pointer arithmetic is not supported"); + } + + llvm::Type* result_type = type_calculator_.visit(e); + auto original_lhs_type = lhs->getType(); + auto original_rhs_type = rhs->getType(); + lhs = ensureType(lhs, result_type); + rhs = ensureType(rhs, result_type); + + if (!lhs || !rhs) { + auto expression = !lhs ? e->getLeft() : e->getRight(); + error(expression, "cannot cast expression of type", getSourceType(original_lhs_type), "to type", + getSourceType(original_rhs_type)); + } + + if (result_type->isIntegerTy()) { + switch (e->getType()) { +#define RETURN_INT_BINOP(NAME, OP) \ + case BinaryExpression::NAME: \ + return builder_.CreateBinOp(llvm::Instruction::OP, lhs, rhs); + IR_INT_BINARY_OPS(RETURN_INT_BINOP) +#undef RETURN_INT_BINOP +#define RETURN_INT_CMPOP(NAME, OP) \ + case BinaryExpression::NAME: \ + return builder_.CreateICmp(llvm::CmpInst::ICMP_##OP, lhs, rhs); + IR_INT_CMP_OPS(RETURN_INT_CMPOP) +#undef RETURN_INT_CMPOP + default: + error(e, "this operation cannot be applied to integers"); + } + + return nullptr; + } + + if (result_type->isFloatingPointTy()) { + switch (e->getType()) { +#define RETURN_FLOAT_BINOP(NAME, OP) \ + case BinaryExpression::NAME: \ + return builder_.CreateBinOp(llvm::Instruction::OP, lhs, rhs); + IR_FLOAT_BINARY_OPS(RETURN_FLOAT_BINOP) +#undef RETURN_FLOAT_BINOP +#define RETURN_FLOAT_CMPOP(NAME, OP) \ + case BinaryExpression::NAME: \ + return builder_.CreateFCmp(llvm::CmpInst::FCMP_##OP, lhs, rhs); + IR_FLOAT_CMP_OPS(RETURN_FLOAT_CMPOP) +#undef RETURN_FLOAT_CMPOP + default: + error(e, "this operation cannot be applied to floats"); + } + + return nullptr; + } + + UNREACHABLE("Unimplemented operand conversion"); +} + +llvm::Value* IRGenerator::visitAtomicExpression(const AtomicExpression* e) { + switch (e->getType()) { + case AtomicExpression::INTEGER: + return builder_.getInt32(e->getIntValue()); + case AtomicExpression::FLOAT: + return llvm::ConstantFP::get(builder_.getDoubleTy(), e->getFloatValue()); + case AtomicExpression::IDENTIFIER: { + auto var = var_scopes_.lookup(e->getIdentifierValue().getName()); + if (!var) { + error(&e->getIdentifierValue(), "undefined variable", e->getIdentifierValue().getName()); + } + assert(llvm::isa(var)); + return builder_.CreateLoad(var); + } + default: + UNREACHABLE("Unhandled AtomicExpression type"); + } +} + +llvm::Value* IRGenerator::visitFunctionCallExpression(const FunctionCallExpression* ast_f) { + llvm::Function* f = module_->getFunction(ast_f->getFunctionName()); + if (!f) { + error(ast_f, "call to undefined function", ast_f->getFunctionName()); + } + std::vector call_args; + auto ir_arg = f->arg_begin(); + for (const Expression* ast_arg : ast_f->args()) { + auto arg = visit(ast_arg); + auto original_arg_type = arg->getType(); + arg = ensureType(arg, ir_arg->getType()); + if (!arg) { + error(ast_arg, "cannot pass expression of type", getSourceType(original_arg_type), + "as argument of type", getSourceType(ir_arg->getType()), "in call to", + ast_f->getFunctionName()); + } + call_args.push_back(arg); + ++ir_arg; + } + assert(ir_arg == f->arg_end()); + return builder_.CreateCall(f, call_args); +} + +llvm::Type* IRGenerator::getIRBaseType(VarType::BaseType type) { + switch (type) { +#define RETURN_IR_TYPE(NAME, IR_TYPE, _1, _2, _3, _4) \ + case VarType::NAME: \ + return builder_.get##IR_TYPE##Ty(); + BUILTIN_TYPES(RETURN_IR_TYPE) +#undef RETURN_IR_TYPE + default: + UNREACHABLE("Unhandled VarType::BaseType on getIRBaseType"); + } +} + +llvm::Value* IRGenerator::ensureType(llvm::Value* value, llvm::Type* type) { + if (value->getType()->isPointerTy() != type->isPointerTy()) { + // Cannot cast pointer to int or viceversa. + return nullptr; + } + if (value->getType()->isPointerTy()) { + // Pointers are not castable. + return value->getType() == type ? value : nullptr; + } + if (value->getType()->isIntegerTy() && type->isIntegerTy()) { + return builder_.CreateSExtOrTrunc(value, type); + } + if (value->getType()->isFloatingPointTy() && type->isIntegerTy()) { + return builder_.CreateFPToSI(value, type); + } + if (value->getType()->isIntegerTy() && type->isFloatingPointTy()) { + return builder_.CreateSIToFP(value, type); + } + if (value->getType()->isFloatingPointTy() && type->isFloatingPointTy()) { + return builder_.CreateFPCast(value, type); + } + UNREACHABLE("Unhandled IR type conversion"); +} + +llvm::Type* ResultTypeCalculator::visitBinaryExpression(const BinaryExpression* e) { + llvm::Type* ltype = visit(e->getLeft()); + llvm::Type* rtype = visit(e->getRight()); + // Void should not be here at all. + if (ltype->isVoidTy() || rtype->isVoidTy()) { + error(e, "cannot operate on void"); + } + // Same type, job done. + if (ltype == rtype) return ltype; + // Pointers are not castable. + if (ltype->isPointerTy() != rtype->isPointerTy()) { + error(e, "cannot cast pointer to int"); + } + if (ltype->isPointerTy() && rtype->isPointerTy()) { + // implied: different pointer types. + error(e, "cannot cast between pointer types"); + } + // Double (floating point) always wins. + if (ltype->isFloatingPointTy() || rtype->isFloatingPointTy()) { + return codegen_->builder_.getDoubleTy(); + } + // Integers always upcast. + if (ltype->isIntegerTy() && rtype->isIntegerTy()) { + int lsize = ltype->getPrimitiveSizeInBits(); + int rsize = rtype->getPrimitiveSizeInBits(); + return lsize > rsize ? ltype : rtype; + } + + UNREACHABLE("Unhandled BinaryExpression type"); +} + +llvm::Type* ResultTypeCalculator::visitAtomicExpression(const AtomicExpression* e) { + switch (e->getType()) { + case AtomicExpression::INTEGER: + return codegen_->builder_.getInt32Ty(); + case AtomicExpression::FLOAT: + return codegen_->builder_.getDoubleTy(); + case AtomicExpression::IDENTIFIER: { + auto var = codegen_->var_scopes_.lookup(e->getIdentifierValue().getName()); + assert(var); + return var->getType()->getPointerElementType(); + } + default: + UNREACHABLE("Unhandled AtomicExpression type"); + } +} + +llvm::Type* ResultTypeCalculator::visitFunctionCallExpression(const FunctionCallExpression* e) { + auto f = codegen_->module_->getFunction(e->getFunctionName()); + assert(f); + return f->getReturnType(); +} + +namespace monicelli { + +std::unique_ptr generateIR(llvm::LLVMContext& context, Module* ast) { + IRGenerator codegen{context, ast->getSourceFilename()}; + codegen.visit(ast); + return codegen.releaseModule(); +} + +void runFunctionOptimizer(llvm::Module* module) { + llvm::legacy::FunctionPassManager pass_manager{module}; + pass_manager.add(llvm::createInstructionCombiningPass()); + pass_manager.add(llvm::createReassociatePass()); + pass_manager.add(llvm::createGVNPass()); + pass_manager.add(llvm::createCFGSimplificationPass()); + pass_manager.add(llvm::createDeadCodeEliminationPass()); + pass_manager.add(llvm::createPromoteMemoryToRegisterPass()); + pass_manager.doInitialization(); + for (llvm::Function& f : module->functions()) { + pass_manager.run(f); + } +} + +void printIR(std::ostream& stream, llvm::Module* module) { + llvm::raw_os_ostream llvm_stream{stream}; + module->print(llvm_stream, nullptr); +} + +} // namespace monicelli diff --git a/src/codegen.def b/src/codegen.def new file mode 100644 index 0000000..eba553c --- /dev/null +++ b/src/codegen.def @@ -0,0 +1,37 @@ +#ifndef MONICELLI_CODEGEN_DEF +#define MONICELLI_CODEGEN_DEF + +// Copyright 2017 the Monicelli project authors. All rights reserved. +// Use of this source code is governed by a GPLv3 license, see LICENSE.txt. + +#include "types.def" + +#define IR_INT_BINARY_OPS(V) \ + V(SHL, Shl) \ + V(SHR, AShr) \ + V(PLUS, Add) \ + V(MINUS, Sub) \ + V(TIMES, Mul) \ + V(DIV, SDiv) + +#define IR_INT_CMP_OPS(V) \ + V(EQ, EQ) \ + V(GE, SGE) \ + V(GT, SGT) \ + V(LE, SLE) \ + V(LT, SLT) \ + +#define IR_FLOAT_BINARY_OPS(V) \ + V(PLUS, FAdd) \ + V(MINUS, FSub) \ + V(TIMES, FMul) \ + V(DIV, FDiv) + +#define IR_FLOAT_CMP_OPS(V) \ + V(EQ, OEQ) \ + V(GE, OGE) \ + V(GT, OGT) \ + V(LE, OLE) \ + V(LT, OLT) \ + +#endif diff --git a/src/codegen.h b/src/codegen.h new file mode 100644 index 0000000..55705bc --- /dev/null +++ b/src/codegen.h @@ -0,0 +1,23 @@ +#ifndef MONICELLI_CODEGEN_H +#define MONICELLI_CODEGEN_H + +// Copyright 2017 the Monicelli project authors. All rights reserved. +// Use of this source code is governed by a GPLv3 license, see LICENSE.txt. + +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" +#include + +namespace monicelli { + +class Module; + +std::unique_ptr generateIR(llvm::LLVMContext& context, Module* ast); + +void runFunctionOptimizer(llvm::Module* module); + +void printIR(std::ostream& stream, llvm::Module* module); + +} // namespace monicelli + +#endif diff --git a/src/errors.cpp b/src/errors.cpp new file mode 100644 index 0000000..d8aca5a --- /dev/null +++ b/src/errors.cpp @@ -0,0 +1,52 @@ +// Copyright 2017 the Monicelli project authors. All rights reserved. +// Use of this source code is governed by a GPLv3 license, see LICENSE.txt. + +#include "errors.h" + +#include +#include +#include +#include + +namespace monicelli { + +[[noreturn]] void UNREACHABLE(const std::string& message) { + std::cerr << message << '\n'; + abort(); +} + +static std::string getNthLine(std::istream& file, int lineNumber) { + std::string line; + for (int i = 0; i < lineNumber; ++i) { + if (!std::getline(file, line)) { + return ""; + } + } + return line; +} + +void ErrorReportingMixin::printErrorLocation(std::ostream& stream, const Location& from, + const Location& to) { + std::ifstream file{source_filename_}; + auto line = getNthLine(file, from.getLine()); + + if (!line.empty()) { + stream << line << '\n'; + + for (int i = 1; i < from.getColumn(); ++i) { + stream << ' '; + } + stream << '^'; + + int area_limit = from.getLine() == to.getLine() ? to.getColumn() - 1 : line.size(); + + // This one will not get printed if from and to are the same. + for (int i = from.getColumn(); i < area_limit; ++i) { + stream << '~'; + } + } + + stream << '\n' << from << ": error: "; +} + +} // namespace monicelli diff --git a/src/errors.h b/src/errors.h new file mode 100644 index 0000000..67d2cee --- /dev/null +++ b/src/errors.h @@ -0,0 +1,59 @@ +#ifndef MONICELLI_ERRORS_H +#define MONICELLI_ERRORS_H + +// Copyright 2017 the Monicelli project authors. All rights reserved. +// Use of this source code is governed by a GPLv3 license, see LICENSE.txt. + +#include "location.h" +#include "support.h" + +#include +#include + +namespace monicelli { + +[[noreturn]] void UNREACHABLE(const std::string& message); + +class ErrorReportingMixin { +protected: + explicit ErrorReportingMixin(const std::string& source_filename) + : source_filename_(source_filename) {} + + const std::string& getSourceFilename() const { return source_filename_; } + + void printErrorLocation(std::ostream& stream, const Location& from, const Location& to); + + template + [[noreturn]] void error(const Locatable& obj, const First& first) { + printErrorLocation(std::cerr, obj->getFirstLocation(), obj->getLastLocation()); + print(std::cerr, first); + exit(1); + } + + template + [[noreturn]] void error(const Locatable& obj, const First& first, Tail... tail) { + printErrorLocation(std::cerr, obj->getFirstLocation(), obj->getLastLocation()); + print(std::cerr, first, tail...); + exit(1); + } + + template + [[noreturn]] void error(const Location& where, const First& first) { + printErrorLocation(std::cerr, where, where); + print(std::cerr, first); + exit(1); + } + + template + [[noreturn]] void error(const Location& where, const First& first, Tail... tail) { + printErrorLocation(std::cerr, where, where); + print(std::cerr, first, tail...); + exit(1); + } + + private : std::string source_filename_; +}; + +} // namespace monicelli + +#endif diff --git a/src/iterators.h b/src/iterators.h new file mode 100644 index 0000000..f4f79f9 --- /dev/null +++ b/src/iterators.h @@ -0,0 +1,48 @@ +#ifndef MONICELLI_ITERATORS_H +#define MONICELLI_ITERATORS_H + +// Copyright 2017 the Monicelli project authors. All rights reserved. +// Use of this source code is governed by a GPLv3 license, see LICENSE.txt. + +#include +#include + +namespace monicelli { + +template class PointerVectorConstIter final { +public: + typedef typename std::vector>::const_iterator ConstIter; + + PointerVectorConstIter(ConstIter iter) : internal_iter_(iter) {} + + const T* operator*() const { return internal_iter_->get(); } + const T* operator->() const { return internal_iter_->get(); } + bool operator!=(const PointerVectorConstIter& other) { + return internal_iter_ != other.internal_iter_; + } + + PointerVectorConstIter& operator++() { + ++internal_iter_; + return *this; + } + PointerVectorConstIter operator++(int) { return {internal_iter_++}; } + +private: + ConstIter internal_iter_; +}; + +template class ConstRangeWrapper final { +public: + ConstRangeWrapper(IterT begin, IterT end) : begin_(begin), end_(end) {} + + IterT begin() const { return begin_; } + IterT end() const { return end_; } + +private: + IterT begin_; + IterT end_; +}; + +} // namespace monicelli + +#endif diff --git a/src/lexer.cpp b/src/lexer.cpp new file mode 100644 index 0000000..4180cd1 --- /dev/null +++ b/src/lexer.cpp @@ -0,0 +1,102 @@ +// Copyright 2017 the Monicelli project authors. All rights reserved. +// Use of this source code is governed by a GPLv3 license, see LICENSE.txt. + +#include "lexer.h" +#include "errors.h" + +#include + +namespace monicelli { + +static const char* builtinTypeToString(Token::BuiltinTypeValue type) { + switch (type) { +#define RETURN_TYPE_NAME(NAME, _1, _2, _3, _4, _5) \ + case Token::BUILTIN_TYPE_##NAME: \ + return #NAME; + BUILTIN_TYPES(RETURN_TYPE_NAME) +#undef RETURN_TYPE_NAME + default: + UNREACHABLE("Unhandled BuiltinType."); + } +} + +void Token::print(std::ostream& stream) { + switch (type_) { +#define PRINT_TOKEN_NAME(TOKEN, _) \ + case Token::TOKEN_##TOKEN: \ + stream << "<" #TOKEN; \ + break; + LEXER_TOKENS(PRINT_TOKEN_NAME) +#undef PRINT_TOKEN_NAME + } + switch (getValueTypeForToken(type_)) { + case ValueType::INTEGER: + stream << '(' << int_value_ << ')'; + break; + case ValueType::FLOAT: + stream << '(' << fp_value_ << ')'; + break; + case ValueType::STRING: + stream << '(' << string_value_ << ')'; + break; + case ValueType::BUILTIN_TYPE: + stream << '(' << builtinTypeToString(builtin_type_value_) << ')'; + break; + case ValueType::VOID: + default: + break; + } + stream << '@' << getFirstLocation() << '-' << getLastLocation() << ">\n"; +} + +bool Token::isOperator() const { + switch (type_) { +#define CASE_NAME(NAME, _) case Token::TOKEN_##NAME: + LEXER_OPERATOR_TOKENS(CASE_NAME) +#undef CASE_NAME + return true; + default: + return false; + } +} + +// static +Token::ValueType Token::getValueTypeForToken(Token::TokenType type) { + switch (type) { +#define RETURN_VALUE_TYPE(TYPE, VALUE_TYPE) \ + case Token::TOKEN_##TYPE: \ + return ValueType::VALUE_TYPE; + LEXER_TOKENS(RETURN_VALUE_TYPE) +#undef RETURN_VALUE_TYPE + default: + UNREACHABLE("Unknown token type."); + } +} + +void Buffer::imbue(std::istream& input) { + int to_read = capacity_ - size_; + + if (to_read <= 0) { + // Grow buffer. + } + + input.read(data_.get() + size_, to_read); + size_ += input.gcount(); + cursor_ = data_.get(); +} + +void Lexer::advanceBuffer() { + // If there is a match in progress, keep it. + if (state_.ts) { + int ts_offset = state_.ts - buffer_.getData(); + buffer_.shift(ts_offset); + state_.ts = buffer_.getData(); + state_.te -= ts_offset; + } else { + buffer_.clear(); + } + + buffer_.imbue(input_); +} + +} // namespace monicelli diff --git a/src/lexer.def b/src/lexer.def new file mode 100644 index 0000000..3b519c6 --- /dev/null +++ b/src/lexer.def @@ -0,0 +1,43 @@ +#ifndef MONICELLI_LEXER_DEF +#define MONICELLI_LEXER_DEF + +// Copyright 2017 the Monicelli project authors. All rights reserved. +// Use of this source code is governed by a GPLv3 license, see LICENSE.txt. + +#include "operators.def" +#include "types.def" + +#define LEXER_TOKENS(V) \ + V(IDENTIFIER, STRING) \ + V(INTEGER, INTEGER) \ + V(FLOAT, FLOAT) \ + V(TYPENAME, BUILTIN_TYPE) \ + LEXER_OPERATOR_TOKENS(V) \ + V(STAR, VOID) \ + V(VARDECL, VOID) \ + V(ARTICLE, VOID) \ + V(BANG, VOID) \ + V(COLON, VOID) \ + V(COMMA, VOID) \ + V(BRANCH_BEGIN, VOID) \ + V(ASSIGN, VOID) \ + V(PRINT, VOID) \ + V(INPUT, VOID) \ + V(ASSERT, VOID) \ + V(ABORT, VOID) \ + V(LOOP_BEGIN, VOID) \ + V(LOOP_CONDITION, VOID) \ + V(BRANCH_CONDITION, VOID) \ + V(BRANCH_ELSE, VOID) \ + V(BRANCH_END, VOID) \ + V(CASE_END, VOID) \ + V(ENTRY_POINT, VOID) \ + V(FUN_DECL, VOID) \ + V(FUN_CALL, VOID) \ + V(FUN_END, VOID) \ + V(FUN_PARAMS, VOID) \ + V(RETURN, VOID) \ + V(END, VOID) \ + V(UNKNOWN, VOID) \ + +#endif diff --git a/src/lexer.h b/src/lexer.h new file mode 100644 index 0000000..d71fa4a --- /dev/null +++ b/src/lexer.h @@ -0,0 +1,186 @@ +#ifndef MONICELLI_LEXER_H +#define MONICELLI_LEXER_H + +// Copyright 2017 the Monicelli project authors. All rights reserved. +// Use of this source code is governed by a GPLv3 license, see LICENSE.txt. + +#include "lexer.def" +#include "location.h" + +#include +#include +#include +#include +#include + +namespace monicelli { + +class Token final : public LocationMixin { +public: + enum TokenType { +#define DECLARE_TOKEN(NAME, _) TOKEN_##NAME, + LEXER_TOKENS(DECLARE_TOKEN) +#undef DECLARE_TOKEN + }; + + enum BuiltinTypeValue { +#define DECLARE_TYPE(NAME, _1, _2, _3, _4, _5) BUILTIN_TYPE_##NAME, + BUILTIN_TYPES(DECLARE_TYPE) +#undef DECLARE_TYPE + }; + + ~Token() { + if (value_type_ == ValueType::STRING) { + string_value_.std::string::~string(); + } + } + + TokenType getType() const { return type_; } + + operator TokenType() const { return getType(); } + + bool isOperator() const; + + uint64_t getIntValue() const { + assert(getValueTypeForToken(type_) == ValueType::INTEGER); + return int_value_; + } + + double getFloatValue() const { + assert(getValueTypeForToken(type_) == ValueType::FLOAT); + return fp_value_; + } + + BuiltinTypeValue getBuiltinTypeValue() const { + assert(getValueTypeForToken(type_) == ValueType::BUILTIN_TYPE); + return builtin_type_value_; + } + + const std::string& getStringValue() const { + assert(getValueTypeForToken(type_) == ValueType::STRING); + return string_value_; + } + + void print(std::ostream& stream); + +private: + enum class ValueType { VOID, STRING, FLOAT, INTEGER, BUILTIN_TYPE }; + + Token(TokenType type, Location first_location, Location last_location) + : LocationMixin(first_location, last_location), type_(type), value_type_(ValueType::VOID) {} + + Token(TokenType type, Location location) + : LocationMixin(location, location), type_(type), value_type_(ValueType::VOID) {} + + void setIntValue(uint64_t value) { + assert(getValueTypeForToken(type_) == ValueType::INTEGER); + int_value_ = value; + } + + void setFloatValue(double value) { + assert(getValueTypeForToken(type_) == ValueType::FLOAT); + fp_value_ = value; + } + + void setBuiltinTypeValue(BuiltinTypeValue value) { + assert(getValueTypeForToken(type_) == ValueType::BUILTIN_TYPE); + builtin_type_value_ = value; + } + + void setStringValue(std::string&& value) { + assert(getValueTypeForToken(type_) == ValueType::STRING); + new (&string_value_) std::string(value); + } + + static ValueType getValueTypeForToken(TokenType type); + + TokenType type_; + + ValueType value_type_; + union { + uint64_t int_value_; + double fp_value_; + BuiltinTypeValue builtin_type_value_; + std::string string_value_; + }; + + friend class Lexer; +}; + +class Buffer final { +public: + static const int DEFAULT_CAPACITY = 1 * 1024 * 1024; + + Buffer(int base_capacity = DEFAULT_CAPACITY) : size_(0), capacity_(base_capacity) { + data_.reset(new char[base_capacity]); + cursor_ = data_.get(); + } + + void shift(int amount) { + assert(amount <= size_ && "Cannot shift buffer more than its size."); + size_ -= amount; + memmove(data_.get(), data_.get() + amount, size_); + } + void imbue(std::istream& input); + void clear() { size_ = 0; } + + bool isExhausted() const { return cursor_ == data_.get() + size_; } + + char* getData() { return data_.get(); } + char* getDataEnd() { return data_.get() + size_; } + int getSize() const { return size_; } + + char* getCursor() { return cursor_; } + void setCursor(char* value) { + assert(data_.get() <= value && value <= data_.get() + size_ && "Cursor out of bounds."); + cursor_ = value; + } + +private: + int size_; + int capacity_; + std::unique_ptr data_; + char* cursor_; +}; + +class Lexer final { +public: + explicit Lexer(std::istream& input) : input_(input), trace_enabled_(false) { resetState(); } + + std::unique_ptr getNextToken(); + + bool isTraceEnabled() const { return trace_enabled_; } + void setTraceEnabled(bool enable) { trace_enabled_ = enable; } + Location getCurrentLocation() const { return current_location_; } + +private: + void advanceColumn() { + assert(state_.ts != nullptr && state_.te >= state_.ts); + current_location_.advanceColumn(state_.te - state_.ts); + } + void newLine() { current_location_.newLine(); } + + void resetState(); + void advanceBuffer(); + + std::istream& input_; + Location current_location_; + + struct { + // State of the lexer FSA. DO NOT MODIFY. + int cs; + int act; + + // Start and end of the current token. + char* ts; + char* te; + } state_; + + Buffer buffer_; + + bool trace_enabled_; +}; + +} // namespace monicelli + +#endif diff --git a/src/lexer.rl b/src/lexer.rl new file mode 100644 index 0000000..0263a41 --- /dev/null +++ b/src/lexer.rl @@ -0,0 +1,182 @@ +// Copyright 2017 the Monicelli project authors. All rights reserved. +// Use of this source code is governed by a GPLv3 license, see LICENSE.txt. + +%%{ + +machine Lexer; + +access state_.; + +accent_a = "a`"|"à"; +accent_e = "e`"|"è"; +accent_u = "u`"|"ù"; +accent_vowels = "à"|"è"|"é"|"ì"|"ò"|"ó"|"ù"| [aeiou] "`"; +articles = "il"|"lo"|"la"|"l'"|"i"|"gli"|"le"|"un"|"un'"|"una"|"dei"|"delle"; +di = "di"|"dei"|"del"|"della"|"dell'"; +prematura = [bp] "rematura"; +supercazzola = "supercazzo" [lr] "a"; + +comment = "#"|"bituma"; +identifier = (alpha | accent_vowels) (alnum | accent_vowels)*; +integer = [+\-]? digit+; +float = [+\-]? (digit* "." digit+ | digit+ "."?) [eE] [+\-]? digit+ + | [+\-]? (digit* "." digit+ | digit+ "."); + +shift := |* + "sinistra" => { SET_TOKEN(OP_SHL); fbreak; }; + "destra" => { SET_TOKEN(OP_SHR); fbreak; }; + "per" => { advanceColumn(); fgoto initial; }; + (space - '\n')+ => { advanceColumn(); fbreak; }; +*|; + +initial := |* + "!" => { SET_TOKEN(BANG); fbreak; }; + "?" => { SET_TOKEN(BRANCH_BEGIN); fbreak; }; + ":" => { SET_TOKEN(COLON); fbreak; }; + "," => { SET_TOKEN(COMMA); fbreak; }; + "conte" => { SET_TOKEN(STAR); fbreak; }; + articles => { SET_TOKEN(ARTICLE); fbreak; }; + + "pi" accent_u => { SET_TOKEN(OP_PLUS); fbreak; }; + "meno" => { SET_TOKEN(OP_MINUS); fbreak; }; + "per" => { SET_TOKEN(OP_TIMES); fbreak; }; + "diviso" => { SET_TOKEN(OP_DIV); fbreak; }; + "con scappellamento a" => { advanceColumn(); fgoto shift; }; + "minore " di => { SET_TOKEN(OP_LT); fbreak; }; + "maggiore " di => { SET_TOKEN(OP_GT); fbreak; }; + "minore o uguale " ("a"|di) => { SET_TOKEN(OP_LE); fbreak; }; + "maggiore o uguale " ("a"|di) => { SET_TOKEN(OP_GE); fbreak; }; + + "vaffanzum" => { SET_TOKEN(RETURN); fbreak; }; + "voglio" => { SET_TOKEN(VARDECL); fbreak; }; + "come " ("se "?) "fosse" => { SET_TOKEN(ASSIGN); fbreak; }; + "a posterdati" => { SET_TOKEN(PRINT); fbreak; }; + "mi porga" => { SET_TOKEN(INPUT); fbreak; }; + "ho visto" => { SET_TOKEN(ASSERT); fbreak; }; + "avvertite don ulrico" => { SET_TOKEN(ABORT); fbreak; }; + + "stuzzica" => { SET_TOKEN(LOOP_BEGIN); fbreak; }; + "e " prematura " anche, se" => { SET_TOKEN(LOOP_CONDITION); fbreak; }; + + "che cos'" accent_e => { SET_TOKEN(BRANCH_CONDITION); fbreak; }; + "o tarapia tapioco" => { SET_TOKEN(BRANCH_ELSE); fbreak; }; + "e velocit" accent_a " di esecuzione" => { SET_TOKEN(BRANCH_END); fbreak; }; + "o magari" => { SET_TOKEN(CASE_END); fbreak; }; + + "Lei ha clacsonato" => { SET_TOKEN(ENTRY_POINT); fbreak; }; + "blinda la " supercazzola => { SET_TOKEN(FUN_DECL); fbreak; }; + "con" => { SET_TOKEN(FUN_PARAMS); fbreak; }; + prematura "ta la " supercazzola => { SET_TOKEN(FUN_CALL); fbreak; }; + "o scherziamo" ("?"?) => { SET_TOKEN(FUN_END); fbreak; }; + + "Necchi" => { + SET_TOKEN(TYPENAME); + token->setBuiltinTypeValue(Token::BUILTIN_TYPE_INTEGER); + fbreak; + }; + + "Mascetti" => { + SET_TOKEN(TYPENAME); + token->setBuiltinTypeValue(Token::BUILTIN_TYPE_CHAR); + fbreak; + }; + + "Perozzi" => { + SET_TOKEN(TYPENAME); + token->setBuiltinTypeValue(Token::BUILTIN_TYPE_FLOAT); + fbreak; + }; + + "Melandri" => { + SET_TOKEN(TYPENAME); + token->setBuiltinTypeValue(Token::BUILTIN_TYPE_BOOL); + fbreak; + }; + + "Sassaroli" => { + SET_TOKEN(TYPENAME); + token->setBuiltinTypeValue(Token::BUILTIN_TYPE_DOUBLE); + fbreak; + }; + + identifier => { + SET_TOKEN(IDENTIFIER); + token->setStringValue({state_.ts, state_.te}); + fbreak; + }; + + float => { + SET_TOKEN(FLOAT); + token->setFloatValue(std::stod(std::string{state_.ts, state_.te})); + fbreak; + }; + + integer => { + SET_TOKEN(INTEGER); + token->setIntValue(std::stoll(std::string{state_.ts, state_.te})); + fbreak; + }; + + comment (^"\n")* | (space - '\n')+ => { + advanceColumn(); + starting_location = current_location_; + }; + + '\n' => { + newLine(); + starting_location = current_location_; + }; +*|; + +}%% + +#include "lexer.h" + +#include + +namespace monicelli { + +#define SET_TOKEN(NAME) \ + do { \ + advanceColumn(); \ + auto end_location = current_location_; \ + token.reset(new Token{Token::TOKEN_##NAME,\ + starting_location, end_location}); \ + } while (false) + +%% write data nofinal; + +void Lexer::resetState() { + %% write init; +} + +std::unique_ptr Lexer::getNextToken() { + if (buffer_.isExhausted()) advanceBuffer(); + + char* p = buffer_.getCursor(); + char* pe = buffer_.getDataEnd(); + char* eof = input_? nullptr : pe; + + Location starting_location = current_location_; + std::unique_ptr token; + + while (p != pe && !token) { + %% write exec noend; + if (state_.cs == %%{ write error; }%%) { + token.reset(new Token{Token::TOKEN_UNKNOWN, starting_location}); + } + } + + if (p == eof) { + token.reset(new Token{Token::TOKEN_END, starting_location}); + } + + state_.ts = nullptr; + buffer_.setCursor(p); + if (trace_enabled_) token->print(std::cout); + return token; +} + +#undef SET_TOKEN + +} // namespace diff --git a/src/location.h b/src/location.h new file mode 100644 index 0000000..f4d02a9 --- /dev/null +++ b/src/location.h @@ -0,0 +1,59 @@ +#ifndef MONICELLI_LOCATION_H +#define MONICELLI_LOCATION_H + +// Copyright 2017 the Monicelli project authors. All rights reserved. +// Use of this source code is governed by a GPLv3 license, see LICENSE.txt. + +#include + +namespace monicelli { + +class Location final { +public: + Location() : line_(1), column_(1) {} + + int getLine() const { return line_; } + int getColumn() const { return column_; } + +private: + Location(int line, int column) : line_(line), column_(column) {} + + void advanceColumn(int amount) { column_ += amount; } + void newLine() { + column_ = 1; + line_ += 1; + } + + int line_; + int column_; + + friend class Lexer; +}; + +class LocationMixin { +public: + Location getFirstLocation() const { return first_location_; } + Location getLastLocation() const { return last_location_; } + +protected: + LocationMixin() {} + + LocationMixin(Location first, Location last) : first_location_(first), last_location_(last) {} + + Location first_location_; + Location last_location_; + + friend class Parser; +}; + +static inline std::ostream& operator<<(std::ostream& stream, const Location& location) { + return stream << location.getLine() << ':' << location.getColumn(); +} + +static inline bool operator==(const Location& a, const Location& b) { + return a.getLine() == b.getLine() && a.getColumn() == b.getColumn(); +} + +} // namespace monicelli + +#endif diff --git a/src/main.cpp b/src/main.cpp index a0f4cd3..7c502ac 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -1,127 +1,89 @@ -/* - * Monicelli: an esoteric language compiler - * - * Copyright (C) 2014 Stefano Sanfilippo - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ +// Copyright 2017 the Monicelli project authors. All rights reserved. +// Use of this source code is governed by a GPLv3 license, see LICENSE.txt. -#include "Scanner.hpp" -#include "Parser.hpp" -#include "CppEmitter.hpp" -#include "ModuleRegistry.hpp" -#include "ModuleLoader.hpp" -#include "BitcodeEmitter.hpp" -#include "CLineParser.hpp" +#include "asmgen.h" +#include "ast-printer.h" +#include "codegen.h" +#include "options.h" +#include "parser.h" -#include -#include -#include - -#include -#include - -#include #include #include -#include +#include using namespace monicelli; -static const boost::regex NAME_RE("^(.+)\\.mc$"); -static const boost::regex MODULE_RE("^(.+)\\.mm$"); - -int process(std::string const&, std::function); - - -int main(int argc, char **argv) { - parseCommandLine(argc, argv); - registerStdLib(getModuleRegistry()); - - if (!configHas("input")) { - std::cerr << "No input." << std::endl; - return 0; - } - - if (configHas("c++")) { - return process("cpp", [](std::ostream &outstream, Program *program) { - CppEmitter emitter(&outstream); - if (!program->emit(&emitter)) return false; - return true; - }); - } else { - return process("bc", [](std::ostream & outstream, Program *program) { - BitcodeEmitter emitter; - if (!program->emit(&emitter)) return false; - - llvm::raw_os_ostream stream(outstream); - llvm::WriteBitcodeToFile(&emitter.getModule(), stream); - - return true; - }); - } -} - -int process(std::string const& suffix, std::function writer) { - std::vector sources; - std::vector modules; - - for (std::string const& arg: config>("input")) { - if (boost::regex_match(arg, NAME_RE)) { - sources.push_back(arg); - } else if (boost::regex_match(arg, MODULE_RE)) { - modules.push_back(arg); - } else { - std::cerr << arg + ": file format not recognized. Perhaps you forgot the .mc/.mm extension?" << std::endl; - } - } - - for (std::string const& name: modules) { - loadModule(name, getModuleRegistry()); - } - - for (std::string const& name: sources) { - std::ifstream instream(name); - - if (!instream.good()) { - std::cerr << name + ": cannot open file" << std::endl; - continue; - } - - Program program; - Scanner scanner(&instream); - Parser parser(scanner, program); - -# if YYDEBUG - parser.set_debug_level(1); -# endif - - parser.parse(); - - std::string outputname = boost::filesystem::path(name).filename().native(); - - if (boost::regex_match(outputname, NAME_RE)) { - outputname = boost::regex_replace(outputname, NAME_RE, "$1." + suffix); - } else { - outputname = outputname + '.' + suffix; - } - - std::ofstream outstream(outputname); - - if (!writer(outstream, &program)) return 1; - } - +int main(int argc, char** argv) { + ProgramOptions options = ProgramOptions::fromCommandLine(argc, argv); + if (options.input_filenames_empty()) { + std::cerr << "No input files.\n"; return 0; -} + } + if (options.shouldOnlyCompile() && options.input_filenames_size() > 1 && + !options.getOutputFilename().empty()) { + std::cerr << "Output filename in compile mode may be specified only with a " + "single input file.\n"; + return 1; + } + + registerTargets(); + + auto triple = llvm::sys::getDefaultTargetTriple(); + auto target_machine = getTargetMachine(triple, options.getCPU(), options.getCPUFeatures()); + +#ifdef MONICELLI_ENABLE_LINKER + std::vector object_filenames; + object_filenames.reserve(options.input_filenames_size()); +#endif + + for (const auto& input_filename : options.input_filenames()) { + std::ifstream input{input_filename}; + if (!input) { + std::cerr << "Cannot open input file " << input_filename << ".\n"; + return 1; + } + + Parser parser{input, input_filename}; + parser.setLexerTrace(options.shouldTraceLexer()); + auto ast = parser.parse(); + + if (options.shouldPrintAST()) { + printAst(std::cout, ast.get()); + continue; + } + + llvm::LLVMContext context; + auto ir = generateIR(context, ast.get()); + ir->setTargetTriple(triple); + ir->setDataLayout(target_machine->createDataLayout()); + runFunctionOptimizer(ir.get()); + + if (options.shouldPrintIR()) { + printIR(std::cout, ir.get()); + continue; + } + + if (options.shouldSkipCompilation()) continue; + + std::string object_filename; + if (options.shouldOnlyCompile() && !options.getOutputFilename().empty()) { + object_filename = options.getOutputFilename(); + } else { + object_filename = basename(input_filename) + ".o"; + } + + writeAssembly(object_filename, ir.get(), target_machine); + +#ifdef MONICELLI_ENABLE_LINKER + object_filenames.emplace_back(std::move(object_filename)); +#endif + } + +#ifdef MONICELLI_ENABLE_LINKER + if (!options.shouldSkipCompilation() && !options.shouldOnlyCompile() && + !object_filenames.empty()) { + linkAssembly(options.getOutputFilename(), object_filenames); + } +#endif +} diff --git a/src/operators.def b/src/operators.def new file mode 100644 index 0000000..e7f41d4 --- /dev/null +++ b/src/operators.def @@ -0,0 +1,39 @@ +#ifndef MONICELLI_OPERATORS_DEF +#define MONICELLI_OPERATORS_DEF + +// Copyright 2017 the Monicelli project authors. All rights reserved. +// Use of this source code is governed by a GPLv3 license, see LICENSE.txt. + +// We have two separate tables because it's cleaner this way. +// Please keep them in sync. + +// lexer_token, lexer_value_type +#define LEXER_OPERATOR_TOKENS(V) \ + V(OP_EQ, VOID) \ + V(OP_GE, VOID) \ + V(OP_GT, VOID) \ + V(OP_LE, VOID) \ + V(OP_LT, VOID) \ + V(OP_SHL, VOID) \ + V(OP_SHR, VOID) \ + V(OP_PLUS, VOID) \ + V(OP_MINUS, VOID) \ + V(OP_TIMES, VOID) \ + V(OP_DIV, VOID) \ + +// lexer_token, ast_operator, priority, representation +// All priorities must be STRICTLY LARGER than 0. +#define AST_BINARY_OPERATORS(V) \ + V(OP_EQ, EQ, 1, "==") \ + V(OP_GE, GE, 5, ">=") \ + V(OP_GT, GT, 5, ">") \ + V(OP_LE, LE, 5, "<=") \ + V(OP_LT, LT, 5, "<") \ + V(OP_SHL, SHL, 10, "<<") \ + V(OP_SHR, SHR, 10, ">>") \ + V(OP_PLUS, PLUS, 15, "+") \ + V(OP_MINUS, MINUS, 15, "-") \ + V(OP_TIMES, TIMES, 20, "*") \ + V(OP_DIV, DIV, 20, "/") + +#endif diff --git a/src/options.cpp b/src/options.cpp new file mode 100644 index 0000000..394cd35 --- /dev/null +++ b/src/options.cpp @@ -0,0 +1,96 @@ +// Copyright 2017 the Monicelli project authors. All rights reserved. +// Use of this source code is governed by a GPLv3 license, see LICENSE.txt. + +#include "options.h" + +#include +#include +#include + +namespace monicelli { + +// static +ProgramOptions ProgramOptions::fromCommandLine(int argc, char** argv) { + ProgramOptions options; + for (int i = 1; i < argc; ++i) { + if (strcmp(argv[i], "-o") == 0 || strcmp(argv[i], "--output") == 0) { + if (i == argc - 1) { + std::cerr << "--output must be followed by a filename.\n"; + break; + } + options.output_filename_ = argv[++i]; + continue; + } + if (strcmp(argv[i], "-s") == 0 || strcmp(argv[i], "--print-ir") == 0) { + options.print_ir_ = true; + continue; + } + if (strcmp(argv[i], "-p") == 0 || strcmp(argv[i], "--print-ast") == 0) { + options.print_ast_ = true; + continue; + } + if (strcmp(argv[i], "-t") == 0 || strcmp(argv[i], "--trace-lexer") == 0) { + options.trace_lexer_ = true; + continue; + } + if (strcmp(argv[i], "-n") == 0 || strcmp(argv[i], "--no-compile") == 0) { + options.skip_compile_ = true; + continue; + } +#ifdef MONICELLI_ENABLE_LINKER + if (strcmp(argv[i], "-c") == 0 || strcmp(argv[i], "--only-compile") == 0) { + options.compile_only_ = true; + continue; + } +#endif + if (strcmp(argv[i], "-m") == 0 || strcmp(argv[i], "--cpu") == 0) { + if (i == argc - 1) { + std::cerr << "--cpu must be followed by a CPU name.\n"; + break; + } + options.cpu_ = argv[++i]; + } + if (strcmp(argv[i], "-f") == 0 || strcmp(argv[i], "--cpu-features") == 0) { + if (i == argc - 1) { + std::cerr << "--cpu-features must be followed by a set of features.\n"; + break; + } + options.cpu_features_ = argv[++i]; + } + if (strcmp(argv[i], "-h") == 0 || strcmp(argv[i], "--help") == 0) { + printHelp(argv[0]); + } + if (argv[i][0] == '-') { + std::cerr << "Unknown option " << argv[i] << ".\n\n"; + printHelp(argv[0]); + exit(1); + } + options.input_filenames_.emplace_back(argv[i]); + } +#ifndef MONICELLI_ENABLE_LINKER + options.compile_only_ = true; +#endif + return options; +} + +// static +void ProgramOptions::printHelp(const char* program_name) { + std::cout << "Usage: " << program_name + << " [options...] [input.mc ...]\n\n" + "Options:\n" +#ifdef MONICELLI_ENABLE_LINKER + " --only-compile, -c : Compile only, do not link.\n" +#endif + " --no-compile, -n : Do not compile, only print (see below).\n" + " --output, -o out.o : Specify the output filename.\n" + " --trace-lexer, -t : Print tokens as seen by the lexer.\n" + " --print-ast, -p : Print the AST as pseudocode.\n" + " --print-ir, -s : Print the IR of the code.\n" + " --cpu, -m model : Set the CPU model to this (default: generic).\n" + " --cpu-features, -f feat : Enable these CPU features (default: none).\n" + " --help, -h : Print this message.\n" + "\n"; + exit(0); +} + +} // namespace monicelli diff --git a/src/options.h b/src/options.h new file mode 100644 index 0000000..add7d92 --- /dev/null +++ b/src/options.h @@ -0,0 +1,58 @@ +// Copyright 2017 the Monicelli project authors. All rights reserved. +// Use of this source code is governed by a GPLv3 license, see LICENSE.txt. + +#ifndef MONICELLI_OPTIONS_H +#define MONICELLI_OPTIONS_H + +#include "iterators.h" + +#include +#include + +namespace monicelli { + +class ProgramOptions final { +public: + typedef std::vector::const_iterator ConstStringIter; + + static ProgramOptions fromCommandLine(int argc, char** argv); + + bool shouldPrintIR() const { return print_ir_; } + bool shouldPrintAST() const { return print_ast_; } + bool shouldTraceLexer() const { return trace_lexer_; } + bool shouldOnlyCompile() const { return compile_only_; } + bool shouldSkipCompilation() const { return skip_compile_; } + const std::string& getOutputFilename() const { return output_filename_; } + + ConstStringIter begin_input_filenames() const { return input_filenames_.cbegin(); } + ConstStringIter end_input_filenames() const { return input_filenames_.cend(); } + ConstRangeWrapper input_filenames() const { + return {begin_input_filenames(), end_input_filenames()}; + } + int input_filenames_size() const { return input_filenames_.size(); } + bool input_filenames_empty() const { return input_filenames_.empty(); } + + const std::string& getCPU() const { return cpu_; } + const std::string& getCPUFeatures() const { return cpu_features_; } + +private: + static void printHelp(const char* program_name); + + ProgramOptions() + : print_ir_(false), print_ast_(false), trace_lexer_(false), compile_only_(false), + skip_compile_(false), cpu_("generic") {} + + bool print_ir_; + bool print_ast_; + bool trace_lexer_; + bool compile_only_; + bool skip_compile_; + std::vector input_filenames_; + std::string output_filename_; + std::string cpu_; + std::string cpu_features_; +}; + +} // namespace monicelli + +#endif diff --git a/src/parser.cpp b/src/parser.cpp new file mode 100644 index 0000000..05393ce --- /dev/null +++ b/src/parser.cpp @@ -0,0 +1,542 @@ +// Copyright 2017 the Monicelli project authors. All rights reserved. +// Use of this source code is governed by a GPLv3 license, see LICENSE.txt. + +#include "parser.h" +#include "errors.h" + +namespace monicelli { + +std::unique_ptr Parser::parseModule() { + std::unique_ptr module{new Module}; + + while (peekNextToken()->getType() == Token::TOKEN_FUN_DECL) { + module->functions_.emplace_back(parseFunction()); + } + + if (peekNextToken()->getType() == Token::TOKEN_ENTRY_POINT) { + module->maybe_entry_point_ = parseEntryPoint(); + } + + while (peekNextToken()->getType() == Token::TOKEN_FUN_DECL) { + module->functions_.emplace_back(parseFunction()); + } + + auto token = getNextToken(); + if (token->getType() != Token::TOKEN_END) { + error(token, "expected end of file"); + } + + module->source_filename_ = getSourceFilename(); + + return module; +} + +std::unique_ptr Parser::parseEntryPoint() { + std::unique_ptr function{new Function}; + + auto token = getNextToken(); + if (token->getType() != Token::TOKEN_ENTRY_POINT) { + error(token, "expected entry point declaration"); + } + + function->return_type_.base_type_ = VarType::INTEGER; + function->body_ = parseStatements(); + + return function; +} + +std::unique_ptr Parser::parseFunction() { + std::unique_ptr function{new Function}; + + auto token = getNextToken(); + if (token->getType() != Token::TOKEN_FUN_DECL) { + error(token, "expected function declaration"); + } + + switch (peekNextToken()->getType()) { + case Token::TOKEN_STAR: + case Token::TOKEN_TYPENAME: + function->return_type_ = parseType(); + break; + default: + break; + } + + token = getNextToken(); + if (token->getType() != Token::TOKEN_IDENTIFIER) { + error(token, "expected function name"); + } + function->name_ = token->getStringValue(); + + token = getNextToken(); + switch (token->getType()) { + case Token::TOKEN_FUN_PARAMS: + for (bool done = false; !done;) { + auto var = parseVariable(); + auto type = parseType(); + function->params_.emplace_back(var, type); + auto token = getNextToken(); + switch (token->getType()) { + case Token::TOKEN_COMMA: + break; + case Token::TOKEN_FUN_END: + done = true; + break; + default: + error(token, "expected either more parameters or function body begin"); + break; + } + } + // fallthrough + case Token::TOKEN_FUN_END: + break; + default: + error(token, "expected either parameters or function body begin"); + break; + } + + function->body_ = parseStatements(); + + return function; +} + +Variable Parser::parseVariable() { + auto token = getNextToken(); + if (token->getType() == Token::TOKEN_ARTICLE) { + token = getNextToken(); + } + if (token->getType() != Token::TOKEN_IDENTIFIER) { + error(token, "expected variable name"); + } + + Variable var; + var.name_ = token->getStringValue(); + var.first_location_ = token->getFirstLocation(); + var.last_location_ = token->getLastLocation(); + return var; +} + +static VarType::BaseType builtinTypeToASTType(Token::BuiltinTypeValue type) { + switch (type) { +#define RETURN_VAR_TYPE(NAME, _1, _2, _3, _4, _5) \ + case Token::BUILTIN_TYPE_##NAME: \ + return VarType::NAME; + BUILTIN_TYPES(RETURN_VAR_TYPE) + default: + UNREACHABLE("Unhandled BuiltinType."); + } +} + +VarType Parser::parseType() { + VarType type; + auto token = getNextToken(); + if (token->getType() == Token::TOKEN_STAR) { + type.pointer_ = true; + token = getNextToken(); + } + if (token->getType() != Token::TOKEN_TYPENAME) { + error(token, "expected type name"); + } + type.base_type_ = builtinTypeToASTType(token->getBuiltinTypeValue()); + return type; +} + +std::vector> Parser::parseStatements() { + std::vector> statements; + while (true) { + auto statement = maybeParseStatement(); + if (!statement) break; + statements.emplace_back(std::move(statement)); + } + return statements; +} + +std::unique_ptr Parser::parseStatement() { + auto start_location = peekNextToken()->getFirstLocation(); + auto statement = maybeParseStatement(); + if (!statement) { + error(start_location, "expected statement"); + } + return statement; +} + +std::unique_ptr Parser::maybeParseStatement() { + switch (peekNextToken()->getType()) { + case Token::TOKEN_ASSERT: + return parseAssertStatement(); + case Token::TOKEN_INPUT: + return parseInputStatement(); + case Token::TOKEN_ABORT: + return parseAbortStatement(); + case Token::TOKEN_BRANCH_CONDITION: + return parseBranchStatement(); + case Token::TOKEN_VARDECL: + return parseVardeclStatement(); + case Token::TOKEN_LOOP_BEGIN: + return parseLoopStatement(); + case Token::TOKEN_RETURN: + return parseReturnStatement(); + case Token::TOKEN_COMMA: + ignoreNextToken(); + return maybeParseStatement(); + default: + break; + } + + // If we are here, the statement starts with an expression. + auto expression = maybeParseExpression(); + + // If there was not an expression here, then it's not a statement. + if (!expression) return nullptr; + + auto token = peekNextToken(); + switch (token->getType()) { + case Token::TOKEN_PRINT: { + ignoreNextToken(); + std::unique_ptr statement{new PrintStatement}; + statement->expression_ = std::move(expression); + return statement; + } + case Token::TOKEN_ASSIGN: { + AtomicExpression* e = dynamic_cast(expression.get()); + if (!e || e->getType() != AtomicExpression::IDENTIFIER) { + error(token, "assignment target must be an identifier"); + } + ignoreNextToken(); + std::unique_ptr statement{new AssignStatement}; + statement->variable_ = e->getIdentifierValue(); + statement->expression_ = parseExpression(); + return statement; + } + default: + if (expression->isFunctionCall()) { + std::unique_ptr statement{new ExpressionStatement}; + statement->expression_ = std::move(expression); + return statement; + } + error(token, "only a function call can be a statement"); + break; + } + + UNREACHABLE("Unhandled statement type in parser"); +} + +std::unique_ptr Parser::parseAssertStatement() { + auto token = getNextToken(); + if (token->getType() != Token::TOKEN_ASSERT) { + error(token, "expected assert statement"); + } + std::unique_ptr statement{new AssertStatement}; + statement->expression_ = parseExpression(); + token = getNextToken(); + if (token->getType() != Token::TOKEN_BANG) { + error(token, "expected final !"); + } + return statement; +} + +std::unique_ptr Parser::parseFunctionCallExpression() { + auto token = getNextToken(); + if (token->getType() != Token::TOKEN_FUN_CALL) { + error(token, "expected function call"); + } + + std::unique_ptr statement{new FunctionCallExpression}; + statement->first_location_ = token->first_location_; + + token = getNextToken(); + if (token->getType() != Token::TOKEN_IDENTIFIER) { + error(token, "expected name of the function to call"); + } + + statement->function_name_ = token->getStringValue(); + + token = getNextToken(); + switch (token->getType()) { + case Token::TOKEN_FUN_PARAMS: + for (bool done = false; !done;) { + statement->function_args_.emplace_back(parseExpression()); + auto token = getNextToken(); + switch (token->getType()) { + case Token::TOKEN_FUN_END: + done = true; + break; + case Token::TOKEN_COMMA: + break; + default: + error(token, "expected either more params or end of call statement"); + break; + } + } + // fallthrough + case Token::TOKEN_FUN_END: + break; + default: + error(token, "expected either call params or end of call statement"); + break; + } + + statement->last_location_ = peekNextToken()->first_location_; + return statement; +} + +std::unique_ptr Parser::parseInputStatement() { + auto token = getNextToken(); + if (token->getType() != Token::TOKEN_INPUT) { + error(token, "expected input statement"); + } + std::unique_ptr statement{new InputStatement}; + statement->variable_ = parseVariable(); + return statement; +} + +std::unique_ptr Parser::parseAbortStatement() { + auto token = getNextToken(); + if (token->getType() != Token::TOKEN_ABORT) { + error(token, "expected abort statement"); + } + return std::unique_ptr{new AbortStatement}; +} + +BranchCase Parser::parseBranchCase(std::shared_ptr condition_lhs) { + BranchCase branch_case; + branch_case.expression_ = parseSemiExpression(condition_lhs); + + if (peekNextToken()->getType() == Token::TOKEN_COLON) { + ignoreNextToken(); + } + + branch_case.body_ = parseStatements(); + + return branch_case; +} + +std::unique_ptr Parser::parseBranchElse() { + std::unique_ptr else_case{new BranchElse}; + else_case->body_ = parseStatements(); + return else_case; +} + +std::unique_ptr Parser::parseBranchStatement() { + auto token = getNextToken(); + if (token->getType() != Token::TOKEN_BRANCH_CONDITION) { + error(token, "expected branch condition"); + } + + std::unique_ptr statement{new BranchStatement}; + statement->lead_var_ = parseVariable(); + + token = getNextToken(); + if (token->getType() != Token::TOKEN_BRANCH_BEGIN) { + error(token, "expected begin of branch"); + } + + std::shared_ptr condition_lhs{ + AtomicExpression::fromIdentifier(statement->lead_var_).release()}; + + statement->cases_.emplace_back(parseBranchCase(condition_lhs)); + for (bool done = false; !done;) { + switch (peekNextToken()->getType()) { + case Token::TOKEN_CASE_END: + ignoreNextToken(); + statement->cases_.emplace_back(parseBranchCase(condition_lhs)); + break; + case Token::TOKEN_BRANCH_ELSE: + case Token::TOKEN_BRANCH_END: + done = true; + break; + default: + error(peekNextToken(), "expected other cases, else case or end of branch"); + break; + } + } + + token = getNextToken(); + switch (token->getType()) { + case Token::TOKEN_BRANCH_ELSE: { + if (peekNextToken()->getType() == Token::TOKEN_COLON) { + ignoreNextToken(); + } + statement->maybe_else_case_ = parseBranchElse(); + auto token = getNextToken(); + if (token->getType() != Token::TOKEN_BRANCH_END) { + error(token, "expected end of branch"); + } + // fallthrough + } + case Token::TOKEN_BRANCH_END: + break; + default: + error(token, "expected either else case or end of branch"); + break; + } + + return statement; +} + +std::unique_ptr Parser::parseVardeclStatement() { + auto token = getNextToken(); + if (token->getType() != Token::TOKEN_VARDECL) { + error(token, "expected declaration"); + } + + std::unique_ptr statement{new VardeclStatement}; + statement->variable_ = parseVariable(); + token = getNextToken(); + if (token->getType() != Token::TOKEN_COMMA) { + error(token, "expected ,"); + } + statement->type_ = parseType(); + + if (peekNextToken()->getType() == Token::TOKEN_ASSIGN) { + ignoreNextToken(); + statement->maybe_init_ = parseExpression(); + } + + return statement; +} + +std::unique_ptr Parser::parseLoopStatement() { + auto token = getNextToken(); + if (token->getType() != Token::TOKEN_LOOP_BEGIN) { + error(token, "expected loop statement"); + } + + std::unique_ptr statement{new LoopStatement}; + while (peekNextToken()->getType() != Token::TOKEN_LOOP_CONDITION) { + statement->body_.emplace_back(parseStatement()); + } + ignoreNextToken(); // This was a Token::TOKEN_LOOP_CONDITION. + + statement->condition_ = parseExpression(); + + return statement; +} + +std::unique_ptr Parser::parseReturnStatement() { + auto token = getNextToken(); + if (token->getType() != Token::TOKEN_RETURN) { + error(token, "expected return statement"); + } + + std::unique_ptr statement{new ReturnStatement}; + if (peekNextToken()->getType() == Token::TOKEN_BANG) { + ignoreNextToken(); + return statement; + } + statement->maybe_expression_ = parseExpression(); + + token = getNextToken(); + if (token->getType() != Token::TOKEN_BANG) { + error(token, "expected !"); + } + + return statement; +} + +std::unique_ptr Parser::parseExpression() { + auto first_location = peekNextToken()->getFirstLocation(); + auto expression = maybeParseExpression(); + if (!expression) { + error(first_location, "expected expression"); + } + return expression; +} + +static BinaryExpression::Type getOperatorTypeFromToken(const Token* token) { + switch (token->getType()) { +#define TOKEN_OP_TO_EXPR_OP(TOKEN_NAME, EXPR_NAME, _, __) \ + case Token::TOKEN_##TOKEN_NAME: \ + return BinaryExpression::EXPR_NAME; + AST_BINARY_OPERATORS(TOKEN_OP_TO_EXPR_OP) +#undef TOKEN_OP_TO_EXPR_OP + default: + UNREACHABLE("Unhandled token in operator conversion"); + } +} + +std::unique_ptr Parser::parseSemiExpression(std::shared_ptr lhs) { + BinaryExpression::Type op; + if (peekNextToken()->isOperator()) { + op = getOperatorTypeFromToken(getNextToken().get()); + } else { + op = BinaryExpression::EQ; + } + + auto rhs = parseExpression(); + + return std::unique_ptr{new BinaryExpression{op, lhs, rhs.release(), true}}; +} + +static int getOperatorPrecedenceFromToken(Token* token) { + switch (token->getType()) { +#define RETURN_OP_PRIORITY(NAME, _, PRIORITY, __) \ + case Token::TOKEN_##NAME: \ + return PRIORITY; + AST_BINARY_OPERATORS(RETURN_OP_PRIORITY) +#undef RETURN_OP_PRIORITY + default: + UNREACHABLE("Undefined operator priority for token"); + } +} + +std::unique_ptr Parser::maybeParseExpressionInternal(int min_precedence) { + Location first_location = peekNextToken()->getFirstLocation(); + + // Precedence climbing. + auto lhs = maybeParseAtomicExpression(); + if (!lhs) return nullptr; + + while (true) { + auto token = peekNextToken(); + if (!token->isOperator()) break; + int precedence = getOperatorPrecedenceFromToken(token); + if (precedence < min_precedence) break; + auto op_type = getOperatorTypeFromToken(token); + auto op_location = token->getFirstLocation(); + ignoreNextToken(); + auto rhs = maybeParseExpressionInternal(precedence + 1); + if (!rhs) { + error(op_location, "binary operation is missing a right side"); + } + lhs.reset(new BinaryExpression{op_type, std::move(lhs), rhs.release(), false}); + } + + lhs->first_location_ = first_location; + lhs->last_location_ = peekNextToken()->getFirstLocation(); + + return lhs; +} + +std::unique_ptr Parser::maybeParseAtomicExpression() { + switch (peekNextToken()->getType()) { + case Token::TOKEN_ARTICLE: + case Token::TOKEN_IDENTIFIER: + return AtomicExpression::fromIdentifier(parseVariable()); + case Token::TOKEN_INTEGER: + return AtomicExpression ::fromInt(getNextToken()->getIntValue()); + case Token::TOKEN_FLOAT: + return AtomicExpression ::fromFloat(getNextToken()->getFloatValue()); + case Token::TOKEN_FUN_CALL: + return parseFunctionCallExpression(); + default: + return nullptr; + } +} + +std::unique_ptr Parser::getNextToken() { + assert(current_token_ && "Cannot get from an empty stream"); + auto token = std::move(current_token_); + switch (token->getType()) { + case Token::TOKEN_END: + case Token::TOKEN_UNKNOWN: + current_token_ = nullptr; + break; + default: + current_token_ = lexer_.getNextToken(); + break; + } + return token; +} + +} // namespace monicelli diff --git a/src/parser.h b/src/parser.h new file mode 100644 index 0000000..0d351c0 --- /dev/null +++ b/src/parser.h @@ -0,0 +1,73 @@ +// Copyright 2017 the Monicelli project authors. All rights reserved. +// Use of this source code is governed by a GPLv3 license, see LICENSE.txt. + +#ifndef MONICELLI_PARSER_H +#define MONICELLI_PARSER_H + +#include "ast.h" +#include "errors.h" +#include "lexer.h" +#include "support.h" + +#include +#include +#include + +namespace monicelli { + +class Parser final : public ErrorReportingMixin { +public: + Parser(std::istream& input, const std::string& source_filename) + : ErrorReportingMixin(source_filename), lexer_{input} {} + + std::unique_ptr parse() { + current_token_ = lexer_.getNextToken(); + return parseModule(); + } + + void setLexerTrace(bool enabled) { lexer_.setTraceEnabled(enabled); } + +private: + Variable parseVariable(); + VarType parseType(); + std::unique_ptr parseModule(); + std::unique_ptr parseFunction(); + std::unique_ptr parseEntryPoint(); + std::unique_ptr parseStatement(); + std::vector> parseStatements(); + std::unique_ptr maybeParseStatement(); + std::unique_ptr parseAssertStatement(); + std::unique_ptr parseInputStatement(); + std::unique_ptr parseAbortStatement(); + BranchCase parseBranchCase(std::shared_ptr condition_lhs); + std::unique_ptr parseBranchElse(); + std::unique_ptr parseBranchStatement(); + std::unique_ptr parseVardeclStatement(); + std::unique_ptr parseLoopStatement(); + std::unique_ptr parseReturnStatement(); + std::unique_ptr parseExpression(); + std::unique_ptr parseSemiExpression(std::shared_ptr lhs); + std::unique_ptr maybeParseExpression() { return maybeParseExpressionInternal(0); } + std::unique_ptr maybeParseExpressionInternal(int min_precedence); + std::unique_ptr maybeParseAtomicExpression(); + std::unique_ptr parseFunctionCallExpression(); + + std::unique_ptr getNextToken(); + + Token* peekNextToken() { + assert(current_token_ && "Cannot peek into an empty stream."); + return current_token_.get(); + } + + void ignoreNextToken() { + auto token = getNextToken(); + USE(token); + } + + Lexer lexer_; + std::unique_ptr current_token_; +}; + +} // namespace monicelli + +#endif diff --git a/src/support.cpp b/src/support.cpp new file mode 100644 index 0000000..35e17fc --- /dev/null +++ b/src/support.cpp @@ -0,0 +1,17 @@ +// Copyright 2017 the Monicelli project authors. All rights reserved. +// Use of this source code is governed by a GPLv3 license, see LICENSE.txt. + +#include "support.h" + +namespace monicelli { + +std::string basename(std::string input_filename) { + auto base_name_start = input_filename.find_last_of("\\/"); + if (base_name_start != std::string::npos) { + input_filename = input_filename.substr(base_name_start + 1); + } + if (input_filename.empty()) return ""; + return input_filename; +} + +} // namespace monicelli diff --git a/src/support.h b/src/support.h new file mode 100644 index 0000000..e496ebc --- /dev/null +++ b/src/support.h @@ -0,0 +1,28 @@ +// Copyright 2017 the Monicelli project authors. All rights reserved. +// Use of this source code is governed by a GPLv3 license, see LICENSE.txt. + +#ifndef MONICELLI_SUPPORT_H +#define MONICELLI_SUPPORT_H + +#include +#include + +namespace monicelli { + +#define USE(x) ((void)(x)) + +template static void print(std::ostream& stream, const First& first) { + stream << first << ".\n"; +} + +template +static void print(std::ostream& stream, const First& first, Tail... tail) { + stream << first << ' '; + print(stream, tail...); +} + +std::string basename(std::string input_filename); + +} // namespace monicelli + +#endif diff --git a/src/types.def b/src/types.def new file mode 100644 index 0000000..2756610 --- /dev/null +++ b/src/types.def @@ -0,0 +1,16 @@ +#ifndef MONICELLI_TYPES_DEF +#define MONICELLI_TYPES_DEF + +// Copyright 2017 the Monicelli project authors. All rights reserved. +// Use of this source code is governed by a GPLv3 license, see LICENSE.txt. + +// symbol_name, ir_type, input_format, output_format, src_name, ast_name +#define BUILTIN_TYPES(V) \ + V(VOID, Void, nullptr, nullptr, "", "void") \ + V(INTEGER, Int32, "%d", "%d\n", "Necchi", "int") \ + V(CHAR, Int8, "%c", "%c", "Mascetti", "char") \ + V(FLOAT, Float, "%f", "%f\n", "Perozzi", "float") \ + V(BOOL, Int1, "%d", "%d\n", "Melandri", "bool") \ + V(DOUBLE, Double, "%lf", "%f\n", "Sassaroli", "double") \ + +#endif