Push Monicelli 2.0 "Cofandina"

This commit is contained in:
Stefano Sanfilippo 2017-10-12 22:36:56 +02:00
parent 8aa5c1a575
commit 7f2a9a34a8
66 changed files with 4131 additions and 4174 deletions

11
.clang-format Normal file
View File

@ -0,0 +1,11 @@
---
BasedOnStyle: LLVM
IndentWidth: 2
---
Language: Cpp
PointerAlignment: Left
AllowShortIfStatementsOnASingleLine: true
ColumnLimit: 100
AlignEscapedNewlines: DontAlign
BreakStringLiterals: false
SpaceAfterTemplateKeyword: false

4
.gitignore vendored
View File

@ -1,5 +1 @@
/Monicelli.output
/*.user
/mcc
/build
/nbproject

View File

@ -1,84 +1,31 @@
#
# Monicelli: an esoteric language compiler
#
# Copyright (C) 2014 Stefano Sanfilippo
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
# Copyright 2017 the Monicelli project authors. All rights reserved.
# Use of this source code is governed by a GPLv3 license, see LICENSE.txt.
cmake_minimum_required(VERSION 3.0)
project(Monicelli)
cmake_minimum_required(VERSION 2.8)
set(CMAKE_BUILD_TYPE Release)
set(CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake)
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake")
message("== Only a limited set of platforms was tested. We need your help!")
message("== Report build errors to https://github.com/esseks/monicelli/issues")
## 1. Compiler sanity check
try_run(
execution_results
supported_compiler
${CMAKE_CURRENT_BINARY_DIR}
${CMAKE_CURRENT_SOURCE_DIR}/cmake/features.cpp
COMPILE_DEFINITIONS -std=c++0x
OUTPUT_VARIABLE features_build_log
)
if (checkfeat)
message(${features_build_log})
endif()
if (NOT supported_compiler)
message(FATAL_ERROR
"Some C++11 features we need are not implemented by your compiler.\n"
"Run cmake with -Dcheckfeat=1 to see the exact cause."
)
endif()
if (execution_results MATCHES FAILED_TO_RUN)
message(FATAL_ERROR
"Your compiler supports the set of C++11 features we need, "
"but something failed.\n"
"Run cmake with -Dcheckfeat=1 to see the exact cause."
)
endif()
## 2. Find Flex and Bison
include(macosx_homebrew)
if (CMAKE_HOST_APPLE)
find_package_prefer_brew(BISON REQUIRED)
find_package_prefer_brew(FLEX 2.5 REQUIRED)
else()
find_package(BISON REQUIRED)
find_package(FLEX 2.5 REQUIRED)
endif()
if (BISON_VERSION VERSION_LESS 2.5)
message(FATAL_ERROR "At least Bison 2.5 is required.")
elseif(BISON_VERSION VERSION_LESS 3.0)
message("== Bison 2.5 was found. You have to apply cmake/bison2.patch...")
endif()
## 2. Build Monicelli
include(package)
find_package(Doxygen)
add_subdirectory(src)
install(FILES README.md LICENSE.txt DESTINATION doc/)
if (DOXYGEN_FOUND)
set(DOXYGEN_CONFIG ${CMAKE_CURRENT_BINARY_DIR}/Doxyfile)
configure_file(
${CMAKE_CURRENT_SOURCE_DIR}/Doxyfile.in
${DOXYGEN_CONFIG}
@ONLY
)
add_custom_target(doc
DEPENDS ${DOXYGEN_CONFIG}
COMMAND ${DOXYGEN_EXECUTABLE} ${DOXYGEN_CONFIG}
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
VERBATIM
)
endif()
install(FILES README.md LICENSE.txt DESTINATION doc/monicelli)

305
Doxyfile.in Normal file
View File

@ -0,0 +1,305 @@
# Doxyfile 1.8.11
#---------------------------------------------------------------------------
# Project related configuration options
#---------------------------------------------------------------------------
DOXYFILE_ENCODING = UTF-8
PROJECT_NAME = "Monicelli"
PROJECT_NUMBER =
PROJECT_BRIEF =
PROJECT_LOGO =
OUTPUT_DIRECTORY = @CMAKE_CURRENT_BINARY_DIR@/doxy
CREATE_SUBDIRS = NO
ALLOW_UNICODE_NAMES = NO
OUTPUT_LANGUAGE = English
BRIEF_MEMBER_DESC = YES
REPEAT_BRIEF = YES
ABBREVIATE_BRIEF =
ALWAYS_DETAILED_SEC = NO
INLINE_INHERITED_MEMB = NO
FULL_PATH_NAMES = YES
STRIP_FROM_PATH =
STRIP_FROM_INC_PATH =
SHORT_NAMES = NO
JAVADOC_AUTOBRIEF = NO
QT_AUTOBRIEF = NO
MULTILINE_CPP_IS_BRIEF = NO
INHERIT_DOCS = YES
SEPARATE_MEMBER_PAGES = NO
TAB_SIZE = 4
ALIASES =
TCL_SUBST =
OPTIMIZE_OUTPUT_FOR_C = NO
OPTIMIZE_OUTPUT_JAVA = NO
OPTIMIZE_FOR_FORTRAN = NO
OPTIMIZE_OUTPUT_VHDL = NO
EXTENSION_MAPPING =
MARKDOWN_SUPPORT = YES
AUTOLINK_SUPPORT = YES
BUILTIN_STL_SUPPORT = NO
CPP_CLI_SUPPORT = NO
SIP_SUPPORT = NO
IDL_PROPERTY_SUPPORT = YES
DISTRIBUTE_GROUP_DOC = NO
GROUP_NESTED_COMPOUNDS = NO
SUBGROUPING = YES
INLINE_GROUPED_CLASSES = NO
INLINE_SIMPLE_STRUCTS = NO
TYPEDEF_HIDES_STRUCT = NO
LOOKUP_CACHE_SIZE = 0
#---------------------------------------------------------------------------
# Build related configuration options
#---------------------------------------------------------------------------
EXTRACT_ALL = NO
EXTRACT_PRIVATE = NO
EXTRACT_PACKAGE = NO
EXTRACT_STATIC = NO
EXTRACT_LOCAL_CLASSES = YES
EXTRACT_LOCAL_METHODS = NO
EXTRACT_ANON_NSPACES = NO
HIDE_UNDOC_MEMBERS = NO
HIDE_UNDOC_CLASSES = NO
HIDE_FRIEND_COMPOUNDS = NO
HIDE_IN_BODY_DOCS = NO
INTERNAL_DOCS = NO
CASE_SENSE_NAMES = YES
HIDE_SCOPE_NAMES = NO
HIDE_COMPOUND_REFERENCE= NO
SHOW_INCLUDE_FILES = YES
SHOW_GROUPED_MEMB_INC = NO
FORCE_LOCAL_INCLUDES = NO
INLINE_INFO = YES
SORT_MEMBER_DOCS = YES
SORT_BRIEF_DOCS = NO
SORT_MEMBERS_CTORS_1ST = NO
SORT_GROUP_NAMES = NO
SORT_BY_SCOPE_NAME = NO
STRICT_PROTO_MATCHING = NO
GENERATE_TODOLIST = YES
GENERATE_TESTLIST = YES
GENERATE_BUGLIST = YES
GENERATE_DEPRECATEDLIST= YES
ENABLED_SECTIONS =
MAX_INITIALIZER_LINES = 30
SHOW_USED_FILES = YES
SHOW_FILES = YES
SHOW_NAMESPACES = YES
FILE_VERSION_FILTER =
LAYOUT_FILE =
CITE_BIB_FILES =
#---------------------------------------------------------------------------
# Configuration options related to warning and progress messages
#---------------------------------------------------------------------------
QUIET = NO
WARNINGS = YES
WARN_IF_UNDOCUMENTED = YES
WARN_IF_DOC_ERROR = YES
WARN_NO_PARAMDOC = NO
WARN_AS_ERROR = NO
WARN_FORMAT = "$file:$line: $text"
WARN_LOGFILE =
#---------------------------------------------------------------------------
# Configuration options related to the input files
#---------------------------------------------------------------------------
INPUT = @CMAKE_CURRENT_SOURCE_DIR@/src
INPUT_ENCODING = UTF-8
FILE_PATTERNS =
RECURSIVE = YES
EXCLUDE = @CMAKE_CURRENT_BINARY_DIR@
EXCLUDE_SYMLINKS = NO
EXCLUDE_PATTERNS =
EXCLUDE_SYMBOLS =
EXAMPLE_PATH =
EXAMPLE_PATTERNS =
EXAMPLE_RECURSIVE = NO
IMAGE_PATH =
INPUT_FILTER =
FILTER_PATTERNS =
FILTER_SOURCE_FILES = NO
FILTER_SOURCE_PATTERNS =
USE_MDFILE_AS_MAINPAGE =
#---------------------------------------------------------------------------
# Configuration options related to source browsing
#---------------------------------------------------------------------------
SOURCE_BROWSER = NO
INLINE_SOURCES = NO
STRIP_CODE_COMMENTS = YES
REFERENCED_BY_RELATION = NO
REFERENCES_RELATION = NO
REFERENCES_LINK_SOURCE = YES
SOURCE_TOOLTIPS = YES
USE_HTAGS = NO
VERBATIM_HEADERS = YES
CLANG_ASSISTED_PARSING = NO
CLANG_OPTIONS =
#---------------------------------------------------------------------------
# Configuration options related to the alphabetical class index
#---------------------------------------------------------------------------
ALPHABETICAL_INDEX = YES
COLS_IN_ALPHA_INDEX = 5
IGNORE_PREFIX =
#---------------------------------------------------------------------------
# Configuration options related to the HTML output
#---------------------------------------------------------------------------
GENERATE_HTML = YES
HTML_OUTPUT = html
HTML_FILE_EXTENSION = .html
HTML_HEADER =
HTML_FOOTER =
HTML_STYLESHEET =
HTML_EXTRA_STYLESHEET =
HTML_EXTRA_FILES =
HTML_COLORSTYLE_HUE = 220
HTML_COLORSTYLE_SAT = 100
HTML_COLORSTYLE_GAMMA = 80
HTML_TIMESTAMP = NO
HTML_DYNAMIC_SECTIONS = NO
HTML_INDEX_NUM_ENTRIES = 100
GENERATE_DOCSET = NO
GENERATE_HTMLHELP = NO
CHM_FILE =
HHC_LOCATION =
GENERATE_CHI = NO
CHM_INDEX_ENCODING =
BINARY_TOC = NO
TOC_EXPAND = NO
DISABLE_INDEX = NO
GENERATE_TREEVIEW = NO
ENUM_VALUES_PER_LINE = 4
TREEVIEW_WIDTH = 250
EXT_LINKS_IN_WINDOW = NO
FORMULA_FONTSIZE = 10
FORMULA_TRANSPARENT = YES
USE_MATHJAX = NO
MATHJAX_FORMAT = HTML-CSS
MATHJAX_RELPATH = http://cdn.mathjax.org/mathjax/latest
MATHJAX_EXTENSIONS =
MATHJAX_CODEFILE =
SEARCHENGINE = YES
SERVER_BASED_SEARCH = NO
EXTERNAL_SEARCH = NO
SEARCHENGINE_URL =
SEARCHDATA_FILE = searchdata.xml
EXTERNAL_SEARCH_ID =
EXTRA_SEARCH_MAPPINGS =
#---------------------------------------------------------------------------
# Configuration options related to the LaTeX output
#---------------------------------------------------------------------------
GENERATE_LATEX = YES
LATEX_OUTPUT = latex
LATEX_CMD_NAME = latex
MAKEINDEX_CMD_NAME = makeindex
COMPACT_LATEX = NO
PAPER_TYPE = a4
EXTRA_PACKAGES =
LATEX_HEADER =
LATEX_FOOTER =
LATEX_EXTRA_STYLESHEET =
LATEX_EXTRA_FILES =
PDF_HYPERLINKS = YES
USE_PDFLATEX = YES
LATEX_BATCHMODE = NO
LATEX_HIDE_INDICES = NO
LATEX_SOURCE_CODE = NO
LATEX_BIB_STYLE = plain
LATEX_TIMESTAMP = NO
#---------------------------------------------------------------------------
# Configuration options related to the RTF output
#---------------------------------------------------------------------------
GENERATE_RTF = NO
RTF_OUTPUT = rtf
COMPACT_RTF = NO
RTF_HYPERLINKS = NO
RTF_STYLESHEET_FILE =
RTF_EXTENSIONS_FILE =
RTF_SOURCE_CODE = NO
#---------------------------------------------------------------------------
# Configuration options related to the man page output
#---------------------------------------------------------------------------
GENERATE_MAN = NO
MAN_OUTPUT = man
MAN_EXTENSION = .3
MAN_SUBDIR =
MAN_LINKS = NO
#---------------------------------------------------------------------------
# Configuration options related to the XML output
#---------------------------------------------------------------------------
GENERATE_XML = NO
XML_OUTPUT = xml
XML_PROGRAMLISTING = YES
#---------------------------------------------------------------------------
# Configuration options related to the DOCBOOK output
#---------------------------------------------------------------------------
GENERATE_DOCBOOK = NO
DOCBOOK_OUTPUT = docbook
DOCBOOK_PROGRAMLISTING = NO
#---------------------------------------------------------------------------
# Configuration options for the AutoGen Definitions output
#---------------------------------------------------------------------------
GENERATE_AUTOGEN_DEF = NO
#---------------------------------------------------------------------------
# Configuration options related to the Perl module output
#---------------------------------------------------------------------------
GENERATE_PERLMOD = NO
PERLMOD_LATEX = NO
PERLMOD_PRETTY = YES
PERLMOD_MAKEVAR_PREFIX =
#---------------------------------------------------------------------------
# Configuration options related to the preprocessor
#---------------------------------------------------------------------------
ENABLE_PREPROCESSING = YES
MACRO_EXPANSION = NO
EXPAND_ONLY_PREDEF = NO
SEARCH_INCLUDES = YES
INCLUDE_PATH =
INCLUDE_FILE_PATTERNS =
PREDEFINED =
EXPAND_AS_DEFINED =
SKIP_FUNCTION_MACROS = YES
#---------------------------------------------------------------------------
# Configuration options related to external references
#---------------------------------------------------------------------------
TAGFILES =
GENERATE_TAGFILE =
ALLEXTERNALS = NO
EXTERNAL_GROUPS = YES
EXTERNAL_PAGES = YES
PERL_PATH = /usr/bin/perl
#---------------------------------------------------------------------------
# Configuration options related to the dot tool
#---------------------------------------------------------------------------
CLASS_DIAGRAMS = YES
MSCGEN_PATH =
DIA_PATH =
HIDE_UNDOC_RELATIONS = YES
HAVE_DOT = YES
DOT_NUM_THREADS = 0
DOT_FONTNAME = Helvetica
DOT_FONTSIZE = 10
DOT_FONTPATH =
CLASS_GRAPH = YES
COLLABORATION_GRAPH = YES
GROUP_GRAPHS = YES
UML_LOOK = NO
UML_LIMIT_NUM_FIELDS = 10
TEMPLATE_RELATIONS = NO
INCLUDE_GRAPH = YES
INCLUDED_BY_GRAPH = YES
CALL_GRAPH = NO
CALLER_GRAPH = NO
GRAPHICAL_HIERARCHY = YES
DIRECTORY_GRAPH = YES
DOT_IMAGE_FORMAT = png
INTERACTIVE_SVG = NO
DOT_PATH =
DOTFILE_DIRS =
MSCFILE_DIRS =
DIAFILE_DIRS =
PLANTUML_JAR_PATH =
PLANTUML_INCLUDE_PATH =
DOT_GRAPH_MAX_NODES = 50
MAX_DOT_GRAPH_DEPTH = 0
DOT_TRANSPARENT = NO
DOT_MULTI_TARGETS = NO
GENERATE_LEGEND = YES
DOT_CLEANUP = YES

219
README.md
View File

@ -1,111 +1,119 @@
Monicelli
=========
# Monicelli 2.0 "Cofandina"
This all-new release mainly brings several improvements to the code that make
it easier to hack and build new features. In addition to that:
* `mcc` now produces an executable by default, no need to use (or install)
`lcc`, as it was previously the case. `mcc cofandina.mc -o cofandina` and
that's it! This feature currently requires a POSIX system (like Linux or
Mac OS X) with a C compiler installed (anything reasonably recent will do).
* `mcc` does not depend on Boost anymore.
* `mcc` has a new hand-written parser that should provide better error
messages. Now it's easier to stuzzicate your prematurated supercazzole.
Error messages are in plain English and not very antani. Apologies for that.
* `mcc` now generates code that directly calls the C standard library. This
allows you to seamlessly link Monicelli object files with C/C++ code, without
any extra dependency on a Monicelli standard library.
* The code generator in `mcc` has been ported to LLVM 3.8 and will continue
to be updated with new releases.
* Modules are gone. This was a rather obscure feature that allowed to expose
functions implemented in C/C++ to Monicelli code using a YAML-based language.
Instead, it's now possible to declare a function with an empty body to signal
that it will be implemented in another file, be it in Monicelli or C/C++.
See the updated Turtle example.
* The C++ transpiler is gone. It might come back again, though.
# What's Monicelli anyway?
Monicelli is an esoterical programming language based on the so-called
"supercazzole" from the movie Amici Miei, a masterpiece of the Italian
comedy.
There is no way to translate a "supercazzola" to English, so if you don't speak
Italian, I'm afraid you won't understand. I'm really sorry for you :)
Over the past few years I have tried to render the idea of "supercazzola" to
non-Italian speakers, with little success. The closest I got was by describing
it as "comically deceptive gibberish", which sadly does not capture the true
essence of what a "supercazzola" (spelled "supercazzora" according to some) is.
I'm still open to suggestions on how to better present Monicelli (the language)
to the international public.
Compilation
===========
# Compilation
You will need `bison` version >= 3.0 (Bison 2.5 works but requires manual intervention),
`flex` >= 2.5, `LLVM` >= 3.5, `Boost` >= 1.48, `YAML-cpp` >= 0.5 and any C++11 compiler.
The build scripts are generated using CMake, version >= 2.8.
A part of the Monicelli compiler (the lexer) is generated using `ragel`, which
you will need to have installed. If this is not the case, the configuration
script will warn you. Monicelli is developed with version 6.8, but any
sufficiently recent release should do just fine.
You will also need to have LLVM development libraries installed, version 3.8.
Other versions might or might not work.
Finally, you will need CMake, version 3.0 or higher.
A typical Makefile-based build workflow would be:
mkdir build/
cd build/
cmake ..
make
During the Makefile generation, the build script will test the compiler for all
the required features.
$ cd monicelli/
$ mkdir build/
$ cd build/
$ cmake .. -DCMAKE_INSTALL_PREFIX="$HOME/mcc"
$ make all install
If your tools are installed in non-standard locations
(e.g. Bison Brew on Mac OS X), you can alter the search path with:
PATH=/path/to/bison cmake ..
$ PATH=/path/to/ragel cmake ..
If you can't really upgrade to Bison 3.0, a patch for Bison 2.5
is provided in `cmake/bison2.patch`. You will have to manually apply it with:
`mcc` statically links LLVM, once compiled it will only depend on the C++
runtime and on `libz`.
patch -p 1 < cmake/bison2.patch
## Note for non-POSIX platforms (like Windows)
However note that compilation with Bison 2.5 is not supported and the patch might be
removed in the future.
The external linker is called using fork+exec for simplicity. This means that
this part of the workflow will **not** work on non-POSIX systems, such as
Windows. There, you will need to disable this feature at build time. You will
only get object files (.o) that you will have to link, including a C runtime
library, by yourself.
###Building with LLVM on Debian/Ubuntu
Debian Testing and Ubuntu >= 14.04 distribute a LLVM 3.5 development package
**which is broken** (see [1](https://bugs.launchpad.net/ubuntu/+source/llvm/+bug/1365432)
and [2](https://bugs.launchpad.net/ubuntu/+source/llvm/+bug/1387011)).
You can disable the invocation of an external linker and make `mcc` compilable
on Windows during CMake configuration by forcing the appropriate flag to OFF:
Luckly, LLVM.org directly provides an APT repo which works fine.
http://llvm.org/apt/ have all the relevant info for installing the repo.
After that, the package we need is `llvm-3.5-dev`.
$ cmake .. -DENABLE_LINKER=OFF
**This is only necessary for compilation, Debian/Ubuntu LLVM runtime libs
and utilities work just fine.**
## Tested platforms
Usage
=====
The reference OS for building and testing Monicelli is Ubuntu 16.04 LTS. If the
build is broken there, then it's a bug. Unfortunately I don't have many other
platforms at hand to test, but it _should_ compile on many more POSIX systems,
including Mac OS X. If you managed to compile Monicelli on your favourite
platform and you needed a patch, it would be great if you could send a PR.
###LLVM frontend
Monicelli emits LLVM bitcode in its default configuration.
A typical compilation workflow would be:
# Usage
$ ./mcc example.mc
$ llc example.bc
$ cc example.s libmcrt.a -o example
Monicelli build an executable by default on POSIX systems
(such as Linux, Mac OS X). Linking requires an external C compiler, anything
decently modern and standard-conformant should do.
In particular, note that the Monicelli runtime library must be compiled in or linked to use
all of the I/O functions. Also note the use of the `llc` utility, which is
provided by LLVM, to produce native assembler from LLVM bitcode.
A typical invocation is very similar to what you would expect from your C
compiler:
Please be aware that the Monicelli standard library depends on the C stdlib,
although this dependency is available on virtually any platform you might
dream of compiling Monicelli on.
$ mcc example.mc -o example
$ ./example
As such, `llvm` utilities are needed for compiling. Only the "low level"
utilities (`opt` and `llc`) are needed, not the whole Clang/Clang++ suite.
Usually, the relevant package goes under the name `llvm`.
Please be aware that the Monicelli compiler depends on the availability of a C
compiler and stdlib, although this dependency should be available on virtually
all platforms where you might think to run `mcc`.
A C compiler is used to simplify the assembling and linking step, but it could
be skipped altogether with a small effort. If you want to try ;)
`mcc` only performs minimal optimizations in order to ensure readibility when
disassembling with `llvm-dis`. However, you might want to optimize the code
using `opt` LLVM utility:
$ opt example.bc | llc -o example.s
in place of the simple `llc` compilation step. See `opt` documentation for a
comprehensive list of optimizations available.
###C++ transpiler
`mcc` also works as a source to source compiler, which reads Monicelli
and outputs a subset of C++. Use the option `--c++` or `-+` for that.
A good way to learn on the field is comparing the resulting C++ with the
input. Well, mostly with the beautified version of the input, `*.beauty.mc`.
The typical command line would be:
$ ./mcc --c++ examples/primes.mc
$ c++ primes.cpp -o primes
$ ./primes
Language overview
=================
# Language overview
The original specification can be found in `Specification.txt`, and was
initially conceived by my colleagues and dear friends Alessandro Barenghi,
Michele Tartara and Nicola Vitucci, to whom goes my gratitude.
Unfortunately, their proposal was meant to be a joke and is not complete.
Their proposal was meant to be an elaborate joke and is not complete.
This project is an ongoing effort to produce a rigorous specification for the
language and implement a compiler, which implies filling gaps and ambiguities
with sensible choices.
@ -121,13 +129,13 @@ Accented letters can be replaced by the non-accented letter followed by a
backtick `` ` ``, although the use of the correct Italian spelling is strongly
encouraged for maximizing the antani effect.
###Get started!
## Getting started real quick
For those of you who want to get to the code ASAP, the `examples/`
folder contains a set of programs covering most of the features of the language.
Main
----
## Main
The entry point of the program (the "main") is identified by the phrase:
@ -143,8 +151,8 @@ optionally, no value might be returned with:
vaffanzum!
Expressions
-----------
## Expressions
The usual operators are given, but spelled as words to best fit in sentences.
They are directly mapped on usual operators as follows:
@ -165,7 +173,7 @@ When evaluating binary expressions whose operands have different types,
the type of the result will be the less restrictive between the two.
This ensures that no loss takes place when evaluating an expression.
###Binary shift
## Binary shift
Binary shift operators have a slighly different
syntax:
@ -190,10 +198,9 @@ maps to `antani << 2`.
It goes without saying, other expression can be used instead of numbers.
Also, the usual precedence rules apply.
**Braces are not implemented**.
**There is no syntax for braces in Monicelli**.
Variables
---------
## Variables
A variable name can contain numbers, upper and lower case character and must
not start with a number (the usual rules, that's it).
@ -206,7 +213,7 @@ to the same variable.
Consequently, the articles above cannot be used as variable names.
###Assignment
## Assignment
A value can be assigned to a variable with the following statement:
@ -218,7 +225,7 @@ The `<expression>` initializer is casted to the declared type of the variable,
even if the cast will cause some loss. This feature can be (ab)used to introduce
C-style casts too.
###Declaration
## Declaration
Variables can be declared in any scope. There are 5 variable types, which are
directly mapped on C++/C99 types as follows:
@ -246,8 +253,7 @@ for instance:
declares a variables called `antani` of type `Necchi` (`int`) and initializes
it to 4.
Input/Output
------------
## Input/Output
Variables and expressions can be printed with the statement:
@ -257,8 +263,7 @@ Conversely, a variable might be read from input using:
mi porga <varname>
Loop
----
## Loop
There is only one loop construct, equivalent to a C `do {} while();`, which is
defined as follows:
@ -283,8 +288,7 @@ maps to:
`brematura` might be replaced by its alternate form `prematura`
Branch
------
## Branch
The branch construct encompasses both the features of an `if` and a `switch`.
The best way to explain it is by comparing its various forms to the corresponding
@ -358,13 +362,12 @@ Finally, here is the equivalent of a `switch () {}`:
where the `o tarapia tapioco` part is like the `default` block.
Functions
---------
## Functions
**Note**: the alternate spelling `supercazzora` might be used in place
of `supercazzola` wherever the latter appears.
###Declaration
## Declaration
A function is declared with the `blinda la supercazzola` statement:
@ -404,7 +407,11 @@ Functions cannot be nested and can be declared before or after the main in any
order. `mcc` will not check that a return statement is always reachable inside
a non-void function. Failing to return a value leads to undefined behaviour.
###Invocation
A function might be declared with no body, in which case it's treated as a
prototype. A prototype makes the function signature known to the compiler, and
it signals that the function is implemented in another file.
## Invocation
A function is called with the `brematurata la supercazzola` statement:
@ -418,8 +425,7 @@ maps to:
antani = alfio(barilotto / 3) * 2;
Exceptions
----------
## Exceptions
The program might be aborted immediately with the statement:
@ -427,16 +433,14 @@ The program might be aborted immediately with the statement:
there are no arguments.
Assertions
----------
## Assertions
An assertion block will evaluate its expression and trigger an error message
if it is found to be 0 (logical false). An assertion is stated as:
ho visto <expression>!
Comments
--------
## Comments
Any character after `bituma` is ignored until a line break is encountered. For
instance, in:
@ -448,7 +452,7 @@ instance, in:
Comments are useful to fill the "supercazzola" and make it more readable, since
any word (including reserved words) can be inserted into it.
###Meta comments
## Meta comments
In addition to line comments, there are meta comments. A meta comment starts
with an hash sign `#` and continues until a line break is encountered, as an
@ -459,10 +463,11 @@ a long "supercazzola". Also, ordinary comments can and should be used in an
improper way to fill the sentence, meta comments provide a mechanism for
distiguishing "real" comments.
Reserved words and phrases
------------------------
## Reserved words and phrases
The following phrases are currently reserved with no assigned usage. They cannot be used as variable identifiers, even if they do not serve any other purpose in the current language revision.
The following phrases are currently reserved with no assigned usage. They cannot
be used as variable identifiers, even if they do not serve any other purpose in
the current language revision.
* `conte`
* `scusi noi siamo in`

54
cmake/FindLLVM.cmake Normal file
View File

@ -0,0 +1,54 @@
# Copyright 2017 the Monicelli project authors. All rights reserved.
# Use of this source code is governed by a GPLv3 license, see LICENSE.txt.
find_program(LLVM_CONFIG llvm-config)
if (LLVM_CONFIG STREQUAL "LLVM_CONFIG-NOTFOUND")
message(FATAL_ERROR "Please install the LLVM dev package to compile Monicelli.")
else()
message(STATUS "Found llvm-config: ${LLVM_CONFIG}")
endif()
execute_process(
COMMAND ${LLVM_CONFIG} --version
OUTPUT_VARIABLE LLVM_VERSION
OUTPUT_STRIP_TRAILING_WHITESPACE
)
set(TARGET_LLVM_VERSION "3.8.0")
if (NOT LLVM_VERSION STREQUAL ${TARGET_LLVM_VERSION})
message(WARNING "Expected LLVM ${TARGET_LLVM_VERSION}, found ${LLVM_VERSION}, build may fail.")
endif()
execute_process(
COMMAND ${LLVM_CONFIG} --includedir
OUTPUT_VARIABLE LLVM_INCLUDE_DIR
OUTPUT_STRIP_TRAILING_WHITESPACE
)
execute_process(
COMMAND ${LLVM_CONFIG} --libdir
OUTPUT_VARIABLE LLVM_LIBRARY_DIR
OUTPUT_STRIP_TRAILING_WHITESPACE
)
execute_process(
COMMAND ${LLVM_CONFIG} --cxxflags
OUTPUT_VARIABLE LLVM_CXXFLAGS
OUTPUT_STRIP_TRAILING_WHITESPACE
)
execute_process(
COMMAND ${LLVM_CONFIG} --libs all
OUTPUT_VARIABLE LLVM_MODULE_LIBS
OUTPUT_STRIP_TRAILING_WHITESPACE
)
execute_process(
COMMAND ${LLVM_CONFIG} --system-libs
OUTPUT_VARIABLE LLVM_SYSTEM_LIBS
OUTPUT_STRIP_TRAILING_WHITESPACE
)
set(LLVM_LIBS ${LLVM_MODULE_LIBS} ${LLVM_SYSTEM_LIBS})

10
cmake/FindRagel.cmake Normal file
View File

@ -0,0 +1,10 @@
# Copyright 2017 the Monicelli project authors. All rights reserved.
# Use of this source code is governed by a GPLv3 license, see LICENSE.txt.
find_program(RAGEL ragel)
if (${RAGEL} STREQUAL "RAGEL-NOTFOUND")
message(FATAL_ERROR "Please install ragel to compile Monicelli.")
else()
message(STATUS "Found ragel: ${RAGEL}")
endif()

View File

@ -1,28 +0,0 @@
diff --git a/Monicelli.ypp b/Monicelli.ypp
index 028506d..9817f1e 100644
--- a/Monicelli.ypp
+++ b/Monicelli.ypp
@@ -23,7 +23,7 @@
}
%skeleton "lalr1.cc"
-%require "3.0"
+%require "2.5"
%language "c++"
%defines
@@ -31,9 +31,11 @@
%locations
%token-table
-%define parse.error verbose
-%define api.namespace {monicelli}
-%define parser_class_name {Parser}
+%{
+#define YYERROR_VERBOSE
+%}
+%define namespace monicelli
+%define parser_class_name Parser
%lex-param {Scanner &scanner}
%parse-param {Scanner &scanner}

View File

@ -1,89 +0,0 @@
/*
* Monicelli: an esoteric language compiler
*
* Copyright (C) 2014 Stefano Sanfilippo
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/**
* Minimum program containing all required C++11 features.
* If a compiler cannot compile this, then it won't compile Monicelli.
*/
#include <memory>
#include <unordered_set>
#include <functional>
#include <vector>
#include <string>
#include <cassert>
const int global_i = 0;
template<typename T>
using Foo = std::vector<T>;
struct TestingReferenceBinding {
TestingReferenceBinding(int const& ii) {
assert(&ii == &global_i);
}
void operator=(int const& ii) {
assert(&ii == &global_i);
}
void operator=(int&&) {
assert(false);
}
};
enum class Dummy {
FOO, BAR, BAZ
};
class Banana {
int yep() const noexcept {
return 0;
}
virtual void something() {}
};
class Phone: public Banana {
virtual void something() override {}
};
int main() {
std::unique_ptr<int> foo(new int{0});
std::vector<int> bar = {1, 2, 3};
for (int baz: bar) {
baz += 1;
}
char *str = nullptr;
Banana a;
Banana b = std::move(a);
long c = std::stol("100");
// Boost::Optional sanity check for old compilers
int const& iref = global_i;
assert(&iref == &global_i);
TestingReferenceBinding ttt = global_i;
ttt = global_i;
TestingReferenceBinding ttt2 = iref;
ttt2 = iref;
}

View File

@ -1,75 +0,0 @@
#
# Monicelli: an esoteric language compiler
#
# Copyright (C) 2014 Stefano Sanfilippo
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
# The following macro has been adapted from
# https://gist.github.com/steakknife/c36c99b51703fc6f6c1b
# If is running under Mac OS X and the Homebrew package manager is installed,
# then Homebrew-specific directories for the given package are automatically
# added to the appropriate CMake search paths.
macro(find_package_prefer_brew _package)
unset(_brew_path)
unset(_brew_pkg_lower)
unset(_has_brew)
find_program(_has_brew NAMES brew DOC "path to Homebrew executable")
if(_has_brew)
string(TOLOWER ${_package} _brew_pkg_lower)
execute_process(COMMAND brew --prefix ${_brew_pkg_lower} ERROR_QUIET
OUTPUT_STRIP_TRAILING_WHITESPACE OUTPUT_VARIABLE _brew_path)
if(EXISTS ${_brew_path})
set(_brew_cmake_module_path ${CMAKE_MODULE_PATH})
set(_brew_cmake_program_path ${CMAKE_PROGRAM_PATH})
set(_brew_cmake_include_path ${CMAKE_INCLUDE_PATH})
set(_brew_cmake_library_path ${CMAKE_LIBRARY_PATH})
if(NOT CMAKE_MODULE_PATH)
list(INSERT CMAKE_MODULE_PATH 0 "${_brew_path}")
endif()
if(NOT CMAKE_PROGRAM_PATH AND EXISTS "${_brew_path}/bin")
list(INSERT CMAKE_PROGRAM_PATH 0 "${_brew_path}/bin")
endif()
if(NOT CMAKE_INCLUDE_PATH AND EXISTS "${_brew_path}/include")
list(INSERT CMAKE_INCLUDE_PATH 0 "${_brew_path}/include")
endif()
if(NOT CMAKE_LIBRARY_PATH AND EXISTS "${_brew_path}/lib")
list(INSERT CMAKE_LIBRARY_PATH 0 "${_brew_path}/lib")
endif()
find_package(${_package} ${ARGN})
set(CMAKE_MODULE_PATH ${_brew_cmake_module_path})
set(CMAKE_PROGRAM_PATH ${_brew_cmake_program_path})
set(CMAKE_INCLUDE_PATH ${_brew_cmake_include_path})
set(CMAKE_LIBRARY_PATH ${_brew_cmake_library_path})
unset(_brew_cmake_module_path)
unset(_brew_cmake_program_path)
unset(_brew_cmake_include_path)
unset(_brew_cmake_library_path)
else()
find_package(${_package} ${ARGN})
endif()
else()
find_package(${_package} ${ARGN})
endif()
endmacro(find_package_prefer_brew)

View File

@ -1,42 +0,0 @@
#
# Monicelli: an esoteric language compiler
#
# Copyright (C) 2014 Stefano Sanfilippo
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
include(InstallRequiredSystemLibraries)
set(CPACK_GENERATOR "TGZ;DEB;RPM;ZIP")
set(CPACK_PACKAGE_DESCRIPTION_SUMMARY
"Monicelli: an esoteric programming language, come se fosse antani."
)
set(CPACK_PACKAGE_VENDOR "Stefano Sanfilippo")
set(CPACK_DEBIAN_PACKAGE_MAINTAINER "Stefano Sanfilippo")
set(CPACK_PACKAGE_DESCRIPTION_FILE "${CMAKE_CURRENT_SOURCE_DIR}/README.md")
set(CPACK_RESOURCE_FILE_LICENSE "${CMAKE_CURRENT_SOURCE_DIR}/LICENSE.txt")
set(CPACK_PACKAGE_VERSION_MAJOR "1")
set(CPACK_PACKAGE_VERSION_MINOR "0")
set(CPACK_PACKAGE_VERSION_PATCH "0")
set(CPACK_PACKAGE_INSTALL_DIRECTORY
"monicelli-${CMake_VERSION_MAJOR}.${CMake_VERSION_MINOR}"
)
include(CPack)

10
examples/Makefile Normal file
View File

@ -0,0 +1,10 @@
MCC=mcc
EXAMPLES=factorial hello-world primes return fibonacci mandelbrot float
all: $(EXAMPLES)
clean:
$(RM) $(EXAMPLES)
%: %.mc
$(MCC) $< -o $@

View File

@ -1,3 +0,0 @@
/tartaruga
/turtle.bc
/barilotto.png

View File

@ -1,6 +1,14 @@
compile:
# Make sure to have mcc somewhere in path
mcc turtle.mm turtle.mc
llc turtle.bc
c++ turtle.s turtle.cpp -I../.. -lcairo -o tartaruga
rm -f turtle.s
MCC=mcc
all: turtle
clean:
$(RM) *.o turtle
turtle.mc:;
%.mc.o: %.mc
$(MCC) -c $< -o $@
turtle: turtle.mc.o turtle.cpp
$(CXX) -std=c++11 $^ -lcairo -o $@

View File

@ -1,10 +1,10 @@
#include "Runtime.h"
#include <cairo/cairo.h>
#include <cstdint>
#include <memory>
#include <string>
class Turtle {
class Turtle final {
public:
Turtle(int sizeX, int sizeY) {
surface = cairo_image_surface_create(CAIRO_FORMAT_RGB24, sizeX, sizeY);
@ -12,7 +12,7 @@ public:
reset();
}
virtual ~Turtle() {
~Turtle() {
cairo_destroy(context);
cairo_surface_destroy(surface);
}
@ -71,28 +71,27 @@ private:
cairo_t *context;
};
static Turtle *turtle = 0;
static std::unique_ptr<Turtle> turtle;
extern "C" {
void cofandina(Monicelli_Int x, Monicelli_Int y) {
if (turtle != 0) delete turtle;
turtle = new Turtle(x, y);
void cofandina(int32_t x, int32_t y) {
turtle.reset(new Turtle(x, y));
}
void pulitina(Monicelli_Double r, Monicelli_Double g, Monicelli_Double b) {
void pulitina(double r, double g, double b) {
turtle->setColor(r, g, b);
}
void pastene(Monicelli_Double size) {
void pastene(double size) {
turtle->setLineWidth(size);
}
void muovi(Monicelli_Double x, Monicelli_Double y) {
void muovi(double x, double y) {
turtle->moveTo(x, y);
}
void ispettore(Monicelli_Double x, Monicelli_Double y) {
void ispettore(double x, double y) {
turtle->lineTo(x, y);
}

View File

@ -1,3 +1,8 @@
blinda la supercazzola cofandina con x Necchi, y Necchi o scherziamo?
blinda la supercazzola pastene con w Sassaroli o scherziamo?
blinda la supercazzola ispettore con x Sassaroli, y Sassaroli o scherziamo?
blinda la supercazzola barilotto o scherziamo?
Lei ha clacsonato
prematurata la supercazzola cofandina con 200, 200 o scherziamo?
prematurata la supercazzola pastene con 3 o scherziamo?

View File

@ -1,26 +0,0 @@
help:
Turtle graphics in Monicelli!
source:
- Turtle.cpp
functions:
cofandina:
args: {x: int, y: int}
type: void
help: Crea una superficie di x per y pixel.
pulitina:
args: {r: double, g: double, b: double}
help: Imposta il colore dell'output.
pastene:
args: {size: double}
help: Imposta la dimensione in pixel della penna.
muovi:
args: {x: double, y: double}
help: Porta la penna nel punto specificato.
ispettore:
args: {x: double, y: double}
help: Traccia una linea dal punto corrente al punto specificato.
barilotto:
docs: Salva l'immagine come barilotto.png

View File

@ -1,796 +0,0 @@
/*
* Monicelli: an esoteric language compiler
*
* Copyright (C) 2014 Stefano Sanfilippo
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "BitcodeEmitter.hpp"
#include "Scope.hpp"
#include "Nodes.hpp"
#include "ModuleRegistry.hpp"
#include <llvm/IR/Verifier.h>
#include <llvm/IR/DerivedTypes.h>
#include <llvm/IR/IRBuilder.h>
#include <llvm/IR/LLVMContext.h>
#include <llvm/IR/Module.h>
#include <llvm/IR/LegacyPassManager.h>
#include <llvm/Transforms/Scalar.h>
#include <llvm/Analysis/Passes.h>
#include <cassert>
#include <iostream>
#include <string>
#include <map>
#include <vector>
#include <unordered_set>
#include <initializer_list>
#include <unordered_map>
// Yes, that's right, no ending ;
#define GUARDED(call) if (!(call)) return false
using namespace monicelli;
using llvm::getGlobalContext;
struct BitcodeEmitter::Private {
llvm::Value *retval = nullptr;
llvm::AllocaInst *funcRetval = nullptr;
llvm::BasicBlock *funcExit = nullptr;
llvm::IRBuilder<> builder = llvm::IRBuilder<>(getGlobalContext());
Scope<std::string, llvm::AllocaInst*> scope;
Pointer<llvm::legacy::FunctionPassManager> optimizer;
};
static
llvm::AllocaInst* allocateVar(llvm::Function *func, Id const& name, llvm::Type *type) {
llvm::IRBuilder<> builder(&func->getEntryBlock(), func->getEntryBlock().begin());
return builder.CreateAlloca(type, 0, name.getValue().c_str());
}
static
llvm::AllocaInst* allocateReturnVariable(llvm::Function *func) {
llvm::IRBuilder<> builder(&func->getEntryBlock(), func->getEntryBlock().begin());
return builder.CreateAlloca(func->getReturnType(), 0, "result");
}
static
bool reportError(Localizable const& node, std::initializer_list<std::string> const& what) {
std::cerr << "line " << node.getLocation().begin.line << ", ";
std::cerr << "col " << node.getLocation().begin.column << ": ";
for (std::string const& chunk: what) {
std::cerr << chunk << ' ';
}
std::cerr << std::endl;
return false;
}
#define I64 llvm::Type::getInt64Ty(getGlobalContext())
#define I8 llvm::Type::getInt8Ty(getGlobalContext())
#define I1 llvm::Type::getInt1Ty(getGlobalContext())
#define F llvm::Type::getFloatTy(getGlobalContext())
#define D llvm::Type::getDoubleTy(getGlobalContext())
#define V llvm::Type::getVoidTy(getGlobalContext())
static const std::unordered_map<llvm::Type*, std::unordered_map<llvm::Type*, llvm::Type*>> TYPECAST_MAP = {
{I64, { {I8, I64}, {I1, I64}, { F, D}, {D, D}}},
{ I8, {{I64, I64}, {I1, I8}, { F, F}, {D, D}}},
{ I1, {{I64, I64}, {I8, I8}, { F, F}, {D, D}}},
{ F, {{I64, D}, {I8, F}, {I1, F}, {D, D}}},
{ D, {{I64, D}, {I8, D}, {I1, D}, { F, D} }}
};
static
Type MonicelliType(llvm::Type const* type) {
if (type == I64) {
return Type::INT;
} else if (type == I8) {
return Type::CHAR;
} else if (type == I1) {
return Type::BOOL;
} else if (type == D) {
return Type::DOUBLE;
} else if (type == F) {
return Type::FLOAT;
} else if (type == V) {
return Type::VOID;
}
return Type::UNKNOWN;
}
static
llvm::Type *LLVMType(Type const& type) {
switch (type) {
case Type::INT:
return I64;
case Type::CHAR:
return I8;
case Type::FLOAT:
return F;
case Type::BOOL:
return I1;
case Type::DOUBLE:
return D;
case Type::VOID:
return V;
case Type::UNKNOWN:
return nullptr; // FIXME
}
return nullptr;
}
static
llvm::Type* deduceResultType(llvm::Value *left, llvm::Value *right) {
llvm::Type *lt = left->getType();
llvm::Type *rt = right->getType();
if (lt == rt) return rt;
auto subTable = TYPECAST_MAP.find(lt);
if (subTable != TYPECAST_MAP.end()) {
auto resultType = subTable->second.find(rt);
if (resultType != subTable->second.end()) return resultType->second;
}
return nullptr;
}
#undef I64
#undef I8
#undef I1
#undef F
#undef D
#undef V
static inline
bool isFP(llvm::Type *type) {
return type->isFloatTy() || type->isDoubleTy();
}
static inline
bool isInt(llvm::Type *type) {
return type->isIntegerTy();
}
static
llvm::Value* coerce(BitcodeEmitter::Private *d, llvm::Value *val, llvm::Type *toType) {
llvm::Type *fromType = val->getType();
if (fromType == toType) return val;
if (isInt(toType)) {
if (isFP(fromType)) {
return d->builder.CreateFPToSI(val, toType);
} else if (isInt(fromType)) {
return d->builder.CreateSExtOrTrunc(val, toType);
}
}
else if (isFP(toType) && isInt(fromType)) {
return d->builder.CreateSIToFP(val, toType);
}
else if (fromType->isFloatTy() && toType->isDoubleTy()) {
return d->builder.CreateFPExt(val, toType);
}
else if (fromType->isDoubleTy() && toType->isFloatTy()) {
return d->builder.CreateFPTrunc(val, toType);
}
return nullptr;
}
static
llvm::Value* isTrue(BitcodeEmitter::Private *d, llvm::Value* test, llvm::Twine const& label="") {
llvm::Value *one = llvm::ConstantInt::get(getGlobalContext(), llvm::APInt(1, 0));
return d->builder.CreateICmpNE(
coerce(d, test, one->getType()), one, label
);
}
static const std::map<Type, std::string> PUT_NAMES = {{
{Type::BOOL, "__Monicelli_putBool"},
{Type::CHAR, "__Monicelli_putChar"},
{Type::FLOAT, "__Monicelli_putFloat"},
{Type::DOUBLE, "__Monicelli_putDouble"},
{Type::INT, "__Monicelli_putInt"}
}};
static const std::map<Type, std::string> GET_NAMES = {
{Type::BOOL, "__Monicelli_getBool"},
{Type::CHAR, "__Monicelli_getChar"},
{Type::FLOAT, "__Monicelli_getFloat"},
{Type::DOUBLE, "__Monicelli_getDouble"},
{Type::INT, "__Monicelli_getInt"}
};
static const std::string ABORT_NAME = "__Monicelli_abort";
static const std::string ASSERT_NAME = "__Monicelli_assert";
static
bool convertAndStore(BitcodeEmitter::Private *d, llvm::AllocaInst *dest, llvm::Value *expression) {
llvm::Type *varType = dest->getAllocatedType();
expression = coerce(d, expression, varType);
if (expression == nullptr) return false;
d->builder.CreateStore(expression, dest);
return true;
}
BitcodeEmitter::BitcodeEmitter() {
module = std::unique_ptr<llvm::Module>(
new llvm::Module("monicelli", getGlobalContext())
);
d = new Private;
d->optimizer = Pointer<llvm::legacy::FunctionPassManager>(
new llvm::legacy::FunctionPassManager(module.get())
);
d->optimizer->add(llvm::createBasicAliasAnalysisPass());
d->optimizer->add(llvm::createInstructionCombiningPass());
d->optimizer->add(llvm::createReassociatePass());
d->optimizer->add(llvm::createGVNPass());
d->optimizer->add(llvm::createCFGSimplificationPass());
d->optimizer->doInitialization();
}
BitcodeEmitter::~BitcodeEmitter() {
delete d;
}
bool BitcodeEmitter::emit(Return const& node) {
if (node.getExpression()) {
GUARDED(node.getExpression()->emit(this));
llvm::Type *type = d->builder.GetInsertBlock()->getParent()->getReturnType();
assert(d->funcRetval != nullptr);
d->builder.CreateStore(coerce(d, d->retval, type), d->funcRetval);
}
d->builder.CreateBr(d->funcExit);
return true;
}
bool BitcodeEmitter::emit(Loop const& node) {
llvm::Function *father = d->builder.GetInsertBlock()->getParent();
llvm::BasicBlock *body = llvm::BasicBlock::Create(
getGlobalContext(), "loop", father
);
d->builder.CreateBr(body);
d->builder.SetInsertPoint(body);
llvm::BasicBlock *condition = llvm::BasicBlock::Create(
getGlobalContext(), "loopcondition"
);
GUARDED(ensureBasicBlock(node.getBody(), condition));
father->getBasicBlockList().push_back(condition);
d->builder.SetInsertPoint(condition);
GUARDED(node.getCondition().emit(this));
llvm::Value *loopTest = isTrue(d, d->retval, "looptest");
llvm::BasicBlock *after = llvm::BasicBlock::Create(
getGlobalContext(), "afterloop", father
);
d->builder.CreateCondBr(loopTest, body, after);
d->builder.SetInsertPoint(after);
return true;
}
bool BitcodeEmitter::emit(VarDeclaration const& node) {
llvm::Function *father = d->builder.GetInsertBlock()->getParent();
llvm::Type *varType = LLVMType(node.getType());
llvm::AllocaInst *alloc = allocateVar(father, node.getId(), varType);
if (node.getInitializer()) {
GUARDED(node.getInitializer()->emit(this));
if (!convertAndStore(d, alloc, d->retval)) {
return reportError(node, {
"Invalid inizializer for variable", node.getId().getValue()
});
}
}
// TODO pointers
d->scope.push(node.getId().getValue(), alloc);
return true;
}
bool BitcodeEmitter::emit(Assignment const& node) {
auto var = d->scope.lookup(node.getName().getValue());
if (!var) {
return reportError(node, {
"Attempting assignment to undefined variable",
node.getName().getValue()
});
}
GUARDED(node.getValue().emit(this));
if (!convertAndStore(d, *var, d->retval)) {
return reportError(node, {
"Invalid assignment to variable", node.getName().getValue()
});
}
return true;
}
bool BitcodeEmitter::emit(Print const& node) {
std::vector<llvm::Value*> callargs;
GUARDED(node.getExpression().emit(this));
callargs.push_back(d->retval);
Type printType = MonicelliType(d->retval->getType());
if (printType == Type::UNKNOWN) {
return reportError(node, {"Attempting to print unknown type"});
}
auto toCall = PUT_NAMES.find(printType);
if (toCall == PUT_NAMES.end()) {
return reportError(node, {"Unknown print function for type"});
}
llvm::Function *callee = module->getFunction(toCall->second);
if (callee == nullptr) {
return reportError(node, {"Print function was not registered"});
}
d->builder.CreateCall(callee, callargs);
return true;
}
bool BitcodeEmitter::emit(Input const& node) {
auto lookupResult = d->scope.lookup(node.getVariable().getValue());
if (!lookupResult) {
return reportError(node, {
"Attempting to read undefined variable",
node.getVariable().getValue()
});
}
llvm::AllocaInst *variable = *lookupResult;
Type inputType = MonicelliType(variable->getAllocatedType());
if (inputType == Type::UNKNOWN) {
return reportError(node, {"Attempting to read unknown type"});
}
auto toCall = GET_NAMES.find(inputType);
if (toCall == GET_NAMES.end()) {
return reportError(node, {
"Unknown input function for type"
});
}
llvm::Function *callee = module->getFunction(toCall->second);
if (callee == nullptr) {
return reportError(node, {
"Input function was not registered for type"
});
}
llvm::Value *readval = d->builder.CreateCall(callee);
d->builder.CreateStore(readval, variable);
return true;
}
bool BitcodeEmitter::emit(Abort const& node) {
llvm::Function *callee = module->getFunction(ABORT_NAME);
if (callee == nullptr) {
return reportError(node, {"Abort function was not registered"});
}
d->builder.CreateCall(callee);
return true;
}
bool BitcodeEmitter::emit(Assert const& node) {
llvm::Function *callee = module->getFunction(ASSERT_NAME);
if (callee == nullptr) {
return reportError(node, {"Assert function was not registered"});
}
node.getExpression().emit(this);
d->builder.CreateCall(callee, {coerce(d, d->retval, LLVMType(Type::BOOL))});
return true;
}
bool BitcodeEmitter::emit(FunctionCall const& node) {
llvm::Function *callee = module->getFunction(node.getName().getValue());
if (callee == 0) {
return reportError(node, {
"Attempting to call undefined function",
node.getName().getValue() + "()"
});
}
if (callee->arg_size() != node.getArgs().size()) {
return reportError(node, {
"Argument number mismatch in call of",
node.getName().getValue() + "()",
"expected", std::to_string(callee->arg_size()),
"given", std::to_string(node.getArgs().size())
});
}
auto param = callee->getArgumentList().begin();
std::vector<llvm::Value*> callargs;
for (Expression const& arg: node.getArgs()) {
GUARDED(arg.emit(this));
callargs.push_back(coerce(d, d->retval, param->getType()));
++param;
}
d->retval = d->builder.CreateCall(callee, callargs);
return true;
}
bool BitcodeEmitter::emit(Branch const& node) {
Branch::Body const& body = node.getBody();
llvm::Function *func = d->builder.GetInsertBlock()->getParent();
llvm::BasicBlock *thenbb = llvm::BasicBlock::Create(
getGlobalContext(), "then", func
);
llvm::BasicBlock *elsebb = llvm::BasicBlock::Create(
getGlobalContext(), "else"
);
llvm::BasicBlock *mergebb = llvm::BasicBlock::Create(
getGlobalContext(), "endif"
);
assert(!body.getCases().empty());
BranchCase const& last = body.getCases().back();
for (BranchCase const& cas: body.getCases()) {
emitSemiExpression(node.getVar(), cas.getCondition());
d->builder.CreateCondBr(
isTrue(d, d->retval, "condition"), thenbb, elsebb
);
d->builder.SetInsertPoint(thenbb);
GUARDED(ensureBasicBlock(cas.getBody(), mergebb));
func->getBasicBlockList().push_back(elsebb);
d->builder.SetInsertPoint(elsebb);
if (&cas != &last) {
thenbb = llvm::BasicBlock::Create(getGlobalContext(), "then", func);
elsebb = llvm::BasicBlock::Create(getGlobalContext(), "else");
}
}
if (body.getElse()) {
GUARDED(ensureBasicBlock(*body.getElse(), mergebb));
} else {
d->builder.CreateBr(mergebb);
}
func->getBasicBlockList().push_back(mergebb);
d->builder.SetInsertPoint(mergebb);
return true;
}
bool BitcodeEmitter::emit(FunctionPrototype const& node) {
std::vector<llvm::Type*> argTypes;
for (FunArg const& arg: node.getArgs()) {
argTypes.emplace_back(LLVMType(arg.getType()));
}
std::unordered_set<std::string> argsSet;
for (FunArg const& arg: node.getArgs()) {
std::string const& name = arg.getName().getValue();
if (argsSet.find(name) != argsSet.end()) {
return reportError(node, {
"Two arguments with same name to function",
node.getName().getValue() + "():", name
});
}
argsSet.insert(name);
}
llvm::FunctionType *ftype = llvm::FunctionType::get(
LLVMType(node.getType()), argTypes, false
);
llvm::Function *func = llvm::Function::Create(
ftype, llvm::Function::ExternalLinkage, node.getName().getValue(), module.get()
);
if (func->getName() != node.getName().getValue()) {
func->eraseFromParent();
func = module->getFunction(node.getName().getValue());
if (!func->empty()) {
return reportError(node, {
"Redefining function", node.getName().getValue()
});
}
if (func->arg_size() != node.getArgs().size()) {
return reportError(node, {
"Argument number mismatch in definition vs declaration of",
node.getName().getValue() + "()",
"expected", std::to_string(func->arg_size()),
"given", std::to_string(node.getArgs().size())
});
}
}
auto argToEmit = func->arg_begin();
for (FunArg const& arg: node.getArgs()) {
argToEmit->setName(arg.getName().getValue());
++argToEmit;
}
d->retval = func;
return true;
}
bool BitcodeEmitter::emit(Function const& node) {
GUARDED(node.getPrototype().emit(this));
llvm::Function *func = dynamic_cast<llvm::Function*>(d->retval);
assert(func != nullptr);
llvm::BasicBlock *bb = llvm::BasicBlock::Create(
getGlobalContext(), "entry", func
);
d->builder.SetInsertPoint(bb);
bool isNotVoid = node.getPrototype().getType() != Type::VOID;
d->funcRetval = isNotVoid? allocateReturnVariable(func): nullptr;
d->funcExit = llvm::BasicBlock::Create(getGlobalContext(), "return");
d->scope.enter();
auto argToAlloc = func->arg_begin();
for (FunArg const& arg: node.getPrototype().getArgs()) {
llvm::AllocaInst *alloc = allocateVar(
func, arg.getName(), LLVMType(arg.getType())
);
d->builder.CreateStore(argToAlloc, alloc);
d->scope.push(arg.getName().getValue(), alloc);
++argToAlloc;
}
for (Statement const& stat: node.getBody()) {
GUARDED(stat.emit(this));
}
d->scope.leave();
if (!d->builder.GetInsertBlock()->getTerminator()) {
d->builder.CreateBr(d->funcExit);
}
func->getBasicBlockList().push_back(d->funcExit);
d->builder.SetInsertPoint(d->funcExit);
if (isNotVoid) {
d->builder.CreateRet(d->builder.CreateLoad(d->funcRetval));
} else {
d->builder.CreateRetVoid();
}
verifyFunction(*func);
d->optimizer->run(*func);
return true;
}
bool BitcodeEmitter::emit(Module const& node) {
return true;
}
bool BitcodeEmitter::emit(Program const& program) {
auto const& externals = getModuleRegistry().getRegisteredFunctions();
for (FunctionPrototype const& proto: externals) {
GUARDED(proto.emit(this));
}
for (Function const& function: program.getFunctions()) {
GUARDED(function.getPrototype().emit(this));
}
for (Function const& function: program.getFunctions()) {
GUARDED(function.emit(this));
}
if (program.getMain()) {
GUARDED(program.getMain()->emit(this));
}
verifyModule(*module);
return true;
}
bool BitcodeEmitter::emit(Id const& node) {
auto value = d->scope.lookup(node.getValue());
if (!value) {
return reportError(node, {
"Undefined variable", node.getValue()
});
}
d->retval = d->builder.CreateLoad(*value, node.getValue().c_str());
return true;
}
bool BitcodeEmitter::emit(Integer const& node) {
d->retval = llvm::ConstantInt::get(
getGlobalContext(), llvm::APInt(64, node.getValue(), true)
);
return true;
}
bool BitcodeEmitter::emit(Float const& node) {
d->retval = llvm::ConstantFP::get(
getGlobalContext(), llvm::APFloat(node.getValue())
);
return true;
}
#define HANDLE(intop, fpop) \
if (fp) { \
d->retval = d->builder.Create##fpop(left, right); \
} else { \
d->retval = d->builder.Create##intop(left, right); \
}
#define HANDLE_INT_ONLY(op, symbol) \
if (fp) { \
return reportError(node, {"Operator " #symbol " cannot be applied to float values!"}); \
} else { \
d->retval = d->builder.Create##op(left, right); \
}
static
bool createOp(BitcodeEmitter::Private *d, Localizable const& node, llvm::Value *left, Operator op, llvm::Value *right) {
llvm::Type *retType = deduceResultType(left, right);
if (retType == nullptr) {
return reportError(node, {"Cannot combine operators."});
}
bool fp = isFP(retType);
left = coerce(d, left, retType);
right = coerce(d, right, retType);
if (left == nullptr || right == nullptr) {
return reportError(node, {"Cannot convert operators to result type."});
}
switch (op) {
case Operator::PLUS:
HANDLE(Add, FAdd)
break;
case Operator::MINUS:
HANDLE(Sub, FSub)
break;
case Operator::TIMES:
HANDLE(Mul, FMul)
break;
case Operator::DIV:
HANDLE(SDiv, FDiv)
break;
case Operator::SHL:
HANDLE_INT_ONLY(Shl, <<);
break;
case Operator::SHR:
HANDLE_INT_ONLY(LShr, >>);
break;
case Operator::LT:
HANDLE(ICmpULT, FCmpULT)
break;
case Operator::GT:
HANDLE(ICmpUGT, FCmpUGT)
break;
case Operator::GTE:
HANDLE(ICmpUGE, FCmpUGE)
break;
case Operator::LTE:
HANDLE(ICmpULE, FCmpULE)
break;
case Operator::EQ:
HANDLE(ICmpEQ, FCmpOEQ)
break;
}
return true;
}
#undef HANDLE
#undef HANDLE_INT_ONLY
bool BitcodeEmitter::emit(BinaryExpression const& expression) {
GUARDED(expression.getLeft().emit(this));
llvm::Value *left = d->retval;
GUARDED(expression.getRight().emit(this));
llvm::Value *right = d->retval;
GUARDED(createOp(d, expression, left, expression.getOperator(), right));
return true;
}
bool BitcodeEmitter::emitSemiExpression(Id const& left, SemiExpression const& right) {
GUARDED(left.emit(this));
llvm::Value *lhs = d->retval;
GUARDED(right.getLeft().emit(this));
llvm::Value *rhs = d->retval;
GUARDED(createOp(d, right, lhs, right.getOperator(), rhs));
return true;
}
bool BitcodeEmitter::ensureBasicBlock(PointerList<Statement> const& statements, llvm::BasicBlock *after) {
d->scope.enter();
for (Statement const& statement: statements) {
GUARDED(statement.emit(this));
}
d->scope.leave();
if (!d->builder.GetInsertBlock()->getTerminator()) {
d->builder.CreateBr(after);
}
return true;
}

View File

@ -1,81 +0,0 @@
#ifndef BITCODE_HPP
#define BITCODE_HPP
/*
* Monicelli: an esoteric language compiler
*
* Copyright (C) 2014 Stefano Sanfilippo
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "Emitter.hpp"
#include "Pointers.hpp"
namespace llvm {
class Module;
class Function;
class BasicBlock;
}
namespace monicelli {
class SemiExpression;
class Statement;
class BitcodeEmitter: public Emitter {
public:
BitcodeEmitter();
BitcodeEmitter(BitcodeEmitter &) = delete;
virtual ~BitcodeEmitter();
virtual bool emit(Return const&) override;
virtual bool emit(Loop const&) override;
virtual bool emit(VarDeclaration const&) override;
virtual bool emit(Assignment const&) override;
virtual bool emit(Print const&) override;
virtual bool emit(Input const&) override;
virtual bool emit(Abort const&) override;
virtual bool emit(Assert const&) override;
virtual bool emit(Branch const&) override;
virtual bool emit(FunctionPrototype const&) override;
virtual bool emit(Function const&) override;
virtual bool emit(Module const&) override;
virtual bool emit(Program const&) override;
virtual bool emit(Id const&) override;
virtual bool emit(Integer const&) override;
virtual bool emit(Float const&) override;
virtual bool emit(FunctionCall const&) override;
virtual bool emit(BinaryExpression const&) override;
llvm::Module const& getModule() const {
return *module;
}
struct Private;
private:
bool emitSemiExpression(Id const& left, SemiExpression const& right);
bool ensureBasicBlock(PointerList<Statement> const& statements, llvm::BasicBlock *after);
Pointer<llvm::Module> module;
Private *d;
};
}
#endif

View File

@ -1,82 +0,0 @@
/*
* Monicelli: an esoteric language compiler
*
* Copyright (C) 2014 Stefano Sanfilippo
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "CLineParser.hpp"
#include <boost/program_options.hpp>
#include <cstdlib>
#include <string>
#include <vector>
#include <iostream>
namespace po = boost::program_options;
using namespace monicelli;
static po::variables_map CONFIG;
static const std::string VERSION_STRING =
"mcc version 1.0.0\n"
"\n"
"Monicelli compiler <https://github.com/esseks/monicelli>\n"
"\n"
"Copyright © 2014,2015 Stefano Sanfilippo\n"
"\n"
"This program comes with ABSOLUTELY NO WARRANTY;\n"
"This is free software, and you are welcome to redistribute it\n"
"under certain conditions; See LICENSE.txt for all details"
;
static const std::string USAGE_STRING = "mcc, Monicelli compiler.\n\nUsage: ";
po::variables_map const& monicelli::getConfig() {
return CONFIG;
}
void monicelli::parseCommandLine(int argc, char **argv) {
po::options_description desc(
USAGE_STRING + argv[0] + " [options] file.mc ..."
);
desc.add_options()
("help,h", "display this help message")
("version,v", "display version")
("c++,+", "emit C++ source code instead of LLVM bitcode")
("input,i", po::value<std::vector<std::string>>(), "input files to process")
;
po::positional_options_description positional;
positional.add("input", -1);
po::store(
po::command_line_parser(argc, argv)
.options(desc)
.positional(positional)
.run(),
CONFIG
);
po::notify(CONFIG);
if (configHas("help")) {
std::cout << desc;
exit(0);
}
if (configHas("version")) {
std::cout << VERSION_STRING << std::endl;
exit(0);
}
}

View File

@ -1,43 +0,0 @@
#ifndef CLINE_PARSER_HPP
#define CLINE_PARSER_HPP
/*
* Monicelli: an esoteric language compiler
*
* Copyright (C) 2014 Stefano Sanfilippo
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <boost/program_options.hpp>
namespace monicelli {
boost::program_options::variables_map const& getConfig();
void parseCommandLine(int argc, char **argv);
template<typename T> inline
T config(std::string const& name) {
return getConfig()[name].as<T>();
}
static inline
bool configHas(std::string const& name) {
return getConfig().count(name);
}
}
#endif

View File

@ -1,92 +1,64 @@
#
# Monicelli: an esoteric language compiler
#
# Copyright (C) 2014 Stefano Sanfilippo
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
# Copyright 2017 the Monicelli project authors. All rights reserved.
# Use of this source code is governed by a GPLv3 license, see LICENSE.txt.
## 1. Find Flex and Bison
find_package(LLVM REQUIRED)
find_package(Ragel REQUIRED)
find_package(BISON REQUIRED)
find_package(FLEX 2.5 REQUIRED)
option(ENABLE_LINKER "Enable the Monicelli linker. Requires POSIX." ON)
if (BISON_VERSION VERSION_LESS 2.5)
message(FATAL_ERROR "At least Bison 2.5 is required.")
elseif(BISON_VERSION VERSION_LESS 3.0)
message("== Bison 2.5 was found. You have to apply cmake/bison2.patch...")
if (ENABLE_LINKER)
add_definitions(-DMONICELLI_ENABLE_LINKER)
endif()
## 2. External components
find_package(Boost 1.48 REQUIRED regex system filesystem program_options)
find_package(LLVM REQUIRED CONFIG)
find_library(YAML_LIBRARIES yaml-cpp)
find_path(YAML_INCLUDE_DIRS yaml.h /usr/include/yaml-cpp/)
add_definitions(
${Boost_DEFINITIONS}
${LLVM_DEFINITIONS}
-std=c++11
-Wall -Wextra -Werror
# The lexer uses implicit fallthroughs all over, but it's OK.
-Wno-implicit-fallthrough
${LLVM_CXXFLAGS}
-g -O2 -UNDEBUG
)
include_directories(
${Boost_INCLUDE_DIRS}
${LLVM_INCLUDE_DIRS}
${YAML_INCLUDE_DIRS}
)
## 3. Build
include_directories(
${CMAKE_CURRENT_BINARY_DIR}
${CMAKE_CURRENT_SOURCE_DIR}
${LLVM_INCLUDE_DIR}
)
bison_target(Parser Monicelli.ypp ${CMAKE_CURRENT_BINARY_DIR}/Parser.cpp)
flex_target(Scanner Monicelli.lpp ${CMAKE_CURRENT_BINARY_DIR}/Lexer.cpp)
add_flex_bison_dependency(Scanner Parser)
link_directories(
${LLVM_LIBRARY_DIR}
)
add_custom_command(
OUTPUT lexer.rl.cpp
MAIN_DEPENDENCY lexer.rl
DEPENDS lexer.h
COMMAND ${RAGEL} -G2 "${CMAKE_CURRENT_SOURCE_DIR}/lexer.rl" -o lexer.rl.cpp
WORKING_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}"
VERBATIM
)
add_executable(mcc
main.cpp Nodes.cpp CLineParser.cpp
ModuleRegistry.cpp ModuleLoader.cpp
${BISON_Parser_OUTPUTS} ${FLEX_Scanner_OUTPUTS}
CppEmitter.cpp BitcodeEmitter.cpp
main.cpp
asmgen.cpp
codegen.cpp
codegen.def
ast.cpp
ast.def
ast-visitor.h
ast-printer.cpp
parser.cpp
lexer.cpp
lexer.def
"${CMAKE_CURRENT_BINARY_DIR}/lexer.rl.cpp"
options.cpp
errors.cpp
support.cpp
location.h
iterators.h
types.def
operators.def
)
target_compile_options(mcc PRIVATE
${LLVM_CXXFLAGS} ${Boost_CXXFLAGS}
-Wall -Wextra -Werror -Wno-unused-parameter -Wno-deprecated-register
-std=c++0x -DYYDEBUG=0
)
llvm_map_components_to_libnames(LLVM_LIBRARIES
support core native bitwriter
)
target_link_libraries(mcc
${Boost_LIBRARIES}
${LLVM_LIBRARIES}
${YAML_LIBRARIES}
)
## 5. Build the runtime library too
add_library(mcrt STATIC Runtime.c)
## 6. Install targets
install(TARGETS mcc DESTINATION bin/)
install(TARGETS mcrt DESTINATION lib/)
target_link_libraries(mcc ${LLVM_LIBS})
install(TARGETS mcc RUNTIME DESTINATION bin)

View File

@ -1,340 +0,0 @@
/*
* Monicelli: an esoteric language compiler
*
* Copyright (C) 2014 Stefano Sanfilippo
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <string>
#include "Nodes.hpp"
#include "CppEmitter.hpp"
#include "Pointers.hpp"
using namespace monicelli;
// Yes, that's right, no ending ;
#define GUARDED(call) if (!(call)) return false
static const std::string STATEMENT_TERMINATOR = ";\n";
static const std::string BLOCK = " ";
void CppEmitter::indent() {
indent_chars += 1;
}
void CppEmitter::dedent() {
indent_chars -= 1;
}
bool CppEmitter::emitIndent() {
for (int i = 0; i < indent_chars; ++i) {
stream << BLOCK;
}
return stream;
}
bool CppEmitter::emit(Program const& program) {
for (Module m: program.getModules()) {
GUARDED(m.emit(this));
stream << "\n";
}
if (!program.getModules().empty()) {
stream << "\n";
}
for (Function const& function: program.getFunctions()) {
emit(function.getPrototype());
stream << ";\n";
}
if (!program.getFunctions().empty()) {
stream << "\n";
}
for (Function const& function: program.getFunctions()) {
GUARDED(function.emit(this));
}
if (program.getMain()) {
GUARDED(program.getMain()->emit(this));
}
return stream;
}
bool CppEmitter::emitStatements(PointerList<Statement> const& node) {
for (Statement const& s: node) {
emitIndent();
GUARDED(s.emit(this));
stream << STATEMENT_TERMINATOR;
}
return stream;
}
bool CppEmitter::emitMain(Function const& main) {
stream << "int main() {\n";
indent();
emitStatements(main.getBody());
dedent();
stream << "}\n";
return stream;
}
bool CppEmitter::emit(Id const& id) {
stream << id.getValue();
return stream;
}
bool CppEmitter::emit(Integer const& num) {
stream << num.getValue();
return stream;
}
bool CppEmitter::emit(Float const& num) {
stream << num.getValue();
return stream;
}
bool CppEmitter::emit(Return const& node) {
stream << "return";
if (node.getExpression()) {
stream << ' ';
GUARDED(node.getExpression()->emit(this));
}
return stream;
}
bool CppEmitter::emit(Print const& node) {
bool needsBraces =
(dynamic_cast<SimpleExpression const*>(&node.getExpression()) == nullptr)
&&
(dynamic_cast<FunctionCall const*>(&node.getExpression()) == nullptr)
;
stream << "std::cout << ";
if (needsBraces) {
stream << '(';
}
GUARDED(node.getExpression().emit(this));
if (needsBraces) {
stream << ')';
}
stream << " << std::endl";
return stream;
}
bool CppEmitter::emit(Input const& node) {
stream << "std::cout << \"";
GUARDED(node.getVariable().emit(this));
stream << "? \";\n";
emitIndent();
stream << "std::cin >> ";
GUARDED(node.getVariable().emit(this));
return stream;
}
bool CppEmitter::emit(Abort const&) {
stream << "std::exit(1)";
return stream;
}
bool CppEmitter::emit(Assert const& node) {
stream << "assert(";
GUARDED(node.getExpression().emit(this));
stream << ")";
return stream;
}
bool CppEmitter::emit(Loop const& loop) {
stream << "do {\n";
indent();
emitStatements(loop.getBody());
dedent();
emitIndent();
stream << "} while (";
GUARDED(loop.getCondition().emit(this));
stream << ")";
return stream;
}
bool CppEmitter::emitBranchCase(BranchCase const& node) {
emitBranchCondition(node.getCondition());
stream << ") {\n";
indent();
emitStatements(node.getBody());
dedent();
emitIndent();
stream << "}";
return stream;
}
bool CppEmitter::emit(Branch const& branch) {
auto &body = branch.getBody();
auto &var = branch.getVar();
stream << "if (";
GUARDED(var.emit(this));
if (!body.getCases().empty()) {
BranchCase const& last = body.getCases().back();
for (BranchCase const& cas: body.getCases()) {
emitBranchCase(cas);
if (&cas != &last) {
stream << " else if (";
GUARDED(var.emit(this));
}
}
}
if (!body.getElse()) {
return stream;
}
stream << " else {\n";
indent();
emitStatements(*body.getElse());
dedent();
emitIndent();
stream << "}";
return stream;
}
bool CppEmitter::emit(Assignment const& assignment) {
GUARDED(assignment.getName().emit(this));
stream << " = ";
GUARDED(assignment.getValue().emit(this));
return stream;
}
bool CppEmitter::emitFunctionArglist(PointerList<Expression> const& args) {
if (args.empty()) return stream;
Expression const& last = args.back();
for (Expression const& arg: args) {
GUARDED(arg.emit(this));
if (&arg != &last) {
stream << ", ";
}
}
return stream;
}
bool CppEmitter::emit(FunctionCall const& funcall) {
GUARDED(funcall.getName().emit(this));
stream << "(";
emitFunctionArglist(funcall.getArgs());
stream << ")";
return stream;
}
bool CppEmitter::emit(Function const& function) {
emit(function.getPrototype());
stream << " {\n";
indent();
emitStatements(function.getBody());
dedent();
stream << "}\n\n";
return stream;
}
bool CppEmitter::emitFunctionParams(PointerList<FunArg> const& funargs) {
if (funargs.empty()) return stream;
FunArg const& last = funargs.back();
for (FunArg const& funarg: funargs) {
stream << funarg.getType() << (funarg.isPointer()? "* ": " ");
GUARDED(funarg.getName().emit(this));
if (&funarg != &last) {
stream << ", ";
}
}
return stream;
}
bool CppEmitter::emit(Module const& module) {
bool system = (module.getType() == Module::SYSTEM);
stream << "#include " << (system? '<': '"') << module.getName() << (system? '>': '"');
return stream;
}
bool CppEmitter::emit(FunctionPrototype const& proto) {
if (proto.getName().getValue() == "main") {
stream << "int ";
} else {
stream << proto.getType() << ' ';
}
GUARDED(proto.getName().emit(this));
stream << "(";
emitFunctionParams(proto.getArgs());
stream << ")";
return stream;
}
bool CppEmitter::emit(VarDeclaration const& decl) {
stream << decl.getType() << ' ';
if (decl.isPointer()) stream << '*';
GUARDED(decl.getId().emit(this));
if (decl.getInitializer()) {
stream << " = ";
GUARDED(decl.getInitializer()->emit(this));
}
return stream;
}
bool CppEmitter::emit(BinaryExpression const& node) {
GUARDED(node.getLeft().emit(this));
stream << ' ' << node.getOperator() << ' ';
GUARDED(node.getRight().emit(this));
return stream;
}
bool CppEmitter::emitBranchCondition(SemiExpression const& node) {
bool braces = (dynamic_cast<SimpleExpression const*>(&node.getLeft()) == nullptr);
stream << ' ' << node.getOperator() << ' ';
if (braces) stream << "(";
GUARDED(node.getLeft().emit(this));
if (braces) stream << ")";
return stream;
}

View File

@ -1,73 +0,0 @@
#ifndef CPPEMITTER_HPP
#define CPPEMITTER_HPP
/*
* Monicelli: an esoteric language compiler
*
* Copyright (C) 2014 Stefano Sanfilippo
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "Emitter.hpp"
#include <iostream>
namespace monicelli {
class CppEmitter: public Emitter {
public:
CppEmitter(std::ostream *stream): stream(*stream), indent_chars(0) {}
virtual bool emit(Return const&) override;
virtual bool emit(Loop const&) override;
virtual bool emit(VarDeclaration const&) override;
virtual bool emit(Assignment const&) override;
virtual bool emit(Print const&) override;
virtual bool emit(Input const&) override;
virtual bool emit(Abort const&) override;
virtual bool emit(Assert const&) override;
virtual bool emit(Branch const&) override;
virtual bool emit(FunctionPrototype const&) override;
virtual bool emit(Function const&) override;
virtual bool emit(Module const&) override;
virtual bool emit(Program const&) override;
virtual bool emit(FunctionCall const&) override;
virtual bool emit(Id const&) override;
virtual bool emit(Integer const&) override;
virtual bool emit(Float const&) override;
virtual bool emit(BinaryExpression const&) override;
private:
bool emitIndent();
bool emitFunctionParams(PointerList<FunArg> const& funargs);
bool emitFunctionArglist(PointerList<Expression> const& args);
bool emitStatements(PointerList<Statement> const& node);
bool emitBranchCondition(SemiExpression const& node);
bool emitBranchCase(BranchCase const& node);
bool emitMain(Function const& main);
void indent();
void dedent();
std::ostream &stream;
int indent_chars;
};
}
#endif

View File

@ -1,73 +0,0 @@
#ifndef EMITTER_HPP
#define EMITTER_HPP
/*
* Monicelli: an esoteric language compiler
*
* Copyright (C) 2014 Stefano Sanfilippo
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
namespace monicelli {
class Id;
class Number;
class Integer;
class Float;
class Return;
class Loop;
class VarDeclaration;
class Assignment;
class Print;
class Input;
class Abort;
class Assert;
class FunctionCall;
class Branch;
class Main;
class FunctionPrototype;
class Function;
class Module;
class Program;
class BinaryExpression;
class Emitter {
public:
virtual bool emit(Return const&) = 0;
virtual bool emit(Loop const&) = 0;
virtual bool emit(VarDeclaration const&) = 0;
virtual bool emit(Assignment const&) = 0;
virtual bool emit(Print const&) = 0;
virtual bool emit(Input const&) = 0;
virtual bool emit(Abort const&) = 0;
virtual bool emit(Assert const&) = 0;
virtual bool emit(FunctionPrototype const&) = 0;
virtual bool emit(Branch const&) = 0;
virtual bool emit(Function const&) = 0;
virtual bool emit(Module const&) = 0;
virtual bool emit(Program const&) = 0;
virtual bool emit(Id const&) = 0;
virtual bool emit(Integer const&) = 0;
virtual bool emit(Float const&) = 0;
virtual bool emit(FunctionCall const&) = 0;
virtual bool emit(BinaryExpression const&) = 0;
};
}
#endif

View File

@ -1,55 +0,0 @@
#include "Nodes.hpp"
#include "ModuleRegistry.hpp"
#include "ModuleLoader.hpp"
#include <yaml-cpp/yaml.h>
#include <string>
using namespace monicelli;
static
Type toType(std::string const& value) {
if (value == "int") {
return Type::INT;
} else if (value == "float") {
return Type::FLOAT;
} else if (value == "double") {
return Type::DOUBLE;
} else if (value == "char") {
return Type::CHAR;
} else if (value == "bool") {
return Type::BOOL;
} else {
return Type::VOID;
}
}
void monicelli::loadModule(std::string const& from, ModuleRegistry &to) {
YAML::Node module = YAML::LoadFile(from);
if (!module["functions"]) return;
for (auto const& proto: module["functions"]) {
PointerList<FunArg> *args = new PointerList<FunArg>();
for (auto const& arg: proto.second["args"]) {
args->push_back(new FunArg(
new Id(arg.first.as<std::string>()),
toType(arg.second.as<std::string>()),
false
));
}
Type type;
if (proto.second["type"]) {
type = toType(proto.second["type"].as<std::string>());
} else {
type = Type::VOID;
}
to.registerFunction(new FunctionPrototype(
new Id(proto.first.as<std::string>()), type, args
));
}
}

View File

@ -1,10 +0,0 @@
#ifndef MODULE_LOADER_HPP
#define MODULE_LOADER_HPP
namespace monicelli {
void loadModule(std::string const& from, monicelli::ModuleRegistry &to);
}
#endif

View File

@ -1,92 +0,0 @@
/*
* Monicelli: an esoteric language compiler
*
* Copyright (C) 2014 Stefano Sanfilippo
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "ModuleRegistry.hpp"
#include "Pointers.hpp"
#include "Nodes.hpp"
using namespace monicelli;
static ModuleRegistry globalRegistry;
ModuleRegistry& monicelli::getModuleRegistry() {
return globalRegistry;
}
struct ModuleRegistry::Private {
boost::ptr_unordered_set<FunctionPrototype> prototypes;
};
ModuleRegistry::ModuleRegistry() {
d = new Private;
}
ModuleRegistry::~ModuleRegistry() {
delete d;
}
PointerSet<FunctionPrototype> const& ModuleRegistry::getRegisteredFunctions() const {
return d->prototypes;
}
void ModuleRegistry::registerFunction(FunctionPrototype *proto) {
d->prototypes.insert(proto);
}
#define PUT(type, funcname) \
new FunctionPrototype { \
new Id {#funcname}, Type::VOID, \
plist_of({ \
new FunArg {new Id {"value"}, type, false} \
}), \
}
#define GET(type, funcname) \
new FunctionPrototype { \
new Id {#funcname}, type, \
new PointerList<FunArg> {}, \
}
void monicelli::registerStdLib(ModuleRegistry &r) {
r.registerFunction(PUT(Type::CHAR, __Monicelli_putBool));
r.registerFunction(PUT(Type::CHAR, __Monicelli_putChar));
r.registerFunction(PUT(Type::FLOAT, __Monicelli_putFloat));
r.registerFunction(PUT(Type::DOUBLE, __Monicelli_putDouble));
r.registerFunction(PUT(Type::INT, __Monicelli_putInt));
r.registerFunction(GET(Type::CHAR, __Monicelli_getBool));
r.registerFunction(GET(Type::CHAR, __Monicelli_getChar));
r.registerFunction(GET(Type::FLOAT, __Monicelli_getFloat));
r.registerFunction(GET(Type::DOUBLE, __Monicelli_getDouble));
r.registerFunction(GET(Type::INT, __Monicelli_getInt));
r.registerFunction(new FunctionPrototype {
new Id("__Monicelli_assert"), Type::VOID,
plist_of({
new FunArg {new Id("condition"), Type::CHAR, false}
})
});
r.registerFunction(new FunctionPrototype {
new Id("__Monicelli_abort"), Type::VOID,
new PointerList<FunArg> {}
});
}
#undef PUT
#undef GET

View File

@ -1,48 +0,0 @@
#ifndef MODULE_REGISTRY_HPP
#define MODULE_REGISTRY_HPP
/*
* Monicelli: an esoteric language compiler
*
* Copyright (C) 2014 Stefano Sanfilippo
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "Pointers.hpp"
namespace monicelli {
class FunctionPrototype;
class ModuleRegistry {
public:
ModuleRegistry();
ModuleRegistry(ModuleRegistry&) = delete;
virtual ~ModuleRegistry();
PointerSet<FunctionPrototype> const& getRegisteredFunctions() const;
void registerFunction(FunctionPrototype *proto);
private:
struct Private;
Private *d;
};
ModuleRegistry& getModuleRegistry();
void registerStdLib(ModuleRegistry &);
}
#endif

View File

@ -1,210 +0,0 @@
%{
/*
* Monicelli: an esoteric language compiler
*
* Copyright (C) 2014 Stefano Sanfilippo
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "Scanner.hpp"
#include "Parser.hpp"
#include <string>
using namespace monicelli;
typedef Parser::token token;
#define YY_USER_ACTION location->begin.columns(yyleng);
static inline
bool in(const char *sub, const std::string &str) {
return str.find(sub) != std::string::npos;
}
%}
%option ecs stack warn c++
%option nodefault noyywrap nounput yylineno
%option yyclass="Scanner"
DIGIT [0-9]
HEXDIGIT [0-9a-fA-F]
CHAR [a-zA-Z_]
%x shift
%%
("#"|"bituma")[^\n]* {}
"Lei ha clacsonato" {
return token::MAIN;
}
"vaffanzum" {
return token::RETURN;
}
"Necchi" {
lval->typeval = Type::INT;
return token::TYPENAME;
}
"Mascetti" {
lval->typeval = Type::CHAR;
return token::TYPENAME;
}
"Perozzi" {
lval->typeval = Type::FLOAT;
return token::TYPENAME;
}
"Melandri" {
lval->typeval = Type::BOOL;
return token::TYPENAME;
}
"Sassaroli" {
lval->typeval = Type::DOUBLE;
return token::TYPENAME;
}
"conte" {
return token::STAR;
}
"voglio" {
return token::VARDECL;
}
"come "("se ")?"fosse" {
return token::ASSIGN;
}
("il"|"lo"|"la"|"l'"|"i"|"gli"|"le"|"un"|"un'"|"una"|"dei"|"delle") {
return token::ARTICLE;
}
"pi"("ù"|"u`") {
return token::OP_PLUS;
}
"meno" {
return token::OP_MINUS;
}
"per" {
return token::OP_TIMES;
}
"diviso" {
return token::OP_DIV;
}
"con scappellamento a" {
BEGIN(shift);
}
<shift>"per" {
BEGIN(INITIAL);
}
<shift>"sinistra" {
return token::OP_SHL;
}
<shift>"destra" {
return token::OP_SHR;
}
"minore "("di"|"del") {
return token::OP_LT;
}
"maggiore "("di"|"del") {
return token::OP_GT;
}
"minore o uguale "("a"|"di") {
return token::OP_LTE;
}
"maggiore o uguale "("a"|"di") {
return token::OP_GTE;
}
"a posterdati" {
return token::PRINT;
}
"mi porga" {
return token::INPUT;
}
"ho visto" {
return token::ASSERT;
}
"!" {
return token::BANG;
}
"stuzzica" {
return token::LOOP_BEGIN;
}
"e "("b"|"p")"rematura anche, se" {
return token::LOOP_CONDITION;
}
"che cos'"("è"|"e`") {
return token::BRANCH_CONDITION;
}
"?" {
return token::BRANCH_BEGIN;
}
"o tarapia tapioco" {
return token::BRANCH_ELSE;
}
"e velocit"("à"|"a`")" di esecuzione" {
return token::BRANCH_END;
}
":" {
return token::COLON;
}
"blinda la supercazzo"("r"|"l")"a" {
return token::FUN_DECL;
}
"con" {
return token::PARAMS;
}
"," {
return token::COMMA;
}
("b"|"p")"rematurata la supercazzo"("r"|"l")"a" {
return token::FUN_CALL;
}
"o scherziamo"("?")? {
return token::FUN_END;
}
"avvertite don ulrico" {
return token::ABORT;
}
"o magari" {
return token::CASE_END;
}
<INITIAL,shift>"\n" {
location->begin.lines();
}
<INITIAL,shift>[ \t\f\v\r] {
}
{CHAR}({DIGIT}|{CHAR})* {
lval->strval = new std::string(yytext);
return token::ID;
}
[-+]?(({DIGIT}*".")?{DIGIT}+|{DIGIT}+".")([eE][-+]?{DIGIT}+)? {
std::string value(yytext);
if (in(".", value) || in("e", value) || in("E", value)) {
lval->floatval = std::stod(value);
return token::FLOAT;
} else {
lval->intval = std::stol(value);
return token::NUMBER;
}
}
<INITIAL,shift>. {
return token::ERROR;
}
%%

View File

@ -1,434 +0,0 @@
/*
* Monicelli: an esoteric language compiler
*
* Copyright (C) 2014 Stefano Sanfilippo
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
%code top {
#include "Nodes.hpp"
using namespace monicelli;
}
%skeleton "lalr1.cc"
%require "3.0"
%language "c++"
%defines
%locations
%token-table
%define parse.error verbose
%define api.namespace {monicelli}
%define parser_class_name {Parser}
%lex-param {Scanner &scanner}
%parse-param {Scanner &scanner}
%parse-param {Program &program}
%code requires {
#include "Nodes.hpp"
namespace monicelli {
class Scanner;
}
}
%code {
static int yylex(Parser::semantic_type*, Parser::location_type*, Scanner&);
}
%token MAIN ERROR
%token RETURN
%token ARTICLE TYPENAME STAR
%token VARDECL ASSIGN
%token PRINT INPUT
%token ASSERT BANG
%token LOOP_BEGIN LOOP_CONDITION
%token BRANCH_CONDITION BRANCH_BEGIN BRANCH_ELSE BRANCH_END CASE_END
%token COLON COMMA
%token FUN_DECL PARAMS FUN_CALL FUN_END
%token ABORT
%token ID NUMBER FLOAT
%left OP_LT OP_GT OP_LTE OP_GTE
%left OP_PLUS OP_MINUS
%left OP_TIMES OP_DIV
%left OP_SHL OP_SHR
%nonassoc LOWER_THAN_ELSE
%nonassoc BRANCH_ELSE
%union {
int intval;
double floatval;
std::string* strval;
bool boolval;
Type typeval;
Statement* statementval;
PointerList<Statement>* statlistval;
Assert* assertval;
FunctionCall* callval;
Print* printval;
Input* inputval;
Abort* abortval;
Branch* branchval;
Branch::Body* branchbodyval;
VarDeclaration* declval;
Assignment* assignval;
Loop* loopval;
BranchCase *caseval;
PointerList<BranchCase> *caselistval;
Return* returnval;
Expression* expressionval;
PointerList<Expression>* exprlistval;
SemiExpression *semiexpval;
Id* idval;
Number* numericval;
Function* funval;
FunctionPrototype* protoval;
FunArg *argval;
PointerList<FunArg> *arglistval;
}
%type<intval> NUMBER
%type<floatval> FLOAT
%type<strval> ID
%type<typeval> TYPENAME fun_return
%type<statementval> statement
%type<statlistval> statements
%type<branchbodyval> branch_body
%type<assertval> assert_stmt
%type<callval> fun_call
%type<argval> arg_decl
%type<arglistval> args_decl args
%type<funval> fun_decl main
%type<protoval> fun_proto
%type<printval> print_stmt
%type<inputval> input_stmt
%type<abortval> abort_stmt
%type<branchval> branch_stmt
%type<caseval> case_stmt
%type<caselistval> cases
%type<declval> var_decl
%type<assignval> assign_stmt
%type<loopval> loop_stmt
%type<returnval> return_stmt
%type<expressionval> expression maybe_expression simple_expression expression_inner
%type<expressionval> var_init
%type<exprlistval> call_arglist call_args
%type<semiexpval> semi_expression semi_expression_inner
%type<idval> variable
%type<numericval> numeric
%type<boolval> pointer
%start program
%%
program:
/* epsilon */
| fun_decls main fun_decls {
program.setMain($2);
}
;
fun_decls:
/* epsilon */
| fun_decl {
program.addFunction($1);
}
fun_decls
;
fun_decl:
fun_proto statements {
$$ = new Function($1, $2);
}
;
fun_proto:
FUN_DECL fun_return ID args FUN_END {
$$ = new FunctionPrototype(new Id($3), $2, $4);
}
;
fun_return:
/* epsilon */ { $$ = Type::VOID; } | TYPENAME { $$ = $1; }
;
args:
/* epsilon */ {
$$ = new PointerList<FunArg>();
}
| PARAMS args_decl {
$$ = $2;
}
;
args_decl:
arg_decl {
$$ = new PointerList<FunArg>();
$$->push_back($1);
}
| args_decl COMMA arg_decl {
$1->push_back($3);
}
;
arg_decl:
variable pointer TYPENAME {
$$ = new FunArg($1, $3, $2);
}
;
main:
MAIN statements {
$$ = makeMain($2);
}
;
statements:
/* epsilon */ {
$$ = new PointerList<Statement>();
}
| statements statement {
if ($2 != nullptr) {
$2->setLocation(@2);
$1->push_back($2);
}
$$ = $1;
}
;
statement:
assert_stmt { $$ = $1; }
| fun_call { $$ = $1; }
| print_stmt { $$ = $1; }
| input_stmt { $$ = $1; }
| abort_stmt { $$ = $1; }
| branch_stmt { $$ = $1; }
| var_decl { $$ = $1; }
| assign_stmt { $$ = $1; }
| loop_stmt { $$ = $1; }
| return_stmt { $$ = $1; }
| COMMA { $$ = nullptr; }
;
var_decl:
VARDECL variable COMMA pointer TYPENAME var_init {
$$ = new VarDeclaration($2, $5, $4, $6);
}
;
pointer:
/* epsilon */ { $$ = false; } | STAR { $$ = true; }
;
var_init:
/* epsilon */ { $$ = nullptr; } | ASSIGN expression { $$ = $2; }
;
numeric:
NUMBER { $$ = new Integer($1); } | FLOAT { $$ = new Float($1); }
;
variable:
ID {
$$ = new Id($1);
}
| ARTICLE ID {
$$ = new Id($2);
}
;
assign_stmt:
variable ASSIGN expression {
$$ = new Assignment($1, $3);
}
;
print_stmt:
expression PRINT {
$$ = new Print($1);
program.addModule(new Module("iostream", Module::SYSTEM));
}
;
input_stmt:
INPUT variable {
$$ = new Input($2);
program.addModule(new Module("iostream", Module::SYSTEM));
}
;
return_stmt:
RETURN maybe_expression BANG {
$$ = new Return($2);
}
;
maybe_expression:
expression { $$ = $1; } | /* epsilon */ { $$ = nullptr; }
;
loop_stmt:
LOOP_BEGIN statements LOOP_CONDITION expression {
$$ = new Loop($2, $4);
}
;
branch_stmt:
BRANCH_CONDITION variable BRANCH_BEGIN branch_body BRANCH_END {
$2->setLocation(@2);
$$ = new Branch($2, $4);
}
;
branch_body:
cases %prec LOWER_THAN_ELSE {
$$ = new Branch::Body($1);
}
| cases BRANCH_ELSE COLON statements {
$$ = new Branch::Body($1, $4);
}
;
cases:
case_stmt {
$$ = new PointerList<BranchCase>();
$$->push_back($1);
}
| cases CASE_END case_stmt {
$1->push_back($3);
$$ = $1;
}
;
case_stmt:
semi_expression COLON statements {
$$ = new BranchCase($1, $3);
}
;
fun_call:
FUN_CALL ID call_args FUN_END {
$$ = new FunctionCall(new Id($2), $3);
}
;
call_args:
/* epsilon */ {
$$ = new PointerList<Expression>();
}
| PARAMS call_arglist {
$$ = $2;
}
;
call_arglist:
expression {
$$ = new PointerList<Expression>();
$$->push_back($1);
}
| call_arglist COMMA expression {
$$->push_back($3);
}
;
abort_stmt:
ABORT {
$$ = new Abort();
program.addModule(new Module("cstdlib", Module::SYSTEM));
}
;
assert_stmt:
ASSERT expression BANG {
$$ = new Assert($2);
program.addModule(new Module("cassert", Module::SYSTEM));
}
;
expression:
expression_inner {
$1->setLocation(@1);
$$ = $1;
}
;
expression_inner:
simple_expression {
$$ = $1;
}
| expression OP_LT expression {
$$ = new ExpLt($1, $3);
}
| expression OP_GT expression {
$$ = new ExpGt($1, $3);
}
| expression OP_LTE expression {
$$ = new ExpLte($1, $3);
}
| expression OP_GTE expression {
$$ = new ExpGte($1, $3);
}
| expression OP_PLUS expression {
$$ = new ExpPlus($1, $3);
}
| expression OP_MINUS expression {
$$ = new ExpMinus($1, $3);
}
| expression OP_TIMES expression {
$$ = new ExpTimes($1, $3);
}
| expression OP_DIV expression {
$$ = new ExpDiv($1, $3);
}
| expression OP_SHL expression {
$$ = new ExpShl($1, $3);
}
| expression OP_SHR expression {
$$ = new ExpShr($1, $3);
}
;
semi_expression:
semi_expression_inner {
$1->setLocation(@1);
$$ = $1;
}
;
semi_expression_inner:
expression {
$$ = new SemiExpEq($1);
}
| OP_LT expression {
$$ = new SemiExpLt($2);
}
| OP_GT expression {
$$ = new SemiExpGt($2);
}
| OP_LTE expression {
$$ = new SemiExpLte($2);
}
| OP_GTE expression {
$$ = new SemiExpGte($2);
}
| OP_PLUS expression {
$$ = new SemiExpPlus($2);
}
| OP_MINUS expression {
$$ = new SemiExpMinus($2);
}
| OP_TIMES expression {
$$ = new SemiExpTimes($2);
}
| OP_DIV expression {
$$ = new SemiExpDiv($2);
}
| OP_SHL expression {
$$ = new SemiExpShl($2);
}
| OP_SHR expression {
$$ = new SemiExpShr($2);
}
;
simple_expression:
fun_call { $$ = $1; }
| numeric { $$ = $1; }
| variable { $$ = $1; }
;
%%
#include "Scanner.hpp"
void Parser::error(const location_type& loc, const std::string &message) {
std::cerr << "line " << loc.begin.line << ", col " << loc.begin.column;
std::cerr << ": " << message << std::endl;
std::exit(1);
}
int yylex(Parser::semantic_type *lval, Parser::location_type *loc, Scanner &scanner) {
return scanner.yylex(lval, loc);
}

View File

@ -1,102 +0,0 @@
/*
* Monicelli: an esoteric language compiler
*
* Copyright (C) 2014 Stefano Sanfilippo
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "Nodes.hpp"
#include <string>
using namespace monicelli;
Function *monicelli::makeMain(PointerList<Statement> *body) {
PointerList<FunArg> *noargs = new PointerList<FunArg>();
FunctionPrototype *proto = new FunctionPrototype(
new Id("main"), Type::VOID, noargs
);
return new Function(proto, body);
}
std::ostream& monicelli::operator<<(std::ostream &stream, Type const& type) {
switch (type) {
case Type::INT:
stream << "int";
break;
case Type::CHAR:
stream << "char";
break;
case Type::FLOAT:
stream << "float";
break;
case Type::BOOL:
stream << "bool";
break;
case Type::DOUBLE:
stream << "double";
break;
case Type::VOID:
stream << "void";
break;
case Type::UNKNOWN:
stream << "???????????";
break;
}
return stream;
}
std::ostream& monicelli::operator<<(std::ostream &stream, Operator const& op) {
switch (op) {
case Operator::PLUS:
stream << '+';
break;
case Operator::MINUS:
stream << '-';
break;
case Operator::TIMES:
stream << '*';
break;
case Operator::DIV:
stream << '/';
break;
case Operator::SHL:
stream << "<<";
break;
case Operator::SHR:
stream << ">>";
break;
case Operator::LT:
stream << '<';
break;
case Operator::GT:
stream << '>';
break;
case Operator::GTE:
stream << ">=";
break;
case Operator::LTE:
stream << "<=";
break;
case Operator::EQ:
stream << "==";
break;
}
return stream;
}

View File

@ -1,712 +0,0 @@
#ifndef NODES_HPP
#define NODES_HPP
/*
* Monicelli: an esoteric language compiler
*
* Copyright (C) 2014 Stefano Sanfilippo
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "Emitter.hpp"
#include "Pointers.hpp"
#include "location.hh"
#include <functional>
#include <unordered_set>
#include <boost/optional.hpp>
#define maybe_return(val) \
if ((val) != nullptr) return *(val); else return boost::none;
namespace monicelli {
enum class Type {
INT,
CHAR,
FLOAT,
BOOL,
DOUBLE,
VOID,
UNKNOWN
};
std::ostream& operator<<(std::ostream&, Type const&);
enum class Operator {
PLUS, MINUS, TIMES, DIV,
SHL, SHR,
LT, GT, GTE, LTE, EQ
};
std::ostream& operator<<(std::ostream&, Operator const&);
class Localizable {
public:
void setLocation(location const& l) {
loc = l;
}
location const& getLocation() const {
return loc;
}
private:
location loc;
};
class Emittable: public Localizable {
public:
virtual ~Emittable() {}
virtual bool emit(Emitter *emitter) const = 0;
};
class Statement: virtual public Emittable {};
class Expression: virtual public Emittable {};
class SimpleExpression: public Expression {};
class SemiExpression: public Localizable {
public:
SemiExpression(Operator op, Expression *l): op(op), left(l) {}
Expression const& getLeft() const {
return *left;
}
Operator getOperator() const {
return op;
}
private:
Operator op;
Pointer<Expression> left;
};
class Id: public SimpleExpression {
public:
explicit Id(std::string *c): value(c) {}
explicit Id(char const* c) {
value = Pointer<std::string>(new std::string(c));
}
explicit Id(std::string const& c) {
value = Pointer<std::string>(new std::string(c));
}
virtual bool emit(Emitter *emitter) const {
return emitter->emit(*this);
}
std::string const& getValue() const {
return *value;
}
private:
Pointer<std::string> value;
};
static inline
bool operator==(Id const& a, Id const& b) {
return a.getValue() == b.getValue();
}
class Number: public SimpleExpression {};
class Integer: public Number {
public:
Integer(long i): value(i) {}
virtual bool emit(Emitter *emitter) const {
return emitter->emit(*this);
}
long getValue() const {
return value;
}
private:
long value;
};
class Float: public Number {
public:
Float(double f): value(f) {}
virtual bool emit(Emitter *emitter) const {
return emitter->emit(*this);
}
double getValue() const {
return value;
}
private:
double value;
};
class Return: public Statement {
public:
explicit Return(Expression *e): expression(e) {}
virtual bool emit(Emitter *emitter) const {
return emitter->emit(*this);
}
boost::optional<Expression const&> getExpression() const {
maybe_return(expression);
}
private:
Pointer<Expression> expression;
};
class Loop: public Statement {
public:
Loop(PointerList<Statement> *b, Expression *c): body(b), condition(c) {}
virtual bool emit(Emitter *emitter) const {
return emitter->emit(*this);
}
PointerList<Statement> const& getBody() const {
return *body;
}
Expression const& getCondition() const {
return *condition;
}
private:
Pointer<PointerList<Statement>> body;
Pointer<Expression> condition;
};
class VarDeclaration: public Statement {
public:
VarDeclaration(Id *n, Type t, bool p, Expression *i):
name(n), point(p), init(i), type(t) {}
virtual bool emit(Emitter *emitter) const {
return emitter->emit(*this);
}
Id const& getId() const {
return *name;
}
bool isPointer() const {
return point;
}
boost::optional<Expression const&> getInitializer() const {
maybe_return(init);
}
Type getType() const {
return type;
}
private:
Pointer<Id> name;
bool point;
Pointer<Expression> init;
Type type;
};
class Assignment: public Statement {
public:
Assignment(Id *n, Expression *v): name(n), value(v) {}
virtual bool emit(Emitter *emitter) const {
return emitter->emit(*this);
}
Id const& getName() const {
return *name;
}
Expression const& getValue() const {
return *value;
}
private:
Pointer<Id> name;
Pointer<Expression> value;
};
class Print: public Statement {
public:
explicit Print(Expression *e): expression(e) {}
virtual bool emit(Emitter *emitter) const {
return emitter->emit(*this);
}
Expression const& getExpression() const {
return *expression;
}
private:
Pointer<Expression> expression;
};
class Input: public Statement {
public:
explicit Input(Id *v): variable(v) {}
virtual bool emit(Emitter *emitter) const {
return emitter->emit(*this);
}
Id const& getVariable() const {
return *variable;
}
private:
Pointer<Id> variable;
};
class Abort: public Statement {
public:
virtual bool emit(Emitter *emitter) const {
return emitter->emit(*this);
}
};
class Assert: public Statement {
public:
explicit Assert(Expression *e): expression(e) {}
virtual bool emit(Emitter *emitter) const {
return emitter->emit(*this);
}
Expression const& getExpression() const {
return *expression;
}
private:
Pointer<Expression> expression;
};
class FunctionCall: public Statement, public Expression {
public:
FunctionCall(Id *n, PointerList<Expression> *a): name(n), args(a) {}
virtual bool emit(Emitter *emitter) const {
return emitter->emit(*this);
}
Id const& getName() const {
return *name;
}
PointerList<Expression> const& getArgs() const {
return *args;
}
private:
Pointer<Id> name;
Pointer<PointerList<Expression>> args;
};
class BranchCase: public Localizable {
public:
BranchCase(SemiExpression *c, PointerList<Statement> *b): condition(c), body(b) {}
SemiExpression const& getCondition() const {
return *condition;
}
PointerList<Statement> const& getBody() const {
return *body;
}
private:
Pointer<SemiExpression> condition;
Pointer<PointerList<Statement>> body;
};
class Branch: public Statement {
public:
class Body: public Localizable {
public:
Body(PointerList<BranchCase> *c, PointerList<Statement> *e = nullptr): cases(c), els(e) {}
PointerList<BranchCase> const& getCases() const {
return *cases;
}
boost::optional<PointerList<Statement> const&> getElse() const {
maybe_return(els);
}
private:
Pointer<PointerList<BranchCase>> cases;
Pointer<PointerList<Statement>> els;
};
Branch(Id *v, Branch::Body *b): var(v), body(b) {}
virtual bool emit(Emitter *emitter) const {
return emitter->emit(*this);
}
Id const& getVar() const {
return *var;
}
Branch::Body const& getBody() const {
return *body;
}
private:
Pointer<Id> var;
Pointer<Branch::Body> body;
};
Function *makeMain(PointerList<Statement> *body);
class FunArg: public Localizable {
public:
FunArg(Id *n, Type t, bool p): name(n), type(t), pointer(p) {}
Id const& getName() const {
return *name;
}
Type getType() const {
return type;
}
bool isPointer() const {
return pointer;
}
private:
Pointer<Id> name;
Type type;
bool pointer;
};
class FunctionPrototype: public Emittable {
public:
FunctionPrototype(Id *n, Type r, PointerList<FunArg> *a):
name(n), type(r), args(a) {}
virtual bool emit(Emitter *emitter) const {
return emitter->emit(*this);
}
Id const& getName() const {
return *name;
}
Type getType() const {
return type;
}
PointerList<FunArg> const& getArgs() const {
return *args;
}
private:
Pointer<Id> name;
Type type;
Pointer<PointerList<FunArg>> args;
};
static inline
bool operator==(const FunctionPrototype &a, const FunctionPrototype &b) {
return a.getName() == b.getName();
}
static inline
size_t hash_value(const monicelli::FunctionPrototype &e) {
return std::hash<std::string>()(e.getName().getValue());
}
class Function: public Emittable {
public:
Function(FunctionPrototype *p, PointerList<Statement> *b):
prototype(p), body(b) {}
virtual bool emit(Emitter *emitter) const {
return emitter->emit(*this);
}
FunctionPrototype const& getPrototype() const {
return *prototype;
}
PointerList<Statement> const& getBody() const {
return *body;
}
private:
Pointer<FunctionPrototype> prototype;
Pointer<PointerList<Statement>> body;
};
class Module: public Emittable {
public:
enum ModuleType {
SYSTEM, USER
};
Module(const std::string &n, ModuleType s): name(n), type(s) {}
virtual bool emit(Emitter *emitter) const {
return emitter->emit(*this);
}
std::string const& getName() const {
return name;
}
ModuleType getType() const {
return type;
}
private:
std::string name;
ModuleType type;
};
static inline
bool operator==(const Module &a, const Module &b) {
return (a.getName() == b.getName()) && (a.getType() == b.getType());
}
static inline
size_t hash_value(const monicelli::Module &e) {
return std::hash<std::string>()(e.getName()) ^ std::hash<bool>()(e.getType());
}
class Program: public Emittable {
public:
virtual bool emit(Emitter *emitter) const {
return emitter->emit(*this);
}
void setMain(Function *m) {
main = Pointer<Function>(m);
}
void addFunction(Function *f) {
functions.push_back(f);
}
void addModule(Module *m) {
modules.insert(m);
}
boost::optional<Function const&> getMain() const {
maybe_return(main);
}
PointerList<Function> const& getFunctions() const {
return functions;
}
PointerSet<Module> const& getModules() const {
return modules;
}
private:
Pointer<Function> main;
PointerList<Function> functions;
PointerSet<Module> modules;
};
class BinaryExpression: public Expression {
public:
BinaryExpression(Expression *l, Operator op, Expression *r):
left(l), op(op), right(r) {}
virtual bool emit(Emitter *emitter) const {
return emitter->emit(*this);
}
Expression const& getLeft() const {
return *left;
}
Expression const& getRight() const {
return *right;
}
Operator getOperator() const {
return op;
}
private:
Pointer<Expression> left;
Operator op;
Pointer<Expression> right;
};
class ExpLt: public BinaryExpression {
public:
ExpLt(Expression *l, Expression *r): BinaryExpression(l, Operator::LT, r) {}
};
class ExpGt: public BinaryExpression {
public:
ExpGt(Expression *l, Expression *r): BinaryExpression(l, Operator::GT, r) {}
};
class ExpLte: public BinaryExpression {
public:
ExpLte(Expression *l, Expression *r): BinaryExpression(l, Operator::LTE, r) {}
};
class ExpGte: public BinaryExpression {
public:
ExpGte(Expression *l, Expression *r): BinaryExpression(l, Operator::GTE, r) {}
};
class ExpPlus: public BinaryExpression {
public:
ExpPlus(Expression *l, Expression *r): BinaryExpression(l, Operator::PLUS, r) {}
};
class ExpMinus: public BinaryExpression {
public:
ExpMinus(Expression *l, Expression *r): BinaryExpression(l, Operator::MINUS, r) {}
};
class ExpTimes: public BinaryExpression {
public:
ExpTimes(Expression *l, Expression *r): BinaryExpression(l, Operator::TIMES, r) {}
};
class ExpDiv: public BinaryExpression {
public:
ExpDiv(Expression *l, Expression *r): BinaryExpression(l, Operator::DIV, r) {}
};
class ExpShl: public BinaryExpression {
public:
ExpShl(Expression *l, Expression *r): BinaryExpression(l, Operator::SHL, r) {}
};
class ExpShr: public BinaryExpression {
public:
ExpShr(Expression *l, Expression *r): BinaryExpression(l, Operator::SHR, r) {}
};
class SemiExpEq: public SemiExpression {
public:
SemiExpEq(Expression *l): SemiExpression(Operator::EQ, l) {}
};
class SemiExpLt: public SemiExpression {
public:
SemiExpLt(Expression *l): SemiExpression(Operator::LT, l) {}
};
class SemiExpGt: public SemiExpression {
public:
SemiExpGt(Expression *l): SemiExpression(Operator::GT, l) {}
};
class SemiExpLte: public SemiExpression {
public:
SemiExpLte(Expression *l): SemiExpression(Operator::LTE, l) {}
};
class SemiExpGte: public SemiExpression {
public:
SemiExpGte(Expression *l): SemiExpression(Operator::GTE, l) {}
};
class SemiExpPlus: public SemiExpression {
public:
SemiExpPlus(Expression *l): SemiExpression(Operator::PLUS, l) {}
};
class SemiExpMinus: public SemiExpression {
public:
SemiExpMinus(Expression *l): SemiExpression(Operator::MINUS, l) {}
};
class SemiExpTimes: public SemiExpression {
public:
SemiExpTimes(Expression *l): SemiExpression(Operator::TIMES, l) {}
};
class SemiExpDiv: public SemiExpression {
public:
SemiExpDiv(Expression *l): SemiExpression(Operator::DIV, l) {}
};
class SemiExpShl: public SemiExpression {
public:
SemiExpShl(Expression *l): SemiExpression(Operator::SHR, l) {}
};
class SemiExpShr: public SemiExpression {
public:
SemiExpShr(Expression *l): SemiExpression(Operator::SHL, l) {}
};
} // namespace
#undef maybe_return
#endif

View File

@ -1,47 +0,0 @@
#ifndef POINTERS_HPP
#define POINTERS_HPP
/*
* Monicelli: an esoteric language compiler
*
* Copyright (C) 2014 Stefano Sanfilippo
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <memory>
#include <initializer_list>
#include <boost/ptr_container/ptr_unordered_set.hpp>
#include <boost/ptr_container/ptr_vector.hpp>
namespace monicelli {
template<typename T> using Pointer = std::unique_ptr<T>;
template<typename T> using PointerList = boost::ptr_vector<T>;
template<typename T> using PointerSet = boost::ptr_unordered_set<T>;
template<typename T>
PointerList<T>* plist_of(std::initializer_list<T*> elements) {
PointerList<T> *result = new PointerList<T>(elements.size());
for (T *el: elements) {
result->push_back(el);
}
return result;
}
}
#endif

View File

@ -1,89 +0,0 @@
/*
* Monicelli: an esoteric language compiler
*
* Copyright (C) 2014 Stefano Sanfilippo
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "Runtime.h"
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
void __Monicelli_putBool(Monicelli_Bool value) {
puts(value? "vero\n": "falso\n");
}
void __Monicelli_putChar(Monicelli_Char value) {
printf("%c", value);
}
void __Monicelli_putInt(Monicelli_Int value) {
printf("%ld\n", value);
}
void __Monicelli_putFloat(Monicelli_Float value) {
printf("%g\n", value);
}
void __Monicelli_putDouble(Monicelli_Double value) {
printf("%lg\n", value);
}
Monicelli_Bool __Monicelli_getBool() {
Monicelli_Bool tmp;
printf("%s", "? ");
scanf("%c", &tmp);
return tmp != 0? 1: 0;
}
Monicelli_Char __Monicelli_getChar() {
Monicelli_Char tmp;
printf("%s", "? ");
scanf("%c", &tmp);
return tmp;
}
Monicelli_Int __Monicelli_getInt() {
Monicelli_Int tmp;
printf("%s", "? ");
scanf("%ld", &tmp);
return tmp;
}
Monicelli_Float __Monicelli_getFloat() {
Monicelli_Float tmp;
printf("%s", "? ");
scanf("%f", &tmp);
return tmp;
}
Monicelli_Double __Monicelli_getDouble() {
Monicelli_Double tmp;
printf("%s", "? ");
scanf("%lf", &tmp);
return tmp;
}
void __Monicelli_abort() {
abort();
}
void __Monicelli_assert(Monicelli_Bool condition) {
assert(condition);
}

View File

@ -1,55 +0,0 @@
#ifndef RUNTIME_H
#define RUNTIME_H
/*
* Monicelli: an esoteric language compiler
*
* Copyright (C) 2014 Stefano Sanfilippo
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <stdint.h>
typedef int8_t Monicelli_Bool;
typedef int8_t Monicelli_Char;
typedef int64_t Monicelli_Int;
typedef float Monicelli_Float;
typedef double Monicelli_Double;
#ifdef __cplusplus
extern "C" {
#endif
void __Monicelli_putBool(Monicelli_Bool value);
void __Monicelli_putChar(Monicelli_Char value);
void __Monicelli_putInt(Monicelli_Int value);
void __Monicelli_putFloat(Monicelli_Float value);
void __Monicelli_putDouble(Monicelli_Double value);
Monicelli_Bool __Monicelli_getBool();
Monicelli_Char __Monicelli_getChar();
Monicelli_Int __Monicelli_getInt();
Monicelli_Float __Monicelli_getFloat();
Monicelli_Double __Monicelli_getDouble();
void __Monicelli_abort();
void __Monicelli_assert(Monicelli_Bool condition);
#ifdef __cplusplus
}
#endif
#endif

View File

@ -1,49 +0,0 @@
#ifndef SCANNER_HPP
#define SCANNER_HPP
/*
* Monicelli: an esoteric language compiler
*
* Copyright (C) 2014 Stefano Sanfilippo
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef yyFlexLexerOnce
#include <FlexLexer.h>
#endif
#include "Parser.hpp"
namespace monicelli {
class Scanner: public yyFlexLexer {
public:
Scanner(std::istream *in): yyFlexLexer(in) {}
int yylex(Parser::semantic_type *lval, Parser::location_type *loc) {
this->lval = lval;
location = loc;
return yylex();
}
private:
int yylex();
Parser::semantic_type *lval;
Parser::location_type *location;
};
} // monicelli
#endif

View File

@ -1,70 +0,0 @@
#ifndef SCOPE_HPP
#define SCOPE_HPP
/*
* Monicelli: an esoteric language compiler
*
* Copyright (C) 2014 Stefano Sanfilippo
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <boost/optional.hpp>
#include <boost/range/adaptor/reversed.hpp>
#include <vector>
#include <unordered_map>
namespace monicelli {
template<class Key, class Value>
class Scope {
public:
boost::optional<Value> lookup(Key name) {
for (auto const& table: boost::adaptors::reverse(tables)) {
auto result = table.find(name);
if (result != table.end()) {
return result->second;
}
}
return boost::none;
}
void push(Key const& key, Value const& value) {
tables.back().insert({key, value});
}
void enter() {
tables.emplace_back();
}
void leave() {
if (!tables.empty()) {
tables.pop_back();
}
}
void drop() {
tables.clear();
}
private:
std::vector<std::unordered_map<Key, Value>> tables;
};
}
#endif

124
src/asmgen.cpp Normal file
View File

@ -0,0 +1,124 @@
// Copyright 2017 the Monicelli project authors. All rights reserved.
// Use of this source code is governed by a GPLv3 license, see LICENSE.txt.
#include "asmgen.h"
#include "errors.h"
#include "llvm/IR/LegacyPassManager.h"
#include "llvm/IR/Module.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/TargetSelect.h"
#include "llvm/Target/TargetMachine.h"
#include <cstdlib>
#include <iostream>
#include <string>
#ifdef MONICELLI_ENABLE_LINKER
#include <sys/types.h>
#include <sys/wait.h>
#include <unistd.h>
#endif
namespace monicelli {
void registerTargets() {
llvm::InitializeAllTargetInfos();
llvm::InitializeAllTargets();
llvm::InitializeAllTargetMCs();
llvm::InitializeAllAsmParsers();
llvm::InitializeAllAsmPrinters();
}
llvm::TargetMachine* getTargetMachine(const std::string& triple, const std::string& cpu,
const std::string& features) {
std::string error;
auto target = llvm::TargetRegistry::lookupTarget(triple, error);
if (!target) {
std::cerr << "While determining target: " << error << '\n';
exit(1);
}
llvm::TargetOptions opt;
auto reloc_model = llvm::Reloc::Model::Static;
return target->createTargetMachine(triple, cpu, features, opt, reloc_model);
}
void writeAssembly(const std::string& to_filename, llvm::Module* module,
llvm::TargetMachine* target_machine) {
std::error_code error_code;
llvm::raw_fd_ostream output{to_filename, error_code, llvm::sys::fs::F_None};
if (error_code) {
std::cerr << "Could not open '" << to_filename << "' for output: " << error_code.message()
<< '\n';
exit(1);
}
llvm::legacy::PassManager asm_generator;
auto file_type = llvm::TargetMachine::CGFT_ObjectFile;
if (target_machine->addPassesToEmitFile(asm_generator, output, file_type)) {
std::cerr << "Cannot emit an object file of this type\n";
exit(1);
}
asm_generator.run(*module);
output.flush();
}
#ifdef MONICELLI_ENABLE_LINKER
static const char* C_COMPILER = "c99";
void linkAssembly(const std::string& output_name, const std::vector<std::string>& object_files,
bool keep_object_files) {
// Linking a C object file with certain modern libc's is so complicated that
// we just let a C compiler do it for us. This function assumes POSIX, and
// most recent POSIX-compliant systems will also adopt the recommendation
// to have a C compiler installed and called c99. Very old systems will have
// c89 instead. cc exists as well, but it's not specified by POSIX.
int cc_argc = object_files.size() + 1 + 2 + 1;
std::unique_ptr<const char* []> cc_args { new const char*[cc_argc] };
int i = 0;
cc_args[i++] = C_COMPILER;
if (!output_name.empty()) {
cc_args[i++] = "-o";
cc_args[i++] = output_name.c_str();
}
for (const auto& object_file : object_files) {
assert(object_file[0] != '-' && "The option parser allowed a filename starting with -");
cc_args[i++] = object_file.c_str();
}
cc_args[i] = nullptr;
pid_t pid = fork();
if (pid == 0) {
if (execvp(C_COMPILER, const_cast<char* const*>(cc_args.get())) == -1) {
std::cerr << "Failed to launch the linker. Check that '" << C_COMPILER << "' is installed.\n";
exit(1);
}
UNREACHABLE("Successfully returned from exec()?");
}
if (pid == -1) {
std::cerr << "Failed to spawn the linker process.\n";
exit(1);
}
waitpid(pid, nullptr, 0);
if (keep_object_files) return;
for (const auto& object_file : object_files) {
unlink(object_file.c_str());
}
}
#endif
} // namespace monicelli

30
src/asmgen.h Normal file
View File

@ -0,0 +1,30 @@
#ifndef MONICELLI_ASMGEN_H
#define MONICELLI_ASMGEN_H
// Copyright 2017 the Monicelli project authors. All rights reserved.
// Use of this source code is governed by a GPLv3 license, see LICENSE.txt.
#include "llvm/IR/Module.h"
#include "llvm/Target/TargetMachine.h"
#include <string>
#include <vector>
namespace monicelli {
void registerTargets();
llvm::TargetMachine* getTargetMachine(const std::string& triple, const std::string& cpu,
const std::string& features);
void writeAssembly(const std::string& to_filename, llvm::Module* module,
llvm::TargetMachine* target_machine);
#ifdef MONICELLI_ENABLE_LINKER
void linkAssembly(const std::string& output_name, const std::vector<std::string>& object_files,
bool keep_object_files = false);
#endif
} // namespace monicelli
#endif

267
src/ast-printer.cpp Normal file
View File

@ -0,0 +1,267 @@
// Copyright 2017 the Monicelli project authors. All rights reserved.
// Use of this source code is governed by a GPLv3 license, see LICENSE.txt.
#include "ast-visitor.h"
#include "ast.h"
#include <iostream>
using namespace monicelli;
namespace {
static const char* baseTypeToString(const VarType::BaseType type) {
switch (type) {
#define RETURN_BASE_NAME(NAME, _1, _2, _3, _4, STRING) \
case VarType::NAME: \
return STRING;
BUILTIN_TYPES(RETURN_BASE_NAME)
#undef RETURN_BASE_NAME
default:
UNREACHABLE("Unhandled VarType::BaseType.");
}
}
static std::ostream& operator<<(std::ostream& stream, const VarType& type) {
stream << baseTypeToString(type.getBaseType());
if (type.isPointer()) {
stream << '*';
}
return stream;
}
class AstPrinter final : public ConstAstVisitor<AstPrinter, void> {
public:
AstPrinter(std::ostream& stream) : expression_level_(0), indent_level_(0), stream_(stream) {}
void visitModule(const Module* module) {
for (const Function* function : module->functions()) {
visitFunction(function);
}
if (module->hasEntryPoint()) {
visitFunction(module->getEntryPoint());
}
}
void visitFunction(const Function* function) {
stream() << function->getReturnType() << ' ';
if (function->isEntryPoint()) {
stream() << "main";
} else {
stream() << function->getName();
}
stream() << '(';
bool first = true;
for (const FunctionParam& param : function->params()) {
if (first) {
first = false;
} else {
stream() << ", ";
}
assert(!param.getType().isVoid());
stream() << param.getType() << ' ' << param.getArg().getName();
}
stream() << ')';
if (!function->body_empty()) {
stream(false) << " {\n";
IndentGuard guard(this);
for (const Statement* statement : function->body()) {
visit(statement);
stream() << '\n';
}
stream(false) << "}";
}
stream(false) << "\n\n";
}
void visitAssertStatement(const AssertStatement* s) {
stream() << "assert ";
visit(s->getExpression());
}
void visitInputStatement(const InputStatement* s) {
stream() << "read " << s->getVariable().getName();
}
void visitExpressionStatement(const ExpressionStatement* s) { visit(s->getExpression()); }
void visitAbortStatement(const AbortStatement*) { stream() << "abort"; }
void visitVardeclStatement(const VardeclStatement* s) {
assert(!s->getType().isVoid());
stream() << s->getType() << ' ' << s->getVariable().getName();
if (s->hasInitializer()) {
stream(false) << " = ";
visit(s->getInitializer());
}
}
void visitBranchStatement(const BranchStatement* branch) {
stream() << "branch " << branch->getLeadVariable().getName() << " {\n";
{
IndentGuard guard(this);
for (const BranchCase& c : branch->cases()) {
stream() << "case";
visit(c.getExpression());
stream(false) << " {\n";
{
IndentGuard guard(this);
for (const Statement* s : c.body()) {
visit(s);
stream(false) << '\n';
}
}
stream() << "}\n";
}
if (branch->hasBranchElse()) {
stream() << "else {\n";
{
IndentGuard guard(this);
for (const Statement* s : branch->getBranchElse()->body()) {
visit(s);
stream(false) << '\n';
}
}
stream() << "}\n";
}
}
stream() << "}";
}
void visitLoopStatement(const LoopStatement* s) {
stream() << "do {\n";
{
IndentGuard guard(this);
for (const Statement* is : s->body()) {
visit(is);
stream(false) << '\n';
}
}
stream() << "} while ";
visit(s->getCondition());
}
void visitReturnStatement(const ReturnStatement* s) {
ExpressionNestingGuard guard{this};
stream() << "return";
if (s->hasExpression()) {
stream(false) << ' ';
visit(s->getExpression());
}
}
void visitPrintStatement(const PrintStatement* s) {
ExpressionNestingGuard guard{this};
stream() << "print ";
visit(s->getExpression());
}
void visitAssignStatement(const AssignStatement* s) {
ExpressionNestingGuard guard{this};
stream() << s->getVariable().getName() << " = ";
visit(s->getExpression());
}
void visitAtomicExpression(const AtomicExpression* s) {
ExpressionNestingGuard guard{this};
switch (s->getType()) {
case AtomicExpression::FLOAT:
stream(false) << s->getFloatValue();
break;
case AtomicExpression::INTEGER:
stream(false) << s->getIntValue();
break;
case AtomicExpression::IDENTIFIER:
stream(false) << s->getIdentifierValue().getName();
break;
default:
UNREACHABLE("Unhanlded AtomicExpression type.");
}
}
void visitBinaryExpression(const BinaryExpression* s) {
if (isNestedExpression()) stream(false) << '(';
{
ExpressionNestingGuard guard{this};
if (!s->isSemiExpression()) visit(s->getLeft());
stream(false) << ' ' << s->getOperatorRepresentation() << ' ';
visit(s->getRight());
}
if (isNestedExpression()) stream(false) << ')';
}
void visitFunctionCallExpression(const FunctionCallExpression* s) {
stream(!isNestedExpression()) << s->getFunctionName() << '(';
ExpressionNestingGuard guard{this};
bool first = true;
for (const Expression* arg : s->args()) {
if (first) {
first = false;
} else {
stream(false) << ", ";
}
visit(arg);
}
stream(false) << ')';
}
private:
class IndentGuard final {
public:
IndentGuard(AstPrinter* printer) : printer_(printer) { printer_->increaseIndent(); }
~IndentGuard() { printer_->decreaseIndent(); }
private:
AstPrinter* printer_;
};
class ExpressionNestingGuard final {
public:
ExpressionNestingGuard(AstPrinter* printer) : printer_(printer) {
++printer->expression_level_;
}
~ExpressionNestingGuard() { --printer_->expression_level_; }
private:
AstPrinter* printer_;
};
bool isNestedExpression() const { return expression_level_ > 0; }
void increaseIndent() { indent_level_ += 2; }
void decreaseIndent() {
if (indent_level_ >= 2) indent_level_ -= 2;
}
std::ostream& stream(bool indent = true) {
if (indent) {
static const char* spaces = " ";
for (int i = 0; i < indent_level_ / 32; ++i) {
stream_ << spaces;
}
stream_.write(spaces, indent_level_ % 32);
}
return stream_;
}
int expression_level_;
int indent_level_;
std::ostream& stream_;
};
} // namespace
namespace monicelli {
void printAst(std::ostream& stream, const AstNode* node) {
AstPrinter printer{stream};
printer.visit(node);
}
} // namespace monicelli

17
src/ast-printer.h Normal file
View File

@ -0,0 +1,17 @@
#ifndef MONICELLI_AST_PRINTER_H
#define MONICELLI_AST_PRINTER_H
// Copyright 2017 the Monicelli project authors. All rights reserved.
// Use of this source code is governed by a GPLv3 license, see LICENSE.txt.
#include <iostream>
namespace monicelli {
class AstNode;
void printAst(std::ostream& stream, const AstNode* node);
} // namespace monicelli
#endif

48
src/ast-visitor.h Normal file
View File

@ -0,0 +1,48 @@
#ifndef MONICELLI_AST_VISITOR_H
#define MONICELLI_AST_VISITOR_H
// Copyright 2017 the Monicelli project authors. All rights reserved.
// Use of this source code is governed by a GPLv3 license, see LICENSE.txt.
#include "ast.def"
#include "ast.h"
#include "errors.h"
namespace monicelli {
#define DECLARE_CLASS(NAME) class NAME;
#define DEFAULT_VISIT(NAME) \
T visit##NAME(const NAME*) { UNREACHABLE("Unhandled " #NAME "."); }
#define DISPATCH_CONST_STATEMENT(NAME) \
case AstNode::TYPE_##NAME: \
return derived().visit##NAME(static_cast<const NAME*>(node));
AST_NODES(DECLARE_CLASS)
template<typename AstVisitorImpl, typename T> class ConstAstVisitor {
public:
AST_NODES(DEFAULT_VISIT)
T visit(const AstNode* node) {
switch (node->getClassType()) {
AST_NODES(DISPATCH_CONST_STATEMENT)
default:
UNREACHABLE("Unknown AstNode type class.");
}
}
T visit(const AstNode& node) { return visit(&node); }
private:
AstVisitorImpl& derived() { return *static_cast<AstVisitorImpl*>(this); }
};
#undef DECLARE_CLASS
#undef DEFAULT_VISIT
#undef DISPATCH_CONST_STATEMENT
} // namespace monicelli
#endif

22
src/ast.cpp Normal file
View File

@ -0,0 +1,22 @@
// Copyright 2017 the Monicelli project authors. All rights reserved.
// Use of this source code is governed by a GPLv3 license, see LICENSE.txt.
#include "ast.h"
#include "errors.h"
namespace monicelli {
// static
const char* BinaryExpression::getOperatorRepresentation(BinaryExpression::Type type) {
switch (type) {
#define RETURN_OP_STRING(_, NAME, __, STRING) \
case Type::NAME: \
return STRING;
AST_BINARY_OPERATORS(RETURN_OP_STRING)
#undef RETURN_OP_STRING
default:
UNREACHABLE("Unhandled BinaryExpression type.");
}
}
} // namespace monicelli

27
src/ast.def Normal file
View File

@ -0,0 +1,27 @@
#ifndef MONICELLI_AST_DEF
#define MONICELLI_AST_DEF
// Copyright 2017 the Monicelli project authors. All rights reserved.
// Use of this source code is governed by a GPLv3 license, see LICENSE.txt.
#include "operators.def"
#include "types.def"
#define AST_NODES(V) \
V(AssertStatement) \
V(ExpressionStatement) \
V(InputStatement) \
V(AbortStatement) \
V(BranchStatement) \
V(VardeclStatement) \
V(LoopStatement) \
V(ReturnStatement) \
V(PrintStatement) \
V(AssignStatement) \
V(Function) \
V(Module) \
V(FunctionCallExpression) \
V(BinaryExpression) \
V(AtomicExpression) \
#endif

448
src/ast.h Normal file
View File

@ -0,0 +1,448 @@
#ifndef MONICELLI_AST_H
#define MONICELLI_AST_H
// Copyright 2017 the Monicelli project authors. All rights reserved.
// Use of this source code is governed by a GPLv3 license, see LICENSE.txt.
#include "ast.def"
#include "iterators.h"
#include "location.h"
#include <cassert>
#include <memory>
#include <string>
#include <vector>
namespace monicelli {
class Variable final : public LocationMixin {
public:
const std::string& getName() const { return name_; }
private:
std::string name_;
friend class Parser;
};
class VarType final {
public:
enum BaseType {
#define DECLARE_TYPE(NAME, _1, _2, _3, _4, _5) NAME,
BUILTIN_TYPES(DECLARE_TYPE)
#undef DECLARE_TYPE
};
VarType() : base_type_(VarType::VOID), pointer_(false) {}
bool isVoid() const { return base_type_ == BaseType::VOID && !pointer_; }
BaseType getBaseType() const { return base_type_; }
bool isPointer() const { return pointer_; }
private:
BaseType base_type_;
bool pointer_;
friend class Parser;
};
class FunctionParam final {
public:
FunctionParam(const Variable& name, const VarType& type) : name_(name), type_(type) {}
const Variable& getArg() const { return name_; }
VarType getType() const { return type_; }
private:
Variable name_;
VarType type_;
friend class Parser;
};
class AstNode {
public:
enum ClassType : uint8_t {
#define DECL_EXPR_TYPE(NAME) TYPE_##NAME,
AST_NODES(DECL_EXPR_TYPE)
#undef DECL_EXPR_TYPE
};
ClassType getClassType() const { return type_tag_; }
protected:
AstNode(ClassType type_tag) : type_tag_(type_tag) {}
private:
ClassType type_tag_;
};
class Expression : public AstNode, public LocationMixin {
public:
Expression(Expression::ClassType type) : AstNode(type) {}
virtual ~Expression() = default;
bool isFunctionCall() const { return getClassType() == Expression::TYPE_FunctionCallExpression; }
};
class BinaryExpression final : public Expression {
public:
enum Type {
#define DECLARE_OP(_, NAME, __, ___) NAME,
AST_BINARY_OPERATORS(DECLARE_OP)
#undef DECLARE_OP
};
Type getType() const { return type_; }
bool isSemiExpression() const { return is_semi_; }
const Expression* getLeft() const { return left_.get(); }
const Expression* getRight() const { return right_.get(); }
static const char* getOperatorRepresentation(BinaryExpression::Type type);
const char* getOperatorRepresentation() const { return getOperatorRepresentation(type_); }
private:
BinaryExpression(Type type, std::shared_ptr<Expression> left, Expression* right, bool is_semi)
: Expression(Expression::TYPE_BinaryExpression), type_(type), is_semi_(is_semi), left_(left),
right_(right) {}
Type type_;
bool is_semi_;
std::shared_ptr<Expression> left_;
std::unique_ptr<Expression> right_;
friend class Parser;
};
class AtomicExpression final : public Expression {
public:
enum Type { IDENTIFIER, INTEGER, FLOAT };
~AtomicExpression() override {
if (type_ == Type::IDENTIFIER) {
identifier_value_.~Variable();
}
}
Type getType() const { return type_; }
uint64_t getIntValue() const {
assert(type_ == Type::INTEGER);
return int_value_;
}
double getFloatValue() const {
assert(type_ == Type::FLOAT);
return fp_value_;
}
const Variable& getIdentifierValue() const {
assert(type_ == Type::IDENTIFIER);
return identifier_value_;
}
private:
AtomicExpression() : Expression(Expression::TYPE_AtomicExpression) {}
static std::unique_ptr<AtomicExpression> fromInt(uint64_t value) {
std::unique_ptr<AtomicExpression> expression{new AtomicExpression};
expression->type_ = Type::INTEGER;
expression->int_value_ = value;
return expression;
}
static std::unique_ptr<AtomicExpression> fromFloat(double value) {
std::unique_ptr<AtomicExpression> expression{new AtomicExpression};
expression->type_ = Type::FLOAT;
expression->fp_value_ = value;
return expression;
}
static std::unique_ptr<AtomicExpression> fromIdentifier(const Variable& value) {
std::unique_ptr<AtomicExpression> expression{new AtomicExpression};
expression->type_ = Type::IDENTIFIER;
new (&expression->identifier_value_) Variable{value};
return expression;
}
Type type_;
union {
uint64_t int_value_;
double fp_value_;
Variable identifier_value_;
};
friend class Parser;
};
class Statement : public AstNode {
public:
Statement(Statement::ClassType type) : AstNode(type) {}
virtual ~Statement() = default;
};
class AssertStatement final : public Statement {
public:
AssertStatement() : Statement(Statement::TYPE_AssertStatement) {}
const Expression* getExpression() const { return expression_.get(); }
private:
std::unique_ptr<Expression> expression_;
friend class Parser;
};
class FunctionCallExpression final : public Expression {
public:
typedef PointerVectorConstIter<Expression> FunctionArgsConstIter;
FunctionCallExpression() : Expression(Expression::TYPE_FunctionCallExpression) {}
const std::string& getFunctionName() const { return function_name_; }
FunctionArgsConstIter args_begin() const { return function_args_.cbegin(); }
FunctionArgsConstIter args_end() const { return function_args_.cend(); }
ConstRangeWrapper<FunctionArgsConstIter> args() const { return {args_begin(), args_end()}; }
private:
std::string function_name_;
std::vector<std::unique_ptr<Expression>> function_args_;
friend class Parser;
};
class ExpressionStatement final : public Statement {
public:
ExpressionStatement() : Statement(Statement::TYPE_ExpressionStatement) {}
const Expression* getExpression() const { return expression_.get(); }
private:
std::unique_ptr<Expression> expression_;
friend class Parser;
};
class InputStatement final : public Statement {
public:
InputStatement() : Statement(Statement::TYPE_InputStatement) {}
const Variable& getVariable() const { return variable_; }
private:
Variable variable_;
friend class Parser;
};
class AbortStatement final : public Statement {
public:
AbortStatement() : Statement(Statement::TYPE_AbortStatement) {}
private:
friend class Parser;
};
class BranchCase final {
public:
typedef PointerVectorConstIter<Statement> BodyConstIter;
const Expression* getExpression() const { return expression_.get(); }
BodyConstIter begin_body() const { return body_.cbegin(); }
BodyConstIter end_body() const { return body_.cend(); }
ConstRangeWrapper<BodyConstIter> body() const { return {begin_body(), end_body()}; }
private:
std::unique_ptr<Expression> expression_;
std::vector<std::unique_ptr<Statement>> body_;
friend class Parser;
};
class BranchElse final {
public:
typedef PointerVectorConstIter<Statement> BodyConstIter;
BodyConstIter begin_body() const { return body_.cbegin(); }
BodyConstIter end_body() const { return body_.cend(); }
ConstRangeWrapper<BodyConstIter> body() const { return {begin_body(), end_body()}; }
private:
std::vector<std::unique_ptr<Statement>> body_;
friend class Parser;
};
class BranchStatement final : public Statement {
public:
typedef std::vector<BranchCase>::const_iterator BranchCaseConstIter;
BranchStatement() : Statement(Statement::TYPE_BranchStatement) {}
const Variable& getLeadVariable() const { return lead_var_; }
BranchCaseConstIter begin_cases() const { return cases_.cbegin(); }
BranchCaseConstIter end_cases() const { return cases_.cend(); }
ConstRangeWrapper<BranchCaseConstIter> cases() const { return {begin_cases(), end_cases()}; }
bool hasBranchElse() const { return static_cast<bool>(maybe_else_case_); }
const BranchElse* getBranchElse() const {
assert(hasBranchElse());
return maybe_else_case_.get();
}
private:
Variable lead_var_;
std::vector<BranchCase> cases_;
std::unique_ptr<BranchElse> maybe_else_case_;
friend class Parser;
};
class VardeclStatement final : public Statement {
public:
VardeclStatement() : Statement(Statement::TYPE_VardeclStatement) {}
const Variable& getVariable() const { return variable_; }
const VarType& getType() const { return type_; }
bool hasInitializer() const { return static_cast<bool>(maybe_init_); }
const Expression* getInitializer() const {
assert(hasInitializer());
return maybe_init_.get();
}
private:
Variable variable_;
VarType type_;
std::unique_ptr<Expression> maybe_init_;
friend class Parser;
};
class LoopStatement final : public Statement {
public:
typedef PointerVectorConstIter<Statement> BodyConstIter;
LoopStatement() : Statement(Statement::TYPE_LoopStatement) {}
BodyConstIter begin_body() const { return body_.cbegin(); }
BodyConstIter end_body() const { return body_.cend(); }
ConstRangeWrapper<BodyConstIter> body() const { return {begin_body(), end_body()}; }
const Expression* getCondition() const { return condition_.get(); }
private:
std::vector<std::unique_ptr<Statement>> body_;
std::unique_ptr<Expression> condition_;
friend class Parser;
};
class ReturnStatement final : public Statement {
public:
ReturnStatement() : Statement(Statement::TYPE_ReturnStatement) {}
bool hasExpression() const { return static_cast<bool>(maybe_expression_); }
const Expression* getExpression() const {
assert(hasExpression());
return maybe_expression_.get();
}
private:
std::unique_ptr<Expression> maybe_expression_;
friend class Parser;
};
class PrintStatement final : public Statement {
public:
PrintStatement() : Statement(Statement::TYPE_PrintStatement) {}
const Expression* getExpression() const { return expression_.get(); }
private:
std::unique_ptr<Expression> expression_;
friend class Parser;
};
class AssignStatement final : public Statement {
public:
AssignStatement() : Statement(Statement::TYPE_AssignStatement) {}
const Variable& getVariable() const { return variable_; }
const Expression* getExpression() const { return expression_.get(); }
private:
std::unique_ptr<Expression> expression_;
Variable variable_;
friend class Parser;
};
class Function final : public AstNode {
public:
Function() : AstNode(Statement::TYPE_Function) {}
typedef std::vector<FunctionParam>::const_iterator FunctionParamConstIter;
typedef PointerVectorConstIter<Statement> BodyConstIter;
const std::string& getName() const { return name_; }
const VarType& getReturnType() const { return return_type_; }
bool isEntryPoint() const { return name_.empty(); }
int params_size() const { return params_.size(); }
FunctionParamConstIter begin_params() const { return params_.cbegin(); }
FunctionParamConstIter end_params() const { return params_.cend(); }
ConstRangeWrapper<FunctionParamConstIter> params() const {
return {begin_params(), end_params()};
}
BodyConstIter begin_body() const { return body_.cbegin(); }
BodyConstIter end_body() const { return body_.cend(); }
bool body_empty() const { return body_.empty(); }
ConstRangeWrapper<BodyConstIter> body() const { return {begin_body(), end_body()}; }
private:
std::string name_;
VarType return_type_;
std::vector<FunctionParam> params_;
std::vector<std::unique_ptr<Statement>> body_;
friend class Parser;
};
class Module final : public AstNode {
public:
typedef PointerVectorConstIter<Function> FunctionsConstIter;
Module() : AstNode(AstNode::TYPE_Module) {}
bool hasEntryPoint() const { return static_cast<bool>(maybe_entry_point_); }
const Function* getEntryPoint() const {
assert(hasEntryPoint());
return maybe_entry_point_.get();
}
FunctionsConstIter begin_functions() const { return functions_.cbegin(); }
FunctionsConstIter end_functions() const { return functions_.cend(); }
ConstRangeWrapper<FunctionsConstIter> functions() const {
return {begin_functions(), end_functions()};
}
const std::string& getSourceFilename() const { return source_filename_; }
private:
std::vector<std::unique_ptr<Function>> functions_;
std::unique_ptr<Function> maybe_entry_point_;
std::string source_filename_;
friend class Parser;
};
} // namespace monicelli
#endif

754
src/codegen.cpp Normal file
View File

@ -0,0 +1,754 @@
// Copyright 2017 the Monicelli project authors. All rights reserved.
// Use of this source code is governed by a GPLv3 license, see LICENSE.txt.
#include "codegen.def"
#include "ast-visitor.h"
#include "parser.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/LegacyPassManager.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/TypeBuilder.h"
#include "llvm/IR/Verifier.h"
#include "llvm/Support/raw_os_ostream.h"
#include "llvm/Transforms/Scalar.h"
#include <vector>
using namespace monicelli;
namespace {
class NestedScopes final {
public:
class Guard final {
public:
Guard(NestedScopes& context) : context_(context) { context_.enterScope(); }
~Guard() { context_.leaveScope(); }
private:
NestedScopes& context_;
};
NestedScopes() {}
NestedScopes(NestedScopes&) = delete;
NestedScopes& operator=(NestedScopes&) = delete;
llvm::Value* lookup(const std::string& name);
bool define(const std::string& name, llvm::Value* def) {
assert(!scopes_.empty() && "Trying to define outside any scope");
auto result = scopes_.back().insert({name, def});
return result.second;
}
void enterScope() { scopes_.emplace_back(); }
void leaveScope() {
assert(!scopes_.empty() && "Trying to leave a scope, but there is none");
scopes_.pop_back();
}
void reset() { scopes_.clear(); }
bool empty() const { return scopes_.empty(); }
private:
std::vector<llvm::StringMap<llvm::Value*>> scopes_;
};
class IRGenerator;
class ResultTypeCalculator : public ConstAstVisitor<ResultTypeCalculator, llvm::Type*>,
public ErrorReportingMixin {
public:
ResultTypeCalculator(IRGenerator* codegen, const std::string& source_filename)
: ErrorReportingMixin(source_filename), codegen_(codegen) {}
llvm::Type* visitBinaryExpression(const BinaryExpression* e);
llvm::Type* visitAtomicExpression(const AtomicExpression* e);
llvm::Type* visitFunctionCallExpression(const FunctionCallExpression* e);
private:
IRGenerator* codegen_;
};
class IRGenerator final : public ConstAstVisitor<IRGenerator, llvm::Value*>,
public ErrorReportingMixin {
public:
IRGenerator(llvm::LLVMContext& context, const std::string& source_filename)
: ErrorReportingMixin(source_filename), context_(context), builder_(context),
exit_block_(nullptr), return_var_(nullptr), type_calculator_(this, source_filename) {}
std::unique_ptr<llvm::Module> releaseModule() { return std::move(module_); }
llvm::Module* getModule() { return module_.get(); }
llvm::Value* visitModule(const Module* m);
llvm::Value* visitFunction(const Function* f);
llvm::Value* visitVardeclStatement(const VardeclStatement* s);
llvm::Value* visitReturnStatement(const ReturnStatement* r);
llvm::Value* visitAssignStatement(const AssignStatement* a);
llvm::Value* visitBranchStatement(const BranchStatement* b);
llvm::Value* visitLoopStatement(const LoopStatement* l);
llvm::Value* visitInputStatement(const InputStatement* s);
llvm::Value* visitPrintStatement(const PrintStatement* p);
llvm::Value* visitExpressionStatement(const ExpressionStatement* s) {
visit(s->getExpression());
return nullptr;
}
llvm::Value* visitBinaryExpression(const BinaryExpression* e);
llvm::Value* visitAtomicExpression(const AtomicExpression* e);
llvm::Value* visitFunctionCallExpression(const FunctionCallExpression* f);
private:
llvm::Function* declareFunction(const Function* f);
std::string getFunctionName(const Function* f) {
return f->isEntryPoint() ? "main" : f->getName();
}
void declareBuiltins();
template<bool output> const char* getFormatSpecifier(llvm::Type* type);
template<bool output> llvm::Value* getFormatString(llvm::Type* type);
template<bool output> void callIOBuiltin(llvm::Type* type, llvm::Value* value);
llvm::Type* getIRType(const VarType& type) {
auto base_type = getIRBaseType(type.getBaseType());
if (type.isPointer()) {
return base_type->getPointerTo();
}
return base_type;
}
llvm::Type* getIRBaseType(VarType::BaseType type);
llvm::Value* ensureType(llvm::Value* value, llvm::Type* type);
const char* getSourceBaseType(llvm::Type* type);
std::string getSourceType(llvm::Type* type);
llvm::Value* evalTruthiness(llvm::Value* val);
llvm::Function* current_function() { return builder_.GetInsertBlock()->getParent(); }
llvm::LLVMContext& context_;
llvm::IRBuilder<> builder_;
std::unique_ptr<llvm::Module> module_;
NestedScopes var_scopes_;
llvm::DenseMap<llvm::Type*, llvm::Value*> input_format_strings_cache_;
llvm::DenseMap<llvm::Type*, llvm::Value*> output_format_strings_cache_;
llvm::BasicBlock* exit_block_;
llvm::AllocaInst* return_var_;
ResultTypeCalculator type_calculator_;
friend class ResultTypeCalculator;
};
} // namespace
llvm::Value* NestedScopes::lookup(const std::string& name) {
for (auto c = scopes_.crbegin(), end = scopes_.crend(); c != end; ++c) {
auto result = c->find(name);
if (result != c->end()) return result->second;
}
return nullptr;
}
void IRGenerator::declareBuiltins() {
llvm::FunctionType* printf_type = llvm::TypeBuilder<int(char*, ...), false>::get(context_);
auto no_alias = llvm::AttributeSet().addAttribute(context_, 1, llvm::Attribute::NoAlias);
module_->getOrInsertFunction("printf", printf_type, no_alias);
module_->getOrInsertFunction("scanf", printf_type, no_alias);
}
llvm::Value* IRGenerator::visitModule(const Module* m) {
module_ = llvm::make_unique<llvm::Module>("antani", context_);
declareBuiltins();
for (const Function* f : m->functions()) {
declareFunction(f);
}
if (m->hasEntryPoint()) declareFunction(m->getEntryPoint());
for (const Function* f : m->functions()) {
visit(f);
}
if (m->hasEntryPoint()) visit(m->getEntryPoint());
llvm::verifyModule(*module_);
return nullptr;
}
llvm::Function* IRGenerator::declareFunction(const Function* ast_f) {
std::vector<llvm::Type*> param_types;
param_types.reserve(ast_f->params_size());
for (const FunctionParam& param : ast_f->params()) {
param_types.push_back(getIRType(param.getType()));
}
auto type = llvm::FunctionType::get(getIRType(ast_f->getReturnType()), param_types, false);
llvm::Function* f = llvm::Function::Create(type, llvm::Function::ExternalLinkage,
getFunctionName(ast_f), module_.get());
auto ast_arg = ast_f->begin_params();
for (auto& arg : f->args()) {
arg.setName(ast_arg->getArg().getName());
++ast_arg;
}
assert(ast_arg == ast_f->end_params());
return f;
}
llvm::Value* IRGenerator::visitFunction(const Function* ast_f) {
llvm::Function* f = module_->getFunction(getFunctionName(ast_f));
assert(f && "This function should have had a prototype defined");
if (ast_f->body_empty()) return f;
NestedScopes::Guard scopes_guard{var_scopes_};
llvm::BasicBlock* entry = llvm::BasicBlock::Create(context_, "entry", f);
builder_.SetInsertPoint(entry);
if (!f->getReturnType()->isVoidTy()) {
return_var_ = builder_.CreateAlloca(f->getReturnType(), nullptr, "result");
if (ast_f->isEntryPoint()) {
assert(f->getReturnType()->isIntegerTy());
builder_.CreateStore(ensureType(builder_.getInt64(0), f->getReturnType()), return_var_);
}
} else {
return_var_ = nullptr;
}
for (auto& arg : f->args()) {
auto arg_ptr = builder_.CreateAlloca(arg.getType(), nullptr, arg.getName());
builder_.CreateStore(&arg, arg_ptr);
var_scopes_.define(arg.getName(), arg_ptr);
}
exit_block_ = llvm::BasicBlock::Create(context_, "exit");
for (const Statement* s : ast_f->body()) {
visit(s);
}
builder_.CreateBr(exit_block_);
f->getBasicBlockList().push_back(exit_block_);
builder_.SetInsertPoint(exit_block_);
if (return_var_) {
builder_.CreateRet(builder_.CreateLoad(return_var_));
} else {
builder_.CreateRetVoid();
}
llvm::verifyFunction(*f);
exit_block_ = nullptr;
return_var_ = nullptr;
return f;
}
llvm::Value* IRGenerator::visitVardeclStatement(const VardeclStatement* s) {
const auto& name = s->getVariable().getName();
llvm::AllocaInst* var = builder_.CreateAlloca(getIRType(s->getType()), nullptr, name);
if (!var_scopes_.define(name, var)) {
error(&s->getVariable(), "redefining an existing variable");
}
if (s->hasInitializer()) {
llvm::Value* init = visit(s->getInitializer());
auto original_init_type = init->getType();
auto target_type = var->getType()->getPointerElementType();
init = ensureType(init, target_type);
if (!init) {
error(s->getInitializer(), "cannot initialize variable of type", getSourceType(target_type),
"with expression of type", getSourceType(original_init_type));
}
builder_.CreateStore(init, var);
}
return var;
}
llvm::Value* IRGenerator::visitReturnStatement(const ReturnStatement* r) {
if (r->hasExpression()) {
auto return_value = visit(r->getExpression());
auto original_return_type = return_value->getType();
auto return_type = return_var_->getType()->getPointerElementType();
return_value = ensureType(return_value, return_type);
if (!return_value) {
error(r->getExpression(), "cannot return expression of type", original_return_type,
"from function of type", return_type);
}
builder_.CreateStore(return_value, return_var_);
}
builder_.CreateBr(exit_block_);
// Code after the return will end up in this unreachable BB and DCE will
// take care of it.
llvm::BasicBlock* after = llvm::BasicBlock::Create(context_, "return.after", current_function());
builder_.SetInsertPoint(after);
// This one is not necessary, but will help catch codegen errors.
builder_.CreateUnreachable();
return nullptr;
}
llvm::Value* IRGenerator::visitAssignStatement(const AssignStatement* a) {
auto val = visit(a->getExpression());
assert(val && "unhandled error while building expression");
auto var = var_scopes_.lookup(a->getVariable().getName());
if (!var) {
error(&a->getVariable(), "assigning to undefined variable", a->getVariable().getName());
}
auto original_val_type = val->getType();
auto target_type = var->getType()->getPointerElementType();
val = ensureType(val, target_type);
if (!val) {
error(a->getExpression(), "cannot assign expression of type", getSourceType(original_val_type),
"to variable of type", getSourceType(target_type));
}
builder_.CreateStore(val, var);
return nullptr;
}
llvm::Value* IRGenerator::evalTruthiness(llvm::Value* val) {
if (llvm::isa<llvm::CmpInst>(val)) return val;
auto val_type = val->getType();
if (!val_type->isIntegerTy() && !val_type->isFloatingPointTy()) {
return nullptr;
}
auto zero = ensureType(builder_.getInt64(0), val_type);
return builder_.CreateICmpNE(val, zero, "cond");
}
llvm::Value* IRGenerator::visitBranchStatement(const BranchStatement* b) {
llvm::BasicBlock* entry_bb =
llvm::BasicBlock::Create(context_, "branch.head", current_function());
llvm::BasicBlock* exit_bb = llvm::BasicBlock::Create(context_, "branch.after");
builder_.CreateBr(entry_bb);
builder_.SetInsertPoint(entry_bb);
llvm::BasicBlock* case_cond_bb =
llvm::BasicBlock::Create(context_, "branch.case.cond", current_function());
builder_.CreateBr(case_cond_bb);
builder_.SetInsertPoint(case_cond_bb);
for (const BranchCase& branch_case : b->cases()) {
llvm::Value* condition = visit(branch_case.getExpression());
auto condition_type = condition->getType();
condition = evalTruthiness(condition);
if (!condition) {
error(branch_case.getExpression(), "cannot convert expression of type",
getSourceType(condition_type), "to boolean.");
}
case_cond_bb = llvm::BasicBlock::Create(context_, "branch.case.cond");
llvm::BasicBlock* case_body_bb =
llvm::BasicBlock::Create(context_, "branch.case.body", current_function());
builder_.CreateCondBr(condition, case_body_bb, case_cond_bb);
builder_.SetInsertPoint(case_body_bb);
for (const Statement* s : branch_case.body()) {
visit(s);
}
builder_.CreateBr(exit_bb);
current_function()->getBasicBlockList().push_back(case_cond_bb);
builder_.SetInsertPoint(case_cond_bb);
}
if (b->hasBranchElse()) {
NestedScopes::Guard scope_guard{var_scopes_};
llvm::BasicBlock* else_bb =
llvm::BasicBlock::Create(context_, "branch.else", current_function());
builder_.CreateBr(else_bb);
builder_.SetInsertPoint(else_bb);
for (const Statement* s : b->getBranchElse()->body()) {
visit(s);
}
}
builder_.CreateBr(exit_bb);
current_function()->getBasicBlockList().push_back(exit_bb);
builder_.SetInsertPoint(exit_bb);
return nullptr;
}
llvm::Value* IRGenerator::visitLoopStatement(const LoopStatement* l) {
llvm::BasicBlock* body_bb = llvm::BasicBlock::Create(context_, "loop.body", current_function());
builder_.CreateBr(body_bb);
builder_.SetInsertPoint(body_bb);
{
NestedScopes::Guard scope_guard{var_scopes_};
for (const Statement* s : l->body()) {
visit(s);
}
}
llvm::BasicBlock* condition_bb =
llvm::BasicBlock::Create(context_, "loop.condition", current_function());
llvm::BasicBlock* after_bb = llvm::BasicBlock::Create(context_, "loop.after");
builder_.CreateBr(condition_bb);
builder_.SetInsertPoint(condition_bb);
auto condition = visit(l->getCondition());
auto condition_type = condition->getType();
condition = evalTruthiness(condition);
if (!condition) {
error(l->getCondition(), "cannot convert expression of type", getSourceType(condition_type),
"to boolean");
}
builder_.CreateCondBr(condition, body_bb, after_bb);
current_function()->getBasicBlockList().push_back(after_bb);
builder_.SetInsertPoint(after_bb);
return nullptr;
}
const char* IRGenerator::getSourceBaseType(llvm::Type* type) {
assert(type->isIntegerTy() || type->isFloatingPointTy());
#define RETURN_BASE_NAME(_1, TYPE, _2, _3, SOURCE_NAME, _4) \
if (type == builder_.get##TYPE##Ty()) { \
return SOURCE_NAME; \
}
BUILTIN_TYPES(RETURN_BASE_NAME)
#undef RETURN_BASE_NAME
UNREACHABLE("Unhandled base type.");
}
std::string IRGenerator::getSourceType(llvm::Type* type) {
std::string name;
while (type->isPointerTy()) {
name += "conte ";
type = type->getPointerElementType();
}
name += getSourceBaseType(type);
return name;
}
namespace {
template<> const char* IRGenerator::getFormatSpecifier<false>(llvm::Type* type) {
assert(type->isIntegerTy() || type->isFloatingPointTy());
#define RETURN_SPECIFIER(_1, TYPE, SPEC, _2, _3, _4) \
if (type == builder_.get##TYPE##Ty()) { \
return SPEC; \
}
BUILTIN_TYPES(RETURN_SPECIFIER)
#undef RETURN_SPECIFIER
UNREACHABLE("Unhandled input format specifier");
}
template<> const char* IRGenerator::getFormatSpecifier<true>(llvm::Type* type) {
assert(type->isIntegerTy() || type->isFloatingPointTy());
#define RETURN_SPECIFIER(_1, TYPE, _2, SPEC, _3, _4) \
if (type == builder_.get##TYPE##Ty()) { \
return SPEC; \
}
BUILTIN_TYPES(RETURN_SPECIFIER)
#undef RETURN_SPECIFIER
UNREACHABLE("Unhandled output format specifier");
}
} // namespace
template<bool output> llvm::Value* IRGenerator::getFormatString(llvm::Type* type) {
auto& cache = output ? output_format_strings_cache_ : input_format_strings_cache_;
auto hit = cache.find(type);
if (hit != cache.end()) return hit->second;
auto specifier = getFormatSpecifier<output>(type);
auto format = builder_.CreateGlobalStringPtr(specifier, "format");
cache.insert({type, format});
return format;
}
template<bool output> void IRGenerator::callIOBuiltin(llvm::Type* type, llvm::Value* value) {
auto builtin = module_->getFunction(output ? "printf" : "scanf");
assert(builtin && "Builtin was not declared");
llvm::Value* args[] = {getFormatString<output>(type), value};
builder_.CreateCall(builtin, args);
}
llvm::Value* IRGenerator::visitInputStatement(const InputStatement* s) {
auto var = var_scopes_.lookup(s->getVariable().getName());
if (!var) {
error(&s->getVariable(), "reading an undefined variable");
}
assert(var->getType()->isPointerTy());
auto target = var;
auto target_type = target->getType()->getPointerElementType();
bool reading_bool = target_type == builder_.getInt1Ty();
if (!target_type->isIntegerTy() && !target_type->isFloatingPointTy()) {
error(&s->getVariable(), "can only read integers and floating point");
}
if (reading_bool) {
target = builder_.CreateAlloca(builder_.getInt32Ty());
}
callIOBuiltin<false>(target_type, target);
if (reading_bool) {
builder_.CreateStore(evalTruthiness(builder_.CreateLoad(target)), var);
}
return nullptr;
}
llvm::Value* IRGenerator::visitPrintStatement(const PrintStatement* p) {
auto value = visit(p->getExpression());
auto type = value->getType();
if (!type->isIntegerTy() && !type->isFloatingPointTy()) {
error(p->getExpression(), "only integer and float valued expressions may be printed");
}
// Integer promotion for variadic call.
if (type->isIntegerTy() && type->getIntegerBitWidth() < 32) {
value = builder_.CreateZExt(value, builder_.getInt32Ty());
}
// Same, for floating point.
if (type == builder_.getFloatTy()) {
value = builder_.CreateFPCast(value, builder_.getDoubleTy());
}
callIOBuiltin<true>(type, value);
return nullptr;
}
llvm::Value* IRGenerator::visitBinaryExpression(const BinaryExpression* e) {
auto lhs = visit(e->getLeft());
auto rhs = visit(e->getRight());
if (lhs->getType()->isPointerTy() || rhs->getType()->isPointerTy()) {
error(e, "pointer arithmetic is not supported");
}
llvm::Type* result_type = type_calculator_.visit(e);
auto original_lhs_type = lhs->getType();
auto original_rhs_type = rhs->getType();
lhs = ensureType(lhs, result_type);
rhs = ensureType(rhs, result_type);
if (!lhs || !rhs) {
auto expression = !lhs ? e->getLeft() : e->getRight();
error(expression, "cannot cast expression of type", getSourceType(original_lhs_type), "to type",
getSourceType(original_rhs_type));
}
if (result_type->isIntegerTy()) {
switch (e->getType()) {
#define RETURN_INT_BINOP(NAME, OP) \
case BinaryExpression::NAME: \
return builder_.CreateBinOp(llvm::Instruction::OP, lhs, rhs);
IR_INT_BINARY_OPS(RETURN_INT_BINOP)
#undef RETURN_INT_BINOP
#define RETURN_INT_CMPOP(NAME, OP) \
case BinaryExpression::NAME: \
return builder_.CreateICmp(llvm::CmpInst::ICMP_##OP, lhs, rhs);
IR_INT_CMP_OPS(RETURN_INT_CMPOP)
#undef RETURN_INT_CMPOP
default:
error(e, "this operation cannot be applied to integers");
}
return nullptr;
}
if (result_type->isFloatingPointTy()) {
switch (e->getType()) {
#define RETURN_FLOAT_BINOP(NAME, OP) \
case BinaryExpression::NAME: \
return builder_.CreateBinOp(llvm::Instruction::OP, lhs, rhs);
IR_FLOAT_BINARY_OPS(RETURN_FLOAT_BINOP)
#undef RETURN_FLOAT_BINOP
#define RETURN_FLOAT_CMPOP(NAME, OP) \
case BinaryExpression::NAME: \
return builder_.CreateFCmp(llvm::CmpInst::FCMP_##OP, lhs, rhs);
IR_FLOAT_CMP_OPS(RETURN_FLOAT_CMPOP)
#undef RETURN_FLOAT_CMPOP
default:
error(e, "this operation cannot be applied to floats");
}
return nullptr;
}
UNREACHABLE("Unimplemented operand conversion");
}
llvm::Value* IRGenerator::visitAtomicExpression(const AtomicExpression* e) {
switch (e->getType()) {
case AtomicExpression::INTEGER:
return builder_.getInt32(e->getIntValue());
case AtomicExpression::FLOAT:
return llvm::ConstantFP::get(builder_.getDoubleTy(), e->getFloatValue());
case AtomicExpression::IDENTIFIER: {
auto var = var_scopes_.lookup(e->getIdentifierValue().getName());
if (!var) {
error(&e->getIdentifierValue(), "undefined variable", e->getIdentifierValue().getName());
}
assert(llvm::isa<llvm::AllocaInst>(var));
return builder_.CreateLoad(var);
}
default:
UNREACHABLE("Unhandled AtomicExpression type");
}
}
llvm::Value* IRGenerator::visitFunctionCallExpression(const FunctionCallExpression* ast_f) {
llvm::Function* f = module_->getFunction(ast_f->getFunctionName());
if (!f) {
error(ast_f, "call to undefined function", ast_f->getFunctionName());
}
std::vector<llvm::Value*> call_args;
auto ir_arg = f->arg_begin();
for (const Expression* ast_arg : ast_f->args()) {
auto arg = visit(ast_arg);
auto original_arg_type = arg->getType();
arg = ensureType(arg, ir_arg->getType());
if (!arg) {
error(ast_arg, "cannot pass expression of type", getSourceType(original_arg_type),
"as argument of type", getSourceType(ir_arg->getType()), "in call to",
ast_f->getFunctionName());
}
call_args.push_back(arg);
++ir_arg;
}
assert(ir_arg == f->arg_end());
return builder_.CreateCall(f, call_args);
}
llvm::Type* IRGenerator::getIRBaseType(VarType::BaseType type) {
switch (type) {
#define RETURN_IR_TYPE(NAME, IR_TYPE, _1, _2, _3, _4) \
case VarType::NAME: \
return builder_.get##IR_TYPE##Ty();
BUILTIN_TYPES(RETURN_IR_TYPE)
#undef RETURN_IR_TYPE
default:
UNREACHABLE("Unhandled VarType::BaseType on getIRBaseType");
}
}
llvm::Value* IRGenerator::ensureType(llvm::Value* value, llvm::Type* type) {
if (value->getType()->isPointerTy() != type->isPointerTy()) {
// Cannot cast pointer to int or viceversa.
return nullptr;
}
if (value->getType()->isPointerTy()) {
// Pointers are not castable.
return value->getType() == type ? value : nullptr;
}
if (value->getType()->isIntegerTy() && type->isIntegerTy()) {
return builder_.CreateSExtOrTrunc(value, type);
}
if (value->getType()->isFloatingPointTy() && type->isIntegerTy()) {
return builder_.CreateFPToSI(value, type);
}
if (value->getType()->isIntegerTy() && type->isFloatingPointTy()) {
return builder_.CreateSIToFP(value, type);
}
if (value->getType()->isFloatingPointTy() && type->isFloatingPointTy()) {
return builder_.CreateFPCast(value, type);
}
UNREACHABLE("Unhandled IR type conversion");
}
llvm::Type* ResultTypeCalculator::visitBinaryExpression(const BinaryExpression* e) {
llvm::Type* ltype = visit(e->getLeft());
llvm::Type* rtype = visit(e->getRight());
// Void should not be here at all.
if (ltype->isVoidTy() || rtype->isVoidTy()) {
error(e, "cannot operate on void");
}
// Same type, job done.
if (ltype == rtype) return ltype;
// Pointers are not castable.
if (ltype->isPointerTy() != rtype->isPointerTy()) {
error(e, "cannot cast pointer to int");
}
if (ltype->isPointerTy() && rtype->isPointerTy()) {
// implied: different pointer types.
error(e, "cannot cast between pointer types");
}
// Double (floating point) always wins.
if (ltype->isFloatingPointTy() || rtype->isFloatingPointTy()) {
return codegen_->builder_.getDoubleTy();
}
// Integers always upcast.
if (ltype->isIntegerTy() && rtype->isIntegerTy()) {
int lsize = ltype->getPrimitiveSizeInBits();
int rsize = rtype->getPrimitiveSizeInBits();
return lsize > rsize ? ltype : rtype;
}
UNREACHABLE("Unhandled BinaryExpression type");
}
llvm::Type* ResultTypeCalculator::visitAtomicExpression(const AtomicExpression* e) {
switch (e->getType()) {
case AtomicExpression::INTEGER:
return codegen_->builder_.getInt32Ty();
case AtomicExpression::FLOAT:
return codegen_->builder_.getDoubleTy();
case AtomicExpression::IDENTIFIER: {
auto var = codegen_->var_scopes_.lookup(e->getIdentifierValue().getName());
assert(var);
return var->getType()->getPointerElementType();
}
default:
UNREACHABLE("Unhandled AtomicExpression type");
}
}
llvm::Type* ResultTypeCalculator::visitFunctionCallExpression(const FunctionCallExpression* e) {
auto f = codegen_->module_->getFunction(e->getFunctionName());
assert(f);
return f->getReturnType();
}
namespace monicelli {
std::unique_ptr<llvm::Module> generateIR(llvm::LLVMContext& context, Module* ast) {
IRGenerator codegen{context, ast->getSourceFilename()};
codegen.visit(ast);
return codegen.releaseModule();
}
void runFunctionOptimizer(llvm::Module* module) {
llvm::legacy::FunctionPassManager pass_manager{module};
pass_manager.add(llvm::createInstructionCombiningPass());
pass_manager.add(llvm::createReassociatePass());
pass_manager.add(llvm::createGVNPass());
pass_manager.add(llvm::createCFGSimplificationPass());
pass_manager.add(llvm::createDeadCodeEliminationPass());
pass_manager.add(llvm::createPromoteMemoryToRegisterPass());
pass_manager.doInitialization();
for (llvm::Function& f : module->functions()) {
pass_manager.run(f);
}
}
void printIR(std::ostream& stream, llvm::Module* module) {
llvm::raw_os_ostream llvm_stream{stream};
module->print(llvm_stream, nullptr);
}
} // namespace monicelli

37
src/codegen.def Normal file
View File

@ -0,0 +1,37 @@
#ifndef MONICELLI_CODEGEN_DEF
#define MONICELLI_CODEGEN_DEF
// Copyright 2017 the Monicelli project authors. All rights reserved.
// Use of this source code is governed by a GPLv3 license, see LICENSE.txt.
#include "types.def"
#define IR_INT_BINARY_OPS(V) \
V(SHL, Shl) \
V(SHR, AShr) \
V(PLUS, Add) \
V(MINUS, Sub) \
V(TIMES, Mul) \
V(DIV, SDiv)
#define IR_INT_CMP_OPS(V) \
V(EQ, EQ) \
V(GE, SGE) \
V(GT, SGT) \
V(LE, SLE) \
V(LT, SLT) \
#define IR_FLOAT_BINARY_OPS(V) \
V(PLUS, FAdd) \
V(MINUS, FSub) \
V(TIMES, FMul) \
V(DIV, FDiv)
#define IR_FLOAT_CMP_OPS(V) \
V(EQ, OEQ) \
V(GE, OGE) \
V(GT, OGT) \
V(LE, OLE) \
V(LT, OLT) \
#endif

23
src/codegen.h Normal file
View File

@ -0,0 +1,23 @@
#ifndef MONICELLI_CODEGEN_H
#define MONICELLI_CODEGEN_H
// Copyright 2017 the Monicelli project authors. All rights reserved.
// Use of this source code is governed by a GPLv3 license, see LICENSE.txt.
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
#include <memory>
namespace monicelli {
class Module;
std::unique_ptr<llvm::Module> generateIR(llvm::LLVMContext& context, Module* ast);
void runFunctionOptimizer(llvm::Module* module);
void printIR(std::ostream& stream, llvm::Module* module);
} // namespace monicelli
#endif

52
src/errors.cpp Normal file
View File

@ -0,0 +1,52 @@
// Copyright 2017 the Monicelli project authors. All rights reserved.
// Use of this source code is governed by a GPLv3 license, see LICENSE.txt.
#include "errors.h"
#include <cstdlib>
#include <fstream>
#include <iostream>
#include <string>
namespace monicelli {
[[noreturn]] void UNREACHABLE(const std::string& message) {
std::cerr << message << '\n';
abort();
}
static std::string getNthLine(std::istream& file, int lineNumber) {
std::string line;
for (int i = 0; i < lineNumber; ++i) {
if (!std::getline(file, line)) {
return "";
}
}
return line;
}
void ErrorReportingMixin::printErrorLocation(std::ostream& stream, const Location& from,
const Location& to) {
std::ifstream file{source_filename_};
auto line = getNthLine(file, from.getLine());
if (!line.empty()) {
stream << line << '\n';
for (int i = 1; i < from.getColumn(); ++i) {
stream << ' ';
}
stream << '^';
int area_limit = from.getLine() == to.getLine() ? to.getColumn() - 1 : line.size();
// This one will not get printed if from and to are the same.
for (int i = from.getColumn(); i < area_limit; ++i) {
stream << '~';
}
}
stream << '\n' << from << ": error: ";
}
} // namespace monicelli

59
src/errors.h Normal file
View File

@ -0,0 +1,59 @@
#ifndef MONICELLI_ERRORS_H
#define MONICELLI_ERRORS_H
// Copyright 2017 the Monicelli project authors. All rights reserved.
// Use of this source code is governed by a GPLv3 license, see LICENSE.txt.
#include "location.h"
#include "support.h"
#include <iostream>
#include <string>
namespace monicelli {
[[noreturn]] void UNREACHABLE(const std::string& message);
class ErrorReportingMixin {
protected:
explicit ErrorReportingMixin(const std::string& source_filename)
: source_filename_(source_filename) {}
const std::string& getSourceFilename() const { return source_filename_; }
void printErrorLocation(std::ostream& stream, const Location& from, const Location& to);
template<typename Locatable, typename First>
[[noreturn]] void error(const Locatable& obj, const First& first) {
printErrorLocation(std::cerr, obj->getFirstLocation(), obj->getLastLocation());
print(std::cerr, first);
exit(1);
}
template<typename Locatable, typename First, typename... Tail>
[[noreturn]] void error(const Locatable& obj, const First& first, Tail... tail) {
printErrorLocation(std::cerr, obj->getFirstLocation(), obj->getLastLocation());
print(std::cerr, first, tail...);
exit(1);
}
template<typename First>
[[noreturn]] void error(const Location& where, const First& first) {
printErrorLocation(std::cerr, where, where);
print(std::cerr, first);
exit(1);
}
template<typename First, typename... Tail>
[[noreturn]] void error(const Location& where, const First& first, Tail... tail) {
printErrorLocation(std::cerr, where, where);
print(std::cerr, first, tail...);
exit(1);
}
private : std::string source_filename_;
};
} // namespace monicelli
#endif

48
src/iterators.h Normal file
View File

@ -0,0 +1,48 @@
#ifndef MONICELLI_ITERATORS_H
#define MONICELLI_ITERATORS_H
// Copyright 2017 the Monicelli project authors. All rights reserved.
// Use of this source code is governed by a GPLv3 license, see LICENSE.txt.
#include <memory>
#include <vector>
namespace monicelli {
template<typename T> class PointerVectorConstIter final {
public:
typedef typename std::vector<std::unique_ptr<T>>::const_iterator ConstIter;
PointerVectorConstIter(ConstIter iter) : internal_iter_(iter) {}
const T* operator*() const { return internal_iter_->get(); }
const T* operator->() const { return internal_iter_->get(); }
bool operator!=(const PointerVectorConstIter& other) {
return internal_iter_ != other.internal_iter_;
}
PointerVectorConstIter<T>& operator++() {
++internal_iter_;
return *this;
}
PointerVectorConstIter<T> operator++(int) { return {internal_iter_++}; }
private:
ConstIter internal_iter_;
};
template<typename IterT> class ConstRangeWrapper final {
public:
ConstRangeWrapper(IterT begin, IterT end) : begin_(begin), end_(end) {}
IterT begin() const { return begin_; }
IterT end() const { return end_; }
private:
IterT begin_;
IterT end_;
};
} // namespace monicelli
#endif

102
src/lexer.cpp Normal file
View File

@ -0,0 +1,102 @@
// Copyright 2017 the Monicelli project authors. All rights reserved.
// Use of this source code is governed by a GPLv3 license, see LICENSE.txt.
#include "lexer.h"
#include "errors.h"
#include <cstring>
namespace monicelli {
static const char* builtinTypeToString(Token::BuiltinTypeValue type) {
switch (type) {
#define RETURN_TYPE_NAME(NAME, _1, _2, _3, _4, _5) \
case Token::BUILTIN_TYPE_##NAME: \
return #NAME;
BUILTIN_TYPES(RETURN_TYPE_NAME)
#undef RETURN_TYPE_NAME
default:
UNREACHABLE("Unhandled BuiltinType.");
}
}
void Token::print(std::ostream& stream) {
switch (type_) {
#define PRINT_TOKEN_NAME(TOKEN, _) \
case Token::TOKEN_##TOKEN: \
stream << "<" #TOKEN; \
break;
LEXER_TOKENS(PRINT_TOKEN_NAME)
#undef PRINT_TOKEN_NAME
}
switch (getValueTypeForToken(type_)) {
case ValueType::INTEGER:
stream << '(' << int_value_ << ')';
break;
case ValueType::FLOAT:
stream << '(' << fp_value_ << ')';
break;
case ValueType::STRING:
stream << '(' << string_value_ << ')';
break;
case ValueType::BUILTIN_TYPE:
stream << '(' << builtinTypeToString(builtin_type_value_) << ')';
break;
case ValueType::VOID:
default:
break;
}
stream << '@' << getFirstLocation() << '-' << getLastLocation() << ">\n";
}
bool Token::isOperator() const {
switch (type_) {
#define CASE_NAME(NAME, _) case Token::TOKEN_##NAME:
LEXER_OPERATOR_TOKENS(CASE_NAME)
#undef CASE_NAME
return true;
default:
return false;
}
}
// static
Token::ValueType Token::getValueTypeForToken(Token::TokenType type) {
switch (type) {
#define RETURN_VALUE_TYPE(TYPE, VALUE_TYPE) \
case Token::TOKEN_##TYPE: \
return ValueType::VALUE_TYPE;
LEXER_TOKENS(RETURN_VALUE_TYPE)
#undef RETURN_VALUE_TYPE
default:
UNREACHABLE("Unknown token type.");
}
}
void Buffer::imbue(std::istream& input) {
int to_read = capacity_ - size_;
if (to_read <= 0) {
// Grow buffer.
}
input.read(data_.get() + size_, to_read);
size_ += input.gcount();
cursor_ = data_.get();
}
void Lexer::advanceBuffer() {
// If there is a match in progress, keep it.
if (state_.ts) {
int ts_offset = state_.ts - buffer_.getData();
buffer_.shift(ts_offset);
state_.ts = buffer_.getData();
state_.te -= ts_offset;
} else {
buffer_.clear();
}
buffer_.imbue(input_);
}
} // namespace monicelli

43
src/lexer.def Normal file
View File

@ -0,0 +1,43 @@
#ifndef MONICELLI_LEXER_DEF
#define MONICELLI_LEXER_DEF
// Copyright 2017 the Monicelli project authors. All rights reserved.
// Use of this source code is governed by a GPLv3 license, see LICENSE.txt.
#include "operators.def"
#include "types.def"
#define LEXER_TOKENS(V) \
V(IDENTIFIER, STRING) \
V(INTEGER, INTEGER) \
V(FLOAT, FLOAT) \
V(TYPENAME, BUILTIN_TYPE) \
LEXER_OPERATOR_TOKENS(V) \
V(STAR, VOID) \
V(VARDECL, VOID) \
V(ARTICLE, VOID) \
V(BANG, VOID) \
V(COLON, VOID) \
V(COMMA, VOID) \
V(BRANCH_BEGIN, VOID) \
V(ASSIGN, VOID) \
V(PRINT, VOID) \
V(INPUT, VOID) \
V(ASSERT, VOID) \
V(ABORT, VOID) \
V(LOOP_BEGIN, VOID) \
V(LOOP_CONDITION, VOID) \
V(BRANCH_CONDITION, VOID) \
V(BRANCH_ELSE, VOID) \
V(BRANCH_END, VOID) \
V(CASE_END, VOID) \
V(ENTRY_POINT, VOID) \
V(FUN_DECL, VOID) \
V(FUN_CALL, VOID) \
V(FUN_END, VOID) \
V(FUN_PARAMS, VOID) \
V(RETURN, VOID) \
V(END, VOID) \
V(UNKNOWN, VOID) \
#endif

186
src/lexer.h Normal file
View File

@ -0,0 +1,186 @@
#ifndef MONICELLI_LEXER_H
#define MONICELLI_LEXER_H
// Copyright 2017 the Monicelli project authors. All rights reserved.
// Use of this source code is governed by a GPLv3 license, see LICENSE.txt.
#include "lexer.def"
#include "location.h"
#include <cassert>
#include <cstring>
#include <iostream>
#include <memory>
#include <string>
namespace monicelli {
class Token final : public LocationMixin {
public:
enum TokenType {
#define DECLARE_TOKEN(NAME, _) TOKEN_##NAME,
LEXER_TOKENS(DECLARE_TOKEN)
#undef DECLARE_TOKEN
};
enum BuiltinTypeValue {
#define DECLARE_TYPE(NAME, _1, _2, _3, _4, _5) BUILTIN_TYPE_##NAME,
BUILTIN_TYPES(DECLARE_TYPE)
#undef DECLARE_TYPE
};
~Token() {
if (value_type_ == ValueType::STRING) {
string_value_.std::string::~string();
}
}
TokenType getType() const { return type_; }
operator TokenType() const { return getType(); }
bool isOperator() const;
uint64_t getIntValue() const {
assert(getValueTypeForToken(type_) == ValueType::INTEGER);
return int_value_;
}
double getFloatValue() const {
assert(getValueTypeForToken(type_) == ValueType::FLOAT);
return fp_value_;
}
BuiltinTypeValue getBuiltinTypeValue() const {
assert(getValueTypeForToken(type_) == ValueType::BUILTIN_TYPE);
return builtin_type_value_;
}
const std::string& getStringValue() const {
assert(getValueTypeForToken(type_) == ValueType::STRING);
return string_value_;
}
void print(std::ostream& stream);
private:
enum class ValueType { VOID, STRING, FLOAT, INTEGER, BUILTIN_TYPE };
Token(TokenType type, Location first_location, Location last_location)
: LocationMixin(first_location, last_location), type_(type), value_type_(ValueType::VOID) {}
Token(TokenType type, Location location)
: LocationMixin(location, location), type_(type), value_type_(ValueType::VOID) {}
void setIntValue(uint64_t value) {
assert(getValueTypeForToken(type_) == ValueType::INTEGER);
int_value_ = value;
}
void setFloatValue(double value) {
assert(getValueTypeForToken(type_) == ValueType::FLOAT);
fp_value_ = value;
}
void setBuiltinTypeValue(BuiltinTypeValue value) {
assert(getValueTypeForToken(type_) == ValueType::BUILTIN_TYPE);
builtin_type_value_ = value;
}
void setStringValue(std::string&& value) {
assert(getValueTypeForToken(type_) == ValueType::STRING);
new (&string_value_) std::string(value);
}
static ValueType getValueTypeForToken(TokenType type);
TokenType type_;
ValueType value_type_;
union {
uint64_t int_value_;
double fp_value_;
BuiltinTypeValue builtin_type_value_;
std::string string_value_;
};
friend class Lexer;
};
class Buffer final {
public:
static const int DEFAULT_CAPACITY = 1 * 1024 * 1024;
Buffer(int base_capacity = DEFAULT_CAPACITY) : size_(0), capacity_(base_capacity) {
data_.reset(new char[base_capacity]);
cursor_ = data_.get();
}
void shift(int amount) {
assert(amount <= size_ && "Cannot shift buffer more than its size.");
size_ -= amount;
memmove(data_.get(), data_.get() + amount, size_);
}
void imbue(std::istream& input);
void clear() { size_ = 0; }
bool isExhausted() const { return cursor_ == data_.get() + size_; }
char* getData() { return data_.get(); }
char* getDataEnd() { return data_.get() + size_; }
int getSize() const { return size_; }
char* getCursor() { return cursor_; }
void setCursor(char* value) {
assert(data_.get() <= value && value <= data_.get() + size_ && "Cursor out of bounds.");
cursor_ = value;
}
private:
int size_;
int capacity_;
std::unique_ptr<char[]> data_;
char* cursor_;
};
class Lexer final {
public:
explicit Lexer(std::istream& input) : input_(input), trace_enabled_(false) { resetState(); }
std::unique_ptr<Token> getNextToken();
bool isTraceEnabled() const { return trace_enabled_; }
void setTraceEnabled(bool enable) { trace_enabled_ = enable; }
Location getCurrentLocation() const { return current_location_; }
private:
void advanceColumn() {
assert(state_.ts != nullptr && state_.te >= state_.ts);
current_location_.advanceColumn(state_.te - state_.ts);
}
void newLine() { current_location_.newLine(); }
void resetState();
void advanceBuffer();
std::istream& input_;
Location current_location_;
struct {
// State of the lexer FSA. DO NOT MODIFY.
int cs;
int act;
// Start and end of the current token.
char* ts;
char* te;
} state_;
Buffer buffer_;
bool trace_enabled_;
};
} // namespace monicelli
#endif

182
src/lexer.rl Normal file
View File

@ -0,0 +1,182 @@
// Copyright 2017 the Monicelli project authors. All rights reserved.
// Use of this source code is governed by a GPLv3 license, see LICENSE.txt.
%%{
machine Lexer;
access state_.;
accent_a = "a`"|"à";
accent_e = "e`"|"è";
accent_u = "u`"|"ù";
accent_vowels = "à"|"è"|"é"|"ì"|"ò"|"ó"|"ù"| [aeiou] "`";
articles = "il"|"lo"|"la"|"l'"|"i"|"gli"|"le"|"un"|"un'"|"una"|"dei"|"delle";
di = "di"|"dei"|"del"|"della"|"dell'";
prematura = [bp] "rematura";
supercazzola = "supercazzo" [lr] "a";
comment = "#"|"bituma";
identifier = (alpha | accent_vowels) (alnum | accent_vowels)*;
integer = [+\-]? digit+;
float = [+\-]? (digit* "." digit+ | digit+ "."?) [eE] [+\-]? digit+
| [+\-]? (digit* "." digit+ | digit+ ".");
shift := |*
"sinistra" => { SET_TOKEN(OP_SHL); fbreak; };
"destra" => { SET_TOKEN(OP_SHR); fbreak; };
"per" => { advanceColumn(); fgoto initial; };
(space - '\n')+ => { advanceColumn(); fbreak; };
*|;
initial := |*
"!" => { SET_TOKEN(BANG); fbreak; };
"?" => { SET_TOKEN(BRANCH_BEGIN); fbreak; };
":" => { SET_TOKEN(COLON); fbreak; };
"," => { SET_TOKEN(COMMA); fbreak; };
"conte" => { SET_TOKEN(STAR); fbreak; };
articles => { SET_TOKEN(ARTICLE); fbreak; };
"pi" accent_u => { SET_TOKEN(OP_PLUS); fbreak; };
"meno" => { SET_TOKEN(OP_MINUS); fbreak; };
"per" => { SET_TOKEN(OP_TIMES); fbreak; };
"diviso" => { SET_TOKEN(OP_DIV); fbreak; };
"con scappellamento a" => { advanceColumn(); fgoto shift; };
"minore " di => { SET_TOKEN(OP_LT); fbreak; };
"maggiore " di => { SET_TOKEN(OP_GT); fbreak; };
"minore o uguale " ("a"|di) => { SET_TOKEN(OP_LE); fbreak; };
"maggiore o uguale " ("a"|di) => { SET_TOKEN(OP_GE); fbreak; };
"vaffanzum" => { SET_TOKEN(RETURN); fbreak; };
"voglio" => { SET_TOKEN(VARDECL); fbreak; };
"come " ("se "?) "fosse" => { SET_TOKEN(ASSIGN); fbreak; };
"a posterdati" => { SET_TOKEN(PRINT); fbreak; };
"mi porga" => { SET_TOKEN(INPUT); fbreak; };
"ho visto" => { SET_TOKEN(ASSERT); fbreak; };
"avvertite don ulrico" => { SET_TOKEN(ABORT); fbreak; };
"stuzzica" => { SET_TOKEN(LOOP_BEGIN); fbreak; };
"e " prematura " anche, se" => { SET_TOKEN(LOOP_CONDITION); fbreak; };
"che cos'" accent_e => { SET_TOKEN(BRANCH_CONDITION); fbreak; };
"o tarapia tapioco" => { SET_TOKEN(BRANCH_ELSE); fbreak; };
"e velocit" accent_a " di esecuzione" => { SET_TOKEN(BRANCH_END); fbreak; };
"o magari" => { SET_TOKEN(CASE_END); fbreak; };
"Lei ha clacsonato" => { SET_TOKEN(ENTRY_POINT); fbreak; };
"blinda la " supercazzola => { SET_TOKEN(FUN_DECL); fbreak; };
"con" => { SET_TOKEN(FUN_PARAMS); fbreak; };
prematura "ta la " supercazzola => { SET_TOKEN(FUN_CALL); fbreak; };
"o scherziamo" ("?"?) => { SET_TOKEN(FUN_END); fbreak; };
"Necchi" => {
SET_TOKEN(TYPENAME);
token->setBuiltinTypeValue(Token::BUILTIN_TYPE_INTEGER);
fbreak;
};
"Mascetti" => {
SET_TOKEN(TYPENAME);
token->setBuiltinTypeValue(Token::BUILTIN_TYPE_CHAR);
fbreak;
};
"Perozzi" => {
SET_TOKEN(TYPENAME);
token->setBuiltinTypeValue(Token::BUILTIN_TYPE_FLOAT);
fbreak;
};
"Melandri" => {
SET_TOKEN(TYPENAME);
token->setBuiltinTypeValue(Token::BUILTIN_TYPE_BOOL);
fbreak;
};
"Sassaroli" => {
SET_TOKEN(TYPENAME);
token->setBuiltinTypeValue(Token::BUILTIN_TYPE_DOUBLE);
fbreak;
};
identifier => {
SET_TOKEN(IDENTIFIER);
token->setStringValue({state_.ts, state_.te});
fbreak;
};
float => {
SET_TOKEN(FLOAT);
token->setFloatValue(std::stod(std::string{state_.ts, state_.te}));
fbreak;
};
integer => {
SET_TOKEN(INTEGER);
token->setIntValue(std::stoll(std::string{state_.ts, state_.te}));
fbreak;
};
comment (^"\n")* | (space - '\n')+ => {
advanceColumn();
starting_location = current_location_;
};
'\n' => {
newLine();
starting_location = current_location_;
};
*|;
}%%
#include "lexer.h"
#include <string>
namespace monicelli {
#define SET_TOKEN(NAME) \
do { \
advanceColumn(); \
auto end_location = current_location_; \
token.reset(new Token{Token::TOKEN_##NAME,\
starting_location, end_location}); \
} while (false)
%% write data nofinal;
void Lexer::resetState() {
%% write init;
}
std::unique_ptr<Token> Lexer::getNextToken() {
if (buffer_.isExhausted()) advanceBuffer();
char* p = buffer_.getCursor();
char* pe = buffer_.getDataEnd();
char* eof = input_? nullptr : pe;
Location starting_location = current_location_;
std::unique_ptr<Token> token;
while (p != pe && !token) {
%% write exec noend;
if (state_.cs == %%{ write error; }%%) {
token.reset(new Token{Token::TOKEN_UNKNOWN, starting_location});
}
}
if (p == eof) {
token.reset(new Token{Token::TOKEN_END, starting_location});
}
state_.ts = nullptr;
buffer_.setCursor(p);
if (trace_enabled_) token->print(std::cout);
return token;
}
#undef SET_TOKEN
} // namespace

59
src/location.h Normal file
View File

@ -0,0 +1,59 @@
#ifndef MONICELLI_LOCATION_H
#define MONICELLI_LOCATION_H
// Copyright 2017 the Monicelli project authors. All rights reserved.
// Use of this source code is governed by a GPLv3 license, see LICENSE.txt.
#include <iostream>
namespace monicelli {
class Location final {
public:
Location() : line_(1), column_(1) {}
int getLine() const { return line_; }
int getColumn() const { return column_; }
private:
Location(int line, int column) : line_(line), column_(column) {}
void advanceColumn(int amount) { column_ += amount; }
void newLine() {
column_ = 1;
line_ += 1;
}
int line_;
int column_;
friend class Lexer;
};
class LocationMixin {
public:
Location getFirstLocation() const { return first_location_; }
Location getLastLocation() const { return last_location_; }
protected:
LocationMixin() {}
LocationMixin(Location first, Location last) : first_location_(first), last_location_(last) {}
Location first_location_;
Location last_location_;
friend class Parser;
};
static inline std::ostream& operator<<(std::ostream& stream, const Location& location) {
return stream << location.getLine() << ':' << location.getColumn();
}
static inline bool operator==(const Location& a, const Location& b) {
return a.getLine() == b.getLine() && a.getColumn() == b.getColumn();
}
} // namespace monicelli
#endif

View File

@ -1,127 +1,89 @@
/*
* Monicelli: an esoteric language compiler
*
* Copyright (C) 2014 Stefano Sanfilippo
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
// Copyright 2017 the Monicelli project authors. All rights reserved.
// Use of this source code is governed by a GPLv3 license, see LICENSE.txt.
#include "Scanner.hpp"
#include "Parser.hpp"
#include "CppEmitter.hpp"
#include "ModuleRegistry.hpp"
#include "ModuleLoader.hpp"
#include "BitcodeEmitter.hpp"
#include "CLineParser.hpp"
#include "asmgen.h"
#include "ast-printer.h"
#include "codegen.h"
#include "options.h"
#include "parser.h"
#include <llvm/Bitcode/ReaderWriter.h>
#include <llvm/Support/FileSystem.h>
#include <llvm/Support/raw_os_ostream.h>
#include <boost/regex.hpp>
#include <boost/filesystem.hpp>
#include <iostream>
#include <fstream>
#include <string>
#include <functional>
#include <vector>
using namespace monicelli;
static const boost::regex NAME_RE("^(.+)\\.mc$");
static const boost::regex MODULE_RE("^(.+)\\.mm$");
int process(std::string const&, std::function<bool(std::ostream&, Program*)>);
int main(int argc, char** argv) {
parseCommandLine(argc, argv);
registerStdLib(getModuleRegistry());
if (!configHas("input")) {
std::cerr << "No input." << std::endl;
ProgramOptions options = ProgramOptions::fromCommandLine(argc, argv);
if (options.input_filenames_empty()) {
std::cerr << "No input files.\n";
return 0;
}
if (configHas("c++")) {
return process("cpp", [](std::ostream &outstream, Program *program) {
CppEmitter emitter(&outstream);
if (!program->emit(&emitter)) return false;
return true;
});
} else {
return process("bc", [](std::ostream & outstream, Program *program) {
BitcodeEmitter emitter;
if (!program->emit(&emitter)) return false;
llvm::raw_os_ostream stream(outstream);
llvm::WriteBitcodeToFile(&emitter.getModule(), stream);
return true;
});
}
if (options.shouldOnlyCompile() && options.input_filenames_size() > 1 &&
!options.getOutputFilename().empty()) {
std::cerr << "Output filename in compile mode may be specified only with a "
"single input file.\n";
return 1;
}
int process(std::string const& suffix, std::function<bool(std::ostream&, Program*)> writer) {
std::vector<std::string> sources;
std::vector<std::string> modules;
registerTargets();
for (std::string const& arg: config<std::vector<std::string>>("input")) {
if (boost::regex_match(arg, NAME_RE)) {
sources.push_back(arg);
} else if (boost::regex_match(arg, MODULE_RE)) {
modules.push_back(arg);
} else {
std::cerr << arg + ": file format not recognized. Perhaps you forgot the .mc/.mm extension?" << std::endl;
}
auto triple = llvm::sys::getDefaultTargetTriple();
auto target_machine = getTargetMachine(triple, options.getCPU(), options.getCPUFeatures());
#ifdef MONICELLI_ENABLE_LINKER
std::vector<std::string> object_filenames;
object_filenames.reserve(options.input_filenames_size());
#endif
for (const auto& input_filename : options.input_filenames()) {
std::ifstream input{input_filename};
if (!input) {
std::cerr << "Cannot open input file " << input_filename << ".\n";
return 1;
}
for (std::string const& name: modules) {
loadModule(name, getModuleRegistry());
}
Parser parser{input, input_filename};
parser.setLexerTrace(options.shouldTraceLexer());
auto ast = parser.parse();
for (std::string const& name: sources) {
std::ifstream instream(name);
if (!instream.good()) {
std::cerr << name + ": cannot open file" << std::endl;
if (options.shouldPrintAST()) {
printAst(std::cout, ast.get());
continue;
}
Program program;
Scanner scanner(&instream);
Parser parser(scanner, program);
llvm::LLVMContext context;
auto ir = generateIR(context, ast.get());
ir->setTargetTriple(triple);
ir->setDataLayout(target_machine->createDataLayout());
runFunctionOptimizer(ir.get());
# if YYDEBUG
parser.set_debug_level(1);
# endif
if (options.shouldPrintIR()) {
printIR(std::cout, ir.get());
continue;
}
parser.parse();
if (options.shouldSkipCompilation()) continue;
std::string outputname = boost::filesystem::path(name).filename().native();
if (boost::regex_match(outputname, NAME_RE)) {
outputname = boost::regex_replace(outputname, NAME_RE, "$1." + suffix);
std::string object_filename;
if (options.shouldOnlyCompile() && !options.getOutputFilename().empty()) {
object_filename = options.getOutputFilename();
} else {
outputname = outputname + '.' + suffix;
object_filename = basename(input_filename) + ".o";
}
std::ofstream outstream(outputname);
writeAssembly(object_filename, ir.get(), target_machine);
if (!writer(outstream, &program)) return 1;
#ifdef MONICELLI_ENABLE_LINKER
object_filenames.emplace_back(std::move(object_filename));
#endif
}
return 0;
#ifdef MONICELLI_ENABLE_LINKER
if (!options.shouldSkipCompilation() && !options.shouldOnlyCompile() &&
!object_filenames.empty()) {
linkAssembly(options.getOutputFilename(), object_filenames);
}
#endif
}

39
src/operators.def Normal file
View File

@ -0,0 +1,39 @@
#ifndef MONICELLI_OPERATORS_DEF
#define MONICELLI_OPERATORS_DEF
// Copyright 2017 the Monicelli project authors. All rights reserved.
// Use of this source code is governed by a GPLv3 license, see LICENSE.txt.
// We have two separate tables because it's cleaner this way.
// Please keep them in sync.
// lexer_token, lexer_value_type
#define LEXER_OPERATOR_TOKENS(V) \
V(OP_EQ, VOID) \
V(OP_GE, VOID) \
V(OP_GT, VOID) \
V(OP_LE, VOID) \
V(OP_LT, VOID) \
V(OP_SHL, VOID) \
V(OP_SHR, VOID) \
V(OP_PLUS, VOID) \
V(OP_MINUS, VOID) \
V(OP_TIMES, VOID) \
V(OP_DIV, VOID) \
// lexer_token, ast_operator, priority, representation
// All priorities must be STRICTLY LARGER than 0.
#define AST_BINARY_OPERATORS(V) \
V(OP_EQ, EQ, 1, "==") \
V(OP_GE, GE, 5, ">=") \
V(OP_GT, GT, 5, ">") \
V(OP_LE, LE, 5, "<=") \
V(OP_LT, LT, 5, "<") \
V(OP_SHL, SHL, 10, "<<") \
V(OP_SHR, SHR, 10, ">>") \
V(OP_PLUS, PLUS, 15, "+") \
V(OP_MINUS, MINUS, 15, "-") \
V(OP_TIMES, TIMES, 20, "*") \
V(OP_DIV, DIV, 20, "/")
#endif

96
src/options.cpp Normal file
View File

@ -0,0 +1,96 @@
// Copyright 2017 the Monicelli project authors. All rights reserved.
// Use of this source code is governed by a GPLv3 license, see LICENSE.txt.
#include "options.h"
#include <cstdlib>
#include <cstring>
#include <iostream>
namespace monicelli {
// static
ProgramOptions ProgramOptions::fromCommandLine(int argc, char** argv) {
ProgramOptions options;
for (int i = 1; i < argc; ++i) {
if (strcmp(argv[i], "-o") == 0 || strcmp(argv[i], "--output") == 0) {
if (i == argc - 1) {
std::cerr << "--output must be followed by a filename.\n";
break;
}
options.output_filename_ = argv[++i];
continue;
}
if (strcmp(argv[i], "-s") == 0 || strcmp(argv[i], "--print-ir") == 0) {
options.print_ir_ = true;
continue;
}
if (strcmp(argv[i], "-p") == 0 || strcmp(argv[i], "--print-ast") == 0) {
options.print_ast_ = true;
continue;
}
if (strcmp(argv[i], "-t") == 0 || strcmp(argv[i], "--trace-lexer") == 0) {
options.trace_lexer_ = true;
continue;
}
if (strcmp(argv[i], "-n") == 0 || strcmp(argv[i], "--no-compile") == 0) {
options.skip_compile_ = true;
continue;
}
#ifdef MONICELLI_ENABLE_LINKER
if (strcmp(argv[i], "-c") == 0 || strcmp(argv[i], "--only-compile") == 0) {
options.compile_only_ = true;
continue;
}
#endif
if (strcmp(argv[i], "-m") == 0 || strcmp(argv[i], "--cpu") == 0) {
if (i == argc - 1) {
std::cerr << "--cpu must be followed by a CPU name.\n";
break;
}
options.cpu_ = argv[++i];
}
if (strcmp(argv[i], "-f") == 0 || strcmp(argv[i], "--cpu-features") == 0) {
if (i == argc - 1) {
std::cerr << "--cpu-features must be followed by a set of features.\n";
break;
}
options.cpu_features_ = argv[++i];
}
if (strcmp(argv[i], "-h") == 0 || strcmp(argv[i], "--help") == 0) {
printHelp(argv[0]);
}
if (argv[i][0] == '-') {
std::cerr << "Unknown option " << argv[i] << ".\n\n";
printHelp(argv[0]);
exit(1);
}
options.input_filenames_.emplace_back(argv[i]);
}
#ifndef MONICELLI_ENABLE_LINKER
options.compile_only_ = true;
#endif
return options;
}
// static
void ProgramOptions::printHelp(const char* program_name) {
std::cout << "Usage: " << program_name
<< " [options...] [input.mc ...]\n\n"
"Options:\n"
#ifdef MONICELLI_ENABLE_LINKER
" --only-compile, -c : Compile only, do not link.\n"
#endif
" --no-compile, -n : Do not compile, only print (see below).\n"
" --output, -o out.o : Specify the output filename.\n"
" --trace-lexer, -t : Print tokens as seen by the lexer.\n"
" --print-ast, -p : Print the AST as pseudocode.\n"
" --print-ir, -s : Print the IR of the code.\n"
" --cpu, -m model : Set the CPU model to this (default: generic).\n"
" --cpu-features, -f feat : Enable these CPU features (default: none).\n"
" --help, -h : Print this message.\n"
"\n";
exit(0);
}
} // namespace monicelli

58
src/options.h Normal file
View File

@ -0,0 +1,58 @@
// Copyright 2017 the Monicelli project authors. All rights reserved.
// Use of this source code is governed by a GPLv3 license, see LICENSE.txt.
#ifndef MONICELLI_OPTIONS_H
#define MONICELLI_OPTIONS_H
#include "iterators.h"
#include <string>
#include <vector>
namespace monicelli {
class ProgramOptions final {
public:
typedef std::vector<std::string>::const_iterator ConstStringIter;
static ProgramOptions fromCommandLine(int argc, char** argv);
bool shouldPrintIR() const { return print_ir_; }
bool shouldPrintAST() const { return print_ast_; }
bool shouldTraceLexer() const { return trace_lexer_; }
bool shouldOnlyCompile() const { return compile_only_; }
bool shouldSkipCompilation() const { return skip_compile_; }
const std::string& getOutputFilename() const { return output_filename_; }
ConstStringIter begin_input_filenames() const { return input_filenames_.cbegin(); }
ConstStringIter end_input_filenames() const { return input_filenames_.cend(); }
ConstRangeWrapper<ConstStringIter> input_filenames() const {
return {begin_input_filenames(), end_input_filenames()};
}
int input_filenames_size() const { return input_filenames_.size(); }
bool input_filenames_empty() const { return input_filenames_.empty(); }
const std::string& getCPU() const { return cpu_; }
const std::string& getCPUFeatures() const { return cpu_features_; }
private:
static void printHelp(const char* program_name);
ProgramOptions()
: print_ir_(false), print_ast_(false), trace_lexer_(false), compile_only_(false),
skip_compile_(false), cpu_("generic") {}
bool print_ir_;
bool print_ast_;
bool trace_lexer_;
bool compile_only_;
bool skip_compile_;
std::vector<std::string> input_filenames_;
std::string output_filename_;
std::string cpu_;
std::string cpu_features_;
};
} // namespace monicelli
#endif

542
src/parser.cpp Normal file
View File

@ -0,0 +1,542 @@
// Copyright 2017 the Monicelli project authors. All rights reserved.
// Use of this source code is governed by a GPLv3 license, see LICENSE.txt.
#include "parser.h"
#include "errors.h"
namespace monicelli {
std::unique_ptr<Module> Parser::parseModule() {
std::unique_ptr<Module> module{new Module};
while (peekNextToken()->getType() == Token::TOKEN_FUN_DECL) {
module->functions_.emplace_back(parseFunction());
}
if (peekNextToken()->getType() == Token::TOKEN_ENTRY_POINT) {
module->maybe_entry_point_ = parseEntryPoint();
}
while (peekNextToken()->getType() == Token::TOKEN_FUN_DECL) {
module->functions_.emplace_back(parseFunction());
}
auto token = getNextToken();
if (token->getType() != Token::TOKEN_END) {
error(token, "expected end of file");
}
module->source_filename_ = getSourceFilename();
return module;
}
std::unique_ptr<Function> Parser::parseEntryPoint() {
std::unique_ptr<Function> function{new Function};
auto token = getNextToken();
if (token->getType() != Token::TOKEN_ENTRY_POINT) {
error(token, "expected entry point declaration");
}
function->return_type_.base_type_ = VarType::INTEGER;
function->body_ = parseStatements();
return function;
}
std::unique_ptr<Function> Parser::parseFunction() {
std::unique_ptr<Function> function{new Function};
auto token = getNextToken();
if (token->getType() != Token::TOKEN_FUN_DECL) {
error(token, "expected function declaration");
}
switch (peekNextToken()->getType()) {
case Token::TOKEN_STAR:
case Token::TOKEN_TYPENAME:
function->return_type_ = parseType();
break;
default:
break;
}
token = getNextToken();
if (token->getType() != Token::TOKEN_IDENTIFIER) {
error(token, "expected function name");
}
function->name_ = token->getStringValue();
token = getNextToken();
switch (token->getType()) {
case Token::TOKEN_FUN_PARAMS:
for (bool done = false; !done;) {
auto var = parseVariable();
auto type = parseType();
function->params_.emplace_back(var, type);
auto token = getNextToken();
switch (token->getType()) {
case Token::TOKEN_COMMA:
break;
case Token::TOKEN_FUN_END:
done = true;
break;
default:
error(token, "expected either more parameters or function body begin");
break;
}
}
// fallthrough
case Token::TOKEN_FUN_END:
break;
default:
error(token, "expected either parameters or function body begin");
break;
}
function->body_ = parseStatements();
return function;
}
Variable Parser::parseVariable() {
auto token = getNextToken();
if (token->getType() == Token::TOKEN_ARTICLE) {
token = getNextToken();
}
if (token->getType() != Token::TOKEN_IDENTIFIER) {
error(token, "expected variable name");
}
Variable var;
var.name_ = token->getStringValue();
var.first_location_ = token->getFirstLocation();
var.last_location_ = token->getLastLocation();
return var;
}
static VarType::BaseType builtinTypeToASTType(Token::BuiltinTypeValue type) {
switch (type) {
#define RETURN_VAR_TYPE(NAME, _1, _2, _3, _4, _5) \
case Token::BUILTIN_TYPE_##NAME: \
return VarType::NAME;
BUILTIN_TYPES(RETURN_VAR_TYPE)
default:
UNREACHABLE("Unhandled BuiltinType.");
}
}
VarType Parser::parseType() {
VarType type;
auto token = getNextToken();
if (token->getType() == Token::TOKEN_STAR) {
type.pointer_ = true;
token = getNextToken();
}
if (token->getType() != Token::TOKEN_TYPENAME) {
error(token, "expected type name");
}
type.base_type_ = builtinTypeToASTType(token->getBuiltinTypeValue());
return type;
}
std::vector<std::unique_ptr<Statement>> Parser::parseStatements() {
std::vector<std::unique_ptr<Statement>> statements;
while (true) {
auto statement = maybeParseStatement();
if (!statement) break;
statements.emplace_back(std::move(statement));
}
return statements;
}
std::unique_ptr<Statement> Parser::parseStatement() {
auto start_location = peekNextToken()->getFirstLocation();
auto statement = maybeParseStatement();
if (!statement) {
error(start_location, "expected statement");
}
return statement;
}
std::unique_ptr<Statement> Parser::maybeParseStatement() {
switch (peekNextToken()->getType()) {
case Token::TOKEN_ASSERT:
return parseAssertStatement();
case Token::TOKEN_INPUT:
return parseInputStatement();
case Token::TOKEN_ABORT:
return parseAbortStatement();
case Token::TOKEN_BRANCH_CONDITION:
return parseBranchStatement();
case Token::TOKEN_VARDECL:
return parseVardeclStatement();
case Token::TOKEN_LOOP_BEGIN:
return parseLoopStatement();
case Token::TOKEN_RETURN:
return parseReturnStatement();
case Token::TOKEN_COMMA:
ignoreNextToken();
return maybeParseStatement();
default:
break;
}
// If we are here, the statement starts with an expression.
auto expression = maybeParseExpression();
// If there was not an expression here, then it's not a statement.
if (!expression) return nullptr;
auto token = peekNextToken();
switch (token->getType()) {
case Token::TOKEN_PRINT: {
ignoreNextToken();
std::unique_ptr<PrintStatement> statement{new PrintStatement};
statement->expression_ = std::move(expression);
return statement;
}
case Token::TOKEN_ASSIGN: {
AtomicExpression* e = dynamic_cast<AtomicExpression*>(expression.get());
if (!e || e->getType() != AtomicExpression::IDENTIFIER) {
error(token, "assignment target must be an identifier");
}
ignoreNextToken();
std::unique_ptr<AssignStatement> statement{new AssignStatement};
statement->variable_ = e->getIdentifierValue();
statement->expression_ = parseExpression();
return statement;
}
default:
if (expression->isFunctionCall()) {
std::unique_ptr<ExpressionStatement> statement{new ExpressionStatement};
statement->expression_ = std::move(expression);
return statement;
}
error(token, "only a function call can be a statement");
break;
}
UNREACHABLE("Unhandled statement type in parser");
}
std::unique_ptr<AssertStatement> Parser::parseAssertStatement() {
auto token = getNextToken();
if (token->getType() != Token::TOKEN_ASSERT) {
error(token, "expected assert statement");
}
std::unique_ptr<AssertStatement> statement{new AssertStatement};
statement->expression_ = parseExpression();
token = getNextToken();
if (token->getType() != Token::TOKEN_BANG) {
error(token, "expected final !");
}
return statement;
}
std::unique_ptr<FunctionCallExpression> Parser::parseFunctionCallExpression() {
auto token = getNextToken();
if (token->getType() != Token::TOKEN_FUN_CALL) {
error(token, "expected function call");
}
std::unique_ptr<FunctionCallExpression> statement{new FunctionCallExpression};
statement->first_location_ = token->first_location_;
token = getNextToken();
if (token->getType() != Token::TOKEN_IDENTIFIER) {
error(token, "expected name of the function to call");
}
statement->function_name_ = token->getStringValue();
token = getNextToken();
switch (token->getType()) {
case Token::TOKEN_FUN_PARAMS:
for (bool done = false; !done;) {
statement->function_args_.emplace_back(parseExpression());
auto token = getNextToken();
switch (token->getType()) {
case Token::TOKEN_FUN_END:
done = true;
break;
case Token::TOKEN_COMMA:
break;
default:
error(token, "expected either more params or end of call statement");
break;
}
}
// fallthrough
case Token::TOKEN_FUN_END:
break;
default:
error(token, "expected either call params or end of call statement");
break;
}
statement->last_location_ = peekNextToken()->first_location_;
return statement;
}
std::unique_ptr<InputStatement> Parser::parseInputStatement() {
auto token = getNextToken();
if (token->getType() != Token::TOKEN_INPUT) {
error(token, "expected input statement");
}
std::unique_ptr<InputStatement> statement{new InputStatement};
statement->variable_ = parseVariable();
return statement;
}
std::unique_ptr<AbortStatement> Parser::parseAbortStatement() {
auto token = getNextToken();
if (token->getType() != Token::TOKEN_ABORT) {
error(token, "expected abort statement");
}
return std::unique_ptr<AbortStatement>{new AbortStatement};
}
BranchCase Parser::parseBranchCase(std::shared_ptr<Expression> condition_lhs) {
BranchCase branch_case;
branch_case.expression_ = parseSemiExpression(condition_lhs);
if (peekNextToken()->getType() == Token::TOKEN_COLON) {
ignoreNextToken();
}
branch_case.body_ = parseStatements();
return branch_case;
}
std::unique_ptr<BranchElse> Parser::parseBranchElse() {
std::unique_ptr<BranchElse> else_case{new BranchElse};
else_case->body_ = parseStatements();
return else_case;
}
std::unique_ptr<BranchStatement> Parser::parseBranchStatement() {
auto token = getNextToken();
if (token->getType() != Token::TOKEN_BRANCH_CONDITION) {
error(token, "expected branch condition");
}
std::unique_ptr<BranchStatement> statement{new BranchStatement};
statement->lead_var_ = parseVariable();
token = getNextToken();
if (token->getType() != Token::TOKEN_BRANCH_BEGIN) {
error(token, "expected begin of branch");
}
std::shared_ptr<Expression> condition_lhs{
AtomicExpression::fromIdentifier(statement->lead_var_).release()};
statement->cases_.emplace_back(parseBranchCase(condition_lhs));
for (bool done = false; !done;) {
switch (peekNextToken()->getType()) {
case Token::TOKEN_CASE_END:
ignoreNextToken();
statement->cases_.emplace_back(parseBranchCase(condition_lhs));
break;
case Token::TOKEN_BRANCH_ELSE:
case Token::TOKEN_BRANCH_END:
done = true;
break;
default:
error(peekNextToken(), "expected other cases, else case or end of branch");
break;
}
}
token = getNextToken();
switch (token->getType()) {
case Token::TOKEN_BRANCH_ELSE: {
if (peekNextToken()->getType() == Token::TOKEN_COLON) {
ignoreNextToken();
}
statement->maybe_else_case_ = parseBranchElse();
auto token = getNextToken();
if (token->getType() != Token::TOKEN_BRANCH_END) {
error(token, "expected end of branch");
}
// fallthrough
}
case Token::TOKEN_BRANCH_END:
break;
default:
error(token, "expected either else case or end of branch");
break;
}
return statement;
}
std::unique_ptr<VardeclStatement> Parser::parseVardeclStatement() {
auto token = getNextToken();
if (token->getType() != Token::TOKEN_VARDECL) {
error(token, "expected declaration");
}
std::unique_ptr<VardeclStatement> statement{new VardeclStatement};
statement->variable_ = parseVariable();
token = getNextToken();
if (token->getType() != Token::TOKEN_COMMA) {
error(token, "expected ,");
}
statement->type_ = parseType();
if (peekNextToken()->getType() == Token::TOKEN_ASSIGN) {
ignoreNextToken();
statement->maybe_init_ = parseExpression();
}
return statement;
}
std::unique_ptr<LoopStatement> Parser::parseLoopStatement() {
auto token = getNextToken();
if (token->getType() != Token::TOKEN_LOOP_BEGIN) {
error(token, "expected loop statement");
}
std::unique_ptr<LoopStatement> statement{new LoopStatement};
while (peekNextToken()->getType() != Token::TOKEN_LOOP_CONDITION) {
statement->body_.emplace_back(parseStatement());
}
ignoreNextToken(); // This was a Token::TOKEN_LOOP_CONDITION.
statement->condition_ = parseExpression();
return statement;
}
std::unique_ptr<ReturnStatement> Parser::parseReturnStatement() {
auto token = getNextToken();
if (token->getType() != Token::TOKEN_RETURN) {
error(token, "expected return statement");
}
std::unique_ptr<ReturnStatement> statement{new ReturnStatement};
if (peekNextToken()->getType() == Token::TOKEN_BANG) {
ignoreNextToken();
return statement;
}
statement->maybe_expression_ = parseExpression();
token = getNextToken();
if (token->getType() != Token::TOKEN_BANG) {
error(token, "expected !");
}
return statement;
}
std::unique_ptr<Expression> Parser::parseExpression() {
auto first_location = peekNextToken()->getFirstLocation();
auto expression = maybeParseExpression();
if (!expression) {
error(first_location, "expected expression");
}
return expression;
}
static BinaryExpression::Type getOperatorTypeFromToken(const Token* token) {
switch (token->getType()) {
#define TOKEN_OP_TO_EXPR_OP(TOKEN_NAME, EXPR_NAME, _, __) \
case Token::TOKEN_##TOKEN_NAME: \
return BinaryExpression::EXPR_NAME;
AST_BINARY_OPERATORS(TOKEN_OP_TO_EXPR_OP)
#undef TOKEN_OP_TO_EXPR_OP
default:
UNREACHABLE("Unhandled token in operator conversion");
}
}
std::unique_ptr<Expression> Parser::parseSemiExpression(std::shared_ptr<Expression> lhs) {
BinaryExpression::Type op;
if (peekNextToken()->isOperator()) {
op = getOperatorTypeFromToken(getNextToken().get());
} else {
op = BinaryExpression::EQ;
}
auto rhs = parseExpression();
return std::unique_ptr<Expression>{new BinaryExpression{op, lhs, rhs.release(), true}};
}
static int getOperatorPrecedenceFromToken(Token* token) {
switch (token->getType()) {
#define RETURN_OP_PRIORITY(NAME, _, PRIORITY, __) \
case Token::TOKEN_##NAME: \
return PRIORITY;
AST_BINARY_OPERATORS(RETURN_OP_PRIORITY)
#undef RETURN_OP_PRIORITY
default:
UNREACHABLE("Undefined operator priority for token");
}
}
std::unique_ptr<Expression> Parser::maybeParseExpressionInternal(int min_precedence) {
Location first_location = peekNextToken()->getFirstLocation();
// Precedence climbing.
auto lhs = maybeParseAtomicExpression();
if (!lhs) return nullptr;
while (true) {
auto token = peekNextToken();
if (!token->isOperator()) break;
int precedence = getOperatorPrecedenceFromToken(token);
if (precedence < min_precedence) break;
auto op_type = getOperatorTypeFromToken(token);
auto op_location = token->getFirstLocation();
ignoreNextToken();
auto rhs = maybeParseExpressionInternal(precedence + 1);
if (!rhs) {
error(op_location, "binary operation is missing a right side");
}
lhs.reset(new BinaryExpression{op_type, std::move(lhs), rhs.release(), false});
}
lhs->first_location_ = first_location;
lhs->last_location_ = peekNextToken()->getFirstLocation();
return lhs;
}
std::unique_ptr<Expression> Parser::maybeParseAtomicExpression() {
switch (peekNextToken()->getType()) {
case Token::TOKEN_ARTICLE:
case Token::TOKEN_IDENTIFIER:
return AtomicExpression::fromIdentifier(parseVariable());
case Token::TOKEN_INTEGER:
return AtomicExpression ::fromInt(getNextToken()->getIntValue());
case Token::TOKEN_FLOAT:
return AtomicExpression ::fromFloat(getNextToken()->getFloatValue());
case Token::TOKEN_FUN_CALL:
return parseFunctionCallExpression();
default:
return nullptr;
}
}
std::unique_ptr<Token> Parser::getNextToken() {
assert(current_token_ && "Cannot get from an empty stream");
auto token = std::move(current_token_);
switch (token->getType()) {
case Token::TOKEN_END:
case Token::TOKEN_UNKNOWN:
current_token_ = nullptr;
break;
default:
current_token_ = lexer_.getNextToken();
break;
}
return token;
}
} // namespace monicelli

73
src/parser.h Normal file
View File

@ -0,0 +1,73 @@
// Copyright 2017 the Monicelli project authors. All rights reserved.
// Use of this source code is governed by a GPLv3 license, see LICENSE.txt.
#ifndef MONICELLI_PARSER_H
#define MONICELLI_PARSER_H
#include "ast.h"
#include "errors.h"
#include "lexer.h"
#include "support.h"
#include <iostream>
#include <memory>
#include <vector>
namespace monicelli {
class Parser final : public ErrorReportingMixin {
public:
Parser(std::istream& input, const std::string& source_filename)
: ErrorReportingMixin(source_filename), lexer_{input} {}
std::unique_ptr<Module> parse() {
current_token_ = lexer_.getNextToken();
return parseModule();
}
void setLexerTrace(bool enabled) { lexer_.setTraceEnabled(enabled); }
private:
Variable parseVariable();
VarType parseType();
std::unique_ptr<Module> parseModule();
std::unique_ptr<Function> parseFunction();
std::unique_ptr<Function> parseEntryPoint();
std::unique_ptr<Statement> parseStatement();
std::vector<std::unique_ptr<Statement>> parseStatements();
std::unique_ptr<Statement> maybeParseStatement();
std::unique_ptr<AssertStatement> parseAssertStatement();
std::unique_ptr<InputStatement> parseInputStatement();
std::unique_ptr<AbortStatement> parseAbortStatement();
BranchCase parseBranchCase(std::shared_ptr<Expression> condition_lhs);
std::unique_ptr<BranchElse> parseBranchElse();
std::unique_ptr<BranchStatement> parseBranchStatement();
std::unique_ptr<VardeclStatement> parseVardeclStatement();
std::unique_ptr<LoopStatement> parseLoopStatement();
std::unique_ptr<ReturnStatement> parseReturnStatement();
std::unique_ptr<Expression> parseExpression();
std::unique_ptr<Expression> parseSemiExpression(std::shared_ptr<Expression> lhs);
std::unique_ptr<Expression> maybeParseExpression() { return maybeParseExpressionInternal(0); }
std::unique_ptr<Expression> maybeParseExpressionInternal(int min_precedence);
std::unique_ptr<Expression> maybeParseAtomicExpression();
std::unique_ptr<FunctionCallExpression> parseFunctionCallExpression();
std::unique_ptr<Token> getNextToken();
Token* peekNextToken() {
assert(current_token_ && "Cannot peek into an empty stream.");
return current_token_.get();
}
void ignoreNextToken() {
auto token = getNextToken();
USE(token);
}
Lexer lexer_;
std::unique_ptr<Token> current_token_;
};
} // namespace monicelli
#endif

17
src/support.cpp Normal file
View File

@ -0,0 +1,17 @@
// Copyright 2017 the Monicelli project authors. All rights reserved.
// Use of this source code is governed by a GPLv3 license, see LICENSE.txt.
#include "support.h"
namespace monicelli {
std::string basename(std::string input_filename) {
auto base_name_start = input_filename.find_last_of("\\/");
if (base_name_start != std::string::npos) {
input_filename = input_filename.substr(base_name_start + 1);
}
if (input_filename.empty()) return "";
return input_filename;
}
} // namespace monicelli

28
src/support.h Normal file
View File

@ -0,0 +1,28 @@
// Copyright 2017 the Monicelli project authors. All rights reserved.
// Use of this source code is governed by a GPLv3 license, see LICENSE.txt.
#ifndef MONICELLI_SUPPORT_H
#define MONICELLI_SUPPORT_H
#include <iostream>
#include <string>
namespace monicelli {
#define USE(x) ((void)(x))
template<typename First> static void print(std::ostream& stream, const First& first) {
stream << first << ".\n";
}
template<typename First, typename... Tail>
static void print(std::ostream& stream, const First& first, Tail... tail) {
stream << first << ' ';
print(stream, tail...);
}
std::string basename(std::string input_filename);
} // namespace monicelli
#endif

16
src/types.def Normal file
View File

@ -0,0 +1,16 @@
#ifndef MONICELLI_TYPES_DEF
#define MONICELLI_TYPES_DEF
// Copyright 2017 the Monicelli project authors. All rights reserved.
// Use of this source code is governed by a GPLv3 license, see LICENSE.txt.
// symbol_name, ir_type, input_format, output_format, src_name, ast_name
#define BUILTIN_TYPES(V) \
V(VOID, Void, nullptr, nullptr, "", "void") \
V(INTEGER, Int32, "%d", "%d\n", "Necchi", "int") \
V(CHAR, Int8, "%c", "%c", "Mascetti", "char") \
V(FLOAT, Float, "%f", "%f\n", "Perozzi", "float") \
V(BOOL, Int1, "%d", "%d\n", "Melandri", "bool") \
V(DOUBLE, Double, "%lf", "%f\n", "Sassaroli", "double") \
#endif