3596 lines
107 KiB
C
3596 lines
107 KiB
C
#include <errno.h>
|
|
#include <stdbool.h>
|
|
#include <stdio.h>
|
|
#include <string.h>
|
|
|
|
#include "wren_common.h"
|
|
#include "wren_compiler.h"
|
|
#include "wren_vm.h"
|
|
|
|
#if WREN_DEBUG_DUMP_COMPILED_CODE
|
|
#include "wren_debug.h"
|
|
#endif
|
|
|
|
// This is written in bottom-up order, so the tokenization comes first, then
|
|
// parsing/code generation. This minimizes the number of explicit forward
|
|
// declarations needed.
|
|
|
|
// The maximum number of local (i.e. not module level) variables that can be
|
|
// declared in a single function, method, or chunk of top level code. This is
|
|
// the maximum number of variables in scope at one time, and spans block scopes.
|
|
//
|
|
// Note that this limitation is also explicit in the bytecode. Since
|
|
// `CODE_LOAD_LOCAL` and `CODE_STORE_LOCAL` use a single argument byte to
|
|
// identify the local, only 256 can be in scope at one time.
|
|
#define MAX_LOCALS 256
|
|
|
|
// The maximum number of upvalues (i.e. variables from enclosing functions)
|
|
// that a function can close over.
|
|
#define MAX_UPVALUES 256
|
|
|
|
// The maximum number of distinct constants that a function can contain. This
|
|
// value is explicit in the bytecode since `CODE_CONSTANT` only takes a single
|
|
// two-byte argument.
|
|
#define MAX_CONSTANTS (1 << 16)
|
|
|
|
// The maximum distance a CODE_JUMP or CODE_JUMP_IF instruction can move the
|
|
// instruction pointer.
|
|
#define MAX_JUMP (1 << 16)
|
|
|
|
// The maximum depth that interpolation can nest. For example, this string has
|
|
// three levels:
|
|
//
|
|
// "outside %(one + "%(two + "%(three)")")"
|
|
#define MAX_INTERPOLATION_NESTING 8
|
|
|
|
// The buffer size used to format a compile error message, excluding the header
|
|
// with the module name and error location. Using a hardcoded buffer for this
|
|
// is kind of hairy, but fortunately we can control what the longest possible
|
|
// message is and handle that. Ideally, we'd use `snprintf()`, but that's not
|
|
// available in standard C++98.
|
|
#define ERROR_MESSAGE_SIZE (80 + MAX_VARIABLE_NAME + 15)
|
|
|
|
typedef enum
|
|
{
|
|
TOKEN_LEFT_PAREN,
|
|
TOKEN_RIGHT_PAREN,
|
|
TOKEN_LEFT_BRACKET,
|
|
TOKEN_RIGHT_BRACKET,
|
|
TOKEN_LEFT_BRACE,
|
|
TOKEN_RIGHT_BRACE,
|
|
TOKEN_COLON,
|
|
TOKEN_DOT,
|
|
TOKEN_DOTDOT,
|
|
TOKEN_DOTDOTDOT,
|
|
TOKEN_COMMA,
|
|
TOKEN_STAR,
|
|
TOKEN_SLASH,
|
|
TOKEN_PERCENT,
|
|
TOKEN_PLUS,
|
|
TOKEN_MINUS,
|
|
TOKEN_LTLT,
|
|
TOKEN_GTGT,
|
|
TOKEN_PIPE,
|
|
TOKEN_PIPEPIPE,
|
|
TOKEN_CARET,
|
|
TOKEN_AMP,
|
|
TOKEN_AMPAMP,
|
|
TOKEN_BANG,
|
|
TOKEN_TILDE,
|
|
TOKEN_QUESTION,
|
|
TOKEN_EQ,
|
|
TOKEN_LT,
|
|
TOKEN_GT,
|
|
TOKEN_LTEQ,
|
|
TOKEN_GTEQ,
|
|
TOKEN_EQEQ,
|
|
TOKEN_BANGEQ,
|
|
|
|
TOKEN_BREAK,
|
|
TOKEN_CLASS,
|
|
TOKEN_CONSTRUCT,
|
|
TOKEN_ELSE,
|
|
TOKEN_FALSE,
|
|
TOKEN_FOR,
|
|
TOKEN_FOREIGN,
|
|
TOKEN_IF,
|
|
TOKEN_IMPORT,
|
|
TOKEN_IN,
|
|
TOKEN_IS,
|
|
TOKEN_NULL,
|
|
TOKEN_RETURN,
|
|
TOKEN_STATIC,
|
|
TOKEN_SUPER,
|
|
TOKEN_THIS,
|
|
TOKEN_TRUE,
|
|
TOKEN_VAR,
|
|
TOKEN_WHILE,
|
|
|
|
TOKEN_FIELD,
|
|
TOKEN_STATIC_FIELD,
|
|
TOKEN_NAME,
|
|
TOKEN_NUMBER,
|
|
|
|
// A string literal without any interpolation, or the last section of a
|
|
// string following the last interpolated expression.
|
|
TOKEN_STRING,
|
|
|
|
// A portion of a string literal preceding an interpolated expression. This
|
|
// string:
|
|
//
|
|
// "a %(b) c %(d) e"
|
|
//
|
|
// is tokenized to:
|
|
//
|
|
// TOKEN_INTERPOLATION "a "
|
|
// TOKEN_NAME b
|
|
// TOKEN_INTERPOLATION " c "
|
|
// TOKEN_NAME d
|
|
// TOKEN_STRING " e"
|
|
TOKEN_INTERPOLATION,
|
|
|
|
TOKEN_LINE,
|
|
|
|
TOKEN_ERROR,
|
|
TOKEN_EOF
|
|
} TokenType;
|
|
|
|
typedef struct
|
|
{
|
|
TokenType type;
|
|
|
|
// The beginning of the token, pointing directly into the source.
|
|
const char* start;
|
|
|
|
// The length of the token in characters.
|
|
int length;
|
|
|
|
// The 1-based line where the token appears.
|
|
int line;
|
|
|
|
// The parsed value if the token is a literal.
|
|
Value value;
|
|
} Token;
|
|
|
|
typedef struct
|
|
{
|
|
WrenVM* vm;
|
|
|
|
// The module being parsed.
|
|
ObjModule* module;
|
|
|
|
// The source code being parsed.
|
|
const char* source;
|
|
|
|
// The beginning of the currently-being-lexed token in [source].
|
|
const char* tokenStart;
|
|
|
|
// The current character being lexed in [source].
|
|
const char* currentChar;
|
|
|
|
// The 1-based line number of [currentChar].
|
|
int currentLine;
|
|
|
|
// The most recently lexed token.
|
|
Token current;
|
|
|
|
// The most recently consumed/advanced token.
|
|
Token previous;
|
|
|
|
// Tracks the lexing state when tokenizing interpolated strings.
|
|
//
|
|
// Interpolated strings make the lexer not strictly regular: we don't know
|
|
// whether a ")" should be treated as a RIGHT_PAREN token or as ending an
|
|
// interpolated expression unless we know whether we are inside a string
|
|
// interpolation and how many unmatched "(" there are. This is particularly
|
|
// complex because interpolation can nest:
|
|
//
|
|
// " %( " %( inner ) " ) "
|
|
//
|
|
// This tracks that state. The parser maintains a stack of ints, one for each
|
|
// level of current interpolation nesting. Each value is the number of
|
|
// unmatched "(" that are waiting to be closed.
|
|
int parens[MAX_INTERPOLATION_NESTING];
|
|
int numParens;
|
|
|
|
// If subsequent newline tokens should be discarded.
|
|
bool skipNewlines;
|
|
|
|
// Whether compile errors should be printed to stderr or discarded.
|
|
bool printErrors;
|
|
|
|
// If a syntax or compile error has occurred.
|
|
bool hasError;
|
|
} Parser;
|
|
|
|
typedef struct
|
|
{
|
|
// The name of the local variable. This points directly into the original
|
|
// source code string.
|
|
const char* name;
|
|
|
|
// The length of the local variable's name.
|
|
int length;
|
|
|
|
// The depth in the scope chain that this variable was declared at. Zero is
|
|
// the outermost scope--parameters for a method, or the first local block in
|
|
// top level code. One is the scope within that, etc.
|
|
int depth;
|
|
|
|
// If this local variable is being used as an upvalue.
|
|
bool isUpvalue;
|
|
} Local;
|
|
|
|
typedef struct
|
|
{
|
|
// True if this upvalue is capturing a local variable from the enclosing
|
|
// function. False if it's capturing an upvalue.
|
|
bool isLocal;
|
|
|
|
// The index of the local or upvalue being captured in the enclosing function.
|
|
int index;
|
|
} CompilerUpvalue;
|
|
|
|
// Bookkeeping information for the current loop being compiled.
|
|
typedef struct sLoop
|
|
{
|
|
// Index of the instruction that the loop should jump back to.
|
|
int start;
|
|
|
|
// Index of the argument for the CODE_JUMP_IF instruction used to exit the
|
|
// loop. Stored so we can patch it once we know where the loop ends.
|
|
int exitJump;
|
|
|
|
// Index of the first instruction of the body of the loop.
|
|
int body;
|
|
|
|
// Depth of the scope(s) that need to be exited if a break is hit inside the
|
|
// loop.
|
|
int scopeDepth;
|
|
|
|
// The loop enclosing this one, or NULL if this is the outermost loop.
|
|
struct sLoop* enclosing;
|
|
} Loop;
|
|
|
|
// The different signature syntaxes for different kinds of methods.
|
|
typedef enum
|
|
{
|
|
// A name followed by a (possibly empty) parenthesized parameter list. Also
|
|
// used for binary operators.
|
|
SIG_METHOD,
|
|
|
|
// Just a name. Also used for unary operators.
|
|
SIG_GETTER,
|
|
|
|
// A name followed by "=".
|
|
SIG_SETTER,
|
|
|
|
// A square bracketed parameter list.
|
|
SIG_SUBSCRIPT,
|
|
|
|
// A square bracketed parameter list followed by "=".
|
|
SIG_SUBSCRIPT_SETTER,
|
|
|
|
// A constructor initializer function. This has a distinct signature to
|
|
// prevent it from being invoked directly outside of the constructor on the
|
|
// metaclass.
|
|
SIG_INITIALIZER
|
|
} SignatureType;
|
|
|
|
typedef struct
|
|
{
|
|
const char* name;
|
|
int length;
|
|
SignatureType type;
|
|
int arity;
|
|
} Signature;
|
|
|
|
// Bookkeeping information for compiling a class definition.
|
|
typedef struct
|
|
{
|
|
// The name of the class.
|
|
ObjString* name;
|
|
|
|
// Symbol table for the fields of the class.
|
|
SymbolTable fields;
|
|
|
|
// Symbols for the methods defined by the class. Used to detect duplicate
|
|
// method definitions.
|
|
IntBuffer methods;
|
|
IntBuffer staticMethods;
|
|
|
|
// True if the class being compiled is a foreign class.
|
|
bool isForeign;
|
|
|
|
// True if the current method being compiled is static.
|
|
bool inStatic;
|
|
|
|
// The signature of the method being compiled.
|
|
Signature* signature;
|
|
} ClassInfo;
|
|
|
|
struct sCompiler
|
|
{
|
|
Parser* parser;
|
|
|
|
// The compiler for the function enclosing this one, or NULL if it's the
|
|
// top level.
|
|
struct sCompiler* parent;
|
|
|
|
// The currently in scope local variables.
|
|
Local locals[MAX_LOCALS];
|
|
|
|
// The number of local variables currently in scope.
|
|
int numLocals;
|
|
|
|
// The upvalues that this function has captured from outer scopes. The count
|
|
// of them is stored in [numUpvalues].
|
|
CompilerUpvalue upvalues[MAX_UPVALUES];
|
|
|
|
// The current level of block scope nesting, where zero is no nesting. A -1
|
|
// here means top-level code is being compiled and there is no block scope
|
|
// in effect at all. Any variables declared will be module-level.
|
|
int scopeDepth;
|
|
|
|
// The current number of slots (locals and temporaries) in use.
|
|
//
|
|
// We use this and maxSlots to track the maximum number of additional slots
|
|
// a function may need while executing. When the function is called, the
|
|
// fiber will check to ensure its stack has enough room to cover that worst
|
|
// case and grow the stack if needed.
|
|
//
|
|
// This value here doesn't include parameters to the function. Since those
|
|
// are already pushed onto the stack by the caller and tracked there, we
|
|
// don't need to double count them here.
|
|
int numSlots;
|
|
|
|
// The current innermost loop being compiled, or NULL if not in a loop.
|
|
Loop* loop;
|
|
|
|
// If this is a compiler for a method, keeps track of the class enclosing it.
|
|
ClassInfo* enclosingClass;
|
|
|
|
// The function being compiled.
|
|
ObjFn* fn;
|
|
|
|
ObjMap* constants;
|
|
};
|
|
|
|
// Describes where a variable is declared.
|
|
typedef enum
|
|
{
|
|
// A local variable in the current function.
|
|
SCOPE_LOCAL,
|
|
|
|
// A local variable declared in an enclosing function.
|
|
SCOPE_UPVALUE,
|
|
|
|
// A top-level module variable.
|
|
SCOPE_MODULE
|
|
} Scope;
|
|
|
|
// A reference to a variable and the scope where it is defined. This contains
|
|
// enough information to emit correct code to load or store the variable.
|
|
typedef struct
|
|
{
|
|
// The stack slot, upvalue slot, or module symbol defining the variable.
|
|
int index;
|
|
|
|
// Where the variable is declared.
|
|
Scope scope;
|
|
} Variable;
|
|
|
|
// The stack effect of each opcode. The index in the array is the opcode, and
|
|
// the value is the stack effect of that instruction.
|
|
static const int stackEffects[] = {
|
|
#define OPCODE(_, effect) effect,
|
|
#include "wren_opcodes.h"
|
|
#undef OPCODE
|
|
};
|
|
|
|
static void printError(Parser* parser, int line, const char* label,
|
|
const char* format, va_list args)
|
|
{
|
|
parser->hasError = true;
|
|
if (!parser->printErrors) return;
|
|
|
|
// Only report errors if there is a WrenErrorFn to handle them.
|
|
if (parser->vm->config.errorFn == NULL) return;
|
|
|
|
// Format the label and message.
|
|
char message[ERROR_MESSAGE_SIZE];
|
|
int length = sprintf(message, "%s: ", label);
|
|
length += vsprintf(message + length, format, args);
|
|
ASSERT(length < ERROR_MESSAGE_SIZE, "Error should not exceed buffer.");
|
|
|
|
ObjString* module = parser->module->name;
|
|
const char* module_name = module ? module->value : "<unknown>";
|
|
|
|
parser->vm->config.errorFn(parser->vm, WREN_ERROR_COMPILE,
|
|
module_name, line, message);
|
|
}
|
|
|
|
// Outputs a lexical error.
|
|
static void lexError(Parser* parser, const char* format, ...)
|
|
{
|
|
va_list args;
|
|
va_start(args, format);
|
|
printError(parser, parser->currentLine, "Error", format, args);
|
|
va_end(args);
|
|
}
|
|
|
|
// Outputs a compile or syntax error. This also marks the compilation as having
|
|
// an error, which ensures that the resulting code will be discarded and never
|
|
// run. This means that after calling error(), it's fine to generate whatever
|
|
// invalid bytecode you want since it won't be used.
|
|
//
|
|
// You'll note that most places that call error() continue to parse and compile
|
|
// after that. That's so that we can try to find as many compilation errors in
|
|
// one pass as possible instead of just bailing at the first one.
|
|
static void error(Compiler* compiler, const char* format, ...)
|
|
{
|
|
Token* token = &compiler->parser->previous;
|
|
|
|
// If the parse error was caused by an error token, the lexer has already
|
|
// reported it.
|
|
if (token->type == TOKEN_ERROR) return;
|
|
|
|
va_list args;
|
|
va_start(args, format);
|
|
if (token->type == TOKEN_LINE)
|
|
{
|
|
printError(compiler->parser, token->line, "Error at newline", format, args);
|
|
}
|
|
else if (token->type == TOKEN_EOF)
|
|
{
|
|
printError(compiler->parser, token->line,
|
|
"Error at end of file", format, args);
|
|
}
|
|
else
|
|
{
|
|
// Make sure we don't exceed the buffer with a very long token.
|
|
char label[10 + MAX_VARIABLE_NAME + 4 + 1];
|
|
if (token->length <= MAX_VARIABLE_NAME)
|
|
{
|
|
sprintf(label, "Error at '%.*s'", token->length, token->start);
|
|
}
|
|
else
|
|
{
|
|
sprintf(label, "Error at '%.*s...'", MAX_VARIABLE_NAME, token->start);
|
|
}
|
|
printError(compiler->parser, token->line, label, format, args);
|
|
}
|
|
va_end(args);
|
|
}
|
|
|
|
// Adds [constant] to the constant pool and returns its index.
|
|
static int addConstant(Compiler* compiler, Value constant)
|
|
{
|
|
if (compiler->parser->hasError) return -1;
|
|
|
|
// See if we already have a constant for the value. If so, reuse it.
|
|
if (compiler->constants != NULL)
|
|
{
|
|
Value existing = wrenMapGet(compiler->constants, constant);
|
|
if (IS_NUM(existing)) return (int)AS_NUM(existing);
|
|
}
|
|
|
|
// It's a new constant.
|
|
if (compiler->fn->constants.count < MAX_CONSTANTS)
|
|
{
|
|
if (IS_OBJ(constant)) wrenPushRoot(compiler->parser->vm, AS_OBJ(constant));
|
|
wrenValueBufferWrite(compiler->parser->vm, &compiler->fn->constants,
|
|
constant);
|
|
if (IS_OBJ(constant)) wrenPopRoot(compiler->parser->vm);
|
|
|
|
if (compiler->constants == NULL)
|
|
{
|
|
compiler->constants = wrenNewMap(compiler->parser->vm);
|
|
}
|
|
wrenMapSet(compiler->parser->vm, compiler->constants, constant,
|
|
NUM_VAL(compiler->fn->constants.count - 1));
|
|
}
|
|
else
|
|
{
|
|
error(compiler, "A function may only contain %d unique constants.",
|
|
MAX_CONSTANTS);
|
|
}
|
|
|
|
return compiler->fn->constants.count - 1;
|
|
}
|
|
|
|
// Initializes [compiler].
|
|
static void initCompiler(Compiler* compiler, Parser* parser, Compiler* parent,
|
|
bool isMethod)
|
|
{
|
|
compiler->parser = parser;
|
|
compiler->parent = parent;
|
|
compiler->loop = NULL;
|
|
compiler->enclosingClass = NULL;
|
|
|
|
// Initialize these to NULL before allocating in case a GC gets triggered in
|
|
// the middle of initializing the compiler.
|
|
compiler->fn = NULL;
|
|
compiler->constants = NULL;
|
|
|
|
parser->vm->compiler = compiler;
|
|
|
|
// Declare a local slot for either the closure or method receiver so that we
|
|
// don't try to reuse that slot for a user-defined local variable. For
|
|
// methods, we name it "this", so that we can resolve references to that like
|
|
// a normal variable. For functions, they have no explicit "this", so we use
|
|
// an empty name. That way references to "this" inside a function walks up
|
|
// the parent chain to find a method enclosing the function whose "this" we
|
|
// can close over.
|
|
compiler->numLocals = 1;
|
|
compiler->numSlots = compiler->numLocals;
|
|
|
|
if (isMethod)
|
|
{
|
|
compiler->locals[0].name = "this";
|
|
compiler->locals[0].length = 4;
|
|
}
|
|
else
|
|
{
|
|
compiler->locals[0].name = NULL;
|
|
compiler->locals[0].length = 0;
|
|
}
|
|
|
|
compiler->locals[0].depth = -1;
|
|
compiler->locals[0].isUpvalue = false;
|
|
|
|
if (parent == NULL)
|
|
{
|
|
// Compiling top-level code, so the initial scope is module-level.
|
|
compiler->scopeDepth = -1;
|
|
}
|
|
else
|
|
{
|
|
// The initial scope for functions and methods is local scope.
|
|
compiler->scopeDepth = 0;
|
|
}
|
|
|
|
compiler->fn = wrenNewFunction(parser->vm, parser->module,
|
|
compiler->numLocals);
|
|
}
|
|
|
|
// Lexing ----------------------------------------------------------------------
|
|
|
|
typedef struct
|
|
{
|
|
const char* identifier;
|
|
size_t length;
|
|
TokenType tokenType;
|
|
} Keyword;
|
|
|
|
// The table of reserved words and their associated token types.
|
|
static Keyword keywords[] =
|
|
{
|
|
{"break", 5, TOKEN_BREAK},
|
|
{"class", 5, TOKEN_CLASS},
|
|
{"construct", 9, TOKEN_CONSTRUCT},
|
|
{"else", 4, TOKEN_ELSE},
|
|
{"false", 5, TOKEN_FALSE},
|
|
{"for", 3, TOKEN_FOR},
|
|
{"foreign", 7, TOKEN_FOREIGN},
|
|
{"if", 2, TOKEN_IF},
|
|
{"import", 6, TOKEN_IMPORT},
|
|
{"in", 2, TOKEN_IN},
|
|
{"is", 2, TOKEN_IS},
|
|
{"null", 4, TOKEN_NULL},
|
|
{"return", 6, TOKEN_RETURN},
|
|
{"static", 6, TOKEN_STATIC},
|
|
{"super", 5, TOKEN_SUPER},
|
|
{"this", 4, TOKEN_THIS},
|
|
{"true", 4, TOKEN_TRUE},
|
|
{"var", 3, TOKEN_VAR},
|
|
{"while", 5, TOKEN_WHILE},
|
|
{NULL, 0, TOKEN_EOF} // Sentinel to mark the end of the array.
|
|
};
|
|
|
|
// Returns true if [c] is a valid (non-initial) identifier character.
|
|
static bool isName(char c)
|
|
{
|
|
return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_';
|
|
}
|
|
|
|
// Returns true if [c] is a digit.
|
|
static bool isDigit(char c)
|
|
{
|
|
return c >= '0' && c <= '9';
|
|
}
|
|
|
|
// Returns the current character the parser is sitting on.
|
|
static char peekChar(Parser* parser)
|
|
{
|
|
return *parser->currentChar;
|
|
}
|
|
|
|
// Returns the character after the current character.
|
|
static char peekNextChar(Parser* parser)
|
|
{
|
|
// If we're at the end of the source, don't read past it.
|
|
if (peekChar(parser) == '\0') return '\0';
|
|
return *(parser->currentChar + 1);
|
|
}
|
|
|
|
// Advances the parser forward one character.
|
|
static char nextChar(Parser* parser)
|
|
{
|
|
char c = peekChar(parser);
|
|
parser->currentChar++;
|
|
if (c == '\n') parser->currentLine++;
|
|
return c;
|
|
}
|
|
|
|
// If the current character is [c], consumes it and returns `true`.
|
|
static bool matchChar(Parser* parser, char c)
|
|
{
|
|
if (peekChar(parser) != c) return false;
|
|
nextChar(parser);
|
|
return true;
|
|
}
|
|
|
|
// Sets the parser's current token to the given [type] and current character
|
|
// range.
|
|
static void makeToken(Parser* parser, TokenType type)
|
|
{
|
|
parser->current.type = type;
|
|
parser->current.start = parser->tokenStart;
|
|
parser->current.length = (int)(parser->currentChar - parser->tokenStart);
|
|
parser->current.line = parser->currentLine;
|
|
|
|
// Make line tokens appear on the line containing the "\n".
|
|
if (type == TOKEN_LINE) parser->current.line--;
|
|
}
|
|
|
|
// If the current character is [c], then consumes it and makes a token of type
|
|
// [two]. Otherwise makes a token of type [one].
|
|
static void twoCharToken(Parser* parser, char c, TokenType two, TokenType one)
|
|
{
|
|
makeToken(parser, matchChar(parser, c) ? two : one);
|
|
}
|
|
|
|
// Skips the rest of the current line.
|
|
static void skipLineComment(Parser* parser)
|
|
{
|
|
while (peekChar(parser) != '\n' && peekChar(parser) != '\0')
|
|
{
|
|
nextChar(parser);
|
|
}
|
|
}
|
|
|
|
// Skips the rest of a block comment.
|
|
static void skipBlockComment(Parser* parser)
|
|
{
|
|
int nesting = 1;
|
|
while (nesting > 0)
|
|
{
|
|
if (peekChar(parser) == '\0')
|
|
{
|
|
lexError(parser, "Unterminated block comment.");
|
|
return;
|
|
}
|
|
|
|
if (peekChar(parser) == '/' && peekNextChar(parser) == '*')
|
|
{
|
|
nextChar(parser);
|
|
nextChar(parser);
|
|
nesting++;
|
|
continue;
|
|
}
|
|
|
|
if (peekChar(parser) == '*' && peekNextChar(parser) == '/')
|
|
{
|
|
nextChar(parser);
|
|
nextChar(parser);
|
|
nesting--;
|
|
continue;
|
|
}
|
|
|
|
// Regular comment character.
|
|
nextChar(parser);
|
|
}
|
|
}
|
|
|
|
// Reads the next character, which should be a hex digit (0-9, a-f, or A-F) and
|
|
// returns its numeric value. If the character isn't a hex digit, returns -1.
|
|
static int readHexDigit(Parser* parser)
|
|
{
|
|
char c = nextChar(parser);
|
|
if (c >= '0' && c <= '9') return c - '0';
|
|
if (c >= 'a' && c <= 'f') return c - 'a' + 10;
|
|
if (c >= 'A' && c <= 'F') return c - 'A' + 10;
|
|
|
|
// Don't consume it if it isn't expected. Keeps us from reading past the end
|
|
// of an unterminated string.
|
|
parser->currentChar--;
|
|
return -1;
|
|
}
|
|
|
|
// Parses the numeric value of the current token.
|
|
static void makeNumber(Parser* parser, bool isHex)
|
|
{
|
|
errno = 0;
|
|
|
|
if (isHex)
|
|
{
|
|
parser->current.value = NUM_VAL((double)strtoll(parser->tokenStart, NULL, 16));
|
|
}
|
|
else
|
|
{
|
|
parser->current.value = NUM_VAL(strtod(parser->tokenStart, NULL));
|
|
}
|
|
|
|
if (errno == ERANGE)
|
|
{
|
|
lexError(parser, "Number literal was too large (%d).", sizeof(long int));
|
|
parser->current.value = NUM_VAL(0);
|
|
}
|
|
|
|
// We don't check that the entire token is consumed after calling strtoll()
|
|
// or strtod() because we've already scanned it ourselves and know it's valid.
|
|
|
|
makeToken(parser, TOKEN_NUMBER);
|
|
}
|
|
|
|
// Finishes lexing a hexadecimal number literal.
|
|
static void readHexNumber(Parser* parser)
|
|
{
|
|
// Skip past the `x` used to denote a hexadecimal literal.
|
|
nextChar(parser);
|
|
|
|
// Iterate over all the valid hexadecimal digits found.
|
|
while (readHexDigit(parser) != -1) continue;
|
|
|
|
makeNumber(parser, true);
|
|
}
|
|
|
|
// Finishes lexing a number literal.
|
|
static void readNumber(Parser* parser)
|
|
{
|
|
while (isDigit(peekChar(parser))) nextChar(parser);
|
|
|
|
// See if it has a floating point. Make sure there is a digit after the "."
|
|
// so we don't get confused by method calls on number literals.
|
|
if (peekChar(parser) == '.' && isDigit(peekNextChar(parser)))
|
|
{
|
|
nextChar(parser);
|
|
while (isDigit(peekChar(parser))) nextChar(parser);
|
|
}
|
|
|
|
// See if the number is in scientific notation.
|
|
if (matchChar(parser, 'e') || matchChar(parser, 'E'))
|
|
{
|
|
// Allow a negative exponent.
|
|
matchChar(parser, '-');
|
|
|
|
if (!isDigit(peekChar(parser)))
|
|
{
|
|
lexError(parser, "Unterminated scientific notation.");
|
|
}
|
|
|
|
while (isDigit(peekChar(parser))) nextChar(parser);
|
|
}
|
|
|
|
makeNumber(parser, false);
|
|
}
|
|
|
|
// Finishes lexing an identifier. Handles reserved words.
|
|
static void readName(Parser* parser, TokenType type)
|
|
{
|
|
while (isName(peekChar(parser)) || isDigit(peekChar(parser)))
|
|
{
|
|
nextChar(parser);
|
|
}
|
|
|
|
// Update the type if it's a keyword.
|
|
size_t length = parser->currentChar - parser->tokenStart;
|
|
for (int i = 0; keywords[i].identifier != NULL; i++)
|
|
{
|
|
if (length == keywords[i].length &&
|
|
memcmp(parser->tokenStart, keywords[i].identifier, length) == 0)
|
|
{
|
|
type = keywords[i].tokenType;
|
|
break;
|
|
}
|
|
}
|
|
|
|
makeToken(parser, type);
|
|
}
|
|
|
|
// Reads [digits] hex digits in a string literal and returns their number value.
|
|
static int readHexEscape(Parser* parser, int digits, const char* description)
|
|
{
|
|
int value = 0;
|
|
for (int i = 0; i < digits; i++)
|
|
{
|
|
if (peekChar(parser) == '"' || peekChar(parser) == '\0')
|
|
{
|
|
lexError(parser, "Incomplete %s escape sequence.", description);
|
|
|
|
// Don't consume it if it isn't expected. Keeps us from reading past the
|
|
// end of an unterminated string.
|
|
parser->currentChar--;
|
|
break;
|
|
}
|
|
|
|
int digit = readHexDigit(parser);
|
|
if (digit == -1)
|
|
{
|
|
lexError(parser, "Invalid %s escape sequence.", description);
|
|
break;
|
|
}
|
|
|
|
value = (value * 16) | digit;
|
|
}
|
|
|
|
return value;
|
|
}
|
|
|
|
// Reads a hex digit Unicode escape sequence in a string literal.
|
|
static void readUnicodeEscape(Parser* parser, ByteBuffer* string, int length)
|
|
{
|
|
int value = readHexEscape(parser, length, "Unicode");
|
|
|
|
// Grow the buffer enough for the encoded result.
|
|
int numBytes = wrenUtf8EncodeNumBytes(value);
|
|
if (numBytes != 0)
|
|
{
|
|
wrenByteBufferFill(parser->vm, string, 0, numBytes);
|
|
wrenUtf8Encode(value, string->data + string->count - numBytes);
|
|
}
|
|
}
|
|
|
|
// Finishes lexing a string literal.
|
|
static void readString(Parser* parser)
|
|
{
|
|
ByteBuffer string;
|
|
TokenType type = TOKEN_STRING;
|
|
wrenByteBufferInit(&string);
|
|
|
|
for (;;)
|
|
{
|
|
char c = nextChar(parser);
|
|
if (c == '"') break;
|
|
|
|
if (c == '\0')
|
|
{
|
|
lexError(parser, "Unterminated string.");
|
|
|
|
// Don't consume it if it isn't expected. Keeps us from reading past the
|
|
// end of an unterminated string.
|
|
parser->currentChar--;
|
|
break;
|
|
}
|
|
|
|
if (c == '%')
|
|
{
|
|
if (parser->numParens < MAX_INTERPOLATION_NESTING)
|
|
{
|
|
// TODO: Allow format string.
|
|
if (nextChar(parser) != '(') lexError(parser, "Expect '(' after '%%'.");
|
|
|
|
parser->parens[parser->numParens++] = 1;
|
|
type = TOKEN_INTERPOLATION;
|
|
break;
|
|
}
|
|
|
|
lexError(parser, "Interpolation may only nest %d levels deep.",
|
|
MAX_INTERPOLATION_NESTING);
|
|
}
|
|
|
|
if (c == '\\')
|
|
{
|
|
switch (nextChar(parser))
|
|
{
|
|
case '"': wrenByteBufferWrite(parser->vm, &string, '"'); break;
|
|
case '\\': wrenByteBufferWrite(parser->vm, &string, '\\'); break;
|
|
case '%': wrenByteBufferWrite(parser->vm, &string, '%'); break;
|
|
case '0': wrenByteBufferWrite(parser->vm, &string, '\0'); break;
|
|
case 'a': wrenByteBufferWrite(parser->vm, &string, '\a'); break;
|
|
case 'b': wrenByteBufferWrite(parser->vm, &string, '\b'); break;
|
|
case 'f': wrenByteBufferWrite(parser->vm, &string, '\f'); break;
|
|
case 'n': wrenByteBufferWrite(parser->vm, &string, '\n'); break;
|
|
case 'r': wrenByteBufferWrite(parser->vm, &string, '\r'); break;
|
|
case 't': wrenByteBufferWrite(parser->vm, &string, '\t'); break;
|
|
case 'u': readUnicodeEscape(parser, &string, 4); break;
|
|
case 'U': readUnicodeEscape(parser, &string, 8); break;
|
|
case 'v': wrenByteBufferWrite(parser->vm, &string, '\v'); break;
|
|
case 'x':
|
|
wrenByteBufferWrite(parser->vm, &string,
|
|
(uint8_t)readHexEscape(parser, 2, "byte"));
|
|
break;
|
|
|
|
default:
|
|
lexError(parser, "Invalid escape character '%c'.",
|
|
*(parser->currentChar - 1));
|
|
break;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
wrenByteBufferWrite(parser->vm, &string, c);
|
|
}
|
|
}
|
|
|
|
parser->current.value = wrenNewStringLength(parser->vm,
|
|
(char*)string.data, string.count);
|
|
|
|
wrenByteBufferClear(parser->vm, &string);
|
|
makeToken(parser, type);
|
|
}
|
|
|
|
// Lex the next token and store it in [parser.current].
|
|
static void nextToken(Parser* parser)
|
|
{
|
|
parser->previous = parser->current;
|
|
|
|
// If we are out of tokens, don't try to tokenize any more. We *do* still
|
|
// copy the TOKEN_EOF to previous so that code that expects it to be consumed
|
|
// will still work.
|
|
if (parser->current.type == TOKEN_EOF) return;
|
|
|
|
while (peekChar(parser) != '\0')
|
|
{
|
|
parser->tokenStart = parser->currentChar;
|
|
|
|
char c = nextChar(parser);
|
|
switch (c)
|
|
{
|
|
case '(':
|
|
// If we are inside an interpolated expression, count the unmatched "(".
|
|
if (parser->numParens > 0) parser->parens[parser->numParens - 1]++;
|
|
makeToken(parser, TOKEN_LEFT_PAREN);
|
|
return;
|
|
|
|
case ')':
|
|
// If we are inside an interpolated expression, count the ")".
|
|
if (parser->numParens > 0 &&
|
|
--parser->parens[parser->numParens - 1] == 0)
|
|
{
|
|
// This is the final ")", so the interpolation expression has ended.
|
|
// This ")" now begins the next section of the template string.
|
|
parser->numParens--;
|
|
readString(parser);
|
|
return;
|
|
}
|
|
|
|
makeToken(parser, TOKEN_RIGHT_PAREN);
|
|
return;
|
|
|
|
case '[': makeToken(parser, TOKEN_LEFT_BRACKET); return;
|
|
case ']': makeToken(parser, TOKEN_RIGHT_BRACKET); return;
|
|
case '{': makeToken(parser, TOKEN_LEFT_BRACE); return;
|
|
case '}': makeToken(parser, TOKEN_RIGHT_BRACE); return;
|
|
case ':': makeToken(parser, TOKEN_COLON); return;
|
|
case ',': makeToken(parser, TOKEN_COMMA); return;
|
|
case '*': makeToken(parser, TOKEN_STAR); return;
|
|
case '%': makeToken(parser, TOKEN_PERCENT); return;
|
|
case '^': makeToken(parser, TOKEN_CARET); return;
|
|
case '+': makeToken(parser, TOKEN_PLUS); return;
|
|
case '-': makeToken(parser, TOKEN_MINUS); return;
|
|
case '~': makeToken(parser, TOKEN_TILDE); return;
|
|
case '?': makeToken(parser, TOKEN_QUESTION); return;
|
|
|
|
case '|': twoCharToken(parser, '|', TOKEN_PIPEPIPE, TOKEN_PIPE); return;
|
|
case '&': twoCharToken(parser, '&', TOKEN_AMPAMP, TOKEN_AMP); return;
|
|
case '=': twoCharToken(parser, '=', TOKEN_EQEQ, TOKEN_EQ); return;
|
|
case '!': twoCharToken(parser, '=', TOKEN_BANGEQ, TOKEN_BANG); return;
|
|
|
|
case '.':
|
|
if (matchChar(parser, '.'))
|
|
{
|
|
twoCharToken(parser, '.', TOKEN_DOTDOTDOT, TOKEN_DOTDOT);
|
|
return;
|
|
}
|
|
|
|
makeToken(parser, TOKEN_DOT);
|
|
return;
|
|
|
|
case '/':
|
|
if (matchChar(parser, '/'))
|
|
{
|
|
skipLineComment(parser);
|
|
break;
|
|
}
|
|
|
|
if (matchChar(parser, '*'))
|
|
{
|
|
skipBlockComment(parser);
|
|
break;
|
|
}
|
|
|
|
makeToken(parser, TOKEN_SLASH);
|
|
return;
|
|
|
|
case '<':
|
|
if (matchChar(parser, '<'))
|
|
{
|
|
makeToken(parser, TOKEN_LTLT);
|
|
}
|
|
else
|
|
{
|
|
twoCharToken(parser, '=', TOKEN_LTEQ, TOKEN_LT);
|
|
}
|
|
return;
|
|
|
|
case '>':
|
|
if (matchChar(parser, '>'))
|
|
{
|
|
makeToken(parser, TOKEN_GTGT);
|
|
}
|
|
else
|
|
{
|
|
twoCharToken(parser, '=', TOKEN_GTEQ, TOKEN_GT);
|
|
}
|
|
return;
|
|
|
|
case '\n':
|
|
makeToken(parser, TOKEN_LINE);
|
|
return;
|
|
|
|
case ' ':
|
|
case '\r':
|
|
case '\t':
|
|
// Skip forward until we run out of whitespace.
|
|
while (peekChar(parser) == ' ' ||
|
|
peekChar(parser) == '\r' ||
|
|
peekChar(parser) == '\t')
|
|
{
|
|
nextChar(parser);
|
|
}
|
|
break;
|
|
|
|
case '"': readString(parser); return;
|
|
case '_':
|
|
readName(parser,
|
|
peekChar(parser) == '_' ? TOKEN_STATIC_FIELD : TOKEN_FIELD);
|
|
return;
|
|
|
|
case '0':
|
|
if (peekChar(parser) == 'x')
|
|
{
|
|
readHexNumber(parser);
|
|
return;
|
|
}
|
|
|
|
readNumber(parser);
|
|
return;
|
|
|
|
default:
|
|
if (parser->currentLine == 1 && c == '#' && peekChar(parser) == '!')
|
|
{
|
|
// Ignore shebang on the first line.
|
|
skipLineComment(parser);
|
|
break;
|
|
}
|
|
if (isName(c))
|
|
{
|
|
readName(parser, TOKEN_NAME);
|
|
}
|
|
else if (isDigit(c))
|
|
{
|
|
readNumber(parser);
|
|
}
|
|
else
|
|
{
|
|
if (c >= 32 && c <= 126)
|
|
{
|
|
lexError(parser, "Invalid character '%c'.", c);
|
|
}
|
|
else
|
|
{
|
|
// Don't show non-ASCII values since we didn't UTF-8 decode the
|
|
// bytes. Since there are no non-ASCII byte values that are
|
|
// meaningful code units in Wren, the lexer works on raw bytes,
|
|
// even though the source code and console output are UTF-8.
|
|
lexError(parser, "Invalid byte 0x%x.", (uint8_t)c);
|
|
}
|
|
parser->current.type = TOKEN_ERROR;
|
|
parser->current.length = 0;
|
|
}
|
|
return;
|
|
}
|
|
}
|
|
|
|
// If we get here, we're out of source, so just make EOF tokens.
|
|
parser->tokenStart = parser->currentChar;
|
|
makeToken(parser, TOKEN_EOF);
|
|
}
|
|
|
|
// Parsing ---------------------------------------------------------------------
|
|
|
|
// Returns the type of the current token.
|
|
static TokenType peek(Compiler* compiler)
|
|
{
|
|
return compiler->parser->current.type;
|
|
}
|
|
|
|
// Consumes the current token if its type is [expected]. Returns true if a
|
|
// token was consumed.
|
|
static bool match(Compiler* compiler, TokenType expected)
|
|
{
|
|
if (peek(compiler) != expected) return false;
|
|
|
|
nextToken(compiler->parser);
|
|
return true;
|
|
}
|
|
|
|
// Consumes the current token. Emits an error if its type is not [expected].
|
|
static void consume(Compiler* compiler, TokenType expected,
|
|
const char* errorMessage)
|
|
{
|
|
nextToken(compiler->parser);
|
|
if (compiler->parser->previous.type != expected)
|
|
{
|
|
error(compiler, errorMessage);
|
|
|
|
// If the next token is the one we want, assume the current one is just a
|
|
// spurious error and discard it to minimize the number of cascaded errors.
|
|
if (compiler->parser->current.type == expected) nextToken(compiler->parser);
|
|
}
|
|
}
|
|
|
|
// Matches one or more newlines. Returns true if at least one was found.
|
|
static bool matchLine(Compiler* compiler)
|
|
{
|
|
if (!match(compiler, TOKEN_LINE)) return false;
|
|
|
|
while (match(compiler, TOKEN_LINE));
|
|
return true;
|
|
}
|
|
|
|
// Discards any newlines starting at the current token.
|
|
static void ignoreNewlines(Compiler* compiler)
|
|
{
|
|
matchLine(compiler);
|
|
}
|
|
|
|
// Consumes the current token. Emits an error if it is not a newline. Then
|
|
// discards any duplicate newlines following it.
|
|
static void consumeLine(Compiler* compiler, const char* errorMessage)
|
|
{
|
|
consume(compiler, TOKEN_LINE, errorMessage);
|
|
ignoreNewlines(compiler);
|
|
}
|
|
|
|
// Variables and scopes --------------------------------------------------------
|
|
|
|
// Emits one single-byte argument. Returns its index.
|
|
static int emitByte(Compiler* compiler, int byte)
|
|
{
|
|
wrenByteBufferWrite(compiler->parser->vm, &compiler->fn->code, (uint8_t)byte);
|
|
|
|
// Assume the instruction is associated with the most recently consumed token.
|
|
wrenIntBufferWrite(compiler->parser->vm, &compiler->fn->debug->sourceLines,
|
|
compiler->parser->previous.line);
|
|
|
|
return compiler->fn->code.count - 1;
|
|
}
|
|
|
|
// Emits one bytecode instruction.
|
|
static void emitOp(Compiler* compiler, Code instruction)
|
|
{
|
|
emitByte(compiler, instruction);
|
|
|
|
// Keep track of the stack's high water mark.
|
|
compiler->numSlots += stackEffects[instruction];
|
|
if (compiler->numSlots > compiler->fn->maxSlots)
|
|
{
|
|
compiler->fn->maxSlots = compiler->numSlots;
|
|
}
|
|
}
|
|
|
|
// Emits one 16-bit argument, which will be written big endian.
|
|
static void emitShort(Compiler* compiler, int arg)
|
|
{
|
|
emitByte(compiler, (arg >> 8) & 0xff);
|
|
emitByte(compiler, arg & 0xff);
|
|
}
|
|
|
|
// Emits one bytecode instruction followed by a 8-bit argument. Returns the
|
|
// index of the argument in the bytecode.
|
|
static int emitByteArg(Compiler* compiler, Code instruction, int arg)
|
|
{
|
|
emitOp(compiler, instruction);
|
|
return emitByte(compiler, arg);
|
|
}
|
|
|
|
// Emits one bytecode instruction followed by a 16-bit argument, which will be
|
|
// written big endian.
|
|
static void emitShortArg(Compiler* compiler, Code instruction, int arg)
|
|
{
|
|
emitOp(compiler, instruction);
|
|
emitShort(compiler, arg);
|
|
}
|
|
|
|
// Emits [instruction] followed by a placeholder for a jump offset. The
|
|
// placeholder can be patched by calling [jumpPatch]. Returns the index of the
|
|
// placeholder.
|
|
static int emitJump(Compiler* compiler, Code instruction)
|
|
{
|
|
emitOp(compiler, instruction);
|
|
emitByte(compiler, 0xff);
|
|
return emitByte(compiler, 0xff) - 1;
|
|
}
|
|
|
|
// Creates a new constant for the current value and emits the bytecode to load
|
|
// it from the constant table.
|
|
static void emitConstant(Compiler* compiler, Value value)
|
|
{
|
|
int constant = addConstant(compiler, value);
|
|
|
|
// Compile the code to load the constant.
|
|
emitShortArg(compiler, CODE_CONSTANT, constant);
|
|
}
|
|
|
|
// Create a new local variable with [name]. Assumes the current scope is local
|
|
// and the name is unique.
|
|
static int addLocal(Compiler* compiler, const char* name, int length)
|
|
{
|
|
Local* local = &compiler->locals[compiler->numLocals];
|
|
local->name = name;
|
|
local->length = length;
|
|
local->depth = compiler->scopeDepth;
|
|
local->isUpvalue = false;
|
|
return compiler->numLocals++;
|
|
}
|
|
|
|
// Declares a variable in the current scope whose name is the given token.
|
|
//
|
|
// If [token] is `NULL`, uses the previously consumed token. Returns its symbol.
|
|
static int declareVariable(Compiler* compiler, Token* token)
|
|
{
|
|
if (token == NULL) token = &compiler->parser->previous;
|
|
|
|
if (token->length > MAX_VARIABLE_NAME)
|
|
{
|
|
error(compiler, "Variable name cannot be longer than %d characters.",
|
|
MAX_VARIABLE_NAME);
|
|
}
|
|
|
|
// Top-level module scope.
|
|
if (compiler->scopeDepth == -1)
|
|
{
|
|
int symbol = wrenDefineVariable(compiler->parser->vm,
|
|
compiler->parser->module,
|
|
token->start, token->length, NULL_VAL);
|
|
|
|
if (symbol == -1)
|
|
{
|
|
error(compiler, "Module variable is already defined.");
|
|
}
|
|
else if (symbol == -2)
|
|
{
|
|
error(compiler, "Too many module variables defined.");
|
|
}
|
|
|
|
return symbol;
|
|
}
|
|
|
|
// See if there is already a variable with this name declared in the current
|
|
// scope. (Outer scopes are OK: those get shadowed.)
|
|
for (int i = compiler->numLocals - 1; i >= 0; i--)
|
|
{
|
|
Local* local = &compiler->locals[i];
|
|
|
|
// Once we escape this scope and hit an outer one, we can stop.
|
|
if (local->depth < compiler->scopeDepth) break;
|
|
|
|
if (local->length == token->length &&
|
|
memcmp(local->name, token->start, token->length) == 0)
|
|
{
|
|
error(compiler, "Variable is already declared in this scope.");
|
|
return i;
|
|
}
|
|
}
|
|
|
|
if (compiler->numLocals == MAX_LOCALS)
|
|
{
|
|
error(compiler, "Cannot declare more than %d variables in one scope.",
|
|
MAX_LOCALS);
|
|
return -1;
|
|
}
|
|
|
|
return addLocal(compiler, token->start, token->length);
|
|
}
|
|
|
|
// Parses a name token and declares a variable in the current scope with that
|
|
// name. Returns its slot.
|
|
static int declareNamedVariable(Compiler* compiler)
|
|
{
|
|
consume(compiler, TOKEN_NAME, "Expect variable name.");
|
|
return declareVariable(compiler, NULL);
|
|
}
|
|
|
|
// Stores a variable with the previously defined symbol in the current scope.
|
|
static void defineVariable(Compiler* compiler, int symbol)
|
|
{
|
|
// Store the variable. If it's a local, the result of the initializer is
|
|
// in the correct slot on the stack already so we're done.
|
|
if (compiler->scopeDepth >= 0) return;
|
|
|
|
// It's a module-level variable, so store the value in the module slot and
|
|
// then discard the temporary for the initializer.
|
|
emitShortArg(compiler, CODE_STORE_MODULE_VAR, symbol);
|
|
emitOp(compiler, CODE_POP);
|
|
}
|
|
|
|
// Starts a new local block scope.
|
|
static void pushScope(Compiler* compiler)
|
|
{
|
|
compiler->scopeDepth++;
|
|
}
|
|
|
|
// Generates code to discard local variables at [depth] or greater. Does *not*
|
|
// actually undeclare variables or pop any scopes, though. This is called
|
|
// directly when compiling "break" statements to ditch the local variables
|
|
// before jumping out of the loop even though they are still in scope *past*
|
|
// the break instruction.
|
|
//
|
|
// Returns the number of local variables that were eliminated.
|
|
static int discardLocals(Compiler* compiler, int depth)
|
|
{
|
|
ASSERT(compiler->scopeDepth > -1, "Cannot exit top-level scope.");
|
|
|
|
int local = compiler->numLocals - 1;
|
|
while (local >= 0 && compiler->locals[local].depth >= depth)
|
|
{
|
|
// If the local was closed over, make sure the upvalue gets closed when it
|
|
// goes out of scope on the stack. We use emitByte() and not emitOp() here
|
|
// because we don't want to track that stack effect of these pops since the
|
|
// variables are still in scope after the break.
|
|
if (compiler->locals[local].isUpvalue)
|
|
{
|
|
emitByte(compiler, CODE_CLOSE_UPVALUE);
|
|
}
|
|
else
|
|
{
|
|
emitByte(compiler, CODE_POP);
|
|
}
|
|
|
|
|
|
local--;
|
|
}
|
|
|
|
return compiler->numLocals - local - 1;
|
|
}
|
|
|
|
// Closes the last pushed block scope and discards any local variables declared
|
|
// in that scope. This should only be called in a statement context where no
|
|
// temporaries are still on the stack.
|
|
static void popScope(Compiler* compiler)
|
|
{
|
|
int popped = discardLocals(compiler, compiler->scopeDepth);
|
|
compiler->numLocals -= popped;
|
|
compiler->numSlots -= popped;
|
|
compiler->scopeDepth--;
|
|
}
|
|
|
|
// Attempts to look up the name in the local variables of [compiler]. If found,
|
|
// returns its index, otherwise returns -1.
|
|
static int resolveLocal(Compiler* compiler, const char* name, int length)
|
|
{
|
|
// Look it up in the local scopes. Look in reverse order so that the most
|
|
// nested variable is found first and shadows outer ones.
|
|
for (int i = compiler->numLocals - 1; i >= 0; i--)
|
|
{
|
|
if (compiler->locals[i].length == length &&
|
|
memcmp(name, compiler->locals[i].name, length) == 0)
|
|
{
|
|
return i;
|
|
}
|
|
}
|
|
|
|
return -1;
|
|
}
|
|
|
|
// Adds an upvalue to [compiler]'s function with the given properties. Does not
|
|
// add one if an upvalue for that variable is already in the list. Returns the
|
|
// index of the upvalue.
|
|
static int addUpvalue(Compiler* compiler, bool isLocal, int index)
|
|
{
|
|
// Look for an existing one.
|
|
for (int i = 0; i < compiler->fn->numUpvalues; i++)
|
|
{
|
|
CompilerUpvalue* upvalue = &compiler->upvalues[i];
|
|
if (upvalue->index == index && upvalue->isLocal == isLocal) return i;
|
|
}
|
|
|
|
// If we got here, it's a new upvalue.
|
|
compiler->upvalues[compiler->fn->numUpvalues].isLocal = isLocal;
|
|
compiler->upvalues[compiler->fn->numUpvalues].index = index;
|
|
return compiler->fn->numUpvalues++;
|
|
}
|
|
|
|
// Attempts to look up [name] in the functions enclosing the one being compiled
|
|
// by [compiler]. If found, it adds an upvalue for it to this compiler's list
|
|
// of upvalues (unless it's already in there) and returns its index. If not
|
|
// found, returns -1.
|
|
//
|
|
// If the name is found outside of the immediately enclosing function, this
|
|
// will flatten the closure and add upvalues to all of the intermediate
|
|
// functions so that it gets walked down to this one.
|
|
//
|
|
// If it reaches a method boundary, this stops and returns -1 since methods do
|
|
// not close over local variables.
|
|
static int findUpvalue(Compiler* compiler, const char* name, int length)
|
|
{
|
|
// If we are at the top level, we didn't find it.
|
|
if (compiler->parent == NULL) return -1;
|
|
|
|
// If we hit the method boundary (and the name isn't a static field), then
|
|
// stop looking for it. We'll instead treat it as a self send.
|
|
if (name[0] != '_' && compiler->parent->enclosingClass != NULL) return -1;
|
|
|
|
// See if it's a local variable in the immediately enclosing function.
|
|
int local = resolveLocal(compiler->parent, name, length);
|
|
if (local != -1)
|
|
{
|
|
// Mark the local as an upvalue so we know to close it when it goes out of
|
|
// scope.
|
|
compiler->parent->locals[local].isUpvalue = true;
|
|
|
|
return addUpvalue(compiler, true, local);
|
|
}
|
|
|
|
// See if it's an upvalue in the immediately enclosing function. In other
|
|
// words, if it's a local variable in a non-immediately enclosing function.
|
|
// This "flattens" closures automatically: it adds upvalues to all of the
|
|
// intermediate functions to get from the function where a local is declared
|
|
// all the way into the possibly deeply nested function that is closing over
|
|
// it.
|
|
int upvalue = findUpvalue(compiler->parent, name, length);
|
|
if (upvalue != -1)
|
|
{
|
|
return addUpvalue(compiler, false, upvalue);
|
|
}
|
|
|
|
// If we got here, we walked all the way up the parent chain and couldn't
|
|
// find it.
|
|
return -1;
|
|
}
|
|
|
|
// Look up [name] in the current scope to see what variable it refers to.
|
|
// Returns the variable either in local scope, or the enclosing function's
|
|
// upvalue list. Does not search the module scope. Returns a variable with
|
|
// index -1 if not found.
|
|
static Variable resolveNonmodule(Compiler* compiler,
|
|
const char* name, int length)
|
|
{
|
|
// Look it up in the local scopes.
|
|
Variable variable;
|
|
variable.scope = SCOPE_LOCAL;
|
|
variable.index = resolveLocal(compiler, name, length);
|
|
if (variable.index != -1) return variable;
|
|
|
|
// Tt's not a local, so guess that it's an upvalue.
|
|
variable.scope = SCOPE_UPVALUE;
|
|
variable.index = findUpvalue(compiler, name, length);
|
|
return variable;
|
|
}
|
|
|
|
// Look up [name] in the current scope to see what variable it refers to.
|
|
// Returns the variable either in module scope, local scope, or the enclosing
|
|
// function's upvalue list. Returns a variable with index -1 if not found.
|
|
static Variable resolveName(Compiler* compiler, const char* name, int length)
|
|
{
|
|
Variable variable = resolveNonmodule(compiler, name, length);
|
|
if (variable.index != -1) return variable;
|
|
|
|
variable.scope = SCOPE_MODULE;
|
|
variable.index = wrenSymbolTableFind(&compiler->parser->module->variableNames,
|
|
name, length);
|
|
return variable;
|
|
}
|
|
|
|
static void loadLocal(Compiler* compiler, int slot)
|
|
{
|
|
if (slot <= 8)
|
|
{
|
|
emitOp(compiler, (Code)(CODE_LOAD_LOCAL_0 + slot));
|
|
return;
|
|
}
|
|
|
|
emitByteArg(compiler, CODE_LOAD_LOCAL, slot);
|
|
}
|
|
|
|
// Finishes [compiler], which is compiling a function, method, or chunk of top
|
|
// level code. If there is a parent compiler, then this emits code in the
|
|
// parent compiler to load the resulting function.
|
|
static ObjFn* endCompiler(Compiler* compiler,
|
|
const char* debugName, int debugNameLength)
|
|
{
|
|
// If we hit an error, don't finish the function since it's borked anyway.
|
|
if (compiler->parser->hasError)
|
|
{
|
|
compiler->parser->vm->compiler = compiler->parent;
|
|
return NULL;
|
|
}
|
|
|
|
// Mark the end of the bytecode. Since it may contain multiple early returns,
|
|
// we can't rely on CODE_RETURN to tell us we're at the end.
|
|
emitOp(compiler, CODE_END);
|
|
|
|
wrenFunctionBindName(compiler->parser->vm, compiler->fn,
|
|
debugName, debugNameLength);
|
|
|
|
// In the function that contains this one, load the resulting function object.
|
|
if (compiler->parent != NULL)
|
|
{
|
|
int constant = addConstant(compiler->parent, OBJ_VAL(compiler->fn));
|
|
|
|
// Wrap the function in a closure. We do this even if it has no upvalues so
|
|
// that the VM can uniformly assume all called objects are closures. This
|
|
// makes creating a function a little slower, but makes invoking them
|
|
// faster. Given that functions are invoked more often than they are
|
|
// created, this is a win.
|
|
emitShortArg(compiler->parent, CODE_CLOSURE, constant);
|
|
|
|
// Emit arguments for each upvalue to know whether to capture a local or
|
|
// an upvalue.
|
|
for (int i = 0; i < compiler->fn->numUpvalues; i++)
|
|
{
|
|
emitByte(compiler->parent, compiler->upvalues[i].isLocal ? 1 : 0);
|
|
emitByte(compiler->parent, compiler->upvalues[i].index);
|
|
}
|
|
}
|
|
|
|
// Pop this compiler off the stack.
|
|
compiler->parser->vm->compiler = compiler->parent;
|
|
|
|
#if WREN_DEBUG_DUMP_COMPILED_CODE
|
|
wrenDumpCode(compiler->parser->vm, compiler->fn);
|
|
#endif
|
|
|
|
return compiler->fn;
|
|
}
|
|
|
|
// Grammar ---------------------------------------------------------------------
|
|
|
|
typedef enum
|
|
{
|
|
PREC_NONE,
|
|
PREC_LOWEST,
|
|
PREC_ASSIGNMENT, // =
|
|
PREC_CONDITIONAL, // ?:
|
|
PREC_LOGICAL_OR, // ||
|
|
PREC_LOGICAL_AND, // &&
|
|
PREC_EQUALITY, // == !=
|
|
PREC_IS, // is
|
|
PREC_COMPARISON, // < > <= >=
|
|
PREC_BITWISE_OR, // |
|
|
PREC_BITWISE_XOR, // ^
|
|
PREC_BITWISE_AND, // &
|
|
PREC_BITWISE_SHIFT, // << >>
|
|
PREC_RANGE, // .. ...
|
|
PREC_TERM, // + -
|
|
PREC_FACTOR, // * / %
|
|
PREC_UNARY, // unary - ! ~
|
|
PREC_CALL, // . () []
|
|
PREC_PRIMARY
|
|
} Precedence;
|
|
|
|
typedef void (*GrammarFn)(Compiler*, bool canAssign);
|
|
|
|
typedef void (*SignatureFn)(Compiler* compiler, Signature* signature);
|
|
|
|
typedef struct
|
|
{
|
|
GrammarFn prefix;
|
|
GrammarFn infix;
|
|
SignatureFn method;
|
|
Precedence precedence;
|
|
const char* name;
|
|
} GrammarRule;
|
|
|
|
// Forward declarations since the grammar is recursive.
|
|
static GrammarRule* getRule(TokenType type);
|
|
static void expression(Compiler* compiler);
|
|
static void statement(Compiler* compiler);
|
|
static void definition(Compiler* compiler);
|
|
static void parsePrecedence(Compiler* compiler, Precedence precedence);
|
|
|
|
// Replaces the placeholder argument for a previous CODE_JUMP or CODE_JUMP_IF
|
|
// instruction with an offset that jumps to the current end of bytecode.
|
|
static void patchJump(Compiler* compiler, int offset)
|
|
{
|
|
// -2 to adjust for the bytecode for the jump offset itself.
|
|
int jump = compiler->fn->code.count - offset - 2;
|
|
if (jump > MAX_JUMP) error(compiler, "Too much code to jump over.");
|
|
|
|
compiler->fn->code.data[offset] = (jump >> 8) & 0xff;
|
|
compiler->fn->code.data[offset + 1] = jump & 0xff;
|
|
}
|
|
|
|
// Parses a block body, after the initial "{" has been consumed.
|
|
//
|
|
// Returns true if it was a expression body, false if it was a statement body.
|
|
// (More precisely, returns true if a value was left on the stack. An empty
|
|
// block returns false.)
|
|
static bool finishBlock(Compiler* compiler)
|
|
{
|
|
// Empty blocks do nothing.
|
|
if (match(compiler, TOKEN_RIGHT_BRACE)) return false;
|
|
|
|
// If there's no line after the "{", it's a single-expression body.
|
|
if (!matchLine(compiler))
|
|
{
|
|
expression(compiler);
|
|
consume(compiler, TOKEN_RIGHT_BRACE, "Expect '}' at end of block.");
|
|
return true;
|
|
}
|
|
|
|
// Empty blocks (with just a newline inside) do nothing.
|
|
if (match(compiler, TOKEN_RIGHT_BRACE)) return false;
|
|
|
|
// Compile the definition list.
|
|
do
|
|
{
|
|
definition(compiler);
|
|
consumeLine(compiler, "Expect newline after statement.");
|
|
}
|
|
while (peek(compiler) != TOKEN_RIGHT_BRACE && peek(compiler) != TOKEN_EOF);
|
|
|
|
consume(compiler, TOKEN_RIGHT_BRACE, "Expect '}' at end of block.");
|
|
return false;
|
|
}
|
|
|
|
// Parses a method or function body, after the initial "{" has been consumed.
|
|
//
|
|
// It [isInitializer] is `true`, this is the body of a constructor initializer.
|
|
// In that case, this adds the code to ensure it returns `this`.
|
|
static void finishBody(Compiler* compiler, bool isInitializer)
|
|
{
|
|
bool isExpressionBody = finishBlock(compiler);
|
|
|
|
if (isInitializer)
|
|
{
|
|
// If the initializer body evaluates to a value, discard it.
|
|
if (isExpressionBody) emitOp(compiler, CODE_POP);
|
|
|
|
// The receiver is always stored in the first local slot.
|
|
emitOp(compiler, CODE_LOAD_LOCAL_0);
|
|
}
|
|
else if (!isExpressionBody)
|
|
{
|
|
// Implicitly return null in statement bodies.
|
|
emitOp(compiler, CODE_NULL);
|
|
}
|
|
|
|
emitOp(compiler, CODE_RETURN);
|
|
}
|
|
|
|
// The VM can only handle a certain number of parameters, so check that we
|
|
// haven't exceeded that and give a usable error.
|
|
static void validateNumParameters(Compiler* compiler, int numArgs)
|
|
{
|
|
if (numArgs == MAX_PARAMETERS + 1)
|
|
{
|
|
// Only show an error at exactly max + 1 so that we can keep parsing the
|
|
// parameters and minimize cascaded errors.
|
|
error(compiler, "Methods cannot have more than %d parameters.",
|
|
MAX_PARAMETERS);
|
|
}
|
|
}
|
|
|
|
// Parses the rest of a comma-separated parameter list after the opening
|
|
// delimeter. Updates `arity` in [signature] with the number of parameters.
|
|
static void finishParameterList(Compiler* compiler, Signature* signature)
|
|
{
|
|
do
|
|
{
|
|
ignoreNewlines(compiler);
|
|
validateNumParameters(compiler, ++signature->arity);
|
|
|
|
// Define a local variable in the method for the parameter.
|
|
declareNamedVariable(compiler);
|
|
}
|
|
while (match(compiler, TOKEN_COMMA));
|
|
}
|
|
|
|
// Gets the symbol for a method [name] with [length].
|
|
static int methodSymbol(Compiler* compiler, const char* name, int length)
|
|
{
|
|
return wrenSymbolTableEnsure(compiler->parser->vm,
|
|
&compiler->parser->vm->methodNames, name, length);
|
|
}
|
|
|
|
// Appends characters to [name] (and updates [length]) for [numParams] "_"
|
|
// surrounded by [leftBracket] and [rightBracket].
|
|
static void signatureParameterList(char name[MAX_METHOD_SIGNATURE], int* length,
|
|
int numParams, char leftBracket, char rightBracket)
|
|
{
|
|
name[(*length)++] = leftBracket;
|
|
|
|
// This function may be called with too many parameters. When that happens,
|
|
// a compile error has already been reported, but we need to make sure we
|
|
// don't overflow the string too, hence the MAX_PARAMETERS check.
|
|
for (int i = 0; i < numParams && i < MAX_PARAMETERS; i++)
|
|
{
|
|
if (i > 0) name[(*length)++] = ',';
|
|
name[(*length)++] = '_';
|
|
}
|
|
name[(*length)++] = rightBracket;
|
|
}
|
|
|
|
// Fills [name] with the stringified version of [signature] and updates
|
|
// [length] to the resulting length.
|
|
static void signatureToString(Signature* signature,
|
|
char name[MAX_METHOD_SIGNATURE], int* length)
|
|
{
|
|
*length = 0;
|
|
|
|
// Build the full name from the signature.
|
|
memcpy(name + *length, signature->name, signature->length);
|
|
*length += signature->length;
|
|
|
|
switch (signature->type)
|
|
{
|
|
case SIG_METHOD:
|
|
signatureParameterList(name, length, signature->arity, '(', ')');
|
|
break;
|
|
|
|
case SIG_GETTER:
|
|
// The signature is just the name.
|
|
break;
|
|
|
|
case SIG_SETTER:
|
|
name[(*length)++] = '=';
|
|
signatureParameterList(name, length, 1, '(', ')');
|
|
break;
|
|
|
|
case SIG_SUBSCRIPT:
|
|
signatureParameterList(name, length, signature->arity, '[', ']');
|
|
break;
|
|
|
|
case SIG_SUBSCRIPT_SETTER:
|
|
signatureParameterList(name, length, signature->arity - 1, '[', ']');
|
|
name[(*length)++] = '=';
|
|
signatureParameterList(name, length, 1, '(', ')');
|
|
break;
|
|
|
|
case SIG_INITIALIZER:
|
|
memcpy(name, "init ", 5);
|
|
memcpy(name + 5, signature->name, signature->length);
|
|
*length = 5 + signature->length;
|
|
signatureParameterList(name, length, signature->arity, '(', ')');
|
|
break;
|
|
}
|
|
|
|
name[*length] = '\0';
|
|
}
|
|
|
|
// Gets the symbol for a method with [signature].
|
|
static int signatureSymbol(Compiler* compiler, Signature* signature)
|
|
{
|
|
// Build the full name from the signature.
|
|
char name[MAX_METHOD_SIGNATURE];
|
|
int length;
|
|
signatureToString(signature, name, &length);
|
|
|
|
return methodSymbol(compiler, name, length);
|
|
}
|
|
|
|
// Returns a signature with [type] whose name is from the last consumed token.
|
|
static Signature signatureFromToken(Compiler* compiler, SignatureType type)
|
|
{
|
|
Signature signature;
|
|
|
|
// Get the token for the method name.
|
|
Token* token = &compiler->parser->previous;
|
|
signature.name = token->start;
|
|
signature.length = token->length;
|
|
signature.type = type;
|
|
signature.arity = 0;
|
|
|
|
if (signature.length > MAX_METHOD_NAME)
|
|
{
|
|
error(compiler, "Method names cannot be longer than %d characters.",
|
|
MAX_METHOD_NAME);
|
|
signature.length = MAX_METHOD_NAME;
|
|
}
|
|
|
|
return signature;
|
|
}
|
|
|
|
// Parses a comma-separated list of arguments. Modifies [signature] to include
|
|
// the arity of the argument list.
|
|
static void finishArgumentList(Compiler* compiler, Signature* signature)
|
|
{
|
|
do
|
|
{
|
|
ignoreNewlines(compiler);
|
|
validateNumParameters(compiler, ++signature->arity);
|
|
expression(compiler);
|
|
}
|
|
while (match(compiler, TOKEN_COMMA));
|
|
|
|
// Allow a newline before the closing delimiter.
|
|
ignoreNewlines(compiler);
|
|
}
|
|
|
|
// Compiles a method call with [signature] using [instruction].
|
|
static void callSignature(Compiler* compiler, Code instruction,
|
|
Signature* signature)
|
|
{
|
|
int symbol = signatureSymbol(compiler, signature);
|
|
emitShortArg(compiler, (Code)(instruction + signature->arity), symbol);
|
|
|
|
if (instruction == CODE_SUPER_0)
|
|
{
|
|
// Super calls need to be statically bound to the class's superclass. This
|
|
// ensures we call the right method even when a method containing a super
|
|
// call is inherited by another subclass.
|
|
//
|
|
// We bind it at class definition time by storing a reference to the
|
|
// superclass in a constant. So, here, we create a slot in the constant
|
|
// table and store NULL in it. When the method is bound, we'll look up the
|
|
// superclass then and store it in the constant slot.
|
|
emitShort(compiler, addConstant(compiler, NULL_VAL));
|
|
}
|
|
}
|
|
|
|
// Compiles a method call with [numArgs] for a method with [name] with [length].
|
|
static void callMethod(Compiler* compiler, int numArgs, const char* name,
|
|
int length)
|
|
{
|
|
int symbol = methodSymbol(compiler, name, length);
|
|
emitShortArg(compiler, (Code)(CODE_CALL_0 + numArgs), symbol);
|
|
}
|
|
|
|
// Compiles an (optional) argument list for a method call with [methodSignature]
|
|
// and then calls it.
|
|
static void methodCall(Compiler* compiler, Code instruction,
|
|
Signature* signature)
|
|
{
|
|
// Make a new signature that contains the updated arity and type based on
|
|
// the arguments we find.
|
|
Signature called = { signature->name, signature->length, SIG_GETTER, 0 };
|
|
|
|
// Parse the argument list, if any.
|
|
if (match(compiler, TOKEN_LEFT_PAREN))
|
|
{
|
|
called.type = SIG_METHOD;
|
|
|
|
// Allow empty an argument list.
|
|
if (peek(compiler) != TOKEN_RIGHT_PAREN)
|
|
{
|
|
finishArgumentList(compiler, &called);
|
|
}
|
|
consume(compiler, TOKEN_RIGHT_PAREN, "Expect ')' after arguments.");
|
|
}
|
|
|
|
// Parse the block argument, if any.
|
|
if (match(compiler, TOKEN_LEFT_BRACE))
|
|
{
|
|
// Include the block argument in the arity.
|
|
called.type = SIG_METHOD;
|
|
called.arity++;
|
|
|
|
Compiler fnCompiler;
|
|
initCompiler(&fnCompiler, compiler->parser, compiler, false);
|
|
|
|
// Make a dummy signature to track the arity.
|
|
Signature fnSignature = { "", 0, SIG_METHOD, 0 };
|
|
|
|
// Parse the parameter list, if any.
|
|
if (match(compiler, TOKEN_PIPE))
|
|
{
|
|
finishParameterList(&fnCompiler, &fnSignature);
|
|
consume(compiler, TOKEN_PIPE, "Expect '|' after function parameters.");
|
|
}
|
|
|
|
fnCompiler.fn->arity = fnSignature.arity;
|
|
|
|
finishBody(&fnCompiler, false);
|
|
|
|
// Name the function based on the method its passed to.
|
|
char blockName[MAX_METHOD_SIGNATURE + 15];
|
|
int blockLength;
|
|
signatureToString(&called, blockName, &blockLength);
|
|
memmove(blockName + blockLength, " block argument", 16);
|
|
|
|
endCompiler(&fnCompiler, blockName, blockLength + 15);
|
|
}
|
|
|
|
// TODO: Allow Grace-style mixfix methods?
|
|
|
|
// If this is a super() call for an initializer, make sure we got an actual
|
|
// argument list.
|
|
if (signature->type == SIG_INITIALIZER)
|
|
{
|
|
if (called.type != SIG_METHOD)
|
|
{
|
|
error(compiler, "A superclass constructor must have an argument list.");
|
|
}
|
|
|
|
called.type = SIG_INITIALIZER;
|
|
}
|
|
|
|
callSignature(compiler, instruction, &called);
|
|
}
|
|
|
|
// Compiles a call whose name is the previously consumed token. This includes
|
|
// getters, method calls with arguments, and setter calls.
|
|
static void namedCall(Compiler* compiler, bool canAssign, Code instruction)
|
|
{
|
|
// Get the token for the method name.
|
|
Signature signature = signatureFromToken(compiler, SIG_GETTER);
|
|
|
|
if (canAssign && match(compiler, TOKEN_EQ))
|
|
{
|
|
ignoreNewlines(compiler);
|
|
|
|
// Build the setter signature.
|
|
signature.type = SIG_SETTER;
|
|
signature.arity = 1;
|
|
|
|
// Compile the assigned value.
|
|
expression(compiler);
|
|
callSignature(compiler, instruction, &signature);
|
|
}
|
|
else
|
|
{
|
|
methodCall(compiler, instruction, &signature);
|
|
}
|
|
}
|
|
|
|
// Emits the code to load [variable] onto the stack.
|
|
static void loadVariable(Compiler* compiler, Variable variable)
|
|
{
|
|
switch (variable.scope)
|
|
{
|
|
case SCOPE_LOCAL:
|
|
loadLocal(compiler, variable.index);
|
|
break;
|
|
case SCOPE_UPVALUE:
|
|
emitByteArg(compiler, CODE_LOAD_UPVALUE, variable.index);
|
|
break;
|
|
case SCOPE_MODULE:
|
|
emitShortArg(compiler, CODE_LOAD_MODULE_VAR, variable.index);
|
|
break;
|
|
default:
|
|
UNREACHABLE();
|
|
}
|
|
}
|
|
|
|
// Loads the receiver of the currently enclosing method. Correctly handles
|
|
// functions defined inside methods.
|
|
static void loadThis(Compiler* compiler)
|
|
{
|
|
loadVariable(compiler, resolveNonmodule(compiler, "this", 4));
|
|
}
|
|
|
|
// Pushes the value for a module-level variable implicitly imported from core.
|
|
static void loadCoreVariable(Compiler* compiler, const char* name)
|
|
{
|
|
int symbol = wrenSymbolTableFind(&compiler->parser->module->variableNames,
|
|
name, strlen(name));
|
|
ASSERT(symbol != -1, "Should have already defined core name.");
|
|
emitShortArg(compiler, CODE_LOAD_MODULE_VAR, symbol);
|
|
}
|
|
|
|
// A parenthesized expression.
|
|
static void grouping(Compiler* compiler, bool canAssign)
|
|
{
|
|
expression(compiler);
|
|
consume(compiler, TOKEN_RIGHT_PAREN, "Expect ')' after expression.");
|
|
}
|
|
|
|
// A list literal.
|
|
static void list(Compiler* compiler, bool canAssign)
|
|
{
|
|
// Instantiate a new list.
|
|
loadCoreVariable(compiler, "List");
|
|
callMethod(compiler, 0, "new()", 5);
|
|
|
|
// Compile the list elements. Each one compiles to a ".add()" call.
|
|
do
|
|
{
|
|
ignoreNewlines(compiler);
|
|
|
|
// Stop if we hit the end of the list.
|
|
if (peek(compiler) == TOKEN_RIGHT_BRACKET) break;
|
|
|
|
// The element.
|
|
expression(compiler);
|
|
callMethod(compiler, 1, "addCore_(_)", 11);
|
|
} while (match(compiler, TOKEN_COMMA));
|
|
|
|
// Allow newlines before the closing ']'.
|
|
ignoreNewlines(compiler);
|
|
consume(compiler, TOKEN_RIGHT_BRACKET, "Expect ']' after list elements.");
|
|
}
|
|
|
|
// A map literal.
|
|
static void map(Compiler* compiler, bool canAssign)
|
|
{
|
|
// Instantiate a new map.
|
|
loadCoreVariable(compiler, "Map");
|
|
callMethod(compiler, 0, "new()", 5);
|
|
|
|
// Compile the map elements. Each one is compiled to just invoke the
|
|
// subscript setter on the map.
|
|
do
|
|
{
|
|
ignoreNewlines(compiler);
|
|
|
|
// Stop if we hit the end of the map.
|
|
if (peek(compiler) == TOKEN_RIGHT_BRACE) break;
|
|
|
|
// The key.
|
|
parsePrecedence(compiler, PREC_UNARY);
|
|
consume(compiler, TOKEN_COLON, "Expect ':' after map key.");
|
|
ignoreNewlines(compiler);
|
|
|
|
// The value.
|
|
expression(compiler);
|
|
callMethod(compiler, 2, "addCore_(_,_)", 13);
|
|
} while (match(compiler, TOKEN_COMMA));
|
|
|
|
// Allow newlines before the closing '}'.
|
|
ignoreNewlines(compiler);
|
|
consume(compiler, TOKEN_RIGHT_BRACE, "Expect '}' after map entries.");
|
|
}
|
|
|
|
// Unary operators like `-foo`.
|
|
static void unaryOp(Compiler* compiler, bool canAssign)
|
|
{
|
|
GrammarRule* rule = getRule(compiler->parser->previous.type);
|
|
|
|
ignoreNewlines(compiler);
|
|
|
|
// Compile the argument.
|
|
parsePrecedence(compiler, (Precedence)(PREC_UNARY + 1));
|
|
|
|
// Call the operator method on the left-hand side.
|
|
callMethod(compiler, 0, rule->name, 1);
|
|
}
|
|
|
|
static void boolean(Compiler* compiler, bool canAssign)
|
|
{
|
|
emitOp(compiler,
|
|
compiler->parser->previous.type == TOKEN_FALSE ? CODE_FALSE : CODE_TRUE);
|
|
}
|
|
|
|
// Walks the compiler chain to find the compiler for the nearest class
|
|
// enclosing this one. Returns NULL if not currently inside a class definition.
|
|
static Compiler* getEnclosingClassCompiler(Compiler* compiler)
|
|
{
|
|
while (compiler != NULL)
|
|
{
|
|
if (compiler->enclosingClass != NULL) return compiler;
|
|
compiler = compiler->parent;
|
|
}
|
|
|
|
return NULL;
|
|
}
|
|
|
|
// Walks the compiler chain to find the nearest class enclosing this one.
|
|
// Returns NULL if not currently inside a class definition.
|
|
static ClassInfo* getEnclosingClass(Compiler* compiler)
|
|
{
|
|
compiler = getEnclosingClassCompiler(compiler);
|
|
return compiler == NULL ? NULL : compiler->enclosingClass;
|
|
}
|
|
|
|
static void field(Compiler* compiler, bool canAssign)
|
|
{
|
|
// Initialize it with a fake value so we can keep parsing and minimize the
|
|
// number of cascaded errors.
|
|
int field = 255;
|
|
|
|
ClassInfo* enclosingClass = getEnclosingClass(compiler);
|
|
|
|
if (enclosingClass == NULL)
|
|
{
|
|
error(compiler, "Cannot reference a field outside of a class definition.");
|
|
}
|
|
else if (enclosingClass->isForeign)
|
|
{
|
|
error(compiler, "Cannot define fields in a foreign class.");
|
|
}
|
|
else if (enclosingClass->inStatic)
|
|
{
|
|
error(compiler, "Cannot use an instance field in a static method.");
|
|
}
|
|
else
|
|
{
|
|
// Look up the field, or implicitly define it.
|
|
field = wrenSymbolTableEnsure(compiler->parser->vm, &enclosingClass->fields,
|
|
compiler->parser->previous.start,
|
|
compiler->parser->previous.length);
|
|
|
|
if (field >= MAX_FIELDS)
|
|
{
|
|
error(compiler, "A class can only have %d fields.", MAX_FIELDS);
|
|
}
|
|
}
|
|
|
|
// If there's an "=" after a field name, it's an assignment.
|
|
bool isLoad = true;
|
|
if (canAssign && match(compiler, TOKEN_EQ))
|
|
{
|
|
// Compile the right-hand side.
|
|
expression(compiler);
|
|
isLoad = false;
|
|
}
|
|
|
|
// If we're directly inside a method, use a more optimal instruction.
|
|
if (compiler->parent != NULL &&
|
|
compiler->parent->enclosingClass == enclosingClass)
|
|
{
|
|
emitByteArg(compiler, isLoad ? CODE_LOAD_FIELD_THIS : CODE_STORE_FIELD_THIS,
|
|
field);
|
|
}
|
|
else
|
|
{
|
|
loadThis(compiler);
|
|
emitByteArg(compiler, isLoad ? CODE_LOAD_FIELD : CODE_STORE_FIELD, field);
|
|
}
|
|
}
|
|
|
|
// Compiles a read or assignment to [variable].
|
|
static void bareName(Compiler* compiler, bool canAssign, Variable variable)
|
|
{
|
|
// If there's an "=" after a bare name, it's a variable assignment.
|
|
if (canAssign && match(compiler, TOKEN_EQ))
|
|
{
|
|
// Compile the right-hand side.
|
|
expression(compiler);
|
|
|
|
// Emit the store instruction.
|
|
switch (variable.scope)
|
|
{
|
|
case SCOPE_LOCAL:
|
|
emitByteArg(compiler, CODE_STORE_LOCAL, variable.index);
|
|
break;
|
|
case SCOPE_UPVALUE:
|
|
emitByteArg(compiler, CODE_STORE_UPVALUE, variable.index);
|
|
break;
|
|
case SCOPE_MODULE:
|
|
emitShortArg(compiler, CODE_STORE_MODULE_VAR, variable.index);
|
|
break;
|
|
default:
|
|
UNREACHABLE();
|
|
}
|
|
return;
|
|
}
|
|
|
|
// Emit the load instruction.
|
|
loadVariable(compiler, variable);
|
|
}
|
|
|
|
static void staticField(Compiler* compiler, bool canAssign)
|
|
{
|
|
Compiler* classCompiler = getEnclosingClassCompiler(compiler);
|
|
if (classCompiler == NULL)
|
|
{
|
|
error(compiler, "Cannot use a static field outside of a class definition.");
|
|
return;
|
|
}
|
|
|
|
// Look up the name in the scope chain.
|
|
Token* token = &compiler->parser->previous;
|
|
|
|
// If this is the first time we've seen this static field, implicitly
|
|
// define it as a variable in the scope surrounding the class definition.
|
|
if (resolveLocal(classCompiler, token->start, token->length) == -1)
|
|
{
|
|
int symbol = declareVariable(classCompiler, NULL);
|
|
|
|
// Implicitly initialize it to null.
|
|
emitOp(classCompiler, CODE_NULL);
|
|
defineVariable(classCompiler, symbol);
|
|
}
|
|
|
|
// It definitely exists now, so resolve it properly. This is different from
|
|
// the above resolveLocal() call because we may have already closed over it
|
|
// as an upvalue.
|
|
Variable variable = resolveName(compiler, token->start, token->length);
|
|
bareName(compiler, canAssign, variable);
|
|
}
|
|
|
|
// Returns `true` if [name] is a local variable name (starts with a lowercase
|
|
// letter).
|
|
static bool isLocalName(const char* name)
|
|
{
|
|
return name[0] >= 'a' && name[0] <= 'z';
|
|
}
|
|
|
|
// Compiles a variable name or method call with an implicit receiver.
|
|
static void name(Compiler* compiler, bool canAssign)
|
|
{
|
|
// Look for the name in the scope chain up to the nearest enclosing method.
|
|
Token* token = &compiler->parser->previous;
|
|
|
|
Variable variable = resolveNonmodule(compiler, token->start, token->length);
|
|
if (variable.index != -1)
|
|
{
|
|
bareName(compiler, canAssign, variable);
|
|
return;
|
|
}
|
|
|
|
// TODO: The fact that we return above here if the variable is known and parse
|
|
// an optional argument list below if not means that the grammar is not
|
|
// context-free. A line of code in a method like "someName(foo)" is a parse
|
|
// error if "someName" is a defined variable in the surrounding scope and not
|
|
// if it isn't. Fix this. One option is to have "someName(foo)" always
|
|
// resolve to a self-call if there is an argument list, but that makes
|
|
// getters a little confusing.
|
|
|
|
// If we're inside a method and the name is lowercase, treat it as a method
|
|
// on this.
|
|
if (isLocalName(token->start) && getEnclosingClass(compiler) != NULL)
|
|
{
|
|
loadThis(compiler);
|
|
namedCall(compiler, canAssign, CODE_CALL_0);
|
|
return;
|
|
}
|
|
|
|
// Otherwise, look for a module-level variable with the name.
|
|
variable.scope = SCOPE_MODULE;
|
|
variable.index = wrenSymbolTableFind(&compiler->parser->module->variableNames,
|
|
token->start, token->length);
|
|
if (variable.index == -1)
|
|
{
|
|
if (isLocalName(token->start))
|
|
{
|
|
error(compiler, "Undefined variable.");
|
|
return;
|
|
}
|
|
|
|
// If it's a nonlocal name, implicitly define a module-level variable in
|
|
// the hopes that we get a real definition later.
|
|
variable.index = wrenDeclareVariable(compiler->parser->vm,
|
|
compiler->parser->module,
|
|
token->start, token->length,
|
|
token->line);
|
|
|
|
if (variable.index == -2)
|
|
{
|
|
error(compiler, "Too many module variables defined.");
|
|
}
|
|
}
|
|
|
|
bareName(compiler, canAssign, variable);
|
|
}
|
|
|
|
static void null(Compiler* compiler, bool canAssign)
|
|
{
|
|
emitOp(compiler, CODE_NULL);
|
|
}
|
|
|
|
// A number or string literal.
|
|
static void literal(Compiler* compiler, bool canAssign)
|
|
{
|
|
emitConstant(compiler, compiler->parser->previous.value);
|
|
}
|
|
|
|
// A string literal that contains interpolated expressions.
|
|
//
|
|
// Interpolation is syntactic sugar for calling ".join()" on a list. So the
|
|
// string:
|
|
//
|
|
// "a %(b + c) d"
|
|
//
|
|
// is compiled roughly like:
|
|
//
|
|
// ["a ", b + c, " d"].join()
|
|
static void stringInterpolation(Compiler* compiler, bool canAssign)
|
|
{
|
|
// Instantiate a new list.
|
|
loadCoreVariable(compiler, "List");
|
|
callMethod(compiler, 0, "new()", 5);
|
|
|
|
do
|
|
{
|
|
// The opening string part.
|
|
literal(compiler, false);
|
|
callMethod(compiler, 1, "addCore_(_)", 11);
|
|
|
|
// The interpolated expression.
|
|
ignoreNewlines(compiler);
|
|
expression(compiler);
|
|
callMethod(compiler, 1, "addCore_(_)", 11);
|
|
|
|
ignoreNewlines(compiler);
|
|
} while (match(compiler, TOKEN_INTERPOLATION));
|
|
|
|
// The trailing string part.
|
|
consume(compiler, TOKEN_STRING, "Expect end of string interpolation.");
|
|
literal(compiler, false);
|
|
callMethod(compiler, 1, "addCore_(_)", 11);
|
|
|
|
// The list of interpolated parts.
|
|
callMethod(compiler, 0, "join()", 6);
|
|
}
|
|
|
|
static void super_(Compiler* compiler, bool canAssign)
|
|
{
|
|
ClassInfo* enclosingClass = getEnclosingClass(compiler);
|
|
if (enclosingClass == NULL)
|
|
{
|
|
error(compiler, "Cannot use 'super' outside of a method.");
|
|
}
|
|
|
|
loadThis(compiler);
|
|
|
|
// TODO: Super operator calls.
|
|
// TODO: There's no syntax for invoking a superclass constructor with a
|
|
// different name from the enclosing one. Figure that out.
|
|
|
|
// See if it's a named super call, or an unnamed one.
|
|
if (match(compiler, TOKEN_DOT))
|
|
{
|
|
// Compile the superclass call.
|
|
consume(compiler, TOKEN_NAME, "Expect method name after 'super.'.");
|
|
namedCall(compiler, canAssign, CODE_SUPER_0);
|
|
}
|
|
else if (enclosingClass != NULL)
|
|
{
|
|
// No explicit name, so use the name of the enclosing method. Make sure we
|
|
// check that enclosingClass isn't NULL first. We've already reported the
|
|
// error, but we don't want to crash here.
|
|
methodCall(compiler, CODE_SUPER_0, enclosingClass->signature);
|
|
}
|
|
}
|
|
|
|
static void this_(Compiler* compiler, bool canAssign)
|
|
{
|
|
if (getEnclosingClass(compiler) == NULL)
|
|
{
|
|
error(compiler, "Cannot use 'this' outside of a method.");
|
|
return;
|
|
}
|
|
|
|
loadThis(compiler);
|
|
}
|
|
|
|
// Subscript or "array indexing" operator like `foo[bar]`.
|
|
static void subscript(Compiler* compiler, bool canAssign)
|
|
{
|
|
Signature signature = { "", 0, SIG_SUBSCRIPT, 0 };
|
|
|
|
// Parse the argument list.
|
|
finishArgumentList(compiler, &signature);
|
|
consume(compiler, TOKEN_RIGHT_BRACKET, "Expect ']' after arguments.");
|
|
|
|
if (canAssign && match(compiler, TOKEN_EQ))
|
|
{
|
|
signature.type = SIG_SUBSCRIPT_SETTER;
|
|
|
|
// Compile the assigned value.
|
|
validateNumParameters(compiler, ++signature.arity);
|
|
expression(compiler);
|
|
}
|
|
|
|
callSignature(compiler, CODE_CALL_0, &signature);
|
|
}
|
|
|
|
static void call(Compiler* compiler, bool canAssign)
|
|
{
|
|
ignoreNewlines(compiler);
|
|
consume(compiler, TOKEN_NAME, "Expect method name after '.'.");
|
|
namedCall(compiler, canAssign, CODE_CALL_0);
|
|
}
|
|
|
|
static void and_(Compiler* compiler, bool canAssign)
|
|
{
|
|
ignoreNewlines(compiler);
|
|
|
|
// Skip the right argument if the left is false.
|
|
int jump = emitJump(compiler, CODE_AND);
|
|
parsePrecedence(compiler, PREC_LOGICAL_AND);
|
|
patchJump(compiler, jump);
|
|
}
|
|
|
|
static void or_(Compiler* compiler, bool canAssign)
|
|
{
|
|
ignoreNewlines(compiler);
|
|
|
|
// Skip the right argument if the left is true.
|
|
int jump = emitJump(compiler, CODE_OR);
|
|
parsePrecedence(compiler, PREC_LOGICAL_OR);
|
|
patchJump(compiler, jump);
|
|
}
|
|
|
|
static void conditional(Compiler* compiler, bool canAssign)
|
|
{
|
|
// Ignore newline after '?'.
|
|
ignoreNewlines(compiler);
|
|
|
|
// Jump to the else branch if the condition is false.
|
|
int ifJump = emitJump(compiler, CODE_JUMP_IF);
|
|
|
|
// Compile the then branch.
|
|
parsePrecedence(compiler, PREC_CONDITIONAL);
|
|
|
|
consume(compiler, TOKEN_COLON,
|
|
"Expect ':' after then branch of conditional operator.");
|
|
ignoreNewlines(compiler);
|
|
|
|
// Jump over the else branch when the if branch is taken.
|
|
int elseJump = emitJump(compiler, CODE_JUMP);
|
|
|
|
// Compile the else branch.
|
|
patchJump(compiler, ifJump);
|
|
|
|
parsePrecedence(compiler, PREC_ASSIGNMENT);
|
|
|
|
// Patch the jump over the else.
|
|
patchJump(compiler, elseJump);
|
|
}
|
|
|
|
void infixOp(Compiler* compiler, bool canAssign)
|
|
{
|
|
GrammarRule* rule = getRule(compiler->parser->previous.type);
|
|
|
|
// An infix operator cannot end an expression.
|
|
ignoreNewlines(compiler);
|
|
|
|
// Compile the right-hand side.
|
|
parsePrecedence(compiler, (Precedence)(rule->precedence + 1));
|
|
|
|
// Call the operator method on the left-hand side.
|
|
Signature signature = { rule->name, (int)strlen(rule->name), SIG_METHOD, 1 };
|
|
callSignature(compiler, CODE_CALL_0, &signature);
|
|
}
|
|
|
|
// Compiles a method signature for an infix operator.
|
|
void infixSignature(Compiler* compiler, Signature* signature)
|
|
{
|
|
// Add the RHS parameter.
|
|
signature->type = SIG_METHOD;
|
|
signature->arity = 1;
|
|
|
|
// Parse the parameter name.
|
|
consume(compiler, TOKEN_LEFT_PAREN, "Expect '(' after operator name.");
|
|
declareNamedVariable(compiler);
|
|
consume(compiler, TOKEN_RIGHT_PAREN, "Expect ')' after parameter name.");
|
|
}
|
|
|
|
// Compiles a method signature for an unary operator (i.e. "!").
|
|
void unarySignature(Compiler* compiler, Signature* signature)
|
|
{
|
|
// Do nothing. The name is already complete.
|
|
signature->type = SIG_GETTER;
|
|
}
|
|
|
|
// Compiles a method signature for an operator that can either be unary or
|
|
// infix (i.e. "-").
|
|
void mixedSignature(Compiler* compiler, Signature* signature)
|
|
{
|
|
signature->type = SIG_GETTER;
|
|
|
|
// If there is a parameter, it's an infix operator, otherwise it's unary.
|
|
if (match(compiler, TOKEN_LEFT_PAREN))
|
|
{
|
|
// Add the RHS parameter.
|
|
signature->type = SIG_METHOD;
|
|
signature->arity = 1;
|
|
|
|
// Parse the parameter name.
|
|
declareNamedVariable(compiler);
|
|
consume(compiler, TOKEN_RIGHT_PAREN, "Expect ')' after parameter name.");
|
|
}
|
|
}
|
|
|
|
// Compiles an optional setter parameter in a method [signature].
|
|
//
|
|
// Returns `true` if it was a setter.
|
|
static bool maybeSetter(Compiler* compiler, Signature* signature)
|
|
{
|
|
// See if it's a setter.
|
|
if (!match(compiler, TOKEN_EQ)) return false;
|
|
|
|
// It's a setter.
|
|
if (signature->type == SIG_SUBSCRIPT)
|
|
{
|
|
signature->type = SIG_SUBSCRIPT_SETTER;
|
|
}
|
|
else
|
|
{
|
|
signature->type = SIG_SETTER;
|
|
}
|
|
|
|
// Parse the value parameter.
|
|
consume(compiler, TOKEN_LEFT_PAREN, "Expect '(' after '='.");
|
|
declareNamedVariable(compiler);
|
|
consume(compiler, TOKEN_RIGHT_PAREN, "Expect ')' after parameter name.");
|
|
|
|
signature->arity++;
|
|
|
|
return true;
|
|
}
|
|
|
|
// Compiles a method signature for a subscript operator.
|
|
void subscriptSignature(Compiler* compiler, Signature* signature)
|
|
{
|
|
signature->type = SIG_SUBSCRIPT;
|
|
|
|
// The signature currently has "[" as its name since that was the token that
|
|
// matched it. Clear that out.
|
|
signature->length = 0;
|
|
|
|
// Parse the parameters inside the subscript.
|
|
finishParameterList(compiler, signature);
|
|
consume(compiler, TOKEN_RIGHT_BRACKET, "Expect ']' after parameters.");
|
|
|
|
maybeSetter(compiler, signature);
|
|
}
|
|
|
|
// Parses an optional parenthesized parameter list. Updates `type` and `arity`
|
|
// in [signature] to match what was parsed.
|
|
static void parameterList(Compiler* compiler, Signature* signature)
|
|
{
|
|
// The parameter list is optional.
|
|
if (!match(compiler, TOKEN_LEFT_PAREN)) return;
|
|
|
|
signature->type = SIG_METHOD;
|
|
|
|
// Allow an empty parameter list.
|
|
if (match(compiler, TOKEN_RIGHT_PAREN)) return;
|
|
|
|
finishParameterList(compiler, signature);
|
|
consume(compiler, TOKEN_RIGHT_PAREN, "Expect ')' after parameters.");
|
|
}
|
|
|
|
// Compiles a method signature for a named method or setter.
|
|
void namedSignature(Compiler* compiler, Signature* signature)
|
|
{
|
|
signature->type = SIG_GETTER;
|
|
|
|
// If it's a setter, it can't also have a parameter list.
|
|
if (maybeSetter(compiler, signature)) return;
|
|
|
|
// Regular named method with an optional parameter list.
|
|
parameterList(compiler, signature);
|
|
}
|
|
|
|
// Compiles a method signature for a constructor.
|
|
void constructorSignature(Compiler* compiler, Signature* signature)
|
|
{
|
|
consume(compiler, TOKEN_NAME, "Expect constructor name after 'construct'.");
|
|
|
|
// Capture the name.
|
|
*signature = signatureFromToken(compiler, SIG_INITIALIZER);
|
|
|
|
if (match(compiler, TOKEN_EQ))
|
|
{
|
|
error(compiler, "A constructor cannot be a setter.");
|
|
}
|
|
|
|
if (!match(compiler, TOKEN_LEFT_PAREN))
|
|
{
|
|
error(compiler, "A constructor cannot be a getter.");
|
|
return;
|
|
}
|
|
|
|
// Allow an empty parameter list.
|
|
if (match(compiler, TOKEN_RIGHT_PAREN)) return;
|
|
|
|
finishParameterList(compiler, signature);
|
|
consume(compiler, TOKEN_RIGHT_PAREN, "Expect ')' after parameters.");
|
|
}
|
|
|
|
// This table defines all of the parsing rules for the prefix and infix
|
|
// expressions in the grammar. Expressions are parsed using a Pratt parser.
|
|
//
|
|
// See: http://journal.stuffwithstuff.com/2011/03/19/pratt-parsers-expression-parsing-made-easy/
|
|
#define UNUSED { NULL, NULL, NULL, PREC_NONE, NULL }
|
|
#define PREFIX(fn) { fn, NULL, NULL, PREC_NONE, NULL }
|
|
#define INFIX(prec, fn) { NULL, fn, NULL, prec, NULL }
|
|
#define INFIX_OPERATOR(prec, name) { NULL, infixOp, infixSignature, prec, name }
|
|
#define PREFIX_OPERATOR(name) { unaryOp, NULL, unarySignature, PREC_NONE, name }
|
|
#define OPERATOR(name) { unaryOp, infixOp, mixedSignature, PREC_TERM, name }
|
|
|
|
GrammarRule rules[] =
|
|
{
|
|
/* TOKEN_LEFT_PAREN */ PREFIX(grouping),
|
|
/* TOKEN_RIGHT_PAREN */ UNUSED,
|
|
/* TOKEN_LEFT_BRACKET */ { list, subscript, subscriptSignature, PREC_CALL, NULL },
|
|
/* TOKEN_RIGHT_BRACKET */ UNUSED,
|
|
/* TOKEN_LEFT_BRACE */ PREFIX(map),
|
|
/* TOKEN_RIGHT_BRACE */ UNUSED,
|
|
/* TOKEN_COLON */ UNUSED,
|
|
/* TOKEN_DOT */ INFIX(PREC_CALL, call),
|
|
/* TOKEN_DOTDOT */ INFIX_OPERATOR(PREC_RANGE, ".."),
|
|
/* TOKEN_DOTDOTDOT */ INFIX_OPERATOR(PREC_RANGE, "..."),
|
|
/* TOKEN_COMMA */ UNUSED,
|
|
/* TOKEN_STAR */ INFIX_OPERATOR(PREC_FACTOR, "*"),
|
|
/* TOKEN_SLASH */ INFIX_OPERATOR(PREC_FACTOR, "/"),
|
|
/* TOKEN_PERCENT */ INFIX_OPERATOR(PREC_FACTOR, "%"),
|
|
/* TOKEN_PLUS */ INFIX_OPERATOR(PREC_TERM, "+"),
|
|
/* TOKEN_MINUS */ OPERATOR("-"),
|
|
/* TOKEN_LTLT */ INFIX_OPERATOR(PREC_BITWISE_SHIFT, "<<"),
|
|
/* TOKEN_GTGT */ INFIX_OPERATOR(PREC_BITWISE_SHIFT, ">>"),
|
|
/* TOKEN_PIPE */ INFIX_OPERATOR(PREC_BITWISE_OR, "|"),
|
|
/* TOKEN_PIPEPIPE */ INFIX(PREC_LOGICAL_OR, or_),
|
|
/* TOKEN_CARET */ INFIX_OPERATOR(PREC_BITWISE_XOR, "^"),
|
|
/* TOKEN_AMP */ INFIX_OPERATOR(PREC_BITWISE_AND, "&"),
|
|
/* TOKEN_AMPAMP */ INFIX(PREC_LOGICAL_AND, and_),
|
|
/* TOKEN_BANG */ PREFIX_OPERATOR("!"),
|
|
/* TOKEN_TILDE */ PREFIX_OPERATOR("~"),
|
|
/* TOKEN_QUESTION */ INFIX(PREC_ASSIGNMENT, conditional),
|
|
/* TOKEN_EQ */ UNUSED,
|
|
/* TOKEN_LT */ INFIX_OPERATOR(PREC_COMPARISON, "<"),
|
|
/* TOKEN_GT */ INFIX_OPERATOR(PREC_COMPARISON, ">"),
|
|
/* TOKEN_LTEQ */ INFIX_OPERATOR(PREC_COMPARISON, "<="),
|
|
/* TOKEN_GTEQ */ INFIX_OPERATOR(PREC_COMPARISON, ">="),
|
|
/* TOKEN_EQEQ */ INFIX_OPERATOR(PREC_EQUALITY, "=="),
|
|
/* TOKEN_BANGEQ */ INFIX_OPERATOR(PREC_EQUALITY, "!="),
|
|
/* TOKEN_BREAK */ UNUSED,
|
|
/* TOKEN_CLASS */ UNUSED,
|
|
/* TOKEN_CONSTRUCT */ { NULL, NULL, constructorSignature, PREC_NONE, NULL },
|
|
/* TOKEN_ELSE */ UNUSED,
|
|
/* TOKEN_FALSE */ PREFIX(boolean),
|
|
/* TOKEN_FOR */ UNUSED,
|
|
/* TOKEN_FOREIGN */ UNUSED,
|
|
/* TOKEN_IF */ UNUSED,
|
|
/* TOKEN_IMPORT */ UNUSED,
|
|
/* TOKEN_IN */ UNUSED,
|
|
/* TOKEN_IS */ INFIX_OPERATOR(PREC_IS, "is"),
|
|
/* TOKEN_NULL */ PREFIX(null),
|
|
/* TOKEN_RETURN */ UNUSED,
|
|
/* TOKEN_STATIC */ UNUSED,
|
|
/* TOKEN_SUPER */ PREFIX(super_),
|
|
/* TOKEN_THIS */ PREFIX(this_),
|
|
/* TOKEN_TRUE */ PREFIX(boolean),
|
|
/* TOKEN_VAR */ UNUSED,
|
|
/* TOKEN_WHILE */ UNUSED,
|
|
/* TOKEN_FIELD */ PREFIX(field),
|
|
/* TOKEN_STATIC_FIELD */ PREFIX(staticField),
|
|
/* TOKEN_NAME */ { name, NULL, namedSignature, PREC_NONE, NULL },
|
|
/* TOKEN_NUMBER */ PREFIX(literal),
|
|
/* TOKEN_STRING */ PREFIX(literal),
|
|
/* TOKEN_INTERPOLATION */ PREFIX(stringInterpolation),
|
|
/* TOKEN_LINE */ UNUSED,
|
|
/* TOKEN_ERROR */ UNUSED,
|
|
/* TOKEN_EOF */ UNUSED
|
|
};
|
|
|
|
// Gets the [GrammarRule] associated with tokens of [type].
|
|
static GrammarRule* getRule(TokenType type)
|
|
{
|
|
return &rules[type];
|
|
}
|
|
|
|
// The main entrypoint for the top-down operator precedence parser.
|
|
void parsePrecedence(Compiler* compiler, Precedence precedence)
|
|
{
|
|
nextToken(compiler->parser);
|
|
GrammarFn prefix = rules[compiler->parser->previous.type].prefix;
|
|
|
|
if (prefix == NULL)
|
|
{
|
|
error(compiler, "Expected expression.");
|
|
return;
|
|
}
|
|
|
|
// Track if the precendence of the surrounding expression is low enough to
|
|
// allow an assignment inside this one. We can't compile an assignment like
|
|
// a normal expression because it requires us to handle the LHS specially --
|
|
// it needs to be an lvalue, not an rvalue. So, for each of the kinds of
|
|
// expressions that are valid lvalues -- names, subscripts, fields, etc. --
|
|
// we pass in whether or not it appears in a context loose enough to allow
|
|
// "=". If so, it will parse the "=" itself and handle it appropriately.
|
|
bool canAssign = precedence <= PREC_CONDITIONAL;
|
|
prefix(compiler, canAssign);
|
|
|
|
while (precedence <= rules[compiler->parser->current.type].precedence)
|
|
{
|
|
nextToken(compiler->parser);
|
|
GrammarFn infix = rules[compiler->parser->previous.type].infix;
|
|
infix(compiler, canAssign);
|
|
}
|
|
}
|
|
|
|
// Parses an expression. Unlike statements, expressions leave a resulting value
|
|
// on the stack.
|
|
void expression(Compiler* compiler)
|
|
{
|
|
parsePrecedence(compiler, PREC_LOWEST);
|
|
}
|
|
|
|
// Returns the number of arguments to the instruction at [ip] in [fn]'s
|
|
// bytecode.
|
|
static int getNumArguments(const uint8_t* bytecode, const Value* constants,
|
|
int ip)
|
|
{
|
|
Code instruction = (Code)bytecode[ip];
|
|
switch (instruction)
|
|
{
|
|
case CODE_NULL:
|
|
case CODE_FALSE:
|
|
case CODE_TRUE:
|
|
case CODE_POP:
|
|
case CODE_CLOSE_UPVALUE:
|
|
case CODE_RETURN:
|
|
case CODE_END:
|
|
case CODE_LOAD_LOCAL_0:
|
|
case CODE_LOAD_LOCAL_1:
|
|
case CODE_LOAD_LOCAL_2:
|
|
case CODE_LOAD_LOCAL_3:
|
|
case CODE_LOAD_LOCAL_4:
|
|
case CODE_LOAD_LOCAL_5:
|
|
case CODE_LOAD_LOCAL_6:
|
|
case CODE_LOAD_LOCAL_7:
|
|
case CODE_LOAD_LOCAL_8:
|
|
case CODE_CONSTRUCT:
|
|
case CODE_FOREIGN_CONSTRUCT:
|
|
case CODE_FOREIGN_CLASS:
|
|
case CODE_END_MODULE:
|
|
return 0;
|
|
|
|
case CODE_LOAD_LOCAL:
|
|
case CODE_STORE_LOCAL:
|
|
case CODE_LOAD_UPVALUE:
|
|
case CODE_STORE_UPVALUE:
|
|
case CODE_LOAD_FIELD_THIS:
|
|
case CODE_STORE_FIELD_THIS:
|
|
case CODE_LOAD_FIELD:
|
|
case CODE_STORE_FIELD:
|
|
case CODE_CLASS:
|
|
return 1;
|
|
|
|
case CODE_CONSTANT:
|
|
case CODE_LOAD_MODULE_VAR:
|
|
case CODE_STORE_MODULE_VAR:
|
|
case CODE_CALL_0:
|
|
case CODE_CALL_1:
|
|
case CODE_CALL_2:
|
|
case CODE_CALL_3:
|
|
case CODE_CALL_4:
|
|
case CODE_CALL_5:
|
|
case CODE_CALL_6:
|
|
case CODE_CALL_7:
|
|
case CODE_CALL_8:
|
|
case CODE_CALL_9:
|
|
case CODE_CALL_10:
|
|
case CODE_CALL_11:
|
|
case CODE_CALL_12:
|
|
case CODE_CALL_13:
|
|
case CODE_CALL_14:
|
|
case CODE_CALL_15:
|
|
case CODE_CALL_16:
|
|
case CODE_JUMP:
|
|
case CODE_LOOP:
|
|
case CODE_JUMP_IF:
|
|
case CODE_AND:
|
|
case CODE_OR:
|
|
case CODE_METHOD_INSTANCE:
|
|
case CODE_METHOD_STATIC:
|
|
case CODE_IMPORT_MODULE:
|
|
return 2;
|
|
|
|
case CODE_SUPER_0:
|
|
case CODE_SUPER_1:
|
|
case CODE_SUPER_2:
|
|
case CODE_SUPER_3:
|
|
case CODE_SUPER_4:
|
|
case CODE_SUPER_5:
|
|
case CODE_SUPER_6:
|
|
case CODE_SUPER_7:
|
|
case CODE_SUPER_8:
|
|
case CODE_SUPER_9:
|
|
case CODE_SUPER_10:
|
|
case CODE_SUPER_11:
|
|
case CODE_SUPER_12:
|
|
case CODE_SUPER_13:
|
|
case CODE_SUPER_14:
|
|
case CODE_SUPER_15:
|
|
case CODE_SUPER_16:
|
|
case CODE_IMPORT_VARIABLE:
|
|
return 4;
|
|
|
|
case CODE_CLOSURE:
|
|
{
|
|
int constant = (bytecode[ip + 1] << 8) | bytecode[ip + 2];
|
|
ObjFn* loadedFn = AS_FN(constants[constant]);
|
|
|
|
// There are two bytes for the constant, then two for each upvalue.
|
|
return 2 + (loadedFn->numUpvalues * 2);
|
|
}
|
|
}
|
|
|
|
UNREACHABLE();
|
|
return 0;
|
|
}
|
|
|
|
// Marks the beginning of a loop. Keeps track of the current instruction so we
|
|
// know what to loop back to at the end of the body.
|
|
static void startLoop(Compiler* compiler, Loop* loop)
|
|
{
|
|
loop->enclosing = compiler->loop;
|
|
loop->start = compiler->fn->code.count - 1;
|
|
loop->scopeDepth = compiler->scopeDepth;
|
|
compiler->loop = loop;
|
|
}
|
|
|
|
// Emits the [CODE_JUMP_IF] instruction used to test the loop condition and
|
|
// potentially exit the loop. Keeps track of the instruction so we can patch it
|
|
// later once we know where the end of the body is.
|
|
static void testExitLoop(Compiler* compiler)
|
|
{
|
|
compiler->loop->exitJump = emitJump(compiler, CODE_JUMP_IF);
|
|
}
|
|
|
|
// Compiles the body of the loop and tracks its extent so that contained "break"
|
|
// statements can be handled correctly.
|
|
static void loopBody(Compiler* compiler)
|
|
{
|
|
compiler->loop->body = compiler->fn->code.count;
|
|
statement(compiler);
|
|
}
|
|
|
|
// Ends the current innermost loop. Patches up all jumps and breaks now that
|
|
// we know where the end of the loop is.
|
|
static void endLoop(Compiler* compiler)
|
|
{
|
|
// We don't check for overflow here since the forward jump over the loop body
|
|
// will report an error for the same problem.
|
|
int loopOffset = compiler->fn->code.count - compiler->loop->start + 2;
|
|
emitShortArg(compiler, CODE_LOOP, loopOffset);
|
|
|
|
patchJump(compiler, compiler->loop->exitJump);
|
|
|
|
// Find any break placeholder instructions (which will be CODE_END in the
|
|
// bytecode) and replace them with real jumps.
|
|
int i = compiler->loop->body;
|
|
while (i < compiler->fn->code.count)
|
|
{
|
|
if (compiler->fn->code.data[i] == CODE_END)
|
|
{
|
|
compiler->fn->code.data[i] = CODE_JUMP;
|
|
patchJump(compiler, i + 1);
|
|
i += 3;
|
|
}
|
|
else
|
|
{
|
|
// Skip this instruction and its arguments.
|
|
i += 1 + getNumArguments(compiler->fn->code.data,
|
|
compiler->fn->constants.data, i);
|
|
}
|
|
}
|
|
|
|
compiler->loop = compiler->loop->enclosing;
|
|
}
|
|
|
|
static void forStatement(Compiler* compiler)
|
|
{
|
|
// A for statement like:
|
|
//
|
|
// for (i in sequence.expression) {
|
|
// System.print(i)
|
|
// }
|
|
//
|
|
// Is compiled to bytecode almost as if the source looked like this:
|
|
//
|
|
// {
|
|
// var seq_ = sequence.expression
|
|
// var iter_
|
|
// while (iter_ = seq_.iterate(iter_)) {
|
|
// var i = seq_.iteratorValue(iter_)
|
|
// System.print(i)
|
|
// }
|
|
// }
|
|
//
|
|
// It's not exactly this, because the synthetic variables `seq_` and `iter_`
|
|
// actually get names that aren't valid Wren identfiers, but that's the basic
|
|
// idea.
|
|
//
|
|
// The important parts are:
|
|
// - The sequence expression is only evaluated once.
|
|
// - The .iterate() method is used to advance the iterator and determine if
|
|
// it should exit the loop.
|
|
// - The .iteratorValue() method is used to get the value at the current
|
|
// iterator position.
|
|
|
|
// Create a scope for the hidden local variables used for the iterator.
|
|
pushScope(compiler);
|
|
|
|
consume(compiler, TOKEN_LEFT_PAREN, "Expect '(' after 'for'.");
|
|
consume(compiler, TOKEN_NAME, "Expect for loop variable name.");
|
|
|
|
// Remember the name of the loop variable.
|
|
const char* name = compiler->parser->previous.start;
|
|
int length = compiler->parser->previous.length;
|
|
|
|
consume(compiler, TOKEN_IN, "Expect 'in' after loop variable.");
|
|
ignoreNewlines(compiler);
|
|
|
|
// Evaluate the sequence expression and store it in a hidden local variable.
|
|
// The space in the variable name ensures it won't collide with a user-defined
|
|
// variable.
|
|
expression(compiler);
|
|
|
|
// Verify that there is space to hidden local variables.
|
|
// Note that we expect only two addLocal calls next to each other in the
|
|
// following code.
|
|
if (compiler->numLocals + 2 > MAX_LOCALS)
|
|
{
|
|
error(compiler, "Cannot declare more than %d variables in one scope. (Not enough space for for-loops internal variables)",
|
|
MAX_LOCALS);
|
|
return;
|
|
}
|
|
int seqSlot = addLocal(compiler, "seq ", 4);
|
|
|
|
// Create another hidden local for the iterator object.
|
|
null(compiler, false);
|
|
int iterSlot = addLocal(compiler, "iter ", 5);
|
|
|
|
consume(compiler, TOKEN_RIGHT_PAREN, "Expect ')' after loop expression.");
|
|
|
|
Loop loop;
|
|
startLoop(compiler, &loop);
|
|
|
|
// Advance the iterator by calling the ".iterate" method on the sequence.
|
|
loadLocal(compiler, seqSlot);
|
|
loadLocal(compiler, iterSlot);
|
|
|
|
// Update and test the iterator.
|
|
callMethod(compiler, 1, "iterate(_)", 10);
|
|
emitByteArg(compiler, CODE_STORE_LOCAL, iterSlot);
|
|
testExitLoop(compiler);
|
|
|
|
// Get the current value in the sequence by calling ".iteratorValue".
|
|
loadLocal(compiler, seqSlot);
|
|
loadLocal(compiler, iterSlot);
|
|
callMethod(compiler, 1, "iteratorValue(_)", 16);
|
|
|
|
// Bind the loop variable in its own scope. This ensures we get a fresh
|
|
// variable each iteration so that closures for it don't all see the same one.
|
|
pushScope(compiler);
|
|
addLocal(compiler, name, length);
|
|
|
|
loopBody(compiler);
|
|
|
|
// Loop variable.
|
|
popScope(compiler);
|
|
|
|
endLoop(compiler);
|
|
|
|
// Hidden variables.
|
|
popScope(compiler);
|
|
}
|
|
|
|
static void ifStatement(Compiler* compiler)
|
|
{
|
|
// Compile the condition.
|
|
consume(compiler, TOKEN_LEFT_PAREN, "Expect '(' after 'if'.");
|
|
expression(compiler);
|
|
consume(compiler, TOKEN_RIGHT_PAREN, "Expect ')' after if condition.");
|
|
|
|
// Jump to the else branch if the condition is false.
|
|
int ifJump = emitJump(compiler, CODE_JUMP_IF);
|
|
|
|
// Compile the then branch.
|
|
statement(compiler);
|
|
|
|
// Compile the else branch if there is one.
|
|
if (match(compiler, TOKEN_ELSE))
|
|
{
|
|
// Jump over the else branch when the if branch is taken.
|
|
int elseJump = emitJump(compiler, CODE_JUMP);
|
|
patchJump(compiler, ifJump);
|
|
|
|
statement(compiler);
|
|
|
|
// Patch the jump over the else.
|
|
patchJump(compiler, elseJump);
|
|
}
|
|
else
|
|
{
|
|
patchJump(compiler, ifJump);
|
|
}
|
|
}
|
|
|
|
static void whileStatement(Compiler* compiler)
|
|
{
|
|
Loop loop;
|
|
startLoop(compiler, &loop);
|
|
|
|
// Compile the condition.
|
|
consume(compiler, TOKEN_LEFT_PAREN, "Expect '(' after 'while'.");
|
|
expression(compiler);
|
|
consume(compiler, TOKEN_RIGHT_PAREN, "Expect ')' after while condition.");
|
|
|
|
testExitLoop(compiler);
|
|
loopBody(compiler);
|
|
endLoop(compiler);
|
|
}
|
|
|
|
// Compiles a simple statement. These can only appear at the top-level or
|
|
// within curly blocks. Simple statements exclude variable binding statements
|
|
// like "var" and "class" which are not allowed directly in places like the
|
|
// branches of an "if" statement.
|
|
//
|
|
// Unlike expressions, statements do not leave a value on the stack.
|
|
void statement(Compiler* compiler)
|
|
{
|
|
if (match(compiler, TOKEN_BREAK))
|
|
{
|
|
if (compiler->loop == NULL)
|
|
{
|
|
error(compiler, "Cannot use 'break' outside of a loop.");
|
|
return;
|
|
}
|
|
|
|
// Since we will be jumping out of the scope, make sure any locals in it
|
|
// are discarded first.
|
|
discardLocals(compiler, compiler->loop->scopeDepth + 1);
|
|
|
|
// Emit a placeholder instruction for the jump to the end of the body. When
|
|
// we're done compiling the loop body and know where the end is, we'll
|
|
// replace these with `CODE_JUMP` instructions with appropriate offsets.
|
|
// We use `CODE_END` here because that can't occur in the middle of
|
|
// bytecode.
|
|
emitJump(compiler, CODE_END);
|
|
}
|
|
else if (match(compiler, TOKEN_FOR))
|
|
{
|
|
forStatement(compiler);
|
|
}
|
|
else if (match(compiler, TOKEN_IF))
|
|
{
|
|
ifStatement(compiler);
|
|
}
|
|
else if (match(compiler, TOKEN_RETURN))
|
|
{
|
|
// Compile the return value.
|
|
if (peek(compiler) == TOKEN_LINE)
|
|
{
|
|
// Implicitly return null if there is no value.
|
|
emitOp(compiler, CODE_NULL);
|
|
}
|
|
else
|
|
{
|
|
expression(compiler);
|
|
}
|
|
|
|
emitOp(compiler, CODE_RETURN);
|
|
}
|
|
else if (match(compiler, TOKEN_WHILE))
|
|
{
|
|
whileStatement(compiler);
|
|
}
|
|
else if (match(compiler, TOKEN_LEFT_BRACE))
|
|
{
|
|
// Block statement.
|
|
pushScope(compiler);
|
|
if (finishBlock(compiler))
|
|
{
|
|
// Block was an expression, so discard it.
|
|
emitOp(compiler, CODE_POP);
|
|
}
|
|
popScope(compiler);
|
|
}
|
|
else
|
|
{
|
|
// Expression statement.
|
|
expression(compiler);
|
|
emitOp(compiler, CODE_POP);
|
|
}
|
|
}
|
|
|
|
// Creates a matching constructor method for an initializer with [signature]
|
|
// and [initializerSymbol].
|
|
//
|
|
// Construction is a two-stage process in Wren that involves two separate
|
|
// methods. There is a static method that allocates a new instance of the class.
|
|
// It then invokes an initializer method on the new instance, forwarding all of
|
|
// the constructor arguments to it.
|
|
//
|
|
// The allocator method always has a fixed implementation:
|
|
//
|
|
// CODE_CONSTRUCT - Replace the class in slot 0 with a new instance of it.
|
|
// CODE_CALL - Invoke the initializer on the new instance.
|
|
//
|
|
// This creates that method and calls the initializer with [initializerSymbol].
|
|
static void createConstructor(Compiler* compiler, Signature* signature,
|
|
int initializerSymbol)
|
|
{
|
|
Compiler methodCompiler;
|
|
initCompiler(&methodCompiler, compiler->parser, compiler, true);
|
|
|
|
// Allocate the instance.
|
|
emitOp(&methodCompiler, compiler->enclosingClass->isForeign
|
|
? CODE_FOREIGN_CONSTRUCT : CODE_CONSTRUCT);
|
|
|
|
// Run its initializer.
|
|
emitShortArg(&methodCompiler, (Code)(CODE_CALL_0 + signature->arity),
|
|
initializerSymbol);
|
|
|
|
// Return the instance.
|
|
emitOp(&methodCompiler, CODE_RETURN);
|
|
|
|
endCompiler(&methodCompiler, "", 0);
|
|
}
|
|
|
|
// Loads the enclosing class onto the stack and then binds the function already
|
|
// on the stack as a method on that class.
|
|
static void defineMethod(Compiler* compiler, Variable classVariable,
|
|
bool isStatic, int methodSymbol)
|
|
{
|
|
// Load the class. We have to do this for each method because we can't
|
|
// keep the class on top of the stack. If there are static fields, they
|
|
// will be locals above the initial variable slot for the class on the
|
|
// stack. To skip past those, we just load the class each time right before
|
|
// defining a method.
|
|
loadVariable(compiler, classVariable);
|
|
|
|
// Define the method.
|
|
Code instruction = isStatic ? CODE_METHOD_STATIC : CODE_METHOD_INSTANCE;
|
|
emitShortArg(compiler, instruction, methodSymbol);
|
|
}
|
|
|
|
// Declares a method in the enclosing class with [signature].
|
|
//
|
|
// Reports an error if a method with that signature is already declared.
|
|
// Returns the symbol for the method.
|
|
static int declareMethod(Compiler* compiler, Signature* signature,
|
|
const char* name, int length)
|
|
{
|
|
int symbol = signatureSymbol(compiler, signature);
|
|
|
|
// See if the class has already declared method with this signature.
|
|
ClassInfo* classInfo = compiler->enclosingClass;
|
|
IntBuffer* methods = classInfo->inStatic
|
|
? &classInfo->staticMethods : &classInfo->methods;
|
|
for (int i = 0; i < methods->count; i++)
|
|
{
|
|
if (methods->data[i] == symbol)
|
|
{
|
|
const char* staticPrefix = classInfo->inStatic ? "static " : "";
|
|
error(compiler, "Class %s already defines a %smethod '%s'.",
|
|
&compiler->enclosingClass->name->value, staticPrefix, name);
|
|
break;
|
|
}
|
|
}
|
|
|
|
wrenIntBufferWrite(compiler->parser->vm, methods, symbol);
|
|
return symbol;
|
|
}
|
|
|
|
// Compiles a method definition inside a class body.
|
|
//
|
|
// Returns `true` if it compiled successfully, or `false` if the method couldn't
|
|
// be parsed.
|
|
static bool method(Compiler* compiler, Variable classVariable)
|
|
{
|
|
// TODO: What about foreign constructors?
|
|
bool isForeign = match(compiler, TOKEN_FOREIGN);
|
|
bool isStatic = match(compiler, TOKEN_STATIC);
|
|
compiler->enclosingClass->inStatic = isStatic;
|
|
|
|
SignatureFn signatureFn = rules[compiler->parser->current.type].method;
|
|
nextToken(compiler->parser);
|
|
|
|
if (signatureFn == NULL)
|
|
{
|
|
error(compiler, "Expect method definition.");
|
|
return false;
|
|
}
|
|
|
|
// Build the method signature.
|
|
Signature signature = signatureFromToken(compiler, SIG_GETTER);
|
|
compiler->enclosingClass->signature = &signature;
|
|
|
|
Compiler methodCompiler;
|
|
initCompiler(&methodCompiler, compiler->parser, compiler, true);
|
|
|
|
// Compile the method signature.
|
|
signatureFn(&methodCompiler, &signature);
|
|
|
|
if (isStatic && signature.type == SIG_INITIALIZER)
|
|
{
|
|
error(compiler, "A constructor cannot be static.");
|
|
}
|
|
|
|
// Include the full signature in debug messages in stack traces.
|
|
char fullSignature[MAX_METHOD_SIGNATURE];
|
|
int length;
|
|
signatureToString(&signature, fullSignature, &length);
|
|
|
|
// Check for duplicate methods. Doesn't matter that it's already been
|
|
// defined, error will discard bytecode anyway.
|
|
// Check if the method table already contains this symbol
|
|
int methodSymbol = declareMethod(compiler, &signature, fullSignature, length);
|
|
|
|
if (isForeign)
|
|
{
|
|
// Define a constant for the signature.
|
|
emitConstant(compiler, wrenNewStringLength(compiler->parser->vm,
|
|
fullSignature, length));
|
|
|
|
// We don't need the function we started compiling in the parameter list
|
|
// any more.
|
|
methodCompiler.parser->vm->compiler = methodCompiler.parent;
|
|
}
|
|
else
|
|
{
|
|
consume(compiler, TOKEN_LEFT_BRACE, "Expect '{' to begin method body.");
|
|
finishBody(&methodCompiler, signature.type == SIG_INITIALIZER);
|
|
endCompiler(&methodCompiler, fullSignature, length);
|
|
}
|
|
|
|
// Define the method. For a constructor, this defines the instance
|
|
// initializer method.
|
|
defineMethod(compiler, classVariable, isStatic, methodSymbol);
|
|
|
|
if (signature.type == SIG_INITIALIZER)
|
|
{
|
|
// Also define a matching constructor method on the metaclass.
|
|
signature.type = SIG_METHOD;
|
|
int constructorSymbol = signatureSymbol(compiler, &signature);
|
|
|
|
createConstructor(compiler, &signature, methodSymbol);
|
|
defineMethod(compiler, classVariable, true, constructorSymbol);
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
// Compiles a class definition. Assumes the "class" token has already been
|
|
// consumed (along with a possibly preceding "foreign" token).
|
|
static void classDefinition(Compiler* compiler, bool isForeign)
|
|
{
|
|
// Create a variable to store the class in.
|
|
Variable classVariable;
|
|
classVariable.scope = compiler->scopeDepth == -1 ? SCOPE_MODULE : SCOPE_LOCAL;
|
|
classVariable.index = declareNamedVariable(compiler);
|
|
|
|
// Create shared class name value
|
|
Value classNameString = wrenNewStringLength(compiler->parser->vm,
|
|
compiler->parser->previous.start, compiler->parser->previous.length);
|
|
|
|
// Create class name string to track method duplicates
|
|
ObjString* className = AS_STRING(classNameString);
|
|
|
|
// Make a string constant for the name.
|
|
emitConstant(compiler, classNameString);
|
|
|
|
// Load the superclass (if there is one).
|
|
if (match(compiler, TOKEN_IS))
|
|
{
|
|
parsePrecedence(compiler, PREC_CALL);
|
|
}
|
|
else
|
|
{
|
|
// Implicitly inherit from Object.
|
|
loadCoreVariable(compiler, "Object");
|
|
}
|
|
|
|
// Store a placeholder for the number of fields argument. We don't know the
|
|
// count until we've compiled all the methods to see which fields are used.
|
|
int numFieldsInstruction = -1;
|
|
if (isForeign)
|
|
{
|
|
emitOp(compiler, CODE_FOREIGN_CLASS);
|
|
}
|
|
else
|
|
{
|
|
numFieldsInstruction = emitByteArg(compiler, CODE_CLASS, 255);
|
|
}
|
|
|
|
// Store it in its name.
|
|
defineVariable(compiler, classVariable.index);
|
|
|
|
// Push a local variable scope. Static fields in a class body are hoisted out
|
|
// into local variables declared in this scope. Methods that use them will
|
|
// have upvalues referencing them.
|
|
pushScope(compiler);
|
|
|
|
ClassInfo classInfo;
|
|
classInfo.isForeign = isForeign;
|
|
classInfo.name = className;
|
|
|
|
// Set up a symbol table for the class's fields. We'll initially compile
|
|
// them to slots starting at zero. When the method is bound to the class, the
|
|
// bytecode will be adjusted by [wrenBindMethod] to take inherited fields
|
|
// into account.
|
|
wrenSymbolTableInit(&classInfo.fields);
|
|
|
|
// Set up symbol buffers to track duplicate static and instance methods.
|
|
wrenIntBufferInit(&classInfo.methods);
|
|
wrenIntBufferInit(&classInfo.staticMethods);
|
|
compiler->enclosingClass = &classInfo;
|
|
|
|
// Compile the method definitions.
|
|
consume(compiler, TOKEN_LEFT_BRACE, "Expect '{' after class declaration.");
|
|
matchLine(compiler);
|
|
|
|
while (!match(compiler, TOKEN_RIGHT_BRACE))
|
|
{
|
|
if (!method(compiler, classVariable)) break;
|
|
|
|
// Don't require a newline after the last definition.
|
|
if (match(compiler, TOKEN_RIGHT_BRACE)) break;
|
|
|
|
consumeLine(compiler, "Expect newline after definition in class.");
|
|
}
|
|
|
|
// Update the class with the number of fields.
|
|
if (!isForeign)
|
|
{
|
|
compiler->fn->code.data[numFieldsInstruction] =
|
|
(uint8_t)classInfo.fields.count;
|
|
}
|
|
|
|
// Clear symbol tables for tracking field and method names.
|
|
wrenSymbolTableClear(compiler->parser->vm, &classInfo.fields);
|
|
wrenIntBufferClear(compiler->parser->vm, &classInfo.methods);
|
|
wrenIntBufferClear(compiler->parser->vm, &classInfo.staticMethods);
|
|
compiler->enclosingClass = NULL;
|
|
popScope(compiler);
|
|
}
|
|
|
|
// Compiles an "import" statement.
|
|
//
|
|
// An import compiles to a series of instructions. Given:
|
|
//
|
|
// import "foo" for Bar, Baz
|
|
//
|
|
// We compile a single IMPORT_MODULE "foo" instruction to load the module
|
|
// itself. When that finishes executing the imported module, it leaves the
|
|
// ObjModule in vm->lastModule. Then, for Bar and Baz, we:
|
|
//
|
|
// * Declare a variable in the current scope with that name.
|
|
// * Emit an IMPORT_VARIABLE instruction to load the variable's value from the
|
|
// other module.
|
|
// * Compile the code to store that value in the variable in this scope.
|
|
static void import(Compiler* compiler)
|
|
{
|
|
ignoreNewlines(compiler);
|
|
consume(compiler, TOKEN_STRING, "Expect a string after 'import'.");
|
|
int moduleConstant = addConstant(compiler, compiler->parser->previous.value);
|
|
|
|
// Load the module.
|
|
emitShortArg(compiler, CODE_IMPORT_MODULE, moduleConstant);
|
|
|
|
// Discard the unused result value from calling the module body's closure.
|
|
emitOp(compiler, CODE_POP);
|
|
|
|
// The for clause is optional.
|
|
if (!match(compiler, TOKEN_FOR)) return;
|
|
|
|
// Compile the comma-separated list of variables to import.
|
|
do
|
|
{
|
|
ignoreNewlines(compiler);
|
|
int slot = declareNamedVariable(compiler);
|
|
|
|
// Define a string constant for the variable name.
|
|
int variableConstant = addConstant(compiler,
|
|
wrenNewStringLength(compiler->parser->vm,
|
|
compiler->parser->previous.start,
|
|
compiler->parser->previous.length));
|
|
|
|
// Load the variable from the other module.
|
|
emitShortArg(compiler, CODE_IMPORT_VARIABLE, variableConstant);
|
|
|
|
// Store the result in the variable here.
|
|
defineVariable(compiler, slot);
|
|
} while (match(compiler, TOKEN_COMMA));
|
|
}
|
|
|
|
// Compiles a "var" variable definition statement.
|
|
static void variableDefinition(Compiler* compiler)
|
|
{
|
|
// Grab its name, but don't declare it yet. A (local) variable shouldn't be
|
|
// in scope in its own initializer.
|
|
consume(compiler, TOKEN_NAME, "Expect variable name.");
|
|
Token nameToken = compiler->parser->previous;
|
|
|
|
// Compile the initializer.
|
|
if (match(compiler, TOKEN_EQ))
|
|
{
|
|
ignoreNewlines(compiler);
|
|
expression(compiler);
|
|
}
|
|
else
|
|
{
|
|
// Default initialize it to null.
|
|
null(compiler, false);
|
|
}
|
|
|
|
// Now put it in scope.
|
|
int symbol = declareVariable(compiler, &nameToken);
|
|
defineVariable(compiler, symbol);
|
|
}
|
|
|
|
// Compiles a "definition". These are the statements that bind new variables.
|
|
// They can only appear at the top level of a block and are prohibited in places
|
|
// like the non-curly body of an if or while.
|
|
void definition(Compiler* compiler)
|
|
{
|
|
if (match(compiler, TOKEN_CLASS))
|
|
{
|
|
classDefinition(compiler, false);
|
|
}
|
|
else if (match(compiler, TOKEN_FOREIGN))
|
|
{
|
|
consume(compiler, TOKEN_CLASS, "Expect 'class' after 'foreign'.");
|
|
classDefinition(compiler, true);
|
|
}
|
|
else if (match(compiler, TOKEN_IMPORT))
|
|
{
|
|
import(compiler);
|
|
}
|
|
else if (match(compiler, TOKEN_VAR))
|
|
{
|
|
variableDefinition(compiler);
|
|
}
|
|
else
|
|
{
|
|
statement(compiler);
|
|
}
|
|
}
|
|
|
|
ObjFn* wrenCompile(WrenVM* vm, ObjModule* module, const char* source,
|
|
bool isExpression, bool printErrors)
|
|
{
|
|
// Skip the UTF-8 BOM if there is one.
|
|
if (strncmp(source, "\xEF\xBB\xBF", 3) == 0) source += 3;
|
|
|
|
Parser parser;
|
|
parser.vm = vm;
|
|
parser.module = module;
|
|
parser.source = source;
|
|
|
|
parser.tokenStart = source;
|
|
parser.currentChar = source;
|
|
parser.currentLine = 1;
|
|
parser.numParens = 0;
|
|
|
|
// Zero-init the current token. This will get copied to previous when
|
|
// advance() is called below.
|
|
parser.current.type = TOKEN_ERROR;
|
|
parser.current.start = source;
|
|
parser.current.length = 0;
|
|
parser.current.line = 0;
|
|
parser.current.value = UNDEFINED_VAL;
|
|
|
|
// Ignore leading newlines.
|
|
parser.skipNewlines = true;
|
|
parser.printErrors = printErrors;
|
|
parser.hasError = false;
|
|
|
|
// Read the first token.
|
|
nextToken(&parser);
|
|
|
|
int numExistingVariables = module->variables.count;
|
|
|
|
Compiler compiler;
|
|
initCompiler(&compiler, &parser, NULL, false);
|
|
ignoreNewlines(&compiler);
|
|
|
|
if (isExpression)
|
|
{
|
|
expression(&compiler);
|
|
consume(&compiler, TOKEN_EOF, "Expect end of expression.");
|
|
}
|
|
else
|
|
{
|
|
while (!match(&compiler, TOKEN_EOF))
|
|
{
|
|
definition(&compiler);
|
|
|
|
// If there is no newline, it must be the end of file on the same line.
|
|
if (!matchLine(&compiler))
|
|
{
|
|
consume(&compiler, TOKEN_EOF, "Expect end of file.");
|
|
break;
|
|
}
|
|
}
|
|
|
|
emitOp(&compiler, CODE_END_MODULE);
|
|
}
|
|
|
|
emitOp(&compiler, CODE_RETURN);
|
|
|
|
// See if there are any implicitly declared module-level variables that never
|
|
// got an explicit definition. They will have values that are numbers
|
|
// indicating the line where the variable was first used.
|
|
for (int i = numExistingVariables; i < parser.module->variables.count; i++)
|
|
{
|
|
if (IS_NUM(parser.module->variables.data[i]))
|
|
{
|
|
// Synthesize a token for the original use site.
|
|
parser.previous.type = TOKEN_NAME;
|
|
parser.previous.start = parser.module->variableNames.data[i]->value;
|
|
parser.previous.length = parser.module->variableNames.data[i]->length;
|
|
parser.previous.line = (int)AS_NUM(parser.module->variables.data[i]);
|
|
error(&compiler, "Variable is used but not defined.");
|
|
}
|
|
}
|
|
|
|
return endCompiler(&compiler, "(script)", 8);
|
|
}
|
|
|
|
void wrenBindMethodCode(ObjClass* classObj, ObjFn* fn)
|
|
{
|
|
int ip = 0;
|
|
for (;;)
|
|
{
|
|
Code instruction = (Code)fn->code.data[ip];
|
|
switch (instruction)
|
|
{
|
|
case CODE_LOAD_FIELD:
|
|
case CODE_STORE_FIELD:
|
|
case CODE_LOAD_FIELD_THIS:
|
|
case CODE_STORE_FIELD_THIS:
|
|
// Shift this class's fields down past the inherited ones. We don't
|
|
// check for overflow here because we'll see if the number of fields
|
|
// overflows when the subclass is created.
|
|
fn->code.data[ip + 1] += classObj->superclass->numFields;
|
|
break;
|
|
|
|
case CODE_SUPER_0:
|
|
case CODE_SUPER_1:
|
|
case CODE_SUPER_2:
|
|
case CODE_SUPER_3:
|
|
case CODE_SUPER_4:
|
|
case CODE_SUPER_5:
|
|
case CODE_SUPER_6:
|
|
case CODE_SUPER_7:
|
|
case CODE_SUPER_8:
|
|
case CODE_SUPER_9:
|
|
case CODE_SUPER_10:
|
|
case CODE_SUPER_11:
|
|
case CODE_SUPER_12:
|
|
case CODE_SUPER_13:
|
|
case CODE_SUPER_14:
|
|
case CODE_SUPER_15:
|
|
case CODE_SUPER_16:
|
|
{
|
|
// Fill in the constant slot with a reference to the superclass.
|
|
int constant = (fn->code.data[ip + 3] << 8) | fn->code.data[ip + 4];
|
|
fn->constants.data[constant] = OBJ_VAL(classObj->superclass);
|
|
break;
|
|
}
|
|
|
|
case CODE_CLOSURE:
|
|
{
|
|
// Bind the nested closure too.
|
|
int constant = (fn->code.data[ip + 1] << 8) | fn->code.data[ip + 2];
|
|
wrenBindMethodCode(classObj, AS_FN(fn->constants.data[constant]));
|
|
break;
|
|
}
|
|
|
|
case CODE_END:
|
|
return;
|
|
|
|
default:
|
|
// Other instructions are unaffected, so just skip over them.
|
|
break;
|
|
}
|
|
ip += 1 + getNumArguments(fn->code.data, fn->constants.data, ip);
|
|
}
|
|
}
|
|
|
|
void wrenMarkCompiler(WrenVM* vm, Compiler* compiler)
|
|
{
|
|
wrenGrayValue(vm, compiler->parser->current.value);
|
|
wrenGrayValue(vm, compiler->parser->previous.value);
|
|
|
|
// Walk up the parent chain to mark the outer compilers too. The VM only
|
|
// tracks the innermost one.
|
|
do
|
|
{
|
|
wrenGrayObj(vm, (Obj*)compiler->fn);
|
|
wrenGrayObj(vm, (Obj*)compiler->constants);
|
|
|
|
if (compiler->enclosingClass != NULL)
|
|
{
|
|
wrenBlackenSymbolTable(vm, &compiler->enclosingClass->fields);
|
|
}
|
|
|
|
compiler = compiler->parent;
|
|
}
|
|
while (compiler != NULL);
|
|
}
|