#include #include #include #include #include "wren_common.h" #include "wren_compiler.h" #include "wren_vm.h" #if WREN_DEBUG_DUMP_COMPILED_CODE #include "wren_debug.h" #endif // This is written in bottom-up order, so the tokenization comes first, then // parsing/code generation. This minimizes the number of explicit forward // declarations needed. // The maximum number of local (i.e. not module level) variables that can be // declared in a single function, method, or chunk of top level code. This is // the maximum number of variables in scope at one time, and spans block scopes. // // Note that this limitation is also explicit in the bytecode. Since // `CODE_LOAD_LOCAL` and `CODE_STORE_LOCAL` use a single argument byte to // identify the local, only 256 can be in scope at one time. #define MAX_LOCALS 256 // The maximum number of upvalues (i.e. variables from enclosing functions) // that a function can close over. #define MAX_UPVALUES 256 // The maximum number of distinct constants that a function can contain. This // value is explicit in the bytecode since `CODE_CONSTANT` only takes a single // two-byte argument. #define MAX_CONSTANTS (1 << 16) // The maximum distance a CODE_JUMP or CODE_JUMP_IF instruction can move the // instruction pointer. #define MAX_JUMP (1 << 16) // The maximum depth that interpolation can nest. For example, this string has // three levels: // // "outside %(one + "%(two + "%(three)")")" #define MAX_INTERPOLATION_NESTING 8 // The buffer size used to format a compile error message, excluding the header // with the module name and error location. Using a hardcoded buffer for this // is kind of hairy, but fortunately we can control what the longest possible // message is and handle that. Ideally, we'd use `snprintf()`, but that's not // available in standard C++98. #define ERROR_MESSAGE_SIZE (80 + MAX_VARIABLE_NAME + 15) typedef enum { TOKEN_LEFT_PAREN, TOKEN_RIGHT_PAREN, TOKEN_LEFT_BRACKET, TOKEN_RIGHT_BRACKET, TOKEN_LEFT_BRACE, TOKEN_RIGHT_BRACE, TOKEN_COLON, TOKEN_DOT, TOKEN_DOTDOT, TOKEN_DOTDOTDOT, TOKEN_COMMA, TOKEN_STAR, TOKEN_SLASH, TOKEN_PERCENT, TOKEN_PLUS, TOKEN_MINUS, TOKEN_LTLT, TOKEN_GTGT, TOKEN_PIPE, TOKEN_PIPEPIPE, TOKEN_CARET, TOKEN_AMP, TOKEN_AMPAMP, TOKEN_BANG, TOKEN_TILDE, TOKEN_QUESTION, TOKEN_EQ, TOKEN_LT, TOKEN_GT, TOKEN_LTEQ, TOKEN_GTEQ, TOKEN_EQEQ, TOKEN_BANGEQ, TOKEN_BREAK, TOKEN_CLASS, TOKEN_CONSTRUCT, TOKEN_ELSE, TOKEN_FALSE, TOKEN_FOR, TOKEN_FOREIGN, TOKEN_IF, TOKEN_IMPORT, TOKEN_IN, TOKEN_IS, TOKEN_NULL, TOKEN_RETURN, TOKEN_STATIC, TOKEN_SUPER, TOKEN_THIS, TOKEN_TRUE, TOKEN_VAR, TOKEN_WHILE, TOKEN_FIELD, TOKEN_STATIC_FIELD, TOKEN_NAME, TOKEN_NUMBER, // A string literal without any interpolation, or the last section of a // string following the last interpolated expression. TOKEN_STRING, // A portion of a string literal preceding an interpolated expression. This // string: // // "a %(b) c %(d) e" // // is tokenized to: // // TOKEN_INTERPOLATION "a " // TOKEN_NAME b // TOKEN_INTERPOLATION " c " // TOKEN_NAME d // TOKEN_STRING " e" TOKEN_INTERPOLATION, TOKEN_LINE, TOKEN_ERROR, TOKEN_EOF } TokenType; typedef struct { TokenType type; // The beginning of the token, pointing directly into the source. const char* start; // The length of the token in characters. int length; // The 1-based line where the token appears. int line; // The parsed value if the token is a literal. Value value; } Token; typedef struct { WrenVM* vm; // The module being parsed. ObjModule* module; // The source code being parsed. const char* source; // The beginning of the currently-being-lexed token in [source]. const char* tokenStart; // The current character being lexed in [source]. const char* currentChar; // The 1-based line number of [currentChar]. int currentLine; // The most recently lexed token. Token current; // The most recently consumed/advanced token. Token previous; // Tracks the lexing state when tokenizing interpolated strings. // // Interpolated strings make the lexer not strictly regular: we don't know // whether a ")" should be treated as a RIGHT_PAREN token or as ending an // interpolated expression unless we know whether we are inside a string // interpolation and how many unmatched "(" there are. This is particularly // complex because interpolation can nest: // // " %( " %( inner ) " ) " // // This tracks that state. The parser maintains a stack of ints, one for each // level of current interpolation nesting. Each value is the number of // unmatched "(" that are waiting to be closed. int parens[MAX_INTERPOLATION_NESTING]; int numParens; // If subsequent newline tokens should be discarded. bool skipNewlines; // Whether compile errors should be printed to stderr or discarded. bool printErrors; // If a syntax or compile error has occurred. bool hasError; } Parser; typedef struct { // The name of the local variable. This points directly into the original // source code string. const char* name; // The length of the local variable's name. int length; // The depth in the scope chain that this variable was declared at. Zero is // the outermost scope--parameters for a method, or the first local block in // top level code. One is the scope within that, etc. int depth; // If this local variable is being used as an upvalue. bool isUpvalue; } Local; typedef struct { // True if this upvalue is capturing a local variable from the enclosing // function. False if it's capturing an upvalue. bool isLocal; // The index of the local or upvalue being captured in the enclosing function. int index; } CompilerUpvalue; // Bookkeeping information for the current loop being compiled. typedef struct sLoop { // Index of the instruction that the loop should jump back to. int start; // Index of the argument for the CODE_JUMP_IF instruction used to exit the // loop. Stored so we can patch it once we know where the loop ends. int exitJump; // Index of the first instruction of the body of the loop. int body; // Depth of the scope(s) that need to be exited if a break is hit inside the // loop. int scopeDepth; // The loop enclosing this one, or NULL if this is the outermost loop. struct sLoop* enclosing; } Loop; // The different signature syntaxes for different kinds of methods. typedef enum { // A name followed by a (possibly empty) parenthesized parameter list. Also // used for binary operators. SIG_METHOD, // Just a name. Also used for unary operators. SIG_GETTER, // A name followed by "=". SIG_SETTER, // A square bracketed parameter list. SIG_SUBSCRIPT, // A square bracketed parameter list followed by "=". SIG_SUBSCRIPT_SETTER, // A constructor initializer function. This has a distinct signature to // prevent it from being invoked directly outside of the constructor on the // metaclass. SIG_INITIALIZER } SignatureType; typedef struct { const char* name; int length; SignatureType type; int arity; } Signature; // Bookkeeping information for compiling a class definition. typedef struct { // The name of the class. ObjString* name; // Symbol table for the fields of the class. SymbolTable fields; // Symbols for the methods defined by the class. Used to detect duplicate // method definitions. IntBuffer methods; IntBuffer staticMethods; // True if the class being compiled is a foreign class. bool isForeign; // True if the current method being compiled is static. bool inStatic; // The signature of the method being compiled. Signature* signature; } ClassInfo; struct sCompiler { Parser* parser; // The compiler for the function enclosing this one, or NULL if it's the // top level. struct sCompiler* parent; // The currently in scope local variables. Local locals[MAX_LOCALS]; // The number of local variables currently in scope. int numLocals; // The upvalues that this function has captured from outer scopes. The count // of them is stored in [numUpvalues]. CompilerUpvalue upvalues[MAX_UPVALUES]; // The current level of block scope nesting, where zero is no nesting. A -1 // here means top-level code is being compiled and there is no block scope // in effect at all. Any variables declared will be module-level. int scopeDepth; // The current number of slots (locals and temporaries) in use. // // We use this and maxSlots to track the maximum number of additional slots // a function may need while executing. When the function is called, the // fiber will check to ensure its stack has enough room to cover that worst // case and grow the stack if needed. // // This value here doesn't include parameters to the function. Since those // are already pushed onto the stack by the caller and tracked there, we // don't need to double count them here. int numSlots; // The current innermost loop being compiled, or NULL if not in a loop. Loop* loop; // If this is a compiler for a method, keeps track of the class enclosing it. ClassInfo* enclosingClass; // The function being compiled. ObjFn* fn; ObjMap* constants; }; // Describes where a variable is declared. typedef enum { // A local variable in the current function. SCOPE_LOCAL, // A local variable declared in an enclosing function. SCOPE_UPVALUE, // A top-level module variable. SCOPE_MODULE } Scope; // A reference to a variable and the scope where it is defined. This contains // enough information to emit correct code to load or store the variable. typedef struct { // The stack slot, upvalue slot, or module symbol defining the variable. int index; // Where the variable is declared. Scope scope; } Variable; // The stack effect of each opcode. The index in the array is the opcode, and // the value is the stack effect of that instruction. static const int stackEffects[] = { #define OPCODE(_, effect) effect, #include "wren_opcodes.h" #undef OPCODE }; static void printError(Parser* parser, int line, const char* label, const char* format, va_list args) { parser->hasError = true; if (!parser->printErrors) return; // Only report errors if there is a WrenErrorFn to handle them. if (parser->vm->config.errorFn == NULL) return; // Format the label and message. char message[ERROR_MESSAGE_SIZE]; int length = sprintf(message, "%s: ", label); length += vsprintf(message + length, format, args); ASSERT(length < ERROR_MESSAGE_SIZE, "Error should not exceed buffer."); ObjString* module = parser->module->name; const char* module_name = module ? module->value : ""; parser->vm->config.errorFn(parser->vm, WREN_ERROR_COMPILE, module_name, line, message); } // Outputs a lexical error. static void lexError(Parser* parser, const char* format, ...) { va_list args; va_start(args, format); printError(parser, parser->currentLine, "Error", format, args); va_end(args); } // Outputs a compile or syntax error. This also marks the compilation as having // an error, which ensures that the resulting code will be discarded and never // run. This means that after calling error(), it's fine to generate whatever // invalid bytecode you want since it won't be used. // // You'll note that most places that call error() continue to parse and compile // after that. That's so that we can try to find as many compilation errors in // one pass as possible instead of just bailing at the first one. static void error(Compiler* compiler, const char* format, ...) { Token* token = &compiler->parser->previous; // If the parse error was caused by an error token, the lexer has already // reported it. if (token->type == TOKEN_ERROR) return; va_list args; va_start(args, format); if (token->type == TOKEN_LINE) { printError(compiler->parser, token->line, "Error at newline", format, args); } else if (token->type == TOKEN_EOF) { printError(compiler->parser, token->line, "Error at end of file", format, args); } else { // Make sure we don't exceed the buffer with a very long token. char label[10 + MAX_VARIABLE_NAME + 4 + 1]; if (token->length <= MAX_VARIABLE_NAME) { sprintf(label, "Error at '%.*s'", token->length, token->start); } else { sprintf(label, "Error at '%.*s...'", MAX_VARIABLE_NAME, token->start); } printError(compiler->parser, token->line, label, format, args); } va_end(args); } // Adds [constant] to the constant pool and returns its index. static int addConstant(Compiler* compiler, Value constant) { if (compiler->parser->hasError) return -1; // See if we already have a constant for the value. If so, reuse it. if (compiler->constants != NULL) { Value existing = wrenMapGet(compiler->constants, constant); if (IS_NUM(existing)) return (int)AS_NUM(existing); } // It's a new constant. if (compiler->fn->constants.count < MAX_CONSTANTS) { if (IS_OBJ(constant)) wrenPushRoot(compiler->parser->vm, AS_OBJ(constant)); wrenValueBufferWrite(compiler->parser->vm, &compiler->fn->constants, constant); if (IS_OBJ(constant)) wrenPopRoot(compiler->parser->vm); if (compiler->constants == NULL) { compiler->constants = wrenNewMap(compiler->parser->vm); } wrenMapSet(compiler->parser->vm, compiler->constants, constant, NUM_VAL(compiler->fn->constants.count - 1)); } else { error(compiler, "A function may only contain %d unique constants.", MAX_CONSTANTS); } return compiler->fn->constants.count - 1; } // Initializes [compiler]. static void initCompiler(Compiler* compiler, Parser* parser, Compiler* parent, bool isMethod) { compiler->parser = parser; compiler->parent = parent; compiler->loop = NULL; compiler->enclosingClass = NULL; // Initialize these to NULL before allocating in case a GC gets triggered in // the middle of initializing the compiler. compiler->fn = NULL; compiler->constants = NULL; parser->vm->compiler = compiler; // Declare a local slot for either the closure or method receiver so that we // don't try to reuse that slot for a user-defined local variable. For // methods, we name it "this", so that we can resolve references to that like // a normal variable. For functions, they have no explicit "this", so we use // an empty name. That way references to "this" inside a function walks up // the parent chain to find a method enclosing the function whose "this" we // can close over. compiler->numLocals = 1; compiler->numSlots = compiler->numLocals; if (isMethod) { compiler->locals[0].name = "this"; compiler->locals[0].length = 4; } else { compiler->locals[0].name = NULL; compiler->locals[0].length = 0; } compiler->locals[0].depth = -1; compiler->locals[0].isUpvalue = false; if (parent == NULL) { // Compiling top-level code, so the initial scope is module-level. compiler->scopeDepth = -1; } else { // The initial scope for functions and methods is local scope. compiler->scopeDepth = 0; } compiler->fn = wrenNewFunction(parser->vm, parser->module, compiler->numLocals); } // Lexing ---------------------------------------------------------------------- typedef struct { const char* identifier; size_t length; TokenType tokenType; } Keyword; // The table of reserved words and their associated token types. static Keyword keywords[] = { {"break", 5, TOKEN_BREAK}, {"class", 5, TOKEN_CLASS}, {"construct", 9, TOKEN_CONSTRUCT}, {"else", 4, TOKEN_ELSE}, {"false", 5, TOKEN_FALSE}, {"for", 3, TOKEN_FOR}, {"foreign", 7, TOKEN_FOREIGN}, {"if", 2, TOKEN_IF}, {"import", 6, TOKEN_IMPORT}, {"in", 2, TOKEN_IN}, {"is", 2, TOKEN_IS}, {"null", 4, TOKEN_NULL}, {"return", 6, TOKEN_RETURN}, {"static", 6, TOKEN_STATIC}, {"super", 5, TOKEN_SUPER}, {"this", 4, TOKEN_THIS}, {"true", 4, TOKEN_TRUE}, {"var", 3, TOKEN_VAR}, {"while", 5, TOKEN_WHILE}, {NULL, 0, TOKEN_EOF} // Sentinel to mark the end of the array. }; // Returns true if [c] is a valid (non-initial) identifier character. static bool isName(char c) { return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_'; } // Returns true if [c] is a digit. static bool isDigit(char c) { return c >= '0' && c <= '9'; } // Returns the current character the parser is sitting on. static char peekChar(Parser* parser) { return *parser->currentChar; } // Returns the character after the current character. static char peekNextChar(Parser* parser) { // If we're at the end of the source, don't read past it. if (peekChar(parser) == '\0') return '\0'; return *(parser->currentChar + 1); } // Advances the parser forward one character. static char nextChar(Parser* parser) { char c = peekChar(parser); parser->currentChar++; if (c == '\n') parser->currentLine++; return c; } // If the current character is [c], consumes it and returns `true`. static bool matchChar(Parser* parser, char c) { if (peekChar(parser) != c) return false; nextChar(parser); return true; } // Sets the parser's current token to the given [type] and current character // range. static void makeToken(Parser* parser, TokenType type) { parser->current.type = type; parser->current.start = parser->tokenStart; parser->current.length = (int)(parser->currentChar - parser->tokenStart); parser->current.line = parser->currentLine; // Make line tokens appear on the line containing the "\n". if (type == TOKEN_LINE) parser->current.line--; } // If the current character is [c], then consumes it and makes a token of type // [two]. Otherwise makes a token of type [one]. static void twoCharToken(Parser* parser, char c, TokenType two, TokenType one) { makeToken(parser, matchChar(parser, c) ? two : one); } // Skips the rest of the current line. static void skipLineComment(Parser* parser) { while (peekChar(parser) != '\n' && peekChar(parser) != '\0') { nextChar(parser); } } // Skips the rest of a block comment. static void skipBlockComment(Parser* parser) { int nesting = 1; while (nesting > 0) { if (peekChar(parser) == '\0') { lexError(parser, "Unterminated block comment."); return; } if (peekChar(parser) == '/' && peekNextChar(parser) == '*') { nextChar(parser); nextChar(parser); nesting++; continue; } if (peekChar(parser) == '*' && peekNextChar(parser) == '/') { nextChar(parser); nextChar(parser); nesting--; continue; } // Regular comment character. nextChar(parser); } } // Reads the next character, which should be a hex digit (0-9, a-f, or A-F) and // returns its numeric value. If the character isn't a hex digit, returns -1. static int readHexDigit(Parser* parser) { char c = nextChar(parser); if (c >= '0' && c <= '9') return c - '0'; if (c >= 'a' && c <= 'f') return c - 'a' + 10; if (c >= 'A' && c <= 'F') return c - 'A' + 10; // Don't consume it if it isn't expected. Keeps us from reading past the end // of an unterminated string. parser->currentChar--; return -1; } // Parses the numeric value of the current token. static void makeNumber(Parser* parser, bool isHex) { errno = 0; if (isHex) { parser->current.value = NUM_VAL((double)strtoll(parser->tokenStart, NULL, 16)); } else { parser->current.value = NUM_VAL(strtod(parser->tokenStart, NULL)); } if (errno == ERANGE) { lexError(parser, "Number literal was too large (%d).", sizeof(long int)); parser->current.value = NUM_VAL(0); } // We don't check that the entire token is consumed after calling strtoll() // or strtod() because we've already scanned it ourselves and know it's valid. makeToken(parser, TOKEN_NUMBER); } // Finishes lexing a hexadecimal number literal. static void readHexNumber(Parser* parser) { // Skip past the `x` used to denote a hexadecimal literal. nextChar(parser); // Iterate over all the valid hexadecimal digits found. while (readHexDigit(parser) != -1) continue; makeNumber(parser, true); } // Finishes lexing a number literal. static void readNumber(Parser* parser) { while (isDigit(peekChar(parser))) nextChar(parser); // See if it has a floating point. Make sure there is a digit after the "." // so we don't get confused by method calls on number literals. if (peekChar(parser) == '.' && isDigit(peekNextChar(parser))) { nextChar(parser); while (isDigit(peekChar(parser))) nextChar(parser); } // See if the number is in scientific notation. if (matchChar(parser, 'e') || matchChar(parser, 'E')) { // Allow a negative exponent. matchChar(parser, '-'); if (!isDigit(peekChar(parser))) { lexError(parser, "Unterminated scientific notation."); } while (isDigit(peekChar(parser))) nextChar(parser); } makeNumber(parser, false); } // Finishes lexing an identifier. Handles reserved words. static void readName(Parser* parser, TokenType type) { while (isName(peekChar(parser)) || isDigit(peekChar(parser))) { nextChar(parser); } // Update the type if it's a keyword. size_t length = parser->currentChar - parser->tokenStart; for (int i = 0; keywords[i].identifier != NULL; i++) { if (length == keywords[i].length && memcmp(parser->tokenStart, keywords[i].identifier, length) == 0) { type = keywords[i].tokenType; break; } } makeToken(parser, type); } // Reads [digits] hex digits in a string literal and returns their number value. static int readHexEscape(Parser* parser, int digits, const char* description) { int value = 0; for (int i = 0; i < digits; i++) { if (peekChar(parser) == '"' || peekChar(parser) == '\0') { lexError(parser, "Incomplete %s escape sequence.", description); // Don't consume it if it isn't expected. Keeps us from reading past the // end of an unterminated string. parser->currentChar--; break; } int digit = readHexDigit(parser); if (digit == -1) { lexError(parser, "Invalid %s escape sequence.", description); break; } value = (value * 16) | digit; } return value; } // Reads a hex digit Unicode escape sequence in a string literal. static void readUnicodeEscape(Parser* parser, ByteBuffer* string, int length) { int value = readHexEscape(parser, length, "Unicode"); // Grow the buffer enough for the encoded result. int numBytes = wrenUtf8EncodeNumBytes(value); if (numBytes != 0) { wrenByteBufferFill(parser->vm, string, 0, numBytes); wrenUtf8Encode(value, string->data + string->count - numBytes); } } // Finishes lexing a string literal. static void readString(Parser* parser) { ByteBuffer string; TokenType type = TOKEN_STRING; wrenByteBufferInit(&string); for (;;) { char c = nextChar(parser); if (c == '"') break; if (c == '\0') { lexError(parser, "Unterminated string."); // Don't consume it if it isn't expected. Keeps us from reading past the // end of an unterminated string. parser->currentChar--; break; } if (c == '%') { if (parser->numParens < MAX_INTERPOLATION_NESTING) { // TODO: Allow format string. if (nextChar(parser) != '(') lexError(parser, "Expect '(' after '%%'."); parser->parens[parser->numParens++] = 1; type = TOKEN_INTERPOLATION; break; } lexError(parser, "Interpolation may only nest %d levels deep.", MAX_INTERPOLATION_NESTING); } if (c == '\\') { switch (nextChar(parser)) { case '"': wrenByteBufferWrite(parser->vm, &string, '"'); break; case '\\': wrenByteBufferWrite(parser->vm, &string, '\\'); break; case '%': wrenByteBufferWrite(parser->vm, &string, '%'); break; case '0': wrenByteBufferWrite(parser->vm, &string, '\0'); break; case 'a': wrenByteBufferWrite(parser->vm, &string, '\a'); break; case 'b': wrenByteBufferWrite(parser->vm, &string, '\b'); break; case 'f': wrenByteBufferWrite(parser->vm, &string, '\f'); break; case 'n': wrenByteBufferWrite(parser->vm, &string, '\n'); break; case 'r': wrenByteBufferWrite(parser->vm, &string, '\r'); break; case 't': wrenByteBufferWrite(parser->vm, &string, '\t'); break; case 'u': readUnicodeEscape(parser, &string, 4); break; case 'U': readUnicodeEscape(parser, &string, 8); break; case 'v': wrenByteBufferWrite(parser->vm, &string, '\v'); break; case 'x': wrenByteBufferWrite(parser->vm, &string, (uint8_t)readHexEscape(parser, 2, "byte")); break; default: lexError(parser, "Invalid escape character '%c'.", *(parser->currentChar - 1)); break; } } else { wrenByteBufferWrite(parser->vm, &string, c); } } parser->current.value = wrenNewStringLength(parser->vm, (char*)string.data, string.count); wrenByteBufferClear(parser->vm, &string); makeToken(parser, type); } // Lex the next token and store it in [parser.current]. static void nextToken(Parser* parser) { parser->previous = parser->current; // If we are out of tokens, don't try to tokenize any more. We *do* still // copy the TOKEN_EOF to previous so that code that expects it to be consumed // will still work. if (parser->current.type == TOKEN_EOF) return; while (peekChar(parser) != '\0') { parser->tokenStart = parser->currentChar; char c = nextChar(parser); switch (c) { case '(': // If we are inside an interpolated expression, count the unmatched "(". if (parser->numParens > 0) parser->parens[parser->numParens - 1]++; makeToken(parser, TOKEN_LEFT_PAREN); return; case ')': // If we are inside an interpolated expression, count the ")". if (parser->numParens > 0 && --parser->parens[parser->numParens - 1] == 0) { // This is the final ")", so the interpolation expression has ended. // This ")" now begins the next section of the template string. parser->numParens--; readString(parser); return; } makeToken(parser, TOKEN_RIGHT_PAREN); return; case '[': makeToken(parser, TOKEN_LEFT_BRACKET); return; case ']': makeToken(parser, TOKEN_RIGHT_BRACKET); return; case '{': makeToken(parser, TOKEN_LEFT_BRACE); return; case '}': makeToken(parser, TOKEN_RIGHT_BRACE); return; case ':': makeToken(parser, TOKEN_COLON); return; case ',': makeToken(parser, TOKEN_COMMA); return; case '*': makeToken(parser, TOKEN_STAR); return; case '%': makeToken(parser, TOKEN_PERCENT); return; case '^': makeToken(parser, TOKEN_CARET); return; case '+': makeToken(parser, TOKEN_PLUS); return; case '-': makeToken(parser, TOKEN_MINUS); return; case '~': makeToken(parser, TOKEN_TILDE); return; case '?': makeToken(parser, TOKEN_QUESTION); return; case '|': twoCharToken(parser, '|', TOKEN_PIPEPIPE, TOKEN_PIPE); return; case '&': twoCharToken(parser, '&', TOKEN_AMPAMP, TOKEN_AMP); return; case '=': twoCharToken(parser, '=', TOKEN_EQEQ, TOKEN_EQ); return; case '!': twoCharToken(parser, '=', TOKEN_BANGEQ, TOKEN_BANG); return; case '.': if (matchChar(parser, '.')) { twoCharToken(parser, '.', TOKEN_DOTDOTDOT, TOKEN_DOTDOT); return; } makeToken(parser, TOKEN_DOT); return; case '/': if (matchChar(parser, '/')) { skipLineComment(parser); break; } if (matchChar(parser, '*')) { skipBlockComment(parser); break; } makeToken(parser, TOKEN_SLASH); return; case '<': if (matchChar(parser, '<')) { makeToken(parser, TOKEN_LTLT); } else { twoCharToken(parser, '=', TOKEN_LTEQ, TOKEN_LT); } return; case '>': if (matchChar(parser, '>')) { makeToken(parser, TOKEN_GTGT); } else { twoCharToken(parser, '=', TOKEN_GTEQ, TOKEN_GT); } return; case '\n': makeToken(parser, TOKEN_LINE); return; case ' ': case '\r': case '\t': // Skip forward until we run out of whitespace. while (peekChar(parser) == ' ' || peekChar(parser) == '\r' || peekChar(parser) == '\t') { nextChar(parser); } break; case '"': readString(parser); return; case '_': readName(parser, peekChar(parser) == '_' ? TOKEN_STATIC_FIELD : TOKEN_FIELD); return; case '0': if (peekChar(parser) == 'x') { readHexNumber(parser); return; } readNumber(parser); return; default: if (parser->currentLine == 1 && c == '#' && peekChar(parser) == '!') { // Ignore shebang on the first line. skipLineComment(parser); break; } if (isName(c)) { readName(parser, TOKEN_NAME); } else if (isDigit(c)) { readNumber(parser); } else { if (c >= 32 && c <= 126) { lexError(parser, "Invalid character '%c'.", c); } else { // Don't show non-ASCII values since we didn't UTF-8 decode the // bytes. Since there are no non-ASCII byte values that are // meaningful code units in Wren, the lexer works on raw bytes, // even though the source code and console output are UTF-8. lexError(parser, "Invalid byte 0x%x.", (uint8_t)c); } parser->current.type = TOKEN_ERROR; parser->current.length = 0; } return; } } // If we get here, we're out of source, so just make EOF tokens. parser->tokenStart = parser->currentChar; makeToken(parser, TOKEN_EOF); } // Parsing --------------------------------------------------------------------- // Returns the type of the current token. static TokenType peek(Compiler* compiler) { return compiler->parser->current.type; } // Consumes the current token if its type is [expected]. Returns true if a // token was consumed. static bool match(Compiler* compiler, TokenType expected) { if (peek(compiler) != expected) return false; nextToken(compiler->parser); return true; } // Consumes the current token. Emits an error if its type is not [expected]. static void consume(Compiler* compiler, TokenType expected, const char* errorMessage) { nextToken(compiler->parser); if (compiler->parser->previous.type != expected) { error(compiler, errorMessage); // If the next token is the one we want, assume the current one is just a // spurious error and discard it to minimize the number of cascaded errors. if (compiler->parser->current.type == expected) nextToken(compiler->parser); } } // Matches one or more newlines. Returns true if at least one was found. static bool matchLine(Compiler* compiler) { if (!match(compiler, TOKEN_LINE)) return false; while (match(compiler, TOKEN_LINE)); return true; } // Discards any newlines starting at the current token. static void ignoreNewlines(Compiler* compiler) { matchLine(compiler); } // Consumes the current token. Emits an error if it is not a newline. Then // discards any duplicate newlines following it. static void consumeLine(Compiler* compiler, const char* errorMessage) { consume(compiler, TOKEN_LINE, errorMessage); ignoreNewlines(compiler); } // Variables and scopes -------------------------------------------------------- // Emits one single-byte argument. Returns its index. static int emitByte(Compiler* compiler, int byte) { wrenByteBufferWrite(compiler->parser->vm, &compiler->fn->code, (uint8_t)byte); // Assume the instruction is associated with the most recently consumed token. wrenIntBufferWrite(compiler->parser->vm, &compiler->fn->debug->sourceLines, compiler->parser->previous.line); return compiler->fn->code.count - 1; } // Emits one bytecode instruction. static void emitOp(Compiler* compiler, Code instruction) { emitByte(compiler, instruction); // Keep track of the stack's high water mark. compiler->numSlots += stackEffects[instruction]; if (compiler->numSlots > compiler->fn->maxSlots) { compiler->fn->maxSlots = compiler->numSlots; } } // Emits one 16-bit argument, which will be written big endian. static void emitShort(Compiler* compiler, int arg) { emitByte(compiler, (arg >> 8) & 0xff); emitByte(compiler, arg & 0xff); } // Emits one bytecode instruction followed by a 8-bit argument. Returns the // index of the argument in the bytecode. static int emitByteArg(Compiler* compiler, Code instruction, int arg) { emitOp(compiler, instruction); return emitByte(compiler, arg); } // Emits one bytecode instruction followed by a 16-bit argument, which will be // written big endian. static void emitShortArg(Compiler* compiler, Code instruction, int arg) { emitOp(compiler, instruction); emitShort(compiler, arg); } // Emits [instruction] followed by a placeholder for a jump offset. The // placeholder can be patched by calling [jumpPatch]. Returns the index of the // placeholder. static int emitJump(Compiler* compiler, Code instruction) { emitOp(compiler, instruction); emitByte(compiler, 0xff); return emitByte(compiler, 0xff) - 1; } // Creates a new constant for the current value and emits the bytecode to load // it from the constant table. static void emitConstant(Compiler* compiler, Value value) { int constant = addConstant(compiler, value); // Compile the code to load the constant. emitShortArg(compiler, CODE_CONSTANT, constant); } // Create a new local variable with [name]. Assumes the current scope is local // and the name is unique. static int addLocal(Compiler* compiler, const char* name, int length) { Local* local = &compiler->locals[compiler->numLocals]; local->name = name; local->length = length; local->depth = compiler->scopeDepth; local->isUpvalue = false; return compiler->numLocals++; } // Declares a variable in the current scope whose name is the given token. // // If [token] is `NULL`, uses the previously consumed token. Returns its symbol. static int declareVariable(Compiler* compiler, Token* token) { if (token == NULL) token = &compiler->parser->previous; if (token->length > MAX_VARIABLE_NAME) { error(compiler, "Variable name cannot be longer than %d characters.", MAX_VARIABLE_NAME); } // Top-level module scope. if (compiler->scopeDepth == -1) { int symbol = wrenDefineVariable(compiler->parser->vm, compiler->parser->module, token->start, token->length, NULL_VAL); if (symbol == -1) { error(compiler, "Module variable is already defined."); } else if (symbol == -2) { error(compiler, "Too many module variables defined."); } return symbol; } // See if there is already a variable with this name declared in the current // scope. (Outer scopes are OK: those get shadowed.) for (int i = compiler->numLocals - 1; i >= 0; i--) { Local* local = &compiler->locals[i]; // Once we escape this scope and hit an outer one, we can stop. if (local->depth < compiler->scopeDepth) break; if (local->length == token->length && memcmp(local->name, token->start, token->length) == 0) { error(compiler, "Variable is already declared in this scope."); return i; } } if (compiler->numLocals == MAX_LOCALS) { error(compiler, "Cannot declare more than %d variables in one scope.", MAX_LOCALS); return -1; } return addLocal(compiler, token->start, token->length); } // Parses a name token and declares a variable in the current scope with that // name. Returns its slot. static int declareNamedVariable(Compiler* compiler) { consume(compiler, TOKEN_NAME, "Expect variable name."); return declareVariable(compiler, NULL); } // Stores a variable with the previously defined symbol in the current scope. static void defineVariable(Compiler* compiler, int symbol) { // Store the variable. If it's a local, the result of the initializer is // in the correct slot on the stack already so we're done. if (compiler->scopeDepth >= 0) return; // It's a module-level variable, so store the value in the module slot and // then discard the temporary for the initializer. emitShortArg(compiler, CODE_STORE_MODULE_VAR, symbol); emitOp(compiler, CODE_POP); } // Starts a new local block scope. static void pushScope(Compiler* compiler) { compiler->scopeDepth++; } // Generates code to discard local variables at [depth] or greater. Does *not* // actually undeclare variables or pop any scopes, though. This is called // directly when compiling "break" statements to ditch the local variables // before jumping out of the loop even though they are still in scope *past* // the break instruction. // // Returns the number of local variables that were eliminated. static int discardLocals(Compiler* compiler, int depth) { ASSERT(compiler->scopeDepth > -1, "Cannot exit top-level scope."); int local = compiler->numLocals - 1; while (local >= 0 && compiler->locals[local].depth >= depth) { // If the local was closed over, make sure the upvalue gets closed when it // goes out of scope on the stack. We use emitByte() and not emitOp() here // because we don't want to track that stack effect of these pops since the // variables are still in scope after the break. if (compiler->locals[local].isUpvalue) { emitByte(compiler, CODE_CLOSE_UPVALUE); } else { emitByte(compiler, CODE_POP); } local--; } return compiler->numLocals - local - 1; } // Closes the last pushed block scope and discards any local variables declared // in that scope. This should only be called in a statement context where no // temporaries are still on the stack. static void popScope(Compiler* compiler) { int popped = discardLocals(compiler, compiler->scopeDepth); compiler->numLocals -= popped; compiler->numSlots -= popped; compiler->scopeDepth--; } // Attempts to look up the name in the local variables of [compiler]. If found, // returns its index, otherwise returns -1. static int resolveLocal(Compiler* compiler, const char* name, int length) { // Look it up in the local scopes. Look in reverse order so that the most // nested variable is found first and shadows outer ones. for (int i = compiler->numLocals - 1; i >= 0; i--) { if (compiler->locals[i].length == length && memcmp(name, compiler->locals[i].name, length) == 0) { return i; } } return -1; } // Adds an upvalue to [compiler]'s function with the given properties. Does not // add one if an upvalue for that variable is already in the list. Returns the // index of the upvalue. static int addUpvalue(Compiler* compiler, bool isLocal, int index) { // Look for an existing one. for (int i = 0; i < compiler->fn->numUpvalues; i++) { CompilerUpvalue* upvalue = &compiler->upvalues[i]; if (upvalue->index == index && upvalue->isLocal == isLocal) return i; } // If we got here, it's a new upvalue. compiler->upvalues[compiler->fn->numUpvalues].isLocal = isLocal; compiler->upvalues[compiler->fn->numUpvalues].index = index; return compiler->fn->numUpvalues++; } // Attempts to look up [name] in the functions enclosing the one being compiled // by [compiler]. If found, it adds an upvalue for it to this compiler's list // of upvalues (unless it's already in there) and returns its index. If not // found, returns -1. // // If the name is found outside of the immediately enclosing function, this // will flatten the closure and add upvalues to all of the intermediate // functions so that it gets walked down to this one. // // If it reaches a method boundary, this stops and returns -1 since methods do // not close over local variables. static int findUpvalue(Compiler* compiler, const char* name, int length) { // If we are at the top level, we didn't find it. if (compiler->parent == NULL) return -1; // If we hit the method boundary (and the name isn't a static field), then // stop looking for it. We'll instead treat it as a self send. if (name[0] != '_' && compiler->parent->enclosingClass != NULL) return -1; // See if it's a local variable in the immediately enclosing function. int local = resolveLocal(compiler->parent, name, length); if (local != -1) { // Mark the local as an upvalue so we know to close it when it goes out of // scope. compiler->parent->locals[local].isUpvalue = true; return addUpvalue(compiler, true, local); } // See if it's an upvalue in the immediately enclosing function. In other // words, if it's a local variable in a non-immediately enclosing function. // This "flattens" closures automatically: it adds upvalues to all of the // intermediate functions to get from the function where a local is declared // all the way into the possibly deeply nested function that is closing over // it. int upvalue = findUpvalue(compiler->parent, name, length); if (upvalue != -1) { return addUpvalue(compiler, false, upvalue); } // If we got here, we walked all the way up the parent chain and couldn't // find it. return -1; } // Look up [name] in the current scope to see what variable it refers to. // Returns the variable either in local scope, or the enclosing function's // upvalue list. Does not search the module scope. Returns a variable with // index -1 if not found. static Variable resolveNonmodule(Compiler* compiler, const char* name, int length) { // Look it up in the local scopes. Variable variable; variable.scope = SCOPE_LOCAL; variable.index = resolveLocal(compiler, name, length); if (variable.index != -1) return variable; // Tt's not a local, so guess that it's an upvalue. variable.scope = SCOPE_UPVALUE; variable.index = findUpvalue(compiler, name, length); return variable; } // Look up [name] in the current scope to see what variable it refers to. // Returns the variable either in module scope, local scope, or the enclosing // function's upvalue list. Returns a variable with index -1 if not found. static Variable resolveName(Compiler* compiler, const char* name, int length) { Variable variable = resolveNonmodule(compiler, name, length); if (variable.index != -1) return variable; variable.scope = SCOPE_MODULE; variable.index = wrenSymbolTableFind(&compiler->parser->module->variableNames, name, length); return variable; } static void loadLocal(Compiler* compiler, int slot) { if (slot <= 8) { emitOp(compiler, (Code)(CODE_LOAD_LOCAL_0 + slot)); return; } emitByteArg(compiler, CODE_LOAD_LOCAL, slot); } // Finishes [compiler], which is compiling a function, method, or chunk of top // level code. If there is a parent compiler, then this emits code in the // parent compiler to load the resulting function. static ObjFn* endCompiler(Compiler* compiler, const char* debugName, int debugNameLength) { // If we hit an error, don't finish the function since it's borked anyway. if (compiler->parser->hasError) { compiler->parser->vm->compiler = compiler->parent; return NULL; } // Mark the end of the bytecode. Since it may contain multiple early returns, // we can't rely on CODE_RETURN to tell us we're at the end. emitOp(compiler, CODE_END); wrenFunctionBindName(compiler->parser->vm, compiler->fn, debugName, debugNameLength); // In the function that contains this one, load the resulting function object. if (compiler->parent != NULL) { int constant = addConstant(compiler->parent, OBJ_VAL(compiler->fn)); // Wrap the function in a closure. We do this even if it has no upvalues so // that the VM can uniformly assume all called objects are closures. This // makes creating a function a little slower, but makes invoking them // faster. Given that functions are invoked more often than they are // created, this is a win. emitShortArg(compiler->parent, CODE_CLOSURE, constant); // Emit arguments for each upvalue to know whether to capture a local or // an upvalue. for (int i = 0; i < compiler->fn->numUpvalues; i++) { emitByte(compiler->parent, compiler->upvalues[i].isLocal ? 1 : 0); emitByte(compiler->parent, compiler->upvalues[i].index); } } // Pop this compiler off the stack. compiler->parser->vm->compiler = compiler->parent; #if WREN_DEBUG_DUMP_COMPILED_CODE wrenDumpCode(compiler->parser->vm, compiler->fn); #endif return compiler->fn; } // Grammar --------------------------------------------------------------------- typedef enum { PREC_NONE, PREC_LOWEST, PREC_ASSIGNMENT, // = PREC_CONDITIONAL, // ?: PREC_LOGICAL_OR, // || PREC_LOGICAL_AND, // && PREC_EQUALITY, // == != PREC_IS, // is PREC_COMPARISON, // < > <= >= PREC_BITWISE_OR, // | PREC_BITWISE_XOR, // ^ PREC_BITWISE_AND, // & PREC_BITWISE_SHIFT, // << >> PREC_RANGE, // .. ... PREC_TERM, // + - PREC_FACTOR, // * / % PREC_UNARY, // unary - ! ~ PREC_CALL, // . () [] PREC_PRIMARY } Precedence; typedef void (*GrammarFn)(Compiler*, bool canAssign); typedef void (*SignatureFn)(Compiler* compiler, Signature* signature); typedef struct { GrammarFn prefix; GrammarFn infix; SignatureFn method; Precedence precedence; const char* name; } GrammarRule; // Forward declarations since the grammar is recursive. static GrammarRule* getRule(TokenType type); static void expression(Compiler* compiler); static void statement(Compiler* compiler); static void definition(Compiler* compiler); static void parsePrecedence(Compiler* compiler, Precedence precedence); // Replaces the placeholder argument for a previous CODE_JUMP or CODE_JUMP_IF // instruction with an offset that jumps to the current end of bytecode. static void patchJump(Compiler* compiler, int offset) { // -2 to adjust for the bytecode for the jump offset itself. int jump = compiler->fn->code.count - offset - 2; if (jump > MAX_JUMP) error(compiler, "Too much code to jump over."); compiler->fn->code.data[offset] = (jump >> 8) & 0xff; compiler->fn->code.data[offset + 1] = jump & 0xff; } // Parses a block body, after the initial "{" has been consumed. // // Returns true if it was a expression body, false if it was a statement body. // (More precisely, returns true if a value was left on the stack. An empty // block returns false.) static bool finishBlock(Compiler* compiler) { // Empty blocks do nothing. if (match(compiler, TOKEN_RIGHT_BRACE)) return false; // If there's no line after the "{", it's a single-expression body. if (!matchLine(compiler)) { expression(compiler); consume(compiler, TOKEN_RIGHT_BRACE, "Expect '}' at end of block."); return true; } // Empty blocks (with just a newline inside) do nothing. if (match(compiler, TOKEN_RIGHT_BRACE)) return false; // Compile the definition list. do { definition(compiler); consumeLine(compiler, "Expect newline after statement."); } while (peek(compiler) != TOKEN_RIGHT_BRACE && peek(compiler) != TOKEN_EOF); consume(compiler, TOKEN_RIGHT_BRACE, "Expect '}' at end of block."); return false; } // Parses a method or function body, after the initial "{" has been consumed. // // It [isInitializer] is `true`, this is the body of a constructor initializer. // In that case, this adds the code to ensure it returns `this`. static void finishBody(Compiler* compiler, bool isInitializer) { bool isExpressionBody = finishBlock(compiler); if (isInitializer) { // If the initializer body evaluates to a value, discard it. if (isExpressionBody) emitOp(compiler, CODE_POP); // The receiver is always stored in the first local slot. emitOp(compiler, CODE_LOAD_LOCAL_0); } else if (!isExpressionBody) { // Implicitly return null in statement bodies. emitOp(compiler, CODE_NULL); } emitOp(compiler, CODE_RETURN); } // The VM can only handle a certain number of parameters, so check that we // haven't exceeded that and give a usable error. static void validateNumParameters(Compiler* compiler, int numArgs) { if (numArgs == MAX_PARAMETERS + 1) { // Only show an error at exactly max + 1 so that we can keep parsing the // parameters and minimize cascaded errors. error(compiler, "Methods cannot have more than %d parameters.", MAX_PARAMETERS); } } // Parses the rest of a comma-separated parameter list after the opening // delimeter. Updates `arity` in [signature] with the number of parameters. static void finishParameterList(Compiler* compiler, Signature* signature) { do { ignoreNewlines(compiler); validateNumParameters(compiler, ++signature->arity); // Define a local variable in the method for the parameter. declareNamedVariable(compiler); } while (match(compiler, TOKEN_COMMA)); } // Gets the symbol for a method [name] with [length]. static int methodSymbol(Compiler* compiler, const char* name, int length) { return wrenSymbolTableEnsure(compiler->parser->vm, &compiler->parser->vm->methodNames, name, length); } // Appends characters to [name] (and updates [length]) for [numParams] "_" // surrounded by [leftBracket] and [rightBracket]. static void signatureParameterList(char name[MAX_METHOD_SIGNATURE], int* length, int numParams, char leftBracket, char rightBracket) { name[(*length)++] = leftBracket; // This function may be called with too many parameters. When that happens, // a compile error has already been reported, but we need to make sure we // don't overflow the string too, hence the MAX_PARAMETERS check. for (int i = 0; i < numParams && i < MAX_PARAMETERS; i++) { if (i > 0) name[(*length)++] = ','; name[(*length)++] = '_'; } name[(*length)++] = rightBracket; } // Fills [name] with the stringified version of [signature] and updates // [length] to the resulting length. static void signatureToString(Signature* signature, char name[MAX_METHOD_SIGNATURE], int* length) { *length = 0; // Build the full name from the signature. memcpy(name + *length, signature->name, signature->length); *length += signature->length; switch (signature->type) { case SIG_METHOD: signatureParameterList(name, length, signature->arity, '(', ')'); break; case SIG_GETTER: // The signature is just the name. break; case SIG_SETTER: name[(*length)++] = '='; signatureParameterList(name, length, 1, '(', ')'); break; case SIG_SUBSCRIPT: signatureParameterList(name, length, signature->arity, '[', ']'); break; case SIG_SUBSCRIPT_SETTER: signatureParameterList(name, length, signature->arity - 1, '[', ']'); name[(*length)++] = '='; signatureParameterList(name, length, 1, '(', ')'); break; case SIG_INITIALIZER: memcpy(name, "init ", 5); memcpy(name + 5, signature->name, signature->length); *length = 5 + signature->length; signatureParameterList(name, length, signature->arity, '(', ')'); break; } name[*length] = '\0'; } // Gets the symbol for a method with [signature]. static int signatureSymbol(Compiler* compiler, Signature* signature) { // Build the full name from the signature. char name[MAX_METHOD_SIGNATURE]; int length; signatureToString(signature, name, &length); return methodSymbol(compiler, name, length); } // Returns a signature with [type] whose name is from the last consumed token. static Signature signatureFromToken(Compiler* compiler, SignatureType type) { Signature signature; // Get the token for the method name. Token* token = &compiler->parser->previous; signature.name = token->start; signature.length = token->length; signature.type = type; signature.arity = 0; if (signature.length > MAX_METHOD_NAME) { error(compiler, "Method names cannot be longer than %d characters.", MAX_METHOD_NAME); signature.length = MAX_METHOD_NAME; } return signature; } // Parses a comma-separated list of arguments. Modifies [signature] to include // the arity of the argument list. static void finishArgumentList(Compiler* compiler, Signature* signature) { do { ignoreNewlines(compiler); validateNumParameters(compiler, ++signature->arity); expression(compiler); } while (match(compiler, TOKEN_COMMA)); // Allow a newline before the closing delimiter. ignoreNewlines(compiler); } // Compiles a method call with [signature] using [instruction]. static void callSignature(Compiler* compiler, Code instruction, Signature* signature) { int symbol = signatureSymbol(compiler, signature); emitShortArg(compiler, (Code)(instruction + signature->arity), symbol); if (instruction == CODE_SUPER_0) { // Super calls need to be statically bound to the class's superclass. This // ensures we call the right method even when a method containing a super // call is inherited by another subclass. // // We bind it at class definition time by storing a reference to the // superclass in a constant. So, here, we create a slot in the constant // table and store NULL in it. When the method is bound, we'll look up the // superclass then and store it in the constant slot. emitShort(compiler, addConstant(compiler, NULL_VAL)); } } // Compiles a method call with [numArgs] for a method with [name] with [length]. static void callMethod(Compiler* compiler, int numArgs, const char* name, int length) { int symbol = methodSymbol(compiler, name, length); emitShortArg(compiler, (Code)(CODE_CALL_0 + numArgs), symbol); } // Compiles an (optional) argument list for a method call with [methodSignature] // and then calls it. static void methodCall(Compiler* compiler, Code instruction, Signature* signature) { // Make a new signature that contains the updated arity and type based on // the arguments we find. Signature called = { signature->name, signature->length, SIG_GETTER, 0 }; // Parse the argument list, if any. if (match(compiler, TOKEN_LEFT_PAREN)) { called.type = SIG_METHOD; // Allow empty an argument list. if (peek(compiler) != TOKEN_RIGHT_PAREN) { finishArgumentList(compiler, &called); } consume(compiler, TOKEN_RIGHT_PAREN, "Expect ')' after arguments."); } // Parse the block argument, if any. if (match(compiler, TOKEN_LEFT_BRACE)) { // Include the block argument in the arity. called.type = SIG_METHOD; called.arity++; Compiler fnCompiler; initCompiler(&fnCompiler, compiler->parser, compiler, false); // Make a dummy signature to track the arity. Signature fnSignature = { "", 0, SIG_METHOD, 0 }; // Parse the parameter list, if any. if (match(compiler, TOKEN_PIPE)) { finishParameterList(&fnCompiler, &fnSignature); consume(compiler, TOKEN_PIPE, "Expect '|' after function parameters."); } fnCompiler.fn->arity = fnSignature.arity; finishBody(&fnCompiler, false); // Name the function based on the method its passed to. char blockName[MAX_METHOD_SIGNATURE + 15]; int blockLength; signatureToString(&called, blockName, &blockLength); memmove(blockName + blockLength, " block argument", 16); endCompiler(&fnCompiler, blockName, blockLength + 15); } // TODO: Allow Grace-style mixfix methods? // If this is a super() call for an initializer, make sure we got an actual // argument list. if (signature->type == SIG_INITIALIZER) { if (called.type != SIG_METHOD) { error(compiler, "A superclass constructor must have an argument list."); } called.type = SIG_INITIALIZER; } callSignature(compiler, instruction, &called); } // Compiles a call whose name is the previously consumed token. This includes // getters, method calls with arguments, and setter calls. static void namedCall(Compiler* compiler, bool canAssign, Code instruction) { // Get the token for the method name. Signature signature = signatureFromToken(compiler, SIG_GETTER); if (canAssign && match(compiler, TOKEN_EQ)) { ignoreNewlines(compiler); // Build the setter signature. signature.type = SIG_SETTER; signature.arity = 1; // Compile the assigned value. expression(compiler); callSignature(compiler, instruction, &signature); } else { methodCall(compiler, instruction, &signature); } } // Emits the code to load [variable] onto the stack. static void loadVariable(Compiler* compiler, Variable variable) { switch (variable.scope) { case SCOPE_LOCAL: loadLocal(compiler, variable.index); break; case SCOPE_UPVALUE: emitByteArg(compiler, CODE_LOAD_UPVALUE, variable.index); break; case SCOPE_MODULE: emitShortArg(compiler, CODE_LOAD_MODULE_VAR, variable.index); break; default: UNREACHABLE(); } } // Loads the receiver of the currently enclosing method. Correctly handles // functions defined inside methods. static void loadThis(Compiler* compiler) { loadVariable(compiler, resolveNonmodule(compiler, "this", 4)); } // Pushes the value for a module-level variable implicitly imported from core. static void loadCoreVariable(Compiler* compiler, const char* name) { int symbol = wrenSymbolTableFind(&compiler->parser->module->variableNames, name, strlen(name)); ASSERT(symbol != -1, "Should have already defined core name."); emitShortArg(compiler, CODE_LOAD_MODULE_VAR, symbol); } // A parenthesized expression. static void grouping(Compiler* compiler, bool canAssign) { expression(compiler); consume(compiler, TOKEN_RIGHT_PAREN, "Expect ')' after expression."); } // A list literal. static void list(Compiler* compiler, bool canAssign) { // Instantiate a new list. loadCoreVariable(compiler, "List"); callMethod(compiler, 0, "new()", 5); // Compile the list elements. Each one compiles to a ".add()" call. do { ignoreNewlines(compiler); // Stop if we hit the end of the list. if (peek(compiler) == TOKEN_RIGHT_BRACKET) break; // The element. expression(compiler); callMethod(compiler, 1, "addCore_(_)", 11); } while (match(compiler, TOKEN_COMMA)); // Allow newlines before the closing ']'. ignoreNewlines(compiler); consume(compiler, TOKEN_RIGHT_BRACKET, "Expect ']' after list elements."); } // A map literal. static void map(Compiler* compiler, bool canAssign) { // Instantiate a new map. loadCoreVariable(compiler, "Map"); callMethod(compiler, 0, "new()", 5); // Compile the map elements. Each one is compiled to just invoke the // subscript setter on the map. do { ignoreNewlines(compiler); // Stop if we hit the end of the map. if (peek(compiler) == TOKEN_RIGHT_BRACE) break; // The key. parsePrecedence(compiler, PREC_UNARY); consume(compiler, TOKEN_COLON, "Expect ':' after map key."); ignoreNewlines(compiler); // The value. expression(compiler); callMethod(compiler, 2, "addCore_(_,_)", 13); } while (match(compiler, TOKEN_COMMA)); // Allow newlines before the closing '}'. ignoreNewlines(compiler); consume(compiler, TOKEN_RIGHT_BRACE, "Expect '}' after map entries."); } // Unary operators like `-foo`. static void unaryOp(Compiler* compiler, bool canAssign) { GrammarRule* rule = getRule(compiler->parser->previous.type); ignoreNewlines(compiler); // Compile the argument. parsePrecedence(compiler, (Precedence)(PREC_UNARY + 1)); // Call the operator method on the left-hand side. callMethod(compiler, 0, rule->name, 1); } static void boolean(Compiler* compiler, bool canAssign) { emitOp(compiler, compiler->parser->previous.type == TOKEN_FALSE ? CODE_FALSE : CODE_TRUE); } // Walks the compiler chain to find the compiler for the nearest class // enclosing this one. Returns NULL if not currently inside a class definition. static Compiler* getEnclosingClassCompiler(Compiler* compiler) { while (compiler != NULL) { if (compiler->enclosingClass != NULL) return compiler; compiler = compiler->parent; } return NULL; } // Walks the compiler chain to find the nearest class enclosing this one. // Returns NULL if not currently inside a class definition. static ClassInfo* getEnclosingClass(Compiler* compiler) { compiler = getEnclosingClassCompiler(compiler); return compiler == NULL ? NULL : compiler->enclosingClass; } static void field(Compiler* compiler, bool canAssign) { // Initialize it with a fake value so we can keep parsing and minimize the // number of cascaded errors. int field = 255; ClassInfo* enclosingClass = getEnclosingClass(compiler); if (enclosingClass == NULL) { error(compiler, "Cannot reference a field outside of a class definition."); } else if (enclosingClass->isForeign) { error(compiler, "Cannot define fields in a foreign class."); } else if (enclosingClass->inStatic) { error(compiler, "Cannot use an instance field in a static method."); } else { // Look up the field, or implicitly define it. field = wrenSymbolTableEnsure(compiler->parser->vm, &enclosingClass->fields, compiler->parser->previous.start, compiler->parser->previous.length); if (field >= MAX_FIELDS) { error(compiler, "A class can only have %d fields.", MAX_FIELDS); } } // If there's an "=" after a field name, it's an assignment. bool isLoad = true; if (canAssign && match(compiler, TOKEN_EQ)) { // Compile the right-hand side. expression(compiler); isLoad = false; } // If we're directly inside a method, use a more optimal instruction. if (compiler->parent != NULL && compiler->parent->enclosingClass == enclosingClass) { emitByteArg(compiler, isLoad ? CODE_LOAD_FIELD_THIS : CODE_STORE_FIELD_THIS, field); } else { loadThis(compiler); emitByteArg(compiler, isLoad ? CODE_LOAD_FIELD : CODE_STORE_FIELD, field); } } // Compiles a read or assignment to [variable]. static void bareName(Compiler* compiler, bool canAssign, Variable variable) { // If there's an "=" after a bare name, it's a variable assignment. if (canAssign && match(compiler, TOKEN_EQ)) { // Compile the right-hand side. expression(compiler); // Emit the store instruction. switch (variable.scope) { case SCOPE_LOCAL: emitByteArg(compiler, CODE_STORE_LOCAL, variable.index); break; case SCOPE_UPVALUE: emitByteArg(compiler, CODE_STORE_UPVALUE, variable.index); break; case SCOPE_MODULE: emitShortArg(compiler, CODE_STORE_MODULE_VAR, variable.index); break; default: UNREACHABLE(); } return; } // Emit the load instruction. loadVariable(compiler, variable); } static void staticField(Compiler* compiler, bool canAssign) { Compiler* classCompiler = getEnclosingClassCompiler(compiler); if (classCompiler == NULL) { error(compiler, "Cannot use a static field outside of a class definition."); return; } // Look up the name in the scope chain. Token* token = &compiler->parser->previous; // If this is the first time we've seen this static field, implicitly // define it as a variable in the scope surrounding the class definition. if (resolveLocal(classCompiler, token->start, token->length) == -1) { int symbol = declareVariable(classCompiler, NULL); // Implicitly initialize it to null. emitOp(classCompiler, CODE_NULL); defineVariable(classCompiler, symbol); } // It definitely exists now, so resolve it properly. This is different from // the above resolveLocal() call because we may have already closed over it // as an upvalue. Variable variable = resolveName(compiler, token->start, token->length); bareName(compiler, canAssign, variable); } // Returns `true` if [name] is a local variable name (starts with a lowercase // letter). static bool isLocalName(const char* name) { return name[0] >= 'a' && name[0] <= 'z'; } // Compiles a variable name or method call with an implicit receiver. static void name(Compiler* compiler, bool canAssign) { // Look for the name in the scope chain up to the nearest enclosing method. Token* token = &compiler->parser->previous; Variable variable = resolveNonmodule(compiler, token->start, token->length); if (variable.index != -1) { bareName(compiler, canAssign, variable); return; } // TODO: The fact that we return above here if the variable is known and parse // an optional argument list below if not means that the grammar is not // context-free. A line of code in a method like "someName(foo)" is a parse // error if "someName" is a defined variable in the surrounding scope and not // if it isn't. Fix this. One option is to have "someName(foo)" always // resolve to a self-call if there is an argument list, but that makes // getters a little confusing. // If we're inside a method and the name is lowercase, treat it as a method // on this. if (isLocalName(token->start) && getEnclosingClass(compiler) != NULL) { loadThis(compiler); namedCall(compiler, canAssign, CODE_CALL_0); return; } // Otherwise, look for a module-level variable with the name. variable.scope = SCOPE_MODULE; variable.index = wrenSymbolTableFind(&compiler->parser->module->variableNames, token->start, token->length); if (variable.index == -1) { if (isLocalName(token->start)) { error(compiler, "Undefined variable."); return; } // If it's a nonlocal name, implicitly define a module-level variable in // the hopes that we get a real definition later. variable.index = wrenDeclareVariable(compiler->parser->vm, compiler->parser->module, token->start, token->length, token->line); if (variable.index == -2) { error(compiler, "Too many module variables defined."); } } bareName(compiler, canAssign, variable); } static void null(Compiler* compiler, bool canAssign) { emitOp(compiler, CODE_NULL); } // A number or string literal. static void literal(Compiler* compiler, bool canAssign) { emitConstant(compiler, compiler->parser->previous.value); } // A string literal that contains interpolated expressions. // // Interpolation is syntactic sugar for calling ".join()" on a list. So the // string: // // "a %(b + c) d" // // is compiled roughly like: // // ["a ", b + c, " d"].join() static void stringInterpolation(Compiler* compiler, bool canAssign) { // Instantiate a new list. loadCoreVariable(compiler, "List"); callMethod(compiler, 0, "new()", 5); do { // The opening string part. literal(compiler, false); callMethod(compiler, 1, "addCore_(_)", 11); // The interpolated expression. ignoreNewlines(compiler); expression(compiler); callMethod(compiler, 1, "addCore_(_)", 11); ignoreNewlines(compiler); } while (match(compiler, TOKEN_INTERPOLATION)); // The trailing string part. consume(compiler, TOKEN_STRING, "Expect end of string interpolation."); literal(compiler, false); callMethod(compiler, 1, "addCore_(_)", 11); // The list of interpolated parts. callMethod(compiler, 0, "join()", 6); } static void super_(Compiler* compiler, bool canAssign) { ClassInfo* enclosingClass = getEnclosingClass(compiler); if (enclosingClass == NULL) { error(compiler, "Cannot use 'super' outside of a method."); } loadThis(compiler); // TODO: Super operator calls. // TODO: There's no syntax for invoking a superclass constructor with a // different name from the enclosing one. Figure that out. // See if it's a named super call, or an unnamed one. if (match(compiler, TOKEN_DOT)) { // Compile the superclass call. consume(compiler, TOKEN_NAME, "Expect method name after 'super.'."); namedCall(compiler, canAssign, CODE_SUPER_0); } else if (enclosingClass != NULL) { // No explicit name, so use the name of the enclosing method. Make sure we // check that enclosingClass isn't NULL first. We've already reported the // error, but we don't want to crash here. methodCall(compiler, CODE_SUPER_0, enclosingClass->signature); } } static void this_(Compiler* compiler, bool canAssign) { if (getEnclosingClass(compiler) == NULL) { error(compiler, "Cannot use 'this' outside of a method."); return; } loadThis(compiler); } // Subscript or "array indexing" operator like `foo[bar]`. static void subscript(Compiler* compiler, bool canAssign) { Signature signature = { "", 0, SIG_SUBSCRIPT, 0 }; // Parse the argument list. finishArgumentList(compiler, &signature); consume(compiler, TOKEN_RIGHT_BRACKET, "Expect ']' after arguments."); if (canAssign && match(compiler, TOKEN_EQ)) { signature.type = SIG_SUBSCRIPT_SETTER; // Compile the assigned value. validateNumParameters(compiler, ++signature.arity); expression(compiler); } callSignature(compiler, CODE_CALL_0, &signature); } static void call(Compiler* compiler, bool canAssign) { ignoreNewlines(compiler); consume(compiler, TOKEN_NAME, "Expect method name after '.'."); namedCall(compiler, canAssign, CODE_CALL_0); } static void and_(Compiler* compiler, bool canAssign) { ignoreNewlines(compiler); // Skip the right argument if the left is false. int jump = emitJump(compiler, CODE_AND); parsePrecedence(compiler, PREC_LOGICAL_AND); patchJump(compiler, jump); } static void or_(Compiler* compiler, bool canAssign) { ignoreNewlines(compiler); // Skip the right argument if the left is true. int jump = emitJump(compiler, CODE_OR); parsePrecedence(compiler, PREC_LOGICAL_OR); patchJump(compiler, jump); } static void conditional(Compiler* compiler, bool canAssign) { // Ignore newline after '?'. ignoreNewlines(compiler); // Jump to the else branch if the condition is false. int ifJump = emitJump(compiler, CODE_JUMP_IF); // Compile the then branch. parsePrecedence(compiler, PREC_CONDITIONAL); consume(compiler, TOKEN_COLON, "Expect ':' after then branch of conditional operator."); ignoreNewlines(compiler); // Jump over the else branch when the if branch is taken. int elseJump = emitJump(compiler, CODE_JUMP); // Compile the else branch. patchJump(compiler, ifJump); parsePrecedence(compiler, PREC_ASSIGNMENT); // Patch the jump over the else. patchJump(compiler, elseJump); } void infixOp(Compiler* compiler, bool canAssign) { GrammarRule* rule = getRule(compiler->parser->previous.type); // An infix operator cannot end an expression. ignoreNewlines(compiler); // Compile the right-hand side. parsePrecedence(compiler, (Precedence)(rule->precedence + 1)); // Call the operator method on the left-hand side. Signature signature = { rule->name, (int)strlen(rule->name), SIG_METHOD, 1 }; callSignature(compiler, CODE_CALL_0, &signature); } // Compiles a method signature for an infix operator. void infixSignature(Compiler* compiler, Signature* signature) { // Add the RHS parameter. signature->type = SIG_METHOD; signature->arity = 1; // Parse the parameter name. consume(compiler, TOKEN_LEFT_PAREN, "Expect '(' after operator name."); declareNamedVariable(compiler); consume(compiler, TOKEN_RIGHT_PAREN, "Expect ')' after parameter name."); } // Compiles a method signature for an unary operator (i.e. "!"). void unarySignature(Compiler* compiler, Signature* signature) { // Do nothing. The name is already complete. signature->type = SIG_GETTER; } // Compiles a method signature for an operator that can either be unary or // infix (i.e. "-"). void mixedSignature(Compiler* compiler, Signature* signature) { signature->type = SIG_GETTER; // If there is a parameter, it's an infix operator, otherwise it's unary. if (match(compiler, TOKEN_LEFT_PAREN)) { // Add the RHS parameter. signature->type = SIG_METHOD; signature->arity = 1; // Parse the parameter name. declareNamedVariable(compiler); consume(compiler, TOKEN_RIGHT_PAREN, "Expect ')' after parameter name."); } } // Compiles an optional setter parameter in a method [signature]. // // Returns `true` if it was a setter. static bool maybeSetter(Compiler* compiler, Signature* signature) { // See if it's a setter. if (!match(compiler, TOKEN_EQ)) return false; // It's a setter. if (signature->type == SIG_SUBSCRIPT) { signature->type = SIG_SUBSCRIPT_SETTER; } else { signature->type = SIG_SETTER; } // Parse the value parameter. consume(compiler, TOKEN_LEFT_PAREN, "Expect '(' after '='."); declareNamedVariable(compiler); consume(compiler, TOKEN_RIGHT_PAREN, "Expect ')' after parameter name."); signature->arity++; return true; } // Compiles a method signature for a subscript operator. void subscriptSignature(Compiler* compiler, Signature* signature) { signature->type = SIG_SUBSCRIPT; // The signature currently has "[" as its name since that was the token that // matched it. Clear that out. signature->length = 0; // Parse the parameters inside the subscript. finishParameterList(compiler, signature); consume(compiler, TOKEN_RIGHT_BRACKET, "Expect ']' after parameters."); maybeSetter(compiler, signature); } // Parses an optional parenthesized parameter list. Updates `type` and `arity` // in [signature] to match what was parsed. static void parameterList(Compiler* compiler, Signature* signature) { // The parameter list is optional. if (!match(compiler, TOKEN_LEFT_PAREN)) return; signature->type = SIG_METHOD; // Allow an empty parameter list. if (match(compiler, TOKEN_RIGHT_PAREN)) return; finishParameterList(compiler, signature); consume(compiler, TOKEN_RIGHT_PAREN, "Expect ')' after parameters."); } // Compiles a method signature for a named method or setter. void namedSignature(Compiler* compiler, Signature* signature) { signature->type = SIG_GETTER; // If it's a setter, it can't also have a parameter list. if (maybeSetter(compiler, signature)) return; // Regular named method with an optional parameter list. parameterList(compiler, signature); } // Compiles a method signature for a constructor. void constructorSignature(Compiler* compiler, Signature* signature) { consume(compiler, TOKEN_NAME, "Expect constructor name after 'construct'."); // Capture the name. *signature = signatureFromToken(compiler, SIG_INITIALIZER); if (match(compiler, TOKEN_EQ)) { error(compiler, "A constructor cannot be a setter."); } if (!match(compiler, TOKEN_LEFT_PAREN)) { error(compiler, "A constructor cannot be a getter."); return; } // Allow an empty parameter list. if (match(compiler, TOKEN_RIGHT_PAREN)) return; finishParameterList(compiler, signature); consume(compiler, TOKEN_RIGHT_PAREN, "Expect ')' after parameters."); } // This table defines all of the parsing rules for the prefix and infix // expressions in the grammar. Expressions are parsed using a Pratt parser. // // See: http://journal.stuffwithstuff.com/2011/03/19/pratt-parsers-expression-parsing-made-easy/ #define UNUSED { NULL, NULL, NULL, PREC_NONE, NULL } #define PREFIX(fn) { fn, NULL, NULL, PREC_NONE, NULL } #define INFIX(prec, fn) { NULL, fn, NULL, prec, NULL } #define INFIX_OPERATOR(prec, name) { NULL, infixOp, infixSignature, prec, name } #define PREFIX_OPERATOR(name) { unaryOp, NULL, unarySignature, PREC_NONE, name } #define OPERATOR(name) { unaryOp, infixOp, mixedSignature, PREC_TERM, name } GrammarRule rules[] = { /* TOKEN_LEFT_PAREN */ PREFIX(grouping), /* TOKEN_RIGHT_PAREN */ UNUSED, /* TOKEN_LEFT_BRACKET */ { list, subscript, subscriptSignature, PREC_CALL, NULL }, /* TOKEN_RIGHT_BRACKET */ UNUSED, /* TOKEN_LEFT_BRACE */ PREFIX(map), /* TOKEN_RIGHT_BRACE */ UNUSED, /* TOKEN_COLON */ UNUSED, /* TOKEN_DOT */ INFIX(PREC_CALL, call), /* TOKEN_DOTDOT */ INFIX_OPERATOR(PREC_RANGE, ".."), /* TOKEN_DOTDOTDOT */ INFIX_OPERATOR(PREC_RANGE, "..."), /* TOKEN_COMMA */ UNUSED, /* TOKEN_STAR */ INFIX_OPERATOR(PREC_FACTOR, "*"), /* TOKEN_SLASH */ INFIX_OPERATOR(PREC_FACTOR, "/"), /* TOKEN_PERCENT */ INFIX_OPERATOR(PREC_FACTOR, "%"), /* TOKEN_PLUS */ INFIX_OPERATOR(PREC_TERM, "+"), /* TOKEN_MINUS */ OPERATOR("-"), /* TOKEN_LTLT */ INFIX_OPERATOR(PREC_BITWISE_SHIFT, "<<"), /* TOKEN_GTGT */ INFIX_OPERATOR(PREC_BITWISE_SHIFT, ">>"), /* TOKEN_PIPE */ INFIX_OPERATOR(PREC_BITWISE_OR, "|"), /* TOKEN_PIPEPIPE */ INFIX(PREC_LOGICAL_OR, or_), /* TOKEN_CARET */ INFIX_OPERATOR(PREC_BITWISE_XOR, "^"), /* TOKEN_AMP */ INFIX_OPERATOR(PREC_BITWISE_AND, "&"), /* TOKEN_AMPAMP */ INFIX(PREC_LOGICAL_AND, and_), /* TOKEN_BANG */ PREFIX_OPERATOR("!"), /* TOKEN_TILDE */ PREFIX_OPERATOR("~"), /* TOKEN_QUESTION */ INFIX(PREC_ASSIGNMENT, conditional), /* TOKEN_EQ */ UNUSED, /* TOKEN_LT */ INFIX_OPERATOR(PREC_COMPARISON, "<"), /* TOKEN_GT */ INFIX_OPERATOR(PREC_COMPARISON, ">"), /* TOKEN_LTEQ */ INFIX_OPERATOR(PREC_COMPARISON, "<="), /* TOKEN_GTEQ */ INFIX_OPERATOR(PREC_COMPARISON, ">="), /* TOKEN_EQEQ */ INFIX_OPERATOR(PREC_EQUALITY, "=="), /* TOKEN_BANGEQ */ INFIX_OPERATOR(PREC_EQUALITY, "!="), /* TOKEN_BREAK */ UNUSED, /* TOKEN_CLASS */ UNUSED, /* TOKEN_CONSTRUCT */ { NULL, NULL, constructorSignature, PREC_NONE, NULL }, /* TOKEN_ELSE */ UNUSED, /* TOKEN_FALSE */ PREFIX(boolean), /* TOKEN_FOR */ UNUSED, /* TOKEN_FOREIGN */ UNUSED, /* TOKEN_IF */ UNUSED, /* TOKEN_IMPORT */ UNUSED, /* TOKEN_IN */ UNUSED, /* TOKEN_IS */ INFIX_OPERATOR(PREC_IS, "is"), /* TOKEN_NULL */ PREFIX(null), /* TOKEN_RETURN */ UNUSED, /* TOKEN_STATIC */ UNUSED, /* TOKEN_SUPER */ PREFIX(super_), /* TOKEN_THIS */ PREFIX(this_), /* TOKEN_TRUE */ PREFIX(boolean), /* TOKEN_VAR */ UNUSED, /* TOKEN_WHILE */ UNUSED, /* TOKEN_FIELD */ PREFIX(field), /* TOKEN_STATIC_FIELD */ PREFIX(staticField), /* TOKEN_NAME */ { name, NULL, namedSignature, PREC_NONE, NULL }, /* TOKEN_NUMBER */ PREFIX(literal), /* TOKEN_STRING */ PREFIX(literal), /* TOKEN_INTERPOLATION */ PREFIX(stringInterpolation), /* TOKEN_LINE */ UNUSED, /* TOKEN_ERROR */ UNUSED, /* TOKEN_EOF */ UNUSED }; // Gets the [GrammarRule] associated with tokens of [type]. static GrammarRule* getRule(TokenType type) { return &rules[type]; } // The main entrypoint for the top-down operator precedence parser. void parsePrecedence(Compiler* compiler, Precedence precedence) { nextToken(compiler->parser); GrammarFn prefix = rules[compiler->parser->previous.type].prefix; if (prefix == NULL) { error(compiler, "Expected expression."); return; } // Track if the precendence of the surrounding expression is low enough to // allow an assignment inside this one. We can't compile an assignment like // a normal expression because it requires us to handle the LHS specially -- // it needs to be an lvalue, not an rvalue. So, for each of the kinds of // expressions that are valid lvalues -- names, subscripts, fields, etc. -- // we pass in whether or not it appears in a context loose enough to allow // "=". If so, it will parse the "=" itself and handle it appropriately. bool canAssign = precedence <= PREC_CONDITIONAL; prefix(compiler, canAssign); while (precedence <= rules[compiler->parser->current.type].precedence) { nextToken(compiler->parser); GrammarFn infix = rules[compiler->parser->previous.type].infix; infix(compiler, canAssign); } } // Parses an expression. Unlike statements, expressions leave a resulting value // on the stack. void expression(Compiler* compiler) { parsePrecedence(compiler, PREC_LOWEST); } // Returns the number of arguments to the instruction at [ip] in [fn]'s // bytecode. static int getNumArguments(const uint8_t* bytecode, const Value* constants, int ip) { Code instruction = (Code)bytecode[ip]; switch (instruction) { case CODE_NULL: case CODE_FALSE: case CODE_TRUE: case CODE_POP: case CODE_CLOSE_UPVALUE: case CODE_RETURN: case CODE_END: case CODE_LOAD_LOCAL_0: case CODE_LOAD_LOCAL_1: case CODE_LOAD_LOCAL_2: case CODE_LOAD_LOCAL_3: case CODE_LOAD_LOCAL_4: case CODE_LOAD_LOCAL_5: case CODE_LOAD_LOCAL_6: case CODE_LOAD_LOCAL_7: case CODE_LOAD_LOCAL_8: case CODE_CONSTRUCT: case CODE_FOREIGN_CONSTRUCT: case CODE_FOREIGN_CLASS: case CODE_END_MODULE: return 0; case CODE_LOAD_LOCAL: case CODE_STORE_LOCAL: case CODE_LOAD_UPVALUE: case CODE_STORE_UPVALUE: case CODE_LOAD_FIELD_THIS: case CODE_STORE_FIELD_THIS: case CODE_LOAD_FIELD: case CODE_STORE_FIELD: case CODE_CLASS: return 1; case CODE_CONSTANT: case CODE_LOAD_MODULE_VAR: case CODE_STORE_MODULE_VAR: case CODE_CALL_0: case CODE_CALL_1: case CODE_CALL_2: case CODE_CALL_3: case CODE_CALL_4: case CODE_CALL_5: case CODE_CALL_6: case CODE_CALL_7: case CODE_CALL_8: case CODE_CALL_9: case CODE_CALL_10: case CODE_CALL_11: case CODE_CALL_12: case CODE_CALL_13: case CODE_CALL_14: case CODE_CALL_15: case CODE_CALL_16: case CODE_JUMP: case CODE_LOOP: case CODE_JUMP_IF: case CODE_AND: case CODE_OR: case CODE_METHOD_INSTANCE: case CODE_METHOD_STATIC: case CODE_IMPORT_MODULE: return 2; case CODE_SUPER_0: case CODE_SUPER_1: case CODE_SUPER_2: case CODE_SUPER_3: case CODE_SUPER_4: case CODE_SUPER_5: case CODE_SUPER_6: case CODE_SUPER_7: case CODE_SUPER_8: case CODE_SUPER_9: case CODE_SUPER_10: case CODE_SUPER_11: case CODE_SUPER_12: case CODE_SUPER_13: case CODE_SUPER_14: case CODE_SUPER_15: case CODE_SUPER_16: case CODE_IMPORT_VARIABLE: return 4; case CODE_CLOSURE: { int constant = (bytecode[ip + 1] << 8) | bytecode[ip + 2]; ObjFn* loadedFn = AS_FN(constants[constant]); // There are two bytes for the constant, then two for each upvalue. return 2 + (loadedFn->numUpvalues * 2); } } UNREACHABLE(); return 0; } // Marks the beginning of a loop. Keeps track of the current instruction so we // know what to loop back to at the end of the body. static void startLoop(Compiler* compiler, Loop* loop) { loop->enclosing = compiler->loop; loop->start = compiler->fn->code.count - 1; loop->scopeDepth = compiler->scopeDepth; compiler->loop = loop; } // Emits the [CODE_JUMP_IF] instruction used to test the loop condition and // potentially exit the loop. Keeps track of the instruction so we can patch it // later once we know where the end of the body is. static void testExitLoop(Compiler* compiler) { compiler->loop->exitJump = emitJump(compiler, CODE_JUMP_IF); } // Compiles the body of the loop and tracks its extent so that contained "break" // statements can be handled correctly. static void loopBody(Compiler* compiler) { compiler->loop->body = compiler->fn->code.count; statement(compiler); } // Ends the current innermost loop. Patches up all jumps and breaks now that // we know where the end of the loop is. static void endLoop(Compiler* compiler) { // We don't check for overflow here since the forward jump over the loop body // will report an error for the same problem. int loopOffset = compiler->fn->code.count - compiler->loop->start + 2; emitShortArg(compiler, CODE_LOOP, loopOffset); patchJump(compiler, compiler->loop->exitJump); // Find any break placeholder instructions (which will be CODE_END in the // bytecode) and replace them with real jumps. int i = compiler->loop->body; while (i < compiler->fn->code.count) { if (compiler->fn->code.data[i] == CODE_END) { compiler->fn->code.data[i] = CODE_JUMP; patchJump(compiler, i + 1); i += 3; } else { // Skip this instruction and its arguments. i += 1 + getNumArguments(compiler->fn->code.data, compiler->fn->constants.data, i); } } compiler->loop = compiler->loop->enclosing; } static void forStatement(Compiler* compiler) { // A for statement like: // // for (i in sequence.expression) { // System.print(i) // } // // Is compiled to bytecode almost as if the source looked like this: // // { // var seq_ = sequence.expression // var iter_ // while (iter_ = seq_.iterate(iter_)) { // var i = seq_.iteratorValue(iter_) // System.print(i) // } // } // // It's not exactly this, because the synthetic variables `seq_` and `iter_` // actually get names that aren't valid Wren identfiers, but that's the basic // idea. // // The important parts are: // - The sequence expression is only evaluated once. // - The .iterate() method is used to advance the iterator and determine if // it should exit the loop. // - The .iteratorValue() method is used to get the value at the current // iterator position. // Create a scope for the hidden local variables used for the iterator. pushScope(compiler); consume(compiler, TOKEN_LEFT_PAREN, "Expect '(' after 'for'."); consume(compiler, TOKEN_NAME, "Expect for loop variable name."); // Remember the name of the loop variable. const char* name = compiler->parser->previous.start; int length = compiler->parser->previous.length; consume(compiler, TOKEN_IN, "Expect 'in' after loop variable."); ignoreNewlines(compiler); // Evaluate the sequence expression and store it in a hidden local variable. // The space in the variable name ensures it won't collide with a user-defined // variable. expression(compiler); // Verify that there is space to hidden local variables. // Note that we expect only two addLocal calls next to each other in the // following code. if (compiler->numLocals + 2 > MAX_LOCALS) { error(compiler, "Cannot declare more than %d variables in one scope. (Not enough space for for-loops internal variables)", MAX_LOCALS); return; } int seqSlot = addLocal(compiler, "seq ", 4); // Create another hidden local for the iterator object. null(compiler, false); int iterSlot = addLocal(compiler, "iter ", 5); consume(compiler, TOKEN_RIGHT_PAREN, "Expect ')' after loop expression."); Loop loop; startLoop(compiler, &loop); // Advance the iterator by calling the ".iterate" method on the sequence. loadLocal(compiler, seqSlot); loadLocal(compiler, iterSlot); // Update and test the iterator. callMethod(compiler, 1, "iterate(_)", 10); emitByteArg(compiler, CODE_STORE_LOCAL, iterSlot); testExitLoop(compiler); // Get the current value in the sequence by calling ".iteratorValue". loadLocal(compiler, seqSlot); loadLocal(compiler, iterSlot); callMethod(compiler, 1, "iteratorValue(_)", 16); // Bind the loop variable in its own scope. This ensures we get a fresh // variable each iteration so that closures for it don't all see the same one. pushScope(compiler); addLocal(compiler, name, length); loopBody(compiler); // Loop variable. popScope(compiler); endLoop(compiler); // Hidden variables. popScope(compiler); } static void ifStatement(Compiler* compiler) { // Compile the condition. consume(compiler, TOKEN_LEFT_PAREN, "Expect '(' after 'if'."); expression(compiler); consume(compiler, TOKEN_RIGHT_PAREN, "Expect ')' after if condition."); // Jump to the else branch if the condition is false. int ifJump = emitJump(compiler, CODE_JUMP_IF); // Compile the then branch. statement(compiler); // Compile the else branch if there is one. if (match(compiler, TOKEN_ELSE)) { // Jump over the else branch when the if branch is taken. int elseJump = emitJump(compiler, CODE_JUMP); patchJump(compiler, ifJump); statement(compiler); // Patch the jump over the else. patchJump(compiler, elseJump); } else { patchJump(compiler, ifJump); } } static void whileStatement(Compiler* compiler) { Loop loop; startLoop(compiler, &loop); // Compile the condition. consume(compiler, TOKEN_LEFT_PAREN, "Expect '(' after 'while'."); expression(compiler); consume(compiler, TOKEN_RIGHT_PAREN, "Expect ')' after while condition."); testExitLoop(compiler); loopBody(compiler); endLoop(compiler); } // Compiles a simple statement. These can only appear at the top-level or // within curly blocks. Simple statements exclude variable binding statements // like "var" and "class" which are not allowed directly in places like the // branches of an "if" statement. // // Unlike expressions, statements do not leave a value on the stack. void statement(Compiler* compiler) { if (match(compiler, TOKEN_BREAK)) { if (compiler->loop == NULL) { error(compiler, "Cannot use 'break' outside of a loop."); return; } // Since we will be jumping out of the scope, make sure any locals in it // are discarded first. discardLocals(compiler, compiler->loop->scopeDepth + 1); // Emit a placeholder instruction for the jump to the end of the body. When // we're done compiling the loop body and know where the end is, we'll // replace these with `CODE_JUMP` instructions with appropriate offsets. // We use `CODE_END` here because that can't occur in the middle of // bytecode. emitJump(compiler, CODE_END); } else if (match(compiler, TOKEN_FOR)) { forStatement(compiler); } else if (match(compiler, TOKEN_IF)) { ifStatement(compiler); } else if (match(compiler, TOKEN_RETURN)) { // Compile the return value. if (peek(compiler) == TOKEN_LINE) { // Implicitly return null if there is no value. emitOp(compiler, CODE_NULL); } else { expression(compiler); } emitOp(compiler, CODE_RETURN); } else if (match(compiler, TOKEN_WHILE)) { whileStatement(compiler); } else if (match(compiler, TOKEN_LEFT_BRACE)) { // Block statement. pushScope(compiler); if (finishBlock(compiler)) { // Block was an expression, so discard it. emitOp(compiler, CODE_POP); } popScope(compiler); } else { // Expression statement. expression(compiler); emitOp(compiler, CODE_POP); } } // Creates a matching constructor method for an initializer with [signature] // and [initializerSymbol]. // // Construction is a two-stage process in Wren that involves two separate // methods. There is a static method that allocates a new instance of the class. // It then invokes an initializer method on the new instance, forwarding all of // the constructor arguments to it. // // The allocator method always has a fixed implementation: // // CODE_CONSTRUCT - Replace the class in slot 0 with a new instance of it. // CODE_CALL - Invoke the initializer on the new instance. // // This creates that method and calls the initializer with [initializerSymbol]. static void createConstructor(Compiler* compiler, Signature* signature, int initializerSymbol) { Compiler methodCompiler; initCompiler(&methodCompiler, compiler->parser, compiler, true); // Allocate the instance. emitOp(&methodCompiler, compiler->enclosingClass->isForeign ? CODE_FOREIGN_CONSTRUCT : CODE_CONSTRUCT); // Run its initializer. emitShortArg(&methodCompiler, (Code)(CODE_CALL_0 + signature->arity), initializerSymbol); // Return the instance. emitOp(&methodCompiler, CODE_RETURN); endCompiler(&methodCompiler, "", 0); } // Loads the enclosing class onto the stack and then binds the function already // on the stack as a method on that class. static void defineMethod(Compiler* compiler, Variable classVariable, bool isStatic, int methodSymbol) { // Load the class. We have to do this for each method because we can't // keep the class on top of the stack. If there are static fields, they // will be locals above the initial variable slot for the class on the // stack. To skip past those, we just load the class each time right before // defining a method. loadVariable(compiler, classVariable); // Define the method. Code instruction = isStatic ? CODE_METHOD_STATIC : CODE_METHOD_INSTANCE; emitShortArg(compiler, instruction, methodSymbol); } // Declares a method in the enclosing class with [signature]. // // Reports an error if a method with that signature is already declared. // Returns the symbol for the method. static int declareMethod(Compiler* compiler, Signature* signature, const char* name, int length) { int symbol = signatureSymbol(compiler, signature); // See if the class has already declared method with this signature. ClassInfo* classInfo = compiler->enclosingClass; IntBuffer* methods = classInfo->inStatic ? &classInfo->staticMethods : &classInfo->methods; for (int i = 0; i < methods->count; i++) { if (methods->data[i] == symbol) { const char* staticPrefix = classInfo->inStatic ? "static " : ""; error(compiler, "Class %s already defines a %smethod '%s'.", &compiler->enclosingClass->name->value, staticPrefix, name); break; } } wrenIntBufferWrite(compiler->parser->vm, methods, symbol); return symbol; } // Compiles a method definition inside a class body. // // Returns `true` if it compiled successfully, or `false` if the method couldn't // be parsed. static bool method(Compiler* compiler, Variable classVariable) { // TODO: What about foreign constructors? bool isForeign = match(compiler, TOKEN_FOREIGN); bool isStatic = match(compiler, TOKEN_STATIC); compiler->enclosingClass->inStatic = isStatic; SignatureFn signatureFn = rules[compiler->parser->current.type].method; nextToken(compiler->parser); if (signatureFn == NULL) { error(compiler, "Expect method definition."); return false; } // Build the method signature. Signature signature = signatureFromToken(compiler, SIG_GETTER); compiler->enclosingClass->signature = &signature; Compiler methodCompiler; initCompiler(&methodCompiler, compiler->parser, compiler, true); // Compile the method signature. signatureFn(&methodCompiler, &signature); if (isStatic && signature.type == SIG_INITIALIZER) { error(compiler, "A constructor cannot be static."); } // Include the full signature in debug messages in stack traces. char fullSignature[MAX_METHOD_SIGNATURE]; int length; signatureToString(&signature, fullSignature, &length); // Check for duplicate methods. Doesn't matter that it's already been // defined, error will discard bytecode anyway. // Check if the method table already contains this symbol int methodSymbol = declareMethod(compiler, &signature, fullSignature, length); if (isForeign) { // Define a constant for the signature. emitConstant(compiler, wrenNewStringLength(compiler->parser->vm, fullSignature, length)); // We don't need the function we started compiling in the parameter list // any more. methodCompiler.parser->vm->compiler = methodCompiler.parent; } else { consume(compiler, TOKEN_LEFT_BRACE, "Expect '{' to begin method body."); finishBody(&methodCompiler, signature.type == SIG_INITIALIZER); endCompiler(&methodCompiler, fullSignature, length); } // Define the method. For a constructor, this defines the instance // initializer method. defineMethod(compiler, classVariable, isStatic, methodSymbol); if (signature.type == SIG_INITIALIZER) { // Also define a matching constructor method on the metaclass. signature.type = SIG_METHOD; int constructorSymbol = signatureSymbol(compiler, &signature); createConstructor(compiler, &signature, methodSymbol); defineMethod(compiler, classVariable, true, constructorSymbol); } return true; } // Compiles a class definition. Assumes the "class" token has already been // consumed (along with a possibly preceding "foreign" token). static void classDefinition(Compiler* compiler, bool isForeign) { // Create a variable to store the class in. Variable classVariable; classVariable.scope = compiler->scopeDepth == -1 ? SCOPE_MODULE : SCOPE_LOCAL; classVariable.index = declareNamedVariable(compiler); // Create shared class name value Value classNameString = wrenNewStringLength(compiler->parser->vm, compiler->parser->previous.start, compiler->parser->previous.length); // Create class name string to track method duplicates ObjString* className = AS_STRING(classNameString); // Make a string constant for the name. emitConstant(compiler, classNameString); // Load the superclass (if there is one). if (match(compiler, TOKEN_IS)) { parsePrecedence(compiler, PREC_CALL); } else { // Implicitly inherit from Object. loadCoreVariable(compiler, "Object"); } // Store a placeholder for the number of fields argument. We don't know the // count until we've compiled all the methods to see which fields are used. int numFieldsInstruction = -1; if (isForeign) { emitOp(compiler, CODE_FOREIGN_CLASS); } else { numFieldsInstruction = emitByteArg(compiler, CODE_CLASS, 255); } // Store it in its name. defineVariable(compiler, classVariable.index); // Push a local variable scope. Static fields in a class body are hoisted out // into local variables declared in this scope. Methods that use them will // have upvalues referencing them. pushScope(compiler); ClassInfo classInfo; classInfo.isForeign = isForeign; classInfo.name = className; // Set up a symbol table for the class's fields. We'll initially compile // them to slots starting at zero. When the method is bound to the class, the // bytecode will be adjusted by [wrenBindMethod] to take inherited fields // into account. wrenSymbolTableInit(&classInfo.fields); // Set up symbol buffers to track duplicate static and instance methods. wrenIntBufferInit(&classInfo.methods); wrenIntBufferInit(&classInfo.staticMethods); compiler->enclosingClass = &classInfo; // Compile the method definitions. consume(compiler, TOKEN_LEFT_BRACE, "Expect '{' after class declaration."); matchLine(compiler); while (!match(compiler, TOKEN_RIGHT_BRACE)) { if (!method(compiler, classVariable)) break; // Don't require a newline after the last definition. if (match(compiler, TOKEN_RIGHT_BRACE)) break; consumeLine(compiler, "Expect newline after definition in class."); } // Update the class with the number of fields. if (!isForeign) { compiler->fn->code.data[numFieldsInstruction] = (uint8_t)classInfo.fields.count; } // Clear symbol tables for tracking field and method names. wrenSymbolTableClear(compiler->parser->vm, &classInfo.fields); wrenIntBufferClear(compiler->parser->vm, &classInfo.methods); wrenIntBufferClear(compiler->parser->vm, &classInfo.staticMethods); compiler->enclosingClass = NULL; popScope(compiler); } // Compiles an "import" statement. // // An import compiles to a series of instructions. Given: // // import "foo" for Bar, Baz // // We compile a single IMPORT_MODULE "foo" instruction to load the module // itself. When that finishes executing the imported module, it leaves the // ObjModule in vm->lastModule. Then, for Bar and Baz, we: // // * Declare a variable in the current scope with that name. // * Emit an IMPORT_VARIABLE instruction to load the variable's value from the // other module. // * Compile the code to store that value in the variable in this scope. static void import(Compiler* compiler) { ignoreNewlines(compiler); consume(compiler, TOKEN_STRING, "Expect a string after 'import'."); int moduleConstant = addConstant(compiler, compiler->parser->previous.value); // Load the module. emitShortArg(compiler, CODE_IMPORT_MODULE, moduleConstant); // Discard the unused result value from calling the module body's closure. emitOp(compiler, CODE_POP); // The for clause is optional. if (!match(compiler, TOKEN_FOR)) return; // Compile the comma-separated list of variables to import. do { ignoreNewlines(compiler); int slot = declareNamedVariable(compiler); // Define a string constant for the variable name. int variableConstant = addConstant(compiler, wrenNewStringLength(compiler->parser->vm, compiler->parser->previous.start, compiler->parser->previous.length)); // Load the variable from the other module. emitShortArg(compiler, CODE_IMPORT_VARIABLE, variableConstant); // Store the result in the variable here. defineVariable(compiler, slot); } while (match(compiler, TOKEN_COMMA)); } // Compiles a "var" variable definition statement. static void variableDefinition(Compiler* compiler) { // Grab its name, but don't declare it yet. A (local) variable shouldn't be // in scope in its own initializer. consume(compiler, TOKEN_NAME, "Expect variable name."); Token nameToken = compiler->parser->previous; // Compile the initializer. if (match(compiler, TOKEN_EQ)) { ignoreNewlines(compiler); expression(compiler); } else { // Default initialize it to null. null(compiler, false); } // Now put it in scope. int symbol = declareVariable(compiler, &nameToken); defineVariable(compiler, symbol); } // Compiles a "definition". These are the statements that bind new variables. // They can only appear at the top level of a block and are prohibited in places // like the non-curly body of an if or while. void definition(Compiler* compiler) { if (match(compiler, TOKEN_CLASS)) { classDefinition(compiler, false); } else if (match(compiler, TOKEN_FOREIGN)) { consume(compiler, TOKEN_CLASS, "Expect 'class' after 'foreign'."); classDefinition(compiler, true); } else if (match(compiler, TOKEN_IMPORT)) { import(compiler); } else if (match(compiler, TOKEN_VAR)) { variableDefinition(compiler); } else { statement(compiler); } } ObjFn* wrenCompile(WrenVM* vm, ObjModule* module, const char* source, bool isExpression, bool printErrors) { // Skip the UTF-8 BOM if there is one. if (strncmp(source, "\xEF\xBB\xBF", 3) == 0) source += 3; Parser parser; parser.vm = vm; parser.module = module; parser.source = source; parser.tokenStart = source; parser.currentChar = source; parser.currentLine = 1; parser.numParens = 0; // Zero-init the current token. This will get copied to previous when // advance() is called below. parser.current.type = TOKEN_ERROR; parser.current.start = source; parser.current.length = 0; parser.current.line = 0; parser.current.value = UNDEFINED_VAL; // Ignore leading newlines. parser.skipNewlines = true; parser.printErrors = printErrors; parser.hasError = false; // Read the first token. nextToken(&parser); int numExistingVariables = module->variables.count; Compiler compiler; initCompiler(&compiler, &parser, NULL, false); ignoreNewlines(&compiler); if (isExpression) { expression(&compiler); consume(&compiler, TOKEN_EOF, "Expect end of expression."); } else { while (!match(&compiler, TOKEN_EOF)) { definition(&compiler); // If there is no newline, it must be the end of file on the same line. if (!matchLine(&compiler)) { consume(&compiler, TOKEN_EOF, "Expect end of file."); break; } } emitOp(&compiler, CODE_END_MODULE); } emitOp(&compiler, CODE_RETURN); // See if there are any implicitly declared module-level variables that never // got an explicit definition. They will have values that are numbers // indicating the line where the variable was first used. for (int i = numExistingVariables; i < parser.module->variables.count; i++) { if (IS_NUM(parser.module->variables.data[i])) { // Synthesize a token for the original use site. parser.previous.type = TOKEN_NAME; parser.previous.start = parser.module->variableNames.data[i]->value; parser.previous.length = parser.module->variableNames.data[i]->length; parser.previous.line = (int)AS_NUM(parser.module->variables.data[i]); error(&compiler, "Variable is used but not defined."); } } return endCompiler(&compiler, "(script)", 8); } void wrenBindMethodCode(ObjClass* classObj, ObjFn* fn) { int ip = 0; for (;;) { Code instruction = (Code)fn->code.data[ip]; switch (instruction) { case CODE_LOAD_FIELD: case CODE_STORE_FIELD: case CODE_LOAD_FIELD_THIS: case CODE_STORE_FIELD_THIS: // Shift this class's fields down past the inherited ones. We don't // check for overflow here because we'll see if the number of fields // overflows when the subclass is created. fn->code.data[ip + 1] += classObj->superclass->numFields; break; case CODE_SUPER_0: case CODE_SUPER_1: case CODE_SUPER_2: case CODE_SUPER_3: case CODE_SUPER_4: case CODE_SUPER_5: case CODE_SUPER_6: case CODE_SUPER_7: case CODE_SUPER_8: case CODE_SUPER_9: case CODE_SUPER_10: case CODE_SUPER_11: case CODE_SUPER_12: case CODE_SUPER_13: case CODE_SUPER_14: case CODE_SUPER_15: case CODE_SUPER_16: { // Fill in the constant slot with a reference to the superclass. int constant = (fn->code.data[ip + 3] << 8) | fn->code.data[ip + 4]; fn->constants.data[constant] = OBJ_VAL(classObj->superclass); break; } case CODE_CLOSURE: { // Bind the nested closure too. int constant = (fn->code.data[ip + 1] << 8) | fn->code.data[ip + 2]; wrenBindMethodCode(classObj, AS_FN(fn->constants.data[constant])); break; } case CODE_END: return; default: // Other instructions are unaffected, so just skip over them. break; } ip += 1 + getNumArguments(fn->code.data, fn->constants.data, ip); } } void wrenMarkCompiler(WrenVM* vm, Compiler* compiler) { wrenGrayValue(vm, compiler->parser->current.value); wrenGrayValue(vm, compiler->parser->previous.value); // Walk up the parent chain to mark the outer compilers too. The VM only // tracks the innermost one. do { wrenGrayObj(vm, (Obj*)compiler->fn); wrenGrayObj(vm, (Obj*)compiler->constants); if (compiler->enclosingClass != NULL) { wrenBlackenSymbolTable(vm, &compiler->enclosingClass->fields); } compiler = compiler->parent; } while (compiler != NULL); }