finish first pass of scanner.

This commit is contained in:
publicmatt 2024-05-03 11:14:50 -07:00
parent c80c8248a2
commit 9b0ad58e71
7 changed files with 713 additions and 39 deletions

244
Cargo.lock generated
View File

@ -2,6 +2,250 @@
# It is not intended for manual editing.
version = 3
[[package]]
name = "anstream"
version = "0.6.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "418c75fa768af9c03be99d17643f93f79bbba589895012a80e3452a19ddda15b"
dependencies = [
"anstyle",
"anstyle-parse",
"anstyle-query",
"anstyle-wincon",
"colorchoice",
"is_terminal_polyfill",
"utf8parse",
]
[[package]]
name = "anstyle"
version = "1.0.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "038dfcf04a5feb68e9c60b21c9625a54c2c0616e79b72b0fd87075a056ae1d1b"
[[package]]
name = "anstyle-parse"
version = "0.2.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c03a11a9034d92058ceb6ee011ce58af4a9bf61491aa7e1e59ecd24bd40d22d4"
dependencies = [
"utf8parse",
]
[[package]]
name = "anstyle-query"
version = "1.0.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a64c907d4e79225ac72e2a354c9ce84d50ebb4586dee56c82b3ee73004f537f5"
dependencies = [
"windows-sys",
]
[[package]]
name = "anstyle-wincon"
version = "3.0.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "61a38449feb7068f52bb06c12759005cf459ee52bb4adc1d5a7c4322d716fb19"
dependencies = [
"anstyle",
"windows-sys",
]
[[package]]
name = "clap"
version = "4.5.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "90bc066a67923782aa8515dbaea16946c5bcc5addbd668bb80af688e53e548a0"
dependencies = [
"clap_builder",
]
[[package]]
name = "clap_builder"
version = "4.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ae129e2e766ae0ec03484e609954119f123cc1fe650337e155d03b022f24f7b4"
dependencies = [
"anstream",
"anstyle",
"clap_lex",
"strsim",
]
[[package]]
name = "clap_lex"
version = "0.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "98cc8fbded0c607b7ba9dd60cd98df59af97e84d24e49c8557331cfc26d301ce"
[[package]]
name = "colorchoice"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0b6a852b24ab71dffc585bcb46eaf7959d175cb865a7152e35b348d1b2960422"
[[package]]
name = "heck"
version = "0.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8"
[[package]]
name = "is_terminal_polyfill"
version = "1.70.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f8478577c03552c21db0e2724ffb8986a5ce7af88107e6be5d2ee6e158c12800"
[[package]]
name = "mox"
version = "0.1.0"
dependencies = [
"clap",
"strum",
"strum_macros",
]
[[package]]
name = "proc-macro2"
version = "1.0.81"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3d1597b0c024618f09a9c3b8655b7e430397a36d23fdafec26d6965e9eec3eba"
dependencies = [
"unicode-ident",
]
[[package]]
name = "quote"
version = "1.0.36"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7"
dependencies = [
"proc-macro2",
]
[[package]]
name = "rustversion"
version = "1.0.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "80af6f9131f277a45a3fba6ce8e2258037bb0477a67e610d3c1fe046ab31de47"
[[package]]
name = "strsim"
version = "0.11.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f"
[[package]]
name = "strum"
version = "0.26.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5d8cec3501a5194c432b2b7976db6b7d10ec95c253208b45f83f7136aa985e29"
[[package]]
name = "strum_macros"
version = "0.26.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c6cf59daf282c0a494ba14fd21610a0325f9f90ec9d1231dea26bcb1d696c946"
dependencies = [
"heck",
"proc-macro2",
"quote",
"rustversion",
"syn",
]
[[package]]
name = "syn"
version = "2.0.60"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "909518bc7b1c9b779f1bbf07f2929d35af9f0f37e47c6e9ef7f9dddc1e1821f3"
dependencies = [
"proc-macro2",
"quote",
"unicode-ident",
]
[[package]]
name = "unicode-ident"
version = "1.0.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b"
[[package]]
name = "utf8parse"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a"
[[package]]
name = "windows-sys"
version = "0.52.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d"
dependencies = [
"windows-targets",
]
[[package]]
name = "windows-targets"
version = "0.52.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6f0713a46559409d202e70e28227288446bf7841d3211583a4b53e3f6d96e7eb"
dependencies = [
"windows_aarch64_gnullvm",
"windows_aarch64_msvc",
"windows_i686_gnu",
"windows_i686_gnullvm",
"windows_i686_msvc",
"windows_x86_64_gnu",
"windows_x86_64_gnullvm",
"windows_x86_64_msvc",
]
[[package]]
name = "windows_aarch64_gnullvm"
version = "0.52.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7088eed71e8b8dda258ecc8bac5fb1153c5cffaf2578fc8ff5d61e23578d3263"
[[package]]
name = "windows_aarch64_msvc"
version = "0.52.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9985fd1504e250c615ca5f281c3f7a6da76213ebd5ccc9561496568a2752afb6"
[[package]]
name = "windows_i686_gnu"
version = "0.52.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "88ba073cf16d5372720ec942a8ccbf61626074c6d4dd2e745299726ce8b89670"
[[package]]
name = "windows_i686_gnullvm"
version = "0.52.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "87f4261229030a858f36b459e748ae97545d6f1ec60e5e0d6a3d32e0dc232ee9"
[[package]]
name = "windows_i686_msvc"
version = "0.52.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "db3c2bf3d13d5b658be73463284eaf12830ac9a26a90c717b7f771dfe97487bf"
[[package]]
name = "windows_x86_64_gnu"
version = "0.52.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4e4246f76bdeff09eb48875a0fd3e2af6aada79d409d33011886d3e1581517d9"
[[package]]
name = "windows_x86_64_gnullvm"
version = "0.52.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "852298e482cd67c356ddd9570386e2862b5673c85bd5f88df9ab6802b334c596"
[[package]]
name = "windows_x86_64_msvc"
version = "0.52.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bec47e5bfd1bff0eeaf6d8b485cc1074891a197ab4225d504cb7a1ab88b02bf0"

View File

@ -6,3 +6,6 @@ edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
clap = "4.5.4"
strum = "0.26"
strum_macros = "0.26"

26
src/compiler.rs Normal file
View File

@ -0,0 +1,26 @@
use crate::scanner::Scanner;
use crate::scanner::TokenType;
pub fn compile(source: &str) {
let mut scanner: Scanner = Scanner::new(source);
let mut line = 0;
loop {
let token = scanner.scan_token();
if token.line != line {
print!("{:4}", token.line);
line = token.line;
} else {
print!(" |");
}
println!(
":{:<3} {:20} {:20}",
token.start, token.token_type, token.lexeme
);
if let TokenType::TokenEof = token.token_type {
break;
}
}
}

View File

@ -1,13 +1,14 @@
pub mod compiler;
pub mod debug;
pub mod scanner;
pub mod value;
pub mod vm;
use std::fmt;
use std::u16;
use value::Value;
#[derive(Debug, PartialEq, Eq)]
#[derive(Debug, PartialEq, Eq, strum_macros::Display)]
pub enum OptCode {
OpReturn,
OpConstant,
@ -37,23 +38,6 @@ impl From<OptCode> for u8 {
}
}
}
impl fmt::Display for OptCode {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(
f,
"{}",
match self {
OptCode::OpConstant => "OpConstant",
OptCode::OpNegate => "OpNegate",
OptCode::OpReturn => "OpReturn",
OptCode::OpAdd => "OpAdd",
OptCode::OpSubstract => "OpSubtract",
OptCode::OpMultiply => "OpMultiply",
OptCode::OpDivide => "OpDivide",
}
)
}
}
#[derive(Debug)]
pub struct ConversionError {
invalid_value: u8,

View File

@ -1,22 +1,87 @@
use mox::vm::{InterpretResult, VM};
use mox::Chunk;
use mox::OptCode;
use clap::{Arg, ArgAction, Command};
use mox::vm::InterpretResult;
use mox::vm::VM;
use std::fs;
use std::io;
use std::io::Write;
use std::process;
use std::process::exit;
fn main() {
let mut chunk = Chunk::new();
let app = Command::new("mox")
.version("1.0")
.author("publicmatt")
.about("mox interpreter!!")
.arg(
Arg::new("file")
.short('f')
.long("file")
.action(ArgAction::Set)
.value_name("FILE")
.help("file to run"),
)
.arg(
Arg::new("command")
.short('c')
.long("command")
.action(ArgAction::Set)
.value_name("COMMAND")
.help("command to run"),
);
let constant_idx: u8 = chunk.write_value(1.2);
chunk.write(OptCode::OpConstant.into(), 123);
chunk.write(constant_idx, 123);
let matches = app.get_matches();
let constant_idx: u8 = chunk.write_value(5.2);
chunk.write(OptCode::OpConstant.into(), 125);
chunk.write(constant_idx, 125);
chunk.write(OptCode::OpAdd.into(), 124);
chunk.write(OptCode::OpReturn.into(), 126);
let mut vm: VM = VM::new();
let _: InterpretResult = vm.interpret(&chunk);
if let Some(command) = matches.get_one::<String>("command") {
run_content(command);
} else if let Some(file) = matches.get_one::<String>("file") {
run_file(file);
} else {
repl();
}
}
fn repl() {
let mut input = String::new();
let mut vm: VM = VM::new();
loop {
input.clear();
print!("> ");
io::stdout().flush().unwrap();
match io::stdin().read_line(&mut input) {
Ok(bytes) => {
if bytes == 0 || input.trim() == "exit" {
println!("Bye!");
break;
}
vm.interpret(&input);
}
Err(_error) => {
continue;
}
}
}
}
fn run_content(source: &str) {
let mut vm: VM = VM::new();
match vm.interpret(source) {
InterpretResult::InterpretOk => exit(0),
InterpretResult::InterpretCompileError => exit(65),
InterpretResult::InterpretRuntimeError => exit(70),
}
}
fn run_file(path: &str) {
let mut vm: VM = VM::new();
let content = fs::read_to_string(path).unwrap_or_else(|err| {
eprintln!("Error reading '{}': {}", path, err);
process::exit(74);
});
match vm.interpret(&content) {
InterpretResult::InterpretOk => exit(0),
InterpretResult::InterpretCompileError => exit(65),
InterpretResult::InterpretRuntimeError => exit(70),
}
}

347
src/scanner.rs Normal file
View File

@ -0,0 +1,347 @@
use std::fmt;
pub struct Scanner<'a> {
start: usize,
current: usize,
line: u16,
source: &'a str,
}
impl<'a> fmt::Display for Scanner<'a> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(
f,
"{}:{}..{} '{}'",
self.line,
self.start,
self.current,
&self.source[self.start..self.current]
)
}
}
impl<'a> Scanner<'a> {
pub fn new(source: &'a str) -> Self {
Scanner {
start: 0,
current: 0,
line: 1,
source,
}
}
pub fn scan_token(&mut self) -> Token {
self.skip_whitespace();
self.start = self.current;
if self.is_at_end() {
return self.make_token(TokenType::TokenEof);
}
let c: char = self.advance();
match c {
c if c.is_digit(10) => return self.make_number(),
c if c.is_alphabetic() || c == '_' => return self.make_identifier(),
'(' => return self.make_token(TokenType::TokenLeftParen),
')' => return self.make_token(TokenType::TokenRightParen),
'{' => return self.make_token(TokenType::TokenLeftBrace),
'}' => return self.make_token(TokenType::TokenRightBrace),
';' => return self.make_token(TokenType::TokenSemicolon),
',' => return self.make_token(TokenType::TokenComma),
'.' => return self.make_token(TokenType::TokenDot),
'-' => return self.make_token(TokenType::TokenMinus),
'+' => return self.make_token(TokenType::TokenPlus),
'/' => return self.make_token(TokenType::TokenSlash),
'*' => return self.make_token(TokenType::TokenStar),
'!' => {
if self.matches('=') {
return self.make_token(TokenType::TokenBangEqual);
} else {
return self.make_token(TokenType::TokenBang);
}
}
'=' => {
if self.matches('=') {
return self.make_token(TokenType::TokenEqualEqual);
} else {
return self.make_token(TokenType::TokenEqual);
}
}
'<' => {
if self.matches('=') {
return self.make_token(TokenType::TokenLessEqual);
} else {
return self.make_token(TokenType::TokenLess);
}
}
'>' => {
if self.matches('=') {
return self.make_token(TokenType::TokenGreaterEqual);
} else {
return self.make_token(TokenType::TokenGreater);
}
}
'"' => return self.make_string(),
_ => return self.make_error_token("Unexpected character."),
};
}
fn make_identifier(&mut self) -> Token {
while let Some(c) = self.peek() {
if c.is_alphabetic() || c == '_' || c.is_digit(10) {
self.advance();
} else {
break;
}
}
return self.make_token(self.identifier_type());
}
fn identifier_type(&self) -> TokenType {
let c = self.source.chars().nth(self.start);
match c {
Some('a') => return self.check_keyword(1, 2, "nd", TokenType::TokenAnd),
Some('c') if self.current - self.start > 1 => {
match self.source.chars().nth(self.start + 1) {
Some('l') => return self.check_keyword(2, 3, "ass", TokenType::TokenClass),
Some('a') => return self.check_keyword(2, 1, "p", TokenType::TokenFalse),
_ => panic!("bad keyword"),
}
}
Some('e') => return self.check_keyword(1, 3, "lse", TokenType::TokenElse),
Some('i') => return self.check_keyword(1, 1, "f", TokenType::TokenIf),
Some('n') if self.current - self.start > 1 => {
match self.source.chars().nth(self.start + 1) {
Some('i') => return self.check_keyword(2, 1, "l", TokenType::TokenNil),
Some('o') => return self.check_keyword(2, 0, "", TokenType::TokenFalse),
_ => panic!("bad keyword"),
}
}
Some('o') => return self.check_keyword(1, 1, "r", TokenType::TokenOr),
Some('p') => return self.check_keyword(1, 4, "rint", TokenType::TokenPrint),
Some('r') => return self.check_keyword(1, 5, "eturn", TokenType::TokenReturn),
Some('s') => return self.check_keyword(1, 4, "uper", TokenType::TokenSuper),
Some('v') => return self.check_keyword(1, 2, "ar", TokenType::TokenVar),
Some('w') => return self.check_keyword(1, 4, "hile", TokenType::TokenWhile),
Some('f') if self.current - self.start > 1 => {
match self.source.chars().nth(self.start + 1) {
Some('a') => return self.check_keyword(2, 3, "lse", TokenType::TokenFalse),
Some('o') => return self.check_keyword(2, 1, "r", TokenType::TokenFor),
Some('u') => return self.check_keyword(2, 1, "n", TokenType::TokenFun),
_ => panic!("bad keyword"),
}
}
Some('t') if self.current - self.start > 1 => {
match self.source.chars().nth(self.start + 1) {
Some('h') => return self.check_keyword(2, 2, "is", TokenType::TokenThis),
Some('r') => return self.check_keyword(2, 2, "ue", TokenType::TokenTrue),
_ => panic!("bad keyword"),
}
}
_ => return TokenType::TokenIdentifier,
};
}
fn check_keyword(
&self,
start: usize,
length: usize,
rest: &str,
token_type: TokenType,
) -> TokenType {
let end = self.start + start + length;
let s = &self.source[self.start + start..end];
let next = self.source.chars().nth(end);
if (self.current == end) && (s == rest) {
match next {
Some(n) if n.is_whitespace() => {
return token_type;
}
_ => return TokenType::TokenIdentifier,
}
} else {
return TokenType::TokenIdentifier;
}
}
fn make_string(&mut self) -> Token {
loop {
match self.peek() {
Some(c) => match c {
'\n' => {
self.line += 1;
continue;
}
'"' => {
self.advance();
return self.make_token(TokenType::TokenString);
}
_ => {
self.advance();
continue;
}
},
None => return self.make_error_token("Unterminated string."),
}
}
}
fn make_number(&mut self) -> Token {
while let Some(c) = self.peek() {
if c.is_digit(10) {
self.advance();
} else {
break;
}
}
match (self.peek(), self.peek_next()) {
(Some(c), Some(cc)) if c == '.' && cc.is_digit(10) => {
self.advance();
while let Some(c) = self.peek() {
if c.is_digit(10) {
self.advance();
} else {
break;
}
}
return self.make_token(TokenType::TokenNumber);
}
_ => {
return self.make_token(TokenType::TokenNumber);
}
}
}
fn peek(&self) -> Option<char> {
self.source.chars().nth(self.current)
}
fn peek_next(&self) -> Option<char> {
if self.is_at_end() {
return None;
}
return self.source.chars().nth(self.current + 1);
}
fn skip_whitespace(&mut self) {
loop {
let peek = self.peek();
match peek {
None => return,
Some(c) => match c {
' ' | '\r' | '\t' => {
self.advance();
continue;
}
'\n' => {
self.line += 1;
self.advance();
continue;
}
'/' => match self.peek_next() {
Some(c) if c == '/' => {
while let Some(_) = self.peek() {
if !self.is_at_end() {
self.advance();
}
}
}
_ => return,
},
_ => return,
},
}
}
}
fn is_at_end(&self) -> bool {
return self.current == self.source.len();
}
fn make_token(&self, token_type: TokenType) -> Token {
Token {
token_type,
start: self.start,
lexeme: self.source[self.start..self.current].to_string(),
line: self.line,
}
}
fn advance(&mut self) -> char {
self.current += 1;
match self.source.chars().nth(self.current - 1) {
Some(c) => c,
None => panic!(
"advance failed: fell off the end. {}:{}",
self.line, self.current
),
}
}
fn matches(&mut self, expected: char) -> bool {
match self.source.chars().nth(self.current) {
Some(c) => {
if c != expected {
return false;
}
self.current += 1;
return true;
}
None => return false,
}
}
fn make_error_token(&self, message: &str) -> Token {
Token {
token_type: TokenType::TokenError,
start: self.start,
lexeme: message.to_string(),
line: self.line,
}
}
}
pub struct Token {
pub token_type: TokenType,
pub start: usize,
pub lexeme: String,
pub line: u16,
}
impl Token {}
#[derive(Clone, Copy, strum_macros::Display)]
pub enum TokenType {
// Single-character tokens.
TokenLeftParen,
TokenRightParen,
TokenLeftBrace,
TokenRightBrace,
TokenComma,
TokenDot,
TokenMinus,
TokenPlus,
TokenSemicolon,
TokenSlash,
TokenStar,
// One or two character tokens.
TokenBang,
TokenBangEqual,
TokenEqual,
TokenEqualEqual,
TokenGreater,
TokenGreaterEqual,
TokenLess,
TokenLessEqual,
// Literals.
TokenIdentifier,
TokenString,
TokenNumber,
// Keywords.
TokenAnd,
TokenClass,
TokenElse,
TokenFalse,
TokenFor,
TokenFun,
TokenIf,
TokenNil,
TokenOr,
TokenPrint,
TokenReturn,
TokenSuper,
TokenThis,
TokenTrue,
TokenVar,
TokenWhile,
TokenError,
TokenEof,
}

View File

@ -1,3 +1,4 @@
use crate::compiler::compile;
use crate::debug::{disassemble_instruction, print_value, trace_enabled};
use crate::Value;
use crate::{Chunk, ConversionError, OptCode};
@ -23,11 +24,15 @@ impl<'a> VM<'a> {
stack: vec![],
}
}
pub fn interpret(&mut self, chunk: &'a Chunk) -> InterpretResult {
pub fn interpret_chunk(&mut self, chunk: &'a Chunk) -> InterpretResult {
self.chunk = Some(chunk);
self.ip = 0;
self.run()
}
pub fn interpret(&mut self, source: &str) -> InterpretResult {
compile(source);
InterpretResult::InterpretOk
}
fn read_byte(&mut self) -> u8 {
match self.chunk {
@ -142,7 +147,7 @@ mod tests {
chunk.write(OptCode::OpReturn.into(), 125);
let mut vm: VM = VM::new();
let result: InterpretResult = vm.interpret(&chunk);
let result: InterpretResult = vm.interpret_chunk(&chunk);
assert_eq!(result, InterpretResult::InterpretOk);
}
#[test]
@ -168,7 +173,7 @@ mod tests {
chunk.write(OptCode::OpReturn.into(), 126);
let mut vm: VM = VM::new();
let result: InterpretResult = vm.interpret(&chunk);
let result: InterpretResult = vm.interpret_chunk(&chunk);
assert_eq!(result, InterpretResult::InterpretOk);
}
}