diff --git a/Cargo.lock b/Cargo.lock index a68d0db..15b700f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,6 +2,15 @@ # It is not intended for manual editing. version = 3 +[[package]] +name = "aho-corasick" +version = "1.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" +dependencies = [ + "memchr", +] + [[package]] name = "anstream" version = "0.6.14" @@ -51,6 +60,44 @@ dependencies = [ "windows-sys", ] +[[package]] +name = "assert_cmd" +version = "2.0.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed72493ac66d5804837f480ab3766c72bdfab91a65e565fc54fa9e42db0073a8" +dependencies = [ + "anstyle", + "bstr", + "doc-comment", + "predicates", + "predicates-core", + "predicates-tree", + "wait-timeout", +] + +[[package]] +name = "autocfg" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c4b4d0bd25bd0b74681c0ad21497610ce1b7c91b1022cd21c80c6fbdd9476b0" + +[[package]] +name = "beef" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3a8241f3ebb85c056b509d4327ad0358fbbba6ffb340bf388f26350aeda225b1" + +[[package]] +name = "bstr" +version = "1.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05efc5cfd9110c8416e471df0e96702d58690178e206e61b7173706673c93706" +dependencies = [ + "memchr", + "regex-automata", + "serde", +] + [[package]] name = "clap" version = "4.5.4" @@ -84,6 +131,33 @@ version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0b6a852b24ab71dffc585bcb46eaf7959d175cb865a7152e35b348d1b2960422" +[[package]] +name = "difflib" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6184e33543162437515c2e2b48714794e37845ec9851711914eec9d308f6ebe8" + +[[package]] +name = "doc-comment" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fea41bba32d969b513997752735605054bc0dfa92b4c56bf1189f2e174be7a10" + +[[package]] +name = "float-cmp" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "98de4bbd547a563b716d8dfa9aad1cb19bfab00f4fa09a6a4ed21dbcf44ce9c4" +dependencies = [ + "num-traits", +] + +[[package]] +name = "fnv" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" + [[package]] name = "heck" version = "0.4.1" @@ -96,22 +170,121 @@ version = "1.70.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f8478577c03552c21db0e2724ffb8986a5ce7af88107e6be5d2ee6e158c12800" +[[package]] +name = "lazy_static" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" + +[[package]] +name = "libc" +version = "0.2.155" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97b3888a4aecf77e811145cadf6eef5901f4782c53886191b2f693f24761847c" + +[[package]] +name = "logos" +version = "0.14.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1c6b6e02facda28ca5fb8dbe4b152496ba3b1bd5a4b40bb2b1b2d8ad74e0f39b" +dependencies = [ + "logos-derive", +] + +[[package]] +name = "logos-codegen" +version = "0.14.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b32eb6b5f26efacd015b000bfc562186472cd9b34bdba3f6b264e2a052676d10" +dependencies = [ + "beef", + "fnv", + "lazy_static", + "proc-macro2", + "quote", + "regex-syntax", + "syn", +] + +[[package]] +name = "logos-derive" +version = "0.14.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3e5d0c5463c911ef55624739fc353238b4e310f0144be1f875dc42fec6bfd5ec" +dependencies = [ + "logos-codegen", +] + +[[package]] +name = "memchr" +version = "2.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" + [[package]] name = "mox" version = "0.1.0" dependencies = [ + "assert_cmd", "clap", + "logos", "peekmore", + "predicates", "strum", "strum_macros", ] +[[package]] +name = "normalize-line-endings" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "61807f77802ff30975e01f4f071c8ba10c022052f98b3294119f3e615d13e5be" + +[[package]] +name = "num-traits" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" +dependencies = [ + "autocfg", +] + [[package]] name = "peekmore" version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9163e1259760e83d528d1b3171e5100c1767f10c52e1c4d6afad26e63d47d758" +[[package]] +name = "predicates" +version = "3.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68b87bfd4605926cdfefc1c3b5f8fe560e3feca9d5552cf68c466d3d8236c7e8" +dependencies = [ + "anstyle", + "difflib", + "float-cmp", + "normalize-line-endings", + "predicates-core", + "regex", +] + +[[package]] +name = "predicates-core" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b794032607612e7abeb4db69adb4e33590fa6cf1149e95fd7cb00e634b92f174" + +[[package]] +name = "predicates-tree" +version = "1.0.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "368ba315fb8c5052ab692e68a0eefec6ec57b23a36959c14496f0b0df2c0cecf" +dependencies = [ + "predicates-core", + "termtree", +] + [[package]] name = "proc-macro2" version = "1.0.81" @@ -130,12 +303,61 @@ dependencies = [ "proc-macro2", ] +[[package]] +name = "regex" +version = "1.10.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b91213439dad192326a0d7c6ee3955910425f441d7038e0d6933b0aec5c4517f" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.4.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38caf58cc5ef2fed281f89292ef23f6365465ed9a41b7a7754eb4e26496c92df" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a66a03ae7c801facd77a29370b4faec201768915ac14a721ba36f20bc9c209b" + [[package]] name = "rustversion" version = "1.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "80af6f9131f277a45a3fba6ce8e2258037bb0477a67e610d3c1fe046ab31de47" +[[package]] +name = "serde" +version = "1.0.204" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bc76f558e0cbb2a839d37354c575f1dc3fdc6546b5be373ba43d95f231bf7c12" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.204" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e0cd7e117be63d3c3678776753929474f3b04a43a080c744d6b0ae2a8c28e222" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "strsim" version = "0.11.1" @@ -172,6 +394,12 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "termtree" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3369f5ac52d5eb6ab48c6b4ffdc8efbcad6b89c765749064ba298f2c68a16a76" + [[package]] name = "unicode-ident" version = "1.0.12" @@ -184,6 +412,15 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a" +[[package]] +name = "wait-timeout" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9f200f5b12eb75f8c1ed65abd4b2db8a6e1b138a20de009dacee265a2498f3f6" +dependencies = [ + "libc", +] + [[package]] name = "windows-sys" version = "0.52.0" diff --git a/Cargo.toml b/Cargo.toml index 793379d..5a44a3a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,6 +7,11 @@ edition = "2021" [dependencies] clap = "4.5.4" +logos = { version = "0.14.2", features = [] } peekmore = "1.3.0" strum = "0.26" strum_macros = "0.26" + +[dev-dependencies] +assert_cmd = "2.0.14" +predicates = "3.1.0" diff --git a/src/compiler.rs b/src/compiler.rs index e93cfe5..33139cf 100644 --- a/src/compiler.rs +++ b/src/compiler.rs @@ -1,22 +1,137 @@ -use strum_macros::Display; +use crate::{ + chunk::{Chunk, OptCode}, + scanner::Scanner, + token::Token, +}; -use crate::chunk::Chunk; - -pub fn compile(source: &str, chunk: &Chunk) -> bool { - return true; +pub struct Compiler<'a> { + pub scanner: Scanner<'a>, + pub current: Option, + pub previous: Option, + pub had_error: bool, + pub panic_mode: bool, + pub chunk: Option, } -#[derive(Display, PartialEq, Eq, PartialOrd, Ord)] -enum Precedence { - None, - Assignment, - Or, - And, - Equality, - Comparison, - Term, - Factor, - Unary, - Call, - Primary, +impl<'a> Compiler<'a> { + pub fn from_source(source: &'a String) -> Self { + Compiler { + scanner: Scanner::new(source), + chunk: None, + current: None, + previous: None, + had_error: false, + panic_mode: false, + } + } + + pub fn compile(&mut self) -> bool { + // let chunk = Chunk::new(); + self.advance(); + self.expression(); + //self.consume(TokenType::TokenEof, "Expect end of expression"); + self.emit_return(); + return !self.had_error; + } + + fn advance(&mut self) { + self.previous = self.current.clone(); + while let Some(r) = self.scanner.next() { + match r { + Ok(token) => { + self.current = Some(token); + break; + } + _ => { + self.error_at_current("error as current token"); + } + } + } + } + + fn expression(&mut self) { + () + } + + fn emit_return(&mut self) { + self.emit_byte(OptCode::OpReturn.into()); + } + + fn consume(&mut self, expected: Token, message: &str) { + match self.current { + t if t == Some(expected) => { + self.error_at_current(message); + } + _ => {} + } + } + fn number(&mut self) { + if let Some(Token::TokenNumber(v)) = self.previous { + self.emit_constant(v); + } + } + fn emit_constant(&mut self, value: f64) { + let idx = self.make_constant(value); + self.emit_bytes(OptCode::OpConstant.into(), idx); + } + fn current_chunk(&mut self) -> &mut Chunk { + match &mut self.chunk { + Some(chunk) => { + let c: &mut Chunk = chunk; + return c; + } + None => panic!("oh no!"), + } + } + fn make_constant(&mut self, value: f64) -> u8 { + let chunk: &mut Chunk = self.current_chunk(); + chunk.write_value(value) + } + fn emit_byte(&mut self, byte: u8) { + // self.write_byte(self.chunk, self.previous.unwrap().line); + } + fn emit_bytes(&mut self, first: u8, second: u8) { + // self.write_byte(self.chunk, self.previous.unwrap().line); + } + fn error_at_current(&mut self, message: &str) { + self.error_at(self.current.clone().unwrap(), message); + } + fn error(&mut self, message: &str) { + self.error_at(self.previous.clone().unwrap(), message); + } + fn error_at(&mut self, token: Token, message: &str) { + if self.panic_mode { + return; + } + // print!("[line {:}] Error", token.line); + // match token.token_type { + // TokenType::TokenEof => { + // print!(" at end") + // } + // TokenType::TokenError => { + // todo!(); + // } + // _ => { + // print!(" at '{:}'", token.lexeme); + // } + // } + // println!(": {:}", message); + self.had_error = true; + } } + +// use strum_macros::Display; +// #[derive(Display, PartialEq, Eq, PartialOrd, Ord)] +// enum Precedence { +// None, +// Assignment, +// Or, +// And, +// Equality, +// Comparison, +// Term, +// Factor, +// Unary, +// Call, +// Primary, +// } diff --git a/src/lib.rs b/src/lib.rs index 3fb087c..f4a387e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -2,5 +2,6 @@ pub mod chunk; pub mod compiler; pub mod debug; pub mod scanner; +pub mod token; pub mod value; pub mod vm; diff --git a/src/main.rs b/src/main.rs index c3806be..1b95ddd 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,66 +1,71 @@ use clap::{Arg, ArgAction, Command}; -use mox::chunk::Chunk; +use mox::compiler::Compiler; use mox::scanner::Scanner; -use mox::vm::InterpretResult; use mox::vm::VM; use std::fs; -// use std::io; -// use std::io::Write; use std::process; -use std::process::exit; fn main() { + let file_arg = Arg::new("file") + .short('f') + .long("file") + .action(ArgAction::Set) + .value_name("FILE") + .help("File to run"); + + let command_arg = Arg::new("commands") + .value_name("COMMANDS") + .index(1) + .action(ArgAction::Set) + .help("Commands to run"); + let app = Command::new("mox") .version("1.0") .author("publicmatt") .about("mox interpreter!!") - .arg( - Arg::new("file") - .short('f') - .long("file") - .action(ArgAction::Set) - .value_name("FILE") - .help("file to run"), + .subcommand( + Command::new("run") + .about("Run commands") + .arg(&file_arg) + .arg(&command_arg), ) - .arg( - Arg::new("command") - .short('c') - .long("command") - .action(ArgAction::Set) - .value_name("COMMAND") - .help("command to run"), - ) - .arg( - Arg::new("scan") - .short('s') - .long("scan") - .action(ArgAction::SetTrue) - .value_name("SCAN") - .help("scan only"), + .subcommand( + Command::new("scan") + .about("Scan commands") + .arg(&file_arg) + .arg(&command_arg), ); - let matches = app.get_matches(); - - if let Some(source) = matches.get_one::("command") { - if let Some(_) = matches.get_one::("scan") { - scan_content(source); - } else { - run_content(source); + match matches.subcommand() { + Some(("run", sub_m)) => { + if let Some(file) = sub_m.get_one::("file") { + let commands = fs::read_to_string(file).unwrap_or_else(|err| { + eprintln!("Error reading '{}': {}", file, err); + process::exit(74); + }); + run_content(&commands); + } else if let Some(commands) = sub_m.get_one::("commands") { + run_content(commands); + } else { + println!("No file or commands provided for run."); + } } - } else if let Some(file) = matches.get_one::("file") { - let source = fs::read_to_string(file).unwrap_or_else(|err| { - eprintln!("Error reading '{}': {}", file, err); - process::exit(74); - }); - println!("{}", source); - if let Some(_) = matches.get_one::("scan") { - scan_content(&source); - } else { - run_content(&source); + Some(("scan", sub_m)) => { + if let Some(file) = sub_m.get_one::("file") { + let commands = fs::read_to_string(file).unwrap_or_else(|err| { + eprintln!("Error reading '{}': {}", file, err); + process::exit(74); + }); + scan_content(&commands); + } else if let Some(commands) = sub_m.get_one::("commands") { + scan_content(commands); + } else { + println!("No file or commands provided for scan."); + } + } + _ => { + todo!("repl not done yet") } - } else { - todo!("repl not done yet") - // repl(); } } @@ -94,11 +99,15 @@ fn scan_content(source: &str) { scanner.compile(); } fn run_content(source: &str) { - let mut vm: VM = VM::new(); - let mut chunk: Chunk = Chunk::new(); - match vm.interpret(source, &mut chunk) { - InterpretResult::InterpretOk => exit(0), - InterpretResult::InterpretCompileError => exit(65), - InterpretResult::InterpretRuntimeError => exit(70), - } + let mut _vm: VM = VM::new(); + let owned = &source.to_owned(); + // let mut compiler: Compiler = Compiler::from_source(owned); + // compiler.compile(); + todo!("run_content is not hooked up yet"); + // let mut chunk: Chunk = compiler.compile(); + // match vm.interpret(&mut chunk) { + // InterpretResult::InterpretOk => exit(0), + // InterpretResult::InterpretCompileError => exit(65), + // InterpretResult::InterpretRuntimeError => exit(70), + // } } diff --git a/src/scanner.rs b/src/scanner.rs index 12f5011..e94b302 100644 --- a/src/scanner.rs +++ b/src/scanner.rs @@ -1,425 +1,138 @@ +use crate::token::Token; +use logos::{Lexer, Logos}; use peekmore::{PeekMore, PeekMoreIterator}; use std::fmt; use std::str::Chars; pub struct Scanner<'a> { - source: &'a String, - start: usize, - chars: PeekMoreIterator>, - current: usize, + lexer: Lexer<'a, Token>, line: u16, } impl<'a> fmt::Display for Scanner<'a> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!( - f, - "{}:{}..{} '{}'", - self.line, - self.start, - self.current, - &self.source[self.start..self.current] - ) + write!(f, "{} '{}'", self.line, self.lexer.slice()) } } impl<'a> Scanner<'a> { - pub fn new(source: &'a String) -> Self { + pub fn new(source: &'a str) -> Self { Scanner { - start: 0, - current: 0, line: 1, - chars: source.chars().peekmore(), - source, + lexer: Token::lexer(source), } } + pub fn next(&mut self) -> Option> { + self.lexer.next() + } + pub fn compile(&mut self) { - let mut line = 0; - loop { - let token = self.scan_token(); - - if token.line != line { - print!("{:4}", token.line); - line = token.line; - } else { - print!(" |"); + let mut line = 1; + while let Some(Ok(token)) = self.lexer.next() { + match token { + Token::TokenNewline => { + line += 1; + print!("{:4}", line); + } + _ => { + if self.lexer.span().next().unwrap() == 0 { + print!("{:4}", line); + } else { + print!(" |"); + } + } } - println!( ":{:<3} {:20} {:20}", - token.start, token.token_type, token.lexeme + self.lexer.span().next().unwrap(), + token, + format!("{:?}", self.lexer.slice()) ); - - if let TokenType::TokenEof = token.token_type { - break; - } } } - pub fn scan_token(&mut self) -> Token { - self.skip_whitespace(); - self.start = self.current; - match self.advance() { - Some(c) => match c { - c if c.is_digit(10) => return self.make_number(), - c if c.is_ascii_alphabetic() || c == '_' => return self.make_identifier(), - '(' => return self.make_token(TokenType::TokenLeftParen), - ')' => return self.make_token(TokenType::TokenRightParen), - '{' => return self.make_token(TokenType::TokenLeftBrace), - '}' => return self.make_token(TokenType::TokenRightBrace), - ';' => return self.make_token(TokenType::TokenSemicolon), - ',' => return self.make_token(TokenType::TokenComma), - '.' => return self.make_token(TokenType::TokenDot), - '-' => return self.make_token(TokenType::TokenMinus), - '+' => return self.make_token(TokenType::TokenPlus), - '/' => return self.make_token(TokenType::TokenSlash), - '*' => return self.make_token(TokenType::TokenStar), - '!' => { - return self.make_token_if_matches( - '=', - TokenType::TokenBangEqual, - TokenType::TokenBang, - ) - } - '=' => { - return self.make_token_if_matches( - '=', - TokenType::TokenEqualEqual, - TokenType::TokenEqual, - ) - } - '<' => { - return self.make_token_if_matches( - '=', - TokenType::TokenLessEqual, - TokenType::TokenLess, - ) - } - '>' => { - return self.make_token_if_matches( - '=', - TokenType::TokenGreaterEqual, - TokenType::TokenGreater, - ) - } - '"' => return self.make_string(), - _ => return self.make_error_token("Unexpected character."), - }, - None => return self.make_eof(), - }; - } - fn make_identifier(&mut self) -> Token { - while let Some(c) = self.chars.peek() { - if c.is_alphabetic() || *c == '_' || c.is_digit(10) { - self.advance(); - } else { - break; - } - } - let lexeme = &self.source[self.start..self.current]; - - match lexeme { - "and" => self.make_token(TokenType::TokenAnd), - "class" => self.make_token(TokenType::TokenClass), - "cap" => self.make_token(TokenType::TokenFalse), - "else" => self.make_token(TokenType::TokenElse), - "if" => self.make_token(TokenType::TokenIf), - "nil" => self.make_token(TokenType::TokenNil), - "no" => self.make_token(TokenType::TokenFalse), - "or" => self.make_token(TokenType::TokenOr), - "print" => self.make_token(TokenType::TokenPrint), - "return" => self.make_token(TokenType::TokenReturn), - "super" => self.make_token(TokenType::TokenSuper), - "var" => self.make_token(TokenType::TokenVar), - "while" => self.make_token(TokenType::TokenWhile), - "false" => self.make_token(TokenType::TokenFalse), - "for" => self.make_token(TokenType::TokenFor), - "fun" => self.make_token(TokenType::TokenFun), - "this" => self.make_token(TokenType::TokenThis), - "true" => self.make_token(TokenType::TokenTrue), - _ => return self.make_token(TokenType::TokenIdentifier), - } - } - fn make_token_if_matches( - &mut self, - expected: char, - on_match: TokenType, - otherwise: TokenType, - ) -> Token { - if self.matches(expected) { - self.make_token(on_match) - } else { - self.make_token(otherwise) - } - } - fn matches(&mut self, expected: char) -> bool { - match self.chars.peek() { - Some(c) => { - if c == &expected { - self.advance(); - true - } else { - false - } - } - None => false, - } - } - fn make_string(&mut self) -> Token { - loop { - match self.chars.peek() { - Some(c) => match c { - '\n' => { - self.line += 1; - continue; - } - '"' => { - self.advance(); - return self.make_token(TokenType::TokenString); - } - _ => { - self.advance(); - continue; - } - }, - None => return self.make_error_token("Unterminated string."), - } - } - } - fn make_number(&mut self) -> Token { - while let Some(c) = self.peek() { - if c.is_digit(10) { - self.advance(); - } else { - break; - } - } - match (self.peek(), self.peek_next()) { - (Some(c), Some(cc)) if c == '.' && cc.is_digit(10) => { - self.advance(); - while let Some(c) = self.peek() { - if c.is_digit(10) { - self.advance(); - } else { - break; - } - } - return self.make_token(TokenType::TokenNumber); - } - _ => { - return self.make_token(TokenType::TokenNumber); - } - } - } - fn peek(&self) -> Option { - self.source.chars().nth(self.current) - } - fn peek_next(&self) -> Option { - if self.is_at_end() { - return None; - } - return self.source.chars().nth(self.current + 1); - } - fn skip_whitespace(&mut self) { - loop { - match self.chars.peek() { - None => return, - Some(c) => match c { - ' ' | '\r' | '\t' => { - self.advance(); - continue; - } - '\n' => { - self.line += 1; - self.advance(); - continue; - } - '/' => match self.chars.peek_nth(1) { - Some(c) if *c == '/' => { - while let Some(c) = self.peek() { - if c == '\n' { - self.line += 1; - self.advance(); - return; - } else { - self.advance(); - } - } - } - _ => return, - }, - _ => return, - }, - } - } - } - fn is_at_end(&self) -> bool { - return self.current == self.source.len(); - } - fn make_token(&self, token_type: TokenType) -> Token { - Token { - token_type, - start: self.start, - lexeme: self.source[self.start..self.current].to_string(), - line: self.line, - } - } - fn advance(&mut self) -> Option { - self.current += 1; - self.chars.next() - } - fn make_error_token(&self, message: &str) -> Token { - Token { - token_type: TokenType::TokenError, - start: self.start, - lexeme: message.to_string(), - line: self.line, - } - } - fn make_eof(&self) -> Token { - Token { - token_type: TokenType::TokenEof, - start: self.start, - lexeme: "".to_string(), - line: self.line, - } - } -} - -pub struct Token { - pub token_type: TokenType, - pub start: usize, - pub lexeme: String, - pub line: u16, -} - -#[derive(Clone, Copy, strum_macros::Display, Debug, PartialEq, Eq)] -pub enum TokenType { - // Single-character tokens. - TokenLeftParen, - TokenRightParen, - TokenLeftBrace, - TokenRightBrace, - TokenComma, - TokenDot, - TokenMinus, - TokenPlus, - TokenSemicolon, - TokenSlash, - TokenStar, - // One or two character tokens. - TokenBang, - TokenBangEqual, - TokenEqual, - TokenEqualEqual, - TokenGreater, - TokenGreaterEqual, - TokenLess, - TokenLessEqual, - // Literals. - TokenIdentifier, - TokenString, - TokenNumber, - // Keywords. - TokenAnd, - TokenClass, - TokenElse, - TokenFalse, - TokenFor, - TokenFun, - TokenIf, - TokenNil, - TokenOr, - TokenPrint, - TokenReturn, - TokenSuper, - TokenThis, - TokenTrue, - TokenVar, - TokenWhile, - - TokenError, - TokenEof, } #[cfg(test)] mod tests { - use crate::scanner; - use crate::scanner::TokenType; + + use crate::token::Token; + use logos::Logos; #[test] fn single_chars() { - assert_token(String::from(""), TokenType::TokenEof); - assert_token(String::from("("), TokenType::TokenLeftParen); - assert_token(String::from("}"), TokenType::TokenRightBrace); - assert_token(String::from("-"), TokenType::TokenMinus); - assert_token(String::from("+"), TokenType::TokenPlus); - assert_token(String::from("/"), TokenType::TokenSlash); + //assert_token(String::from(""), Token::TokenEof); + assert_token(String::from("("), Token::TokenLeftParen); + assert_token(String::from("}"), Token::TokenRightBrace); + assert_token(String::from("-"), Token::TokenMinus); + assert_token(String::from("+"), Token::TokenPlus); + assert_token(String::from("/"), Token::TokenSlash); } #[test] fn double_chars() { - assert_token(String::from("=="), TokenType::TokenEqualEqual); - assert_token(String::from("!="), TokenType::TokenBangEqual); - assert_token(String::from(">"), TokenType::TokenGreater); - assert_token(String::from(">="), TokenType::TokenGreaterEqual); + assert_token(String::from("=="), Token::TokenEqualEqual); + assert_token(String::from("!="), Token::TokenBangEqual); + assert_token(String::from(">"), Token::TokenGreater); + assert_token(String::from(">="), Token::TokenGreaterEqual); } #[test] fn strings() { - assert_token_lexeme(String::from("\"mox\""), TokenType::TokenString, "\"mox\""); - assert_token_lexeme(String::from("\"\""), TokenType::TokenString, "\"\""); + assert_token_lexeme(String::from("\"mox\""), Token::TokenString, "\"mox\""); + assert_token_lexeme(String::from("\"\""), Token::TokenString, "\"\""); } #[test] fn numbers() { - assert_token_lexeme(String::from("0"), TokenType::TokenNumber, "0"); - assert_token_lexeme(String::from("4"), TokenType::TokenNumber, "4"); - assert_token_lexeme(String::from("42"), TokenType::TokenNumber, "42"); - assert_token_lexeme(String::from("13.99"), TokenType::TokenNumber, "13.99"); + assert_token_lexeme(String::from("0"), Token::TokenNumber(0.0), "0"); + assert_token_lexeme(String::from("4"), Token::TokenNumber(4.0), "4"); + assert_token_lexeme(String::from("42"), Token::TokenNumber(42.0), "42"); + assert_token_lexeme(String::from("13.99"), Token::TokenNumber(13.99), "13.99"); } #[test] fn newlines() { assert_tokens( String::from("+\n//comment\n-"), - &vec![TokenType::TokenPlus, TokenType::TokenMinus], + &vec![ + Token::TokenPlus, + Token::TokenNewline, + Token::TokenComment, + Token::TokenNewline, + Token::TokenMinus, + ], ); } #[test] fn identifier() { - assert_token(String::from("class"), TokenType::TokenClass); - assert_token(String::from("if"), TokenType::TokenIf); - assert_token(String::from("while"), TokenType::TokenWhile); - assert_token(String::from("true"), TokenType::TokenTrue); - assert_token(String::from("false"), TokenType::TokenFalse); + assert_token(String::from("class"), Token::TokenClass); + assert_token(String::from("if"), Token::TokenIf); + assert_token(String::from("while"), Token::TokenWhile); + assert_token(String::from("true"), Token::TokenTrue); + assert_token(String::from("false"), Token::TokenFalse); - assert_token(String::from("cap"), TokenType::TokenFalse); + assert_token(String::from("cap"), Token::TokenFalse); - assert_token_lexeme(String::from("mox"), TokenType::TokenIdentifier, "mox"); + assert_token_lexeme(String::from("mox"), Token::TokenIdentifier, "mox"); } - fn assert_token(source: String, expected: scanner::TokenType) { - let mut scanner = scanner::Scanner::new(&source); - let token = scanner.scan_token(); + fn assert_token(source: String, expected: Token) { + let mut lex = Token::lexer(&source); + let token = lex.next(); - assert_eq!(token.token_type, expected); + assert_eq!(token, Some(Ok(expected))); } - fn assert_token_lexeme( - source: String, - expected_type: scanner::TokenType, - expected_lexeme: &str, - ) { - let mut scanner = scanner::Scanner::new(&source); - let token = scanner.scan_token(); + fn assert_token_lexeme(source: String, expected_type: Token, expected_lexeme: &str) { + let mut lex = Token::lexer(&source); + let token = lex.next(); - assert_eq!(token.token_type, expected_type); - assert_eq!(token.lexeme, expected_lexeme); + assert_eq!(token, Some(Ok(expected_type))); + + assert_eq!(lex.slice(), expected_lexeme); } - fn assert_tokens(source: String, expected_tokens: &Vec) { - let mut scanner = scanner::Scanner::new(&source); + fn assert_tokens(source: String, expected_tokens: &Vec) { + let mut lex = Token::lexer(&source); for expected in expected_tokens { - let actual = scanner.scan_token(); - assert_eq!(actual.token_type, *expected); + let actual = lex.next(); + assert_eq!(actual, Some(Ok(*expected))); } - - assert_eq!(scanner.scan_token().token_type, TokenType::TokenEof); } } diff --git a/src/token.rs b/src/token.rs new file mode 100644 index 0000000..c7352ca --- /dev/null +++ b/src/token.rs @@ -0,0 +1,137 @@ +use logos::{Lexer, Logos, Skip}; +use strum_macros::Display; + +// #[derive(Clone, Debug)] +// pub struct Token { +// pub token_type: TokenType, +// pub start: usize, +// pub lexeme: String, +// pub line: u16, +// } +// +// impl Token { +// pub fn empty() -> Self { +// Token { +// token_type: TokenType::TokenError, +// start: 0, +// lexeme: "empty token".to_string(), +// line: 0, +// } +// } +// } +// +#[derive(Default, Debug)] +struct Location { + line: usize, + column: usize, +} + +/// /// Update the line count and the char index. +/// fn newline_callback(lex: &mut Lexer) -> Location { +/// Location { +/// line: lex.extras.0 + 1, +/// column: lex.span().end, +/// } +/// } +/// fn default_callback(lex: &mut Lexer) -> Location { +/// Location { +/// line: lex.extras.0, +/// column: lex.span().start - lex.extras.1, +/// } +/// } + +#[derive(Logos, Display, Debug, PartialEq, Clone, Copy)] +#[logos(skip r"[ \t\f]+")] +//#[logos(extras = (usize, usize))] +pub enum Token { + #[regex(r"//[^\n]*")] + TokenComment, + // Single-character tokens. + #[token("(")] + TokenLeftParen, + #[token(")")] + TokenRightParen, + #[token("{")] + TokenLeftBrace, + #[token("}")] + TokenRightBrace, + #[token(",")] + TokenComma, + #[token(".")] + TokenDot, + #[token("-")] + TokenMinus, + #[token("+")] + TokenPlus, + #[token(";")] + TokenSemicolon, + #[token("/")] + TokenSlash, + #[token("*")] + TokenStar, + // One or two character tokens. + #[token("!")] + TokenBang, + #[token("!=")] + TokenBangEqual, + #[token("=")] + TokenEqual, + #[token("==")] + TokenEqualEqual, + #[token(">")] + TokenGreater, + #[token(">=")] + TokenGreaterEqual, + #[token("<")] + TokenLess, + #[token("<=")] + TokenLessEqual, + + // Literals. + #[regex(r"[a-zA-Z_][a-zA-Z0-9_]*")] + TokenIdentifier, + #[regex(r#""([^"\\]*(\\.[^"\\]*)*)""#)] + TokenString, + + #[regex(r"\d+\.\d*|\d*\.\d+|\d+", |lex| lex.slice().parse().ok())] + TokenNumber(f64), + // Keywords. + #[token("and")] + TokenAnd, + #[token("class")] + TokenClass, + #[token("else")] + TokenElse, + #[regex(r"no|false|cap")] + TokenFalse, + #[token("for")] + TokenFor, + #[token("fun")] + TokenFun, + #[token("if")] + TokenIf, + #[token("nil")] + TokenNil, + #[token("of")] + TokenOr, + #[token("print")] + TokenPrint, + #[token("return")] + TokenReturn, + #[token("super")] + TokenSuper, + #[token("this")] + TokenThis, + #[token("true")] + TokenTrue, + #[token("var")] + TokenVar, + #[token("while")] + TokenWhile, + + #[regex(r"\n")] + TokenNewline, + // #[error] + // TokenError, + //TokenEof, +} diff --git a/src/vm.rs b/src/vm.rs index dedea92..be5d8e9 100644 --- a/src/vm.rs +++ b/src/vm.rs @@ -1,5 +1,4 @@ use crate::chunk::{Chunk, ConversionError, OptCode}; -use crate::compiler; use crate::debug::{disassemble_instruction, print_value, trace_enabled}; use crate::value::Value; @@ -29,17 +28,10 @@ impl<'a> VM<'a> { self.ip = 0; self.run() } - pub fn interpret(&mut self, source: &str, chunk: &'a mut Chunk) -> InterpretResult { - match compiler::compile(source, &chunk) { - false => { - return InterpretResult::InterpretCompileError; - } - true => { - self.chunk = Some(chunk); - self.ip = 0; - return self.run(); - } - } + pub fn interpret(&mut self, chunk: &'a mut Chunk) -> InterpretResult { + self.chunk = Some(chunk); + self.ip = 0; + return self.run(); } fn read_byte(&mut self) -> u8 { diff --git a/test.mox b/test.mox new file mode 100644 index 0000000..3ec3c5a --- /dev/null +++ b/test.mox @@ -0,0 +1,3 @@ +no cap 2+2 +// comments and newlines work +"moxide" == "jlox/clox but matt" diff --git a/tests/cli.rs b/tests/cli.rs new file mode 100644 index 0000000..b1e0a31 --- /dev/null +++ b/tests/cli.rs @@ -0,0 +1,27 @@ +use assert_cmd::prelude::*; // Add methods on commands +use predicates::prelude::*; // Used for writing assertions +use std::process::Command; // Run programs + +#[test] +fn file_doesnt_exist() -> Result<(), Box> { + let mut cmd = Command::cargo_bin("mox")?; + + cmd.arg("scan").arg("--file").arg("test.moxy"); + cmd.assert() + .failure() + .stderr(predicate::str::contains("No such file or directory")); + + Ok(()) +} + +#[test] +fn scan_file_succeeds() -> Result<(), Box> { + let mut cmd = Command::cargo_bin("mox")?; + + cmd.arg("scan").arg("--file").arg("test.mox"); + cmd.assert().success().stdout(predicate::str::contains( + "1:0 TokenFalse \"no\"", + )); + + Ok(()) +}