diff --git a/src/earley.ts b/src/earley.ts index 3c42a45..4c3d382 100644 --- a/src/earley.ts +++ b/src/earley.ts @@ -43,9 +43,9 @@ function isNonTerminal(tokenClass: TokenClass): tokenClass is NonTerminalTokenCl return !tokenClass.terminal; } -type TerminalTokenClass = { new(...args: any[]) : Terminal, terminal: true } -type NonTerminalTokenClass = { new(...args: any[]) : NonTerminal, terminal: false } -type TokenClass = TerminalTokenClass | NonTerminalTokenClass; +export type TerminalTokenClass = { new(...args: any[]) : Terminal, terminal: true } +export type NonTerminalTokenClass = { new(...args: any[]) : NonTerminal, terminal: false } +export type TokenClass = TerminalTokenClass | NonTerminalTokenClass; function getTokenClassFromToken(token: Token): TokenClass { return token.constructor as TokenClass; diff --git a/src/tokenizer.ts b/src/tokenizer.ts index 09cfa74..318ef49 100644 --- a/src/tokenizer.ts +++ b/src/tokenizer.ts @@ -82,4 +82,4 @@ export function tokenize(string) { } return tokens; -} +} \ No newline at end of file diff --git a/src/util/asm/tokenizer.ts b/src/util/asm/tokenizer.ts new file mode 100644 index 0000000..e69de29 diff --git a/src/util/asm/tokens.ts b/src/util/asm/tokens.ts new file mode 100644 index 0000000..cce9a7d --- /dev/null +++ b/src/util/asm/tokens.ts @@ -0,0 +1,15 @@ +import { Terminal } from "../../earley"; + +// Instruction keywords... +export class $Mov extends Terminal {} +export class $Push extends Terminal {} +export class $Pop extends Terminal {} +export class $Call extends Terminal {} +export class $Syscall extends Terminal {} +export class $Ret extends Terminal {} +export class $Je extends Terminal {} +export class $Inc extends Terminal {} +export class $Cmp extends Terminal {} +export class $Jmp extends Terminal {} + +export class $Identifier extends Terminal {} \ No newline at end of file diff --git a/src/util/asmLogger.ts b/src/util/asmLogger.ts new file mode 100644 index 0000000..66c28de --- /dev/null +++ b/src/util/asmLogger.ts @@ -0,0 +1,3 @@ +export function logASM(asm: string) { + +} \ No newline at end of file diff --git a/src/util/generalTokenizer.ts b/src/util/generalTokenizer.ts new file mode 100644 index 0000000..7636979 --- /dev/null +++ b/src/util/generalTokenizer.ts @@ -0,0 +1,51 @@ +import { TerminalTokenClass } from "../earley"; + +export function createTokenizer(tokenMap: Map) { + return function tokenize(str: string) { + let tokens = []; + let token = ''; + let line = 1, column = 0; + for(let i = 0; i < str.length; i ++) { + const char = str[i]; + const lookahead = (i < str.length - 1 ? str[i + 1] : null) + column++; + token += char; + + for(const [matcher, tokenClass] of tokenMap) { + if(typeof matcher === 'string') { + if(matcher === token) { + tokens.push(new tokenClass(line, column, token)); + } else { + // dw about it + } + } else { + // matcher is regex... + // * note: this only tests if token contains a match, not that it _is_ a match + if(matcher.test(token)) { + if(lookahead) { + if(!matcher.test(token + lookahead)) { + // the next character would not match, so this must be the match. + // ! PS: it is possible that even though this would no longer + // ! match, another matcher could still match more. + // ! in those cases, we would want to expand on this logic + // ! to only match if there are no matches for any matcher + // ! in the lookahead. + // ! in practice this means tracking all possible non lookahead + // ! matches, then testing them for their lookahead afterwards + // ! in another loop, and only tokenizing if you have only one + // ! option, and that option will fail on the lookahead. + } + } else { + tokens.push(new tokenClass(line, column, token)); + } + } + } + } + + if(char === '\n') { + line ++; + column = 0; + } + } + } +} \ No newline at end of file diff --git a/todo.md b/todo.md index 65c22a2..9b84bd8 100644 --- a/todo.md +++ b/todo.md @@ -1,6 +1,7 @@ # Todo List [ ] colorize the assembly output +- write a tokenizer & grammar for asm. then colorize it. [ ] add number support [ ] add comment support [ ] add fixed length array support