hahhah o k

2022-03-14 03:30:10 -04:00 · 2022-03-14 03:30:10 -04:00 · 5dacad91b6
parent bda7afbb71
commit 5dacad91b6
7 changed files with 74 additions and 4 deletions
--- a/src/earley.ts
+++ b/src/earley.ts
@ -43,9 +43,9 @@ function isNonTerminal(tokenClass: TokenClass): tokenClass is NonTerminalTokenCl
  return !tokenClass.terminal;
 }

-type TerminalTokenClass = { new(...args: any[]) : Terminal, terminal: true }
-type NonTerminalTokenClass = { new(...args: any[]) : NonTerminal, terminal: false }
-type TokenClass = TerminalTokenClass | NonTerminalTokenClass;
+export type TerminalTokenClass = { new(...args: any[]) : Terminal, terminal: true }
+export type NonTerminalTokenClass = { new(...args: any[]) : NonTerminal, terminal: false }
+export type TokenClass = TerminalTokenClass | NonTerminalTokenClass;

 function getTokenClassFromToken(token: Token): TokenClass {
  return token.constructor as TokenClass;
--- a/src/tokenizer.ts
+++ b/src/tokenizer.ts
@ -82,4 +82,4 @@ export function tokenize(string) {
  }

  return tokens;
-}
+}
--- a/src/util/asm/tokenizer.ts
+++ b/src/util/asm/tokenizer.ts
--- a/src/util/asm/tokens.ts
+++ b/src/util/asm/tokens.ts
@ -0,0 +1,15 @@
+import { Terminal } from "../../earley";
+
+// Instruction keywords...
+export class $Mov extends Terminal {}
+export class $Push extends Terminal {}
+export class $Pop extends Terminal {}
+export class $Call extends Terminal {}
+export class $Syscall extends Terminal {}
+export class $Ret extends Terminal {}
+export class $Je extends Terminal {}
+export class $Inc extends Terminal {}
+export class $Cmp extends Terminal {}
+export class $Jmp extends Terminal {}
+
+export class $Identifier extends Terminal {}
--- a/src/util/asmLogger.ts
+++ b/src/util/asmLogger.ts
@ -0,0 +1,3 @@
+export function logASM(asm: string) {
+
+}
--- a/src/util/generalTokenizer.ts
+++ b/src/util/generalTokenizer.ts
@ -0,0 +1,51 @@
+import { TerminalTokenClass } from "../earley";
+
+export function createTokenizer(tokenMap: Map<string | RegExp, TerminalTokenClass>) {
+  return function tokenize(str: string) {
+    let tokens = [];
+    let token = '';
+    let line = 1, column = 0;
+    for(let i = 0; i < str.length; i ++) {
+      const char = str[i];
+      const lookahead = (i < str.length - 1 ? str[i + 1] : null)
+      column++;
+      token += char;
+
+      for(const [matcher, tokenClass] of tokenMap) {
+        if(typeof matcher === 'string') {
+          if(matcher === token) {
+            tokens.push(new tokenClass(line, column, token));
+          } else {
+            // dw about it
+          }
+        } else {
+          // matcher is regex...
+          // * note: this only tests if token contains a match, not that it _is_ a match
+          if(matcher.test(token)) {
+            if(lookahead) {
+              if(!matcher.test(token + lookahead)) {
+                // the next character would not match, so this must be the match.
+                // ! PS: it is possible that even though this would no longer
+                // ! match, another matcher could still match more.
+                // ! in those cases, we would want to expand on this logic
+                // ! to only match if there are no matches for any matcher
+                // ! in the lookahead.
+                // ! in practice this means tracking all possible non lookahead
+                // ! matches, then testing them for their lookahead afterwards
+                // ! in another loop, and only tokenizing if you have only one
+                // ! option, and that option will fail on the lookahead.
+              }
+            } else {
+              tokens.push(new tokenClass(line, column, token));
+            }
+          }
+        }
+      }
+
+      if(char === '\n') {
+        line ++;
+        column = 0;
+      }
+    }
+  }
+}
--- a/todo.md
+++ b/todo.md
@ -1,6 +1,7 @@
 # Todo List

 [ ] colorize the assembly output
+- write a tokenizer & grammar for asm. then colorize it.
 [ ] add number support
 [ ] add comment support
 [ ] add fixed length array support