colorize assembly

canary
Neon 2022-03-14 07:43:41 -04:00
parent 5dacad91b6
commit df5bc01720
15 changed files with 269 additions and 63 deletions

Binary file not shown.

View File

@ -1,15 +1,15 @@
global _main
section .data
UMRRSQMF db 72,101,108,108,111,32,87,111,114,108,100,0
EFDNYLFZ db 72,101,108,108,111,32,87,111,114,108,100,0
section .text
_main:
global _start
_start:
push rbp
mov rbp, rsp
mov rdi, UMRRSQMF
mov rdi, EFDNYLFZ
call _log
mov rsp, rbp
pop rbp
mov rax, 0x02000001
mov rax, 60
mov rdi, 0
syscall
_log:
@ -24,12 +24,12 @@ _log_loop:
jmp _log_loop
_log_loop_end:
mov rdx, rbx
mov rax, 0x02000004
mov rax, 1
mov rdi, 1
pop rsi
syscall
push 10
mov rax, 0x02000004
mov rax, 1
mov rdi, 1
mov rsi, rsp
mov rdx, 1

View File

@ -1,12 +1,11 @@
#!/usr/bin/env node
// const AST = require('../ast.js');
// const compile = require('../compiler.js');
import { readFileSync } from "fs";
import { compile } from "./compiler";
import grammar, { $Newline } from "./grammar";
import grammar from "./grammar";
import { tokenize } from "./tokenizer";
import colorize from "./util/asm/colorize";
import { printTokens } from "./util/utils";
console.log();
console.log('=== Original ===');
@ -15,15 +14,12 @@ console.log(fileContents)
console.log('=== Tokenization ===');
const tokens = tokenize(fileContents);
for(const token of tokens) {
process.stdout.write(token.toString() + ' ');
if(token instanceof $Newline) console.log();
}
printTokens(tokens);
console.log();
console.log('=== Parsing ===');
const ast = grammar.solveFor(tokens)[0];
const ast = grammar.solveFor(tokens, { silent: false })[0];
console.log();
console.log('=== AST ===');
@ -36,7 +32,7 @@ const asmFile = compile(ast)
try {
console.log();
console.log('=== ASM ===');
console.log(asmFile);
console.log(colorize(asmFile));
require('fs').writeFileSync('disco_test.asm', asmFile);
console.log();

View File

@ -1,15 +1,14 @@
import * as chalk from 'chalk';
const rgb2ansi = (r: number, g: number, b: number) => r * 36 + g * 6 + b + 16
const ansi = (r: number, g = r, b = r) => chalk.ansi256(rgb2ansi(r, g, b));
import { ansi } from './util/utils';
export abstract class Token {
l: number;
c: number;
value: string;
static terminal: boolean;
constructor(l: number, c: number) {
constructor(l: number, c: number, value: string) {
this.l = l;
this.c = c;
this.value = value;
}
static toString() {
if(this.terminal) {
@ -35,6 +34,10 @@ export abstract class Token {
export class NonTerminal extends Token { static terminal: false = false };
export class Terminal extends Token { static terminal: true = true };
// these tokens are special, for formatting and generalization reasons.
export class $Newline extends Terminal { }
export class $Whitespace extends Terminal { }
function isTerminal(tokenClass: TokenClass): tokenClass is TerminalTokenClass {
return tokenClass.terminal;
}
@ -96,7 +99,7 @@ export class Grammar {
this.startingSymbol = startingSymbol;
}
solveFor(tokens: Token[]) {
solveFor(tokens: Token[], options: { silent: boolean } = { silent: true }) {
const state = new TimeMachine<SingleEarleyState>(() => new SingleEarleyState());
const possibleStartingProductions = getProductionsForTokenClass(this.productions, this.startingSymbol)
@ -132,12 +135,12 @@ export class Grammar {
// expand all non terminals here
console.log(ansi(3, 3, 0)('s') + ansi(4, 4, 0)(state.currentIndex) + ': ' + this.startingSymbol.toString());
console.log(state.current.toString(), '\n')
if(!options.silent) console.log(ansi(3, 3, 0)('s') + ansi(4, 4, 0)(state.currentIndex) + ': ' + this.startingSymbol.toString());
if(!options.silent) console.log(state.current.toString(), '\n')
for(const token of tokens) {
state.newState();
console.log(ansi(3, 3, 0)('s') + ansi(4, 4, 0)(state.currentIndex) + ': ' + token.toString());
if(!options.silent) console.log(ansi(3, 3, 0)('s') + ansi(4, 4, 0)(state.currentIndex) + ': ' + token.toString());
for(const partialMatch of state.previousState.partialMatches) {
if(partialMatch.complete) continue;
@ -148,11 +151,15 @@ export class Grammar {
}
console.assert(state.current.partialMatches.length !== 0, ansi(4, 1, 1)('unexpected token ' + token.toString()))
if(state.current.partialMatches.length === 0) {
if(!options.silent) console.log();
process.exit(1);
}
state.current.partialMatches.forEach(expand);
state.current.deduplicate()
console.log(state.current.toString(), '\n')
if(!options.silent) console.log(state.current.toString(), '\n')
}
const completedResolutions = [];

View File

@ -7,21 +7,8 @@ export class $KeywordLParen extends Terminal { }
export class $KeywordRParen extends Terminal { }
export class $KeywordConst extends Terminal { }
export class $String extends Terminal {
value: string;
constructor(l: number, c: number, value: string) {
super(l, c);
this.value = value;
}
}
export class $Identifier extends Terminal {
value: string;
constructor(l: number, c: number, value: string) {
super(l, c);
this.value = value;
}
}
export class $String extends Terminal {}
export class $Identifier extends Terminal {}
export class $Newline extends Terminal { }

View File

@ -28,11 +28,11 @@ export function tokenize(string) {
if(token.trim() !== '') {
if(keywords.has(token)) {
const kwTokenClass = keywords.get(token);
tokens.push(new kwTokenClass(0, 0));
tokens.push(new kwTokenClass(0, 0, token));
} else if (isStringDelim(token[0]))
tokens.push(new $String(0, 0, token.substring(1, token.length - 1)));
else if (token === 'NEWLINE')
tokens.push(new $Newline(0, 0))
tokens.push(new $Newline(0, 0, token))
else
tokens.push(new $Identifier(0, 0, token));
resetToken();

View File

@ -0,0 +1,6 @@
import grammar from './grammar';
import tokenize from './tokenizer';
export default function colorize(str: string): string {
return grammar.solveFor(tokenize(str))[0];
}

View File

@ -0,0 +1,56 @@
import { Grammar, Production, $Newline } from "../../earley";
import { ansi } from "../utils";
import * as t from "./tokens";
// add EOF token to basic shit, and always add it to tokenizer
// const grammar = new Grammar(ps, );
type ansiRGB = [number, number, number];
const registerColor: ansiRGB = [5, 3, 0];
const numberColor: ansiRGB = [4, 4, 0];
const keywordColor: ansiRGB = [2, 4, 0];
const instructionColor: ansiRGB = [5, 1, 4];
const syscallColor: ansiRGB = [5, 1, 5];
const identifierColor: ansiRGB = [0, 4, 5];
const pointerColor: ansiRGB = [3, 0, 5];
export default new Grammar([
{ left: t.$Program, right: [t.$Line], resolver: (s) => !!s ? s : '' },
{ left: t.$Program, right: [t.$Line, $Newline, t.$Program], resolver: (s, _, ss) => !!s ? s + '\n' + ss : ss},
// lines that arent instructions? idk man.
{ left: t.$Line, right: [t.$Section, t.$Identifier],
resolver: (_, identifier) => `${ansi(...keywordColor).bold('section')} ${ansi(...identifierColor)(identifier.value)}` },
{ left: t.$Line, right: [t.$Identifier, t.$Db, t.$CompoundString],
resolver: (identifier, _, ns) => ` ${ansi(...identifierColor)(identifier.value)} ${ansi(...keywordColor).bold('db')} ${ns}` },
{ left: t.$Line, right: [t.$Global, t.$Identifier],
resolver: (_, {value}) => ` ${ansi(...keywordColor).bold('global')} ${ansi(...identifierColor)(value)}` },
{ left: t.$Line, right: [t.$Identifier, t.$Colon], resolver: ({value}) => `${ansi(...identifierColor)(value)}:` },
// actual instructions
{ left: t.$Line, right: [t.$Push, t.$Value], resolver: (_, v) => ` ${ansi(...instructionColor)('push')} ${v}` },
{ left: t.$Line, right: [t.$Pop, t.$Value], resolver: (_, v) => ` ${ansi(...instructionColor)('pop')} ${v}` },
{ left: t.$Line, right: [t.$Cmp, t.$Register, t.$Comma, t.$Value],
resolver: (_, register, __, value) => ` ${ansi(...instructionColor)('cmp')} ${ansi(...registerColor)(register.value)}, ${value}`},
{ left: t.$Line, right: [t.$Je, t.$Identifier], resolver: (_, {value}) => ` ${ansi(...instructionColor)('je')} ${ansi(...identifierColor)(value)}` },
{ left: t.$Line, right: [t.$Jmp, t.$Identifier], resolver: (_, {value}) => ` ${ansi(...instructionColor)('jmp')} ${ansi(...identifierColor)(value)}` },
{ left: t.$Line, right: [t.$Ret], resolver: () => ` ${ansi(...keywordColor).bold('ret')}`},
{ left: t.$Line, right: [t.$Inc, t.$Register], resolver: (_, register) => ` ${ansi(...instructionColor)('inc')} ${ansi(...registerColor)(register.value)}` },
{ left: t.$Line, right: [t.$Syscall], resolver: () => ` ${ansi(...syscallColor).bold('syscall')}` },
{ left: t.$Line, right: [t.$Mov, t.$Register, t.$Comma, t.$Value],
resolver: (_, register, __, value) => ` ${ansi(...instructionColor)('mov')} ${ansi(...registerColor)(register.value)}, ${value}` },
{ left: t.$Line, right: [t.$Mov, t.$Register, t.$Comma, t.$PointerDereference],
resolver: (_, register, __, value) => ` ${ansi(...instructionColor)('mov')} ${ansi(...registerColor)(register.value)}, ${value}` },
{ left: t.$Line, right: [t.$Call, t.$Identifier], resolver: (_, {value}) => ` ${ansi(...keywordColor).bold('call')} ${ansi(...identifierColor)(value)}` },
{ left: t.$PointerDereference, right: [t.$LBracket, t.$Value, t.$Minus, t.$Number, t.$RBracket],
resolver: (_, v, __, n) => `${ansi(...pointerColor)('[')}${v}-${ansi(...numberColor)(n.value)}${ansi(...pointerColor)(']')}` },
{ left: t.$PointerDereference, right: [t.$LBracket, t.$Value, t.$RBracket], resolver: (_, v) => `${ansi(...pointerColor)('[')}${v}${ansi(...pointerColor)(']')}` },
{ left: t.$Value, right: [t.$Number], resolver: (v) => ansi(...numberColor)(v.value) },
{ left: t.$Value, right: [t.$Register], resolver: (v) => ansi(...registerColor)(v.value) },
{ left: t.$Value, right: [t.$Identifier], resolver: (v) => ansi(...identifierColor)(v.value) },
{ left: t.$CompoundString, right: [t.$Number], resolver: (n) => ansi(...numberColor)(n.value) },
{ left: t.$CompoundString, right: [t.$Number, t.$Comma, t.$CompoundString], resolver: (n, _, ns) => ansi(...numberColor)(n.value) + ',' + ns }
], t.$Program);

View File

@ -0,0 +1,53 @@
export default `section .data
QVGWSIUM db 84,104,105,115,32,83,116,114,105,110,103,32,105,115,32,67,111,110,116,97,105,110,101,100,32,105,110,32,97,32,118,97,114,105,97,98,108,101,0
ZYXGJUBF db 84,104,105,115,32,105,115,32,97,32,115,101,99,111,110,100,32,115,116,114,105,110,103,32,105,110,32,97,32,118,97,114,105,97,98,108,101,0
GPBLFTCX db 104,101,108,108,111,0
GXMDWCDF db 119,111,114,108,100,0
section .text
global _start
_start:
push rbp
mov rbp, rsp
push QVGWSIUM
push ZYXGJUBF
mov rdi, GPBLFTCX
call _log
mov rdi, GXMDWCDF
call _log
mov rdi, [rbp - 8]
call _log
mov rdi, [rbp - 16]
call _log
mov rdi, [rbp - 8]
call _log
mov rdi, [rbp - 16]
call _log
mov rsp, rbp
pop rbp
mov rax, 60
mov rdi, 0
syscall
_log:
push rdi
mov rbx, 0
_log_loop:
mov cl, [rdi]
cmp cl, 0
je _log_loop_end
inc rdi
inc rbx
jmp _log_loop
_log_loop_end:
mov rdx, rbx
mov rax, 1
mov rdi, 1
pop rsi
syscall
push 10
mov rax, 1
mov rdi, 1
mov rsi, rsp
mov rdx, 1
syscall
pop rdi
ret`;

View File

@ -0,0 +1,31 @@
import { createTokenizer } from "../generalTokenizer";
import * as tokens from "./tokens";
import {
$Newline,
} from "./../../earley";
export default createTokenizer([
{ match: /^[\r\t ]{1,}$/, token: null },
{ match: 'section', token: tokens.$Section },
{ match: 'db', token: tokens.$Db },
{ match: 'global', token: tokens.$Global },
{ match: '\n', token: $Newline },
{ match: ':', token: tokens.$Colon },
{ match: ',', token: tokens.$Comma },
{ match: '[', token: tokens.$LBracket },
{ match: ']', token: tokens.$RBracket },
{ match: '-', token: tokens.$Minus },
{ match: 'mov', token: tokens.$Mov },
{ match: 'push', token: tokens.$Push },
{ match: 'pop', token: tokens.$Pop },
{ match: 'call', token: tokens.$Call },
{ match: 'syscall', token: tokens.$Syscall },
{ match: 'ret', token: tokens.$Ret },
{ match: 'je', token: tokens.$Je },
{ match: 'jmp', token: tokens.$Jmp },
{ match: 'cmp', token: tokens.$Cmp },
{ match: 'inc', token: tokens.$Inc },
{ match: /^[0-9]{1,}$/, token: tokens.$Number },
{ match: /^(rbp|rsp|rax|rcx|rbx|rdx|rdi|rsi|al|bl|cl|dl|ah|bh|ch|dh|ax|bx|cx|dx|eax|ebx|ecx|edx)$/, token: tokens.$Register },
{ match: /^[A-Za-z._][A-Za-z_]{0,}$/, token: tokens.$Identifier },
])

View File

@ -1,15 +1,36 @@
import { Terminal } from "../../earley";
import { Terminal, NonTerminal } from "../../earley";
// Instruction keywords...
export class $Mov extends Terminal {}
export class $Push extends Terminal {}
export class $Pop extends Terminal {}
export class $Call extends Terminal {}
export class $Syscall extends Terminal {}
export class $Ret extends Terminal {}
export class $Je extends Terminal {}
export class $Inc extends Terminal {}
export class $Cmp extends Terminal {}
export class $Jmp extends Terminal {}
export class $Mov extends Terminal { }
export class $Push extends Terminal { }
export class $Pop extends Terminal { }
export class $Call extends Terminal { }
export class $Syscall extends Terminal { }
export class $Ret extends Terminal { }
export class $Je extends Terminal { }
export class $Inc extends Terminal { }
export class $Cmp extends Terminal { }
export class $Jmp extends Terminal { }
export class $Identifier extends Terminal {}
// keywords
export class $Section extends Terminal { }
export class $Global extends Terminal { }
export class $Db extends Terminal { }
export class $LBracket extends Terminal { }
export class $RBracket extends Terminal { }
export class $Comma extends Terminal { }
export class $Colon extends Terminal { }
export class $Minus extends Terminal { }
// varying tokens
export class $Identifier extends Terminal { }
export class $String extends Terminal { }
export class $Number extends Terminal { }
export class $Register extends Terminal { }
// non terminals
export class $Line extends NonTerminal { }
export class $PointerDereference extends NonTerminal { }
export class $Program extends NonTerminal { }
export class $CompoundString extends NonTerminal { }
export class $Value extends NonTerminal { }

View File

@ -1,6 +1,12 @@
import { TerminalTokenClass } from "../earley";
import { inspect } from 'util';
export function createTokenizer(tokenMap: Map<string | RegExp, TerminalTokenClass>) {
interface TokenMatcher {
match: RegExp | string,
token: TerminalTokenClass
}
export function createTokenizer(tokenMap: TokenMatcher[]) {
return function tokenize(str: string) {
let tokens = [];
let token = '';
@ -11,10 +17,13 @@ export function createTokenizer(tokenMap: Map<string | RegExp, TerminalTokenClas
column++;
token += char;
for(const [matcher, tokenClass] of tokenMap) {
for(const {match: matcher, token: tokenClass} of tokenMap) {
if(typeof matcher === 'string') {
if(matcher === token) {
tokens.push(new tokenClass(line, column, token));
if(tokenClass !== null) {
tokens.push(new tokenClass(line, column - token.length + 1, token));
}
token = '';
} else {
// dw about it
}
@ -34,9 +43,19 @@ export function createTokenizer(tokenMap: Map<string | RegExp, TerminalTokenClas
// ! matches, then testing them for their lookahead afterwards
// ! in another loop, and only tokenizing if you have only one
// ! option, and that option will fail on the lookahead.
if(tokenClass !== null) {
tokens.push(new tokenClass(line, column - token.length + 1, token));
}
token = '';
} else {
// the lookahead matches this too, so we should probably hold off
// on tokenizing it...
}
} else {
tokens.push(new tokenClass(line, column, token));
if(tokenClass !== null) {
tokens.push(new tokenClass(line, column - token.length + 1, token));
}
token = '';
}
}
}
@ -47,5 +66,7 @@ export function createTokenizer(tokenMap: Map<string | RegExp, TerminalTokenClas
column = 0;
}
}
return tokens;
}
}

View File

@ -0,0 +1,15 @@
import { Terminal } from '../earley';
import { createTokenizer } from './generalTokenizer';
class $Number extends Terminal { }
class $Plus extends Terminal { }
class $Newline extends Terminal { }
const tokenizer = createTokenizer([
{ match: /^[0-9]{1,}$/, token: $Number },
{ match: /^[\r\t ]{1,}$/, token: null },
{ match: '\n', token: $Newline },
{ match: '+', token: $Plus },
])
console.log(tokenizer("5 + \n 6 ").map(v => v.toString()).join(' '));

13
src/util/utils.ts 100644
View File

@ -0,0 +1,13 @@
import * as chalk from 'chalk';
import { Token, $Newline } from '../earley';
export function printTokens(tokens: Token[]) {
for(const token of tokens) {
process.stdout.write(token.toString() + ' ');
if(token instanceof $Newline) console.log();
}
console.log();
}
const rgb2ansi = (r: number, g: number, b: number) => r * 36 + g * 6 + b + 16
export const ansi = (r: number, g = r, b = r) => chalk.ansi256(rgb2ansi(r, g, b));

View File

@ -1,7 +1,7 @@
# Todo List
[ ] colorize the assembly output
- write a tokenizer & grammar for asm. then colorize it.
[x] colorize the assembly output
[ ] rewrite disco tokenizer to the new generalTokenizer
[ ] add number support
[ ] add comment support
[ ] add fixed length array support
@ -10,4 +10,4 @@
[ ] optionally artifically slow down compilation (for fun)
[ ] implement some basic maths operations
[ ] implement multi-argument invocations
[ ] implement return values
[ ] implement return values