From 417827bb7beb7588f64c32ac2c7e3eef459b4992 Mon Sep 17 00:00:00 2001 From: Val Date: Sat, 24 Jun 2023 13:32:53 -0500 Subject: [PATCH 1/5] Fix inconsistent whitespace --- assemble.py | 14 +++++++------- msprobe.py | 10 +++++----- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/assemble.py b/assemble.py index 291f870..1532230 100644 --- a/assemble.py +++ b/assemble.py @@ -6,11 +6,11 @@ oneOpOpcodes = ['rrc', 'swpb', 'rra', 'sxt', 'push', 'call', 'reti'] emulatedOpcodes = { 'ret' : 'mov @sp+, pc', -'clrc' : 'bic #1, sr', -'setc' : 'bis #1, sr', -'clrz' : 'bic #2, sr', -'setz' : 'bis #2, sr', -'clrn' : 'bic #4, sr', +'clrc' : 'bic #1, sr', +'setc' : 'bis #1, sr', +'clrz' : 'bic #2, sr', +'setz' : 'bis #2, sr', +'clrn' : 'bic #4, sr', 'setn' : 'bis #4, sr', 'dint' : 'bic #8, sr', 'eint' : 'bis #8, sr', @@ -226,7 +226,7 @@ def assembleTwoOpInstruction(ins): opcode, byteMode = getOpcode(ins) out[0:4] = bitrep(twoOpOpcodes.index(opcode), 4) out[9] = bitrep(byteMode, 1) - + #Find the location of the first operand start = ins.find(' ') + 1 end = ins.find(',') @@ -398,4 +398,4 @@ def assembleRegister(reg, opcode=None, isDestReg = False): adrmode = 0 regID = getRegister(reg) - return extensionWord, adrmode, regID \ No newline at end of file + return extensionWord, adrmode, regID diff --git a/msprobe.py b/msprobe.py index be8df24..df15ac8 100755 --- a/msprobe.py +++ b/msprobe.py @@ -159,7 +159,7 @@ def disassemble(instruction): if ins[0:3] == '001': return disassembleJumpInstruction(ins) elif ins[0:6] == '000100': - return disassembleOneOpInstruction(ins) + return disassembleOneOpInstruction(ins) else: return disassembleTwoOpInstruction(ins) @@ -346,18 +346,18 @@ def disassembleAddressingMode(reg, adrmode): elif adrmode == 1: regOutput = adrModes[adrmode].format(register=registerNames[reg], index=hex(asm[PC + 1])) extensionWord = True - + elif adrmode == 2: regOutput = adrModes[adrmode].format(register=registerNames[reg]) - + elif adrmode == 3 and reg == 0: #PC was incremented for a constant regOutput = '#' + hex(asm[PC + 1]) extensionWord = True - + elif adrmode == 3: regOutput = adrModes[adrmode].format(register=registerNames[reg]) return (regOutput, extensionWord) if __name__ == '__main__': - main() \ No newline at end of file + main() From 9186babb9857f3b57c45ddadec62c9ac743ce550 Mon Sep 17 00:00:00 2001 From: Val Date: Sat, 24 Jun 2023 14:05:51 -0500 Subject: [PATCH 2/5] Bug fixes msprobe.py: - Default loadaddr was left undefined unless `-mc` passed assemble.py: - Off-by-one in OneOp assembly produced incorrect machine code - Off-by-one in Indirect mode assembly produced invalid register name - Out-of-range relative jumps produced garbage output --- assemble.py | 6 ++++-- msprobe.py | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/assemble.py b/assemble.py index 1532230..e1f7a4c 100644 --- a/assemble.py +++ b/assemble.py @@ -213,7 +213,7 @@ def assembleOneOpInstruction(ins): #We need to provide the opcode here to detect the push bug; see the function itself extensionWord, adrmode, regID = assembleRegister(reg, opcode=opcode) - out[11:12] = bitrep(adrmode, 2) + out[10:12] = bitrep(adrmode, 2) out[12:] = bitrep(regID, 4) appendWord(int(''.join(str(e) for e in out), 2)) if extensionWord: @@ -288,6 +288,8 @@ def assembleJumpInstruction(ins): offset = int(dest, 16) if offset % 2 != 0: raise IllegalOffsetException(offset) + if offset <= -0x3fe or offset >= 0x400: + raise IllegalOffsetException(offset) #Jump offsets are multiplied by two, added by two (PC increment), and sign extended out[6:] = bitrep((offset - 2) // 2, 10) else: @@ -358,7 +360,7 @@ def assembleRegister(reg, opcode=None, isDestReg = False): extensionWord = 0 else: adrmode = 2 - regID = getRegister(reg[reg.find('@') : ]) + regID = getRegister(reg[reg.find('@') + 1 : ]) elif '#' in reg: #Use PC to specify an immediate constant if isDestReg: raise IllegalAddressingModeException(0, reg) diff --git a/msprobe.py b/msprobe.py index df15ac8..89fdf23 100755 --- a/msprobe.py +++ b/msprobe.py @@ -57,7 +57,7 @@ def main(): disasmMode = False if disasmMode: - if args.loadaddr == '' and args.microcorruptionparse: #We might have read loadaddr from -mc instead + if args.loadaddr == '' or args.microcorruptionparse: #We might have read loadaddr from -mc instead pcBase = 0 else: pcBase = int(args.loadaddr, 16) From 3be3b2d04e49976330a83cb9fd98fdb203c44b06 Mon Sep 17 00:00:00 2001 From: Val Date: Sat, 24 Jun 2023 14:14:22 -0500 Subject: [PATCH 3/5] msprobe,py: UX Improvements - Replace invalid instruction marker '!!!' with the invalid opcode - Exit with a cancellation message instead of unwinding stack on SIGINT --- msprobe.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/msprobe.py b/msprobe.py index 89fdf23..452d7f7 100755 --- a/msprobe.py +++ b/msprobe.py @@ -11,6 +11,7 @@ import sys import pdb +from signal import signal, SIGINT from assemble import asmMain PC = 0 #Incremented by each disassembled instruction, incremented in words NOT bytes @@ -303,6 +304,8 @@ def disassembleTwoOpInstruction(ins): if reassembleins: finalins = opcode + bytemode + ' ' + (regOutputDst if usesDest else regOutputSrc) + if '!!!' in finalins: + finalins = finalins.replace('!!!', f'!{int(ins,2):04x}!') return finalins @@ -360,4 +363,5 @@ def disassembleAddressingMode(reg, adrmode): return (regOutput, extensionWord) if __name__ == '__main__': + signal(SIGINT, lambda *args: print('\nAction cancelled by user.') + exit(0)) main() From 3b7ae614b7069917ea8e8d8630e1f1081c790e99 Mon Sep 17 00:00:00 2001 From: Val Date: Sat, 24 Jun 2023 14:21:10 -0500 Subject: [PATCH 4/5] assemble.py: Alternate mnemonics - `jnc` = `jlo` - `jnz` = `jne` - `jc` = `jhs` - `jz` = `jeq` --- assemble.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/assemble.py b/assemble.py index e1f7a4c..6736c35 100644 --- a/assemble.py +++ b/assemble.py @@ -1,7 +1,7 @@ import sys import pdb -jumpOpcodes = ['jne', 'jeq', 'jlo', 'jhs', 'jn ', 'jge', 'jl ', 'jmp'] +jumpOpcodes = ['jne', 'jeq', 'jlo', 'jhs', 'jn', 'jge', 'jl', 'jmp'] twoOpOpcodes = ['!!!', '!!!', '!!!', '!!!', 'mov', 'add', 'addc', 'subc', 'sub', 'cmp', 'dadd', 'bit', 'bic', 'bis', 'xor', 'and'] oneOpOpcodes = ['rrc', 'swpb', 'rra', 'sxt', 'push', 'call', 'reti'] emulatedOpcodes = { @@ -29,6 +29,10 @@ 'adc' : 'addc #0, {reg}', 'dadc' : 'dadd #0, {reg}', 'sbc' : 'subc #0, {reg}', +'jnc' : 'jlo {reg}', #jlo, jhs are aliases of jnc, jc +'jnz' : 'jne {reg}', #jnz, jz are aliases of jne, jeq +'jc' : 'jhs {reg}', +'jz' : 'jeq {reg}', } def bitrep(number, bits = 16): From f614fe497cf867919422f00266b9f4076e94ea66 Mon Sep 17 00:00:00 2001 From: Val Date: Tue, 4 Jul 2023 20:33:03 -0500 Subject: [PATCH 5/5] assemble.py: fix input parsing limitations with regex - Remove comments from lines before processing - Remove comment-stripping logic from each function - Skip empty/comment-only lines - Use regex to identify relative jump offset number - This fixes a bug where jump offset always needed + or - - Use regex to split opcode --- assemble.py | 72 +++++++++++++++++++---------------------------------- 1 file changed, 26 insertions(+), 46 deletions(-) diff --git a/assemble.py b/assemble.py index 6736c35..9070659 100644 --- a/assemble.py +++ b/assemble.py @@ -1,5 +1,6 @@ import sys import pdb +import re jumpOpcodes = ['jne', 'jeq', 'jlo', 'jhs', 'jn', 'jge', 'jl', 'jmp'] twoOpOpcodes = ['!!!', '!!!', '!!!', '!!!', 'mov', 'add', 'addc', 'subc', 'sub', 'cmp', 'dadd', 'bit', 'bic', 'bis', 'xor', 'and'] @@ -108,6 +109,14 @@ def asmMain(assembly, outfile=None, silent=False): for ins in instructions.splitlines(): + #Strip leading and trailing whitespace + ins = ins.strip() + ins = re.split(r'\s*[/;]', ins)[0] #Remove comments + #Skip empty lines or lines beginning with a comment + if len(ins) == 0: + continue + + #Handle label registraation if ':' in ins: try: registerLabel(ins) @@ -206,13 +215,7 @@ def assembleOneOpInstruction(ins): #Figure out where the comment is start = ins.find(' ') + 1 - if ';' in ins: - end = ins.find(';') - elif '//' in ins: - end = ins.find('//') - else: - end = len(ins) - reg = ins[start : end] + reg = ''.join(ins[start :].split()) #Remove whitespace #We need to provide the opcode here to detect the push bug; see the function itself extensionWord, adrmode, regID = assembleRegister(reg, opcode=opcode) @@ -227,30 +230,21 @@ def assembleTwoOpInstruction(ins): """Assembles a two-operand (format III) instruction.""" out = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] - opcode, byteMode = getOpcode(ins) + #Separate instruction into parts without spaces or commas + operands = re.match(r'([\w.]+)\s+([^,]+)[,\s]+([^,]+)', ins) + if not operands: + raise IllegalOpcodeException(ins) + opcodeRaw, regSrc, regDest = operands.groups() + + opcode, byteMode = getOpcode(opcodeRaw) out[0:4] = bitrep(twoOpOpcodes.index(opcode), 4) out[9] = bitrep(byteMode, 1) - #Find the location of the first operand - start = ins.find(' ') + 1 - end = ins.find(',') - regSrc = ins[start : end] - extensionWordSrc, adrmodeSrc, regIDSrc = assembleRegister(regSrc) out[10:12] = bitrep(adrmodeSrc, 2) out[4:8] = bitrep(regIDSrc, 4) - #Figure out where the comment is - start = end + 2 #Right after the comma, and the space after the comma - if ';' in ins: - end = ins.find(';') - elif '//' in ins: - end = ins.find('//') - else: - end = len(ins) - regDest = ins[start : end] - extensionWordDest, adrmodeDest, regIDDest = assembleRegister(regDest, isDestReg = True) out[8] = bitrep(adrmodeDest, 1) @@ -275,20 +269,14 @@ def assembleJumpInstruction(ins): out[3:6] = bitrep(jumpOpcodes.index(opcode), 3) - #Figure out where the comment is + #Figure out where the operand is start = ins.find(' ') + 1 - if ';' in ins: - end = ins.find(';') - elif '//' in ins: - end = ins.find('//') - else: - end = len(ins) - dest = ''.join(ins[start : end].split()) #Remove whitespace + dest = ''.join(ins[start :].split()) #Remove whitespace #Immediate offset char1 = dest[0] #Is this a number? - if char1 == '+' or char1 == '-' or char1 in [i for i in range(10)]: + if re.match(r'[+\-]?[0x|0b]?[0-9A-Fa-f]+', dest): offset = int(dest, 16) if offset % 2 != 0: raise IllegalOffsetException(offset) @@ -305,6 +293,7 @@ def assembleJumpInstruction(ins): def getRegister(registerName): """Decodes special register names (or normal register names).""" + registerName = registerName.strip().lower() #Strip leading and trailing whitespace, and convert to lowercase specialRegisterNames = ['pc', 'sp', 'sr', 'cg'] if registerName.lower() in specialRegisterNames: return specialRegisterNames.index(registerName) @@ -313,21 +302,12 @@ def getRegister(registerName): def getOpcode(ins): """Returns the opcode and whether byte mode is being used.""" - if ' ' in ins: - end = ins.find(' ') #Regular instruction with operands - elif ';' in ins: - end = ins.find(';') #No-operand with comment - elif '//' in ins: - end = ins.find('//') #No-operand with comment - else: - end = len(ins) #No-operand - opcode = ins[0 : end] #Opcode name will be before the first space + #Split the opcode on characters that can't be used in an identifier + #Example: [mov].b r15, r15 + opcode = re.match(r'[\w]+', ins)[0] byteMode = False - if '.b' in opcode: - opcode = opcode[0 : opcode.find('.b')] + if '.b' in ins: byteMode = True - elif '.w' in opcode: - opcode = opcode[0 : opcode.find('.w')] return opcode, byteMode def appendWord(word): @@ -361,7 +341,7 @@ def assembleRegister(reg, opcode=None, isDestReg = False): #Indirect can be faked with an index of 0. What a waste. if isDestReg: adrmode = 1 - extensionWord = 0 + extensionWord = "0" else: adrmode = 2 regID = getRegister(reg[reg.find('@') + 1 : ])