diff --git a/assemble.py b/assemble.py index 291f870..9070659 100644 --- a/assemble.py +++ b/assemble.py @@ -1,16 +1,17 @@ import sys import pdb +import re -jumpOpcodes = ['jne', 'jeq', 'jlo', 'jhs', 'jn ', 'jge', 'jl ', 'jmp'] +jumpOpcodes = ['jne', 'jeq', 'jlo', 'jhs', 'jn', 'jge', 'jl', 'jmp'] twoOpOpcodes = ['!!!', '!!!', '!!!', '!!!', 'mov', 'add', 'addc', 'subc', 'sub', 'cmp', 'dadd', 'bit', 'bic', 'bis', 'xor', 'and'] oneOpOpcodes = ['rrc', 'swpb', 'rra', 'sxt', 'push', 'call', 'reti'] emulatedOpcodes = { 'ret' : 'mov @sp+, pc', -'clrc' : 'bic #1, sr', -'setc' : 'bis #1, sr', -'clrz' : 'bic #2, sr', -'setz' : 'bis #2, sr', -'clrn' : 'bic #4, sr', +'clrc' : 'bic #1, sr', +'setc' : 'bis #1, sr', +'clrz' : 'bic #2, sr', +'setz' : 'bis #2, sr', +'clrn' : 'bic #4, sr', 'setn' : 'bis #4, sr', 'dint' : 'bic #8, sr', 'eint' : 'bis #8, sr', @@ -29,6 +30,10 @@ 'adc' : 'addc #0, {reg}', 'dadc' : 'dadd #0, {reg}', 'sbc' : 'subc #0, {reg}', +'jnc' : 'jlo {reg}', #jlo, jhs are aliases of jnc, jc +'jnz' : 'jne {reg}', #jnz, jz are aliases of jne, jeq +'jc' : 'jhs {reg}', +'jz' : 'jeq {reg}', } def bitrep(number, bits = 16): @@ -104,6 +109,14 @@ def asmMain(assembly, outfile=None, silent=False): for ins in instructions.splitlines(): + #Strip leading and trailing whitespace + ins = ins.strip() + ins = re.split(r'\s*[/;]', ins)[0] #Remove comments + #Skip empty lines or lines beginning with a comment + if len(ins) == 0: + continue + + #Handle label registraation if ':' in ins: try: registerLabel(ins) @@ -202,18 +215,12 @@ def assembleOneOpInstruction(ins): #Figure out where the comment is start = ins.find(' ') + 1 - if ';' in ins: - end = ins.find(';') - elif '//' in ins: - end = ins.find('//') - else: - end = len(ins) - reg = ins[start : end] + reg = ''.join(ins[start :].split()) #Remove whitespace #We need to provide the opcode here to detect the push bug; see the function itself extensionWord, adrmode, regID = assembleRegister(reg, opcode=opcode) - out[11:12] = bitrep(adrmode, 2) + out[10:12] = bitrep(adrmode, 2) out[12:] = bitrep(regID, 4) appendWord(int(''.join(str(e) for e in out), 2)) if extensionWord: @@ -223,30 +230,21 @@ def assembleTwoOpInstruction(ins): """Assembles a two-operand (format III) instruction.""" out = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] - opcode, byteMode = getOpcode(ins) + #Separate instruction into parts without spaces or commas + operands = re.match(r'([\w.]+)\s+([^,]+)[,\s]+([^,]+)', ins) + if not operands: + raise IllegalOpcodeException(ins) + opcodeRaw, regSrc, regDest = operands.groups() + + opcode, byteMode = getOpcode(opcodeRaw) out[0:4] = bitrep(twoOpOpcodes.index(opcode), 4) out[9] = bitrep(byteMode, 1) - - #Find the location of the first operand - start = ins.find(' ') + 1 - end = ins.find(',') - regSrc = ins[start : end] extensionWordSrc, adrmodeSrc, regIDSrc = assembleRegister(regSrc) out[10:12] = bitrep(adrmodeSrc, 2) out[4:8] = bitrep(regIDSrc, 4) - #Figure out where the comment is - start = end + 2 #Right after the comma, and the space after the comma - if ';' in ins: - end = ins.find(';') - elif '//' in ins: - end = ins.find('//') - else: - end = len(ins) - regDest = ins[start : end] - extensionWordDest, adrmodeDest, regIDDest = assembleRegister(regDest, isDestReg = True) out[8] = bitrep(adrmodeDest, 1) @@ -271,23 +269,19 @@ def assembleJumpInstruction(ins): out[3:6] = bitrep(jumpOpcodes.index(opcode), 3) - #Figure out where the comment is + #Figure out where the operand is start = ins.find(' ') + 1 - if ';' in ins: - end = ins.find(';') - elif '//' in ins: - end = ins.find('//') - else: - end = len(ins) - dest = ''.join(ins[start : end].split()) #Remove whitespace + dest = ''.join(ins[start :].split()) #Remove whitespace #Immediate offset char1 = dest[0] #Is this a number? - if char1 == '+' or char1 == '-' or char1 in [i for i in range(10)]: + if re.match(r'[+\-]?[0x|0b]?[0-9A-Fa-f]+', dest): offset = int(dest, 16) if offset % 2 != 0: raise IllegalOffsetException(offset) + if offset <= -0x3fe or offset >= 0x400: + raise IllegalOffsetException(offset) #Jump offsets are multiplied by two, added by two (PC increment), and sign extended out[6:] = bitrep((offset - 2) // 2, 10) else: @@ -299,6 +293,7 @@ def assembleJumpInstruction(ins): def getRegister(registerName): """Decodes special register names (or normal register names).""" + registerName = registerName.strip().lower() #Strip leading and trailing whitespace, and convert to lowercase specialRegisterNames = ['pc', 'sp', 'sr', 'cg'] if registerName.lower() in specialRegisterNames: return specialRegisterNames.index(registerName) @@ -307,21 +302,12 @@ def getRegister(registerName): def getOpcode(ins): """Returns the opcode and whether byte mode is being used.""" - if ' ' in ins: - end = ins.find(' ') #Regular instruction with operands - elif ';' in ins: - end = ins.find(';') #No-operand with comment - elif '//' in ins: - end = ins.find('//') #No-operand with comment - else: - end = len(ins) #No-operand - opcode = ins[0 : end] #Opcode name will be before the first space + #Split the opcode on characters that can't be used in an identifier + #Example: [mov].b r15, r15 + opcode = re.match(r'[\w]+', ins)[0] byteMode = False - if '.b' in opcode: - opcode = opcode[0 : opcode.find('.b')] + if '.b' in ins: byteMode = True - elif '.w' in opcode: - opcode = opcode[0 : opcode.find('.w')] return opcode, byteMode def appendWord(word): @@ -355,10 +341,10 @@ def assembleRegister(reg, opcode=None, isDestReg = False): #Indirect can be faked with an index of 0. What a waste. if isDestReg: adrmode = 1 - extensionWord = 0 + extensionWord = "0" else: adrmode = 2 - regID = getRegister(reg[reg.find('@') : ]) + regID = getRegister(reg[reg.find('@') + 1 : ]) elif '#' in reg: #Use PC to specify an immediate constant if isDestReg: raise IllegalAddressingModeException(0, reg) @@ -398,4 +384,4 @@ def assembleRegister(reg, opcode=None, isDestReg = False): adrmode = 0 regID = getRegister(reg) - return extensionWord, adrmode, regID \ No newline at end of file + return extensionWord, adrmode, regID diff --git a/msprobe.py b/msprobe.py index be8df24..452d7f7 100755 --- a/msprobe.py +++ b/msprobe.py @@ -11,6 +11,7 @@ import sys import pdb +from signal import signal, SIGINT from assemble import asmMain PC = 0 #Incremented by each disassembled instruction, incremented in words NOT bytes @@ -57,7 +58,7 @@ def main(): disasmMode = False if disasmMode: - if args.loadaddr == '' and args.microcorruptionparse: #We might have read loadaddr from -mc instead + if args.loadaddr == '' or args.microcorruptionparse: #We might have read loadaddr from -mc instead pcBase = 0 else: pcBase = int(args.loadaddr, 16) @@ -159,7 +160,7 @@ def disassemble(instruction): if ins[0:3] == '001': return disassembleJumpInstruction(ins) elif ins[0:6] == '000100': - return disassembleOneOpInstruction(ins) + return disassembleOneOpInstruction(ins) else: return disassembleTwoOpInstruction(ins) @@ -303,6 +304,8 @@ def disassembleTwoOpInstruction(ins): if reassembleins: finalins = opcode + bytemode + ' ' + (regOutputDst if usesDest else regOutputSrc) + if '!!!' in finalins: + finalins = finalins.replace('!!!', f'!{int(ins,2):04x}!') return finalins @@ -346,18 +349,19 @@ def disassembleAddressingMode(reg, adrmode): elif adrmode == 1: regOutput = adrModes[adrmode].format(register=registerNames[reg], index=hex(asm[PC + 1])) extensionWord = True - + elif adrmode == 2: regOutput = adrModes[adrmode].format(register=registerNames[reg]) - + elif adrmode == 3 and reg == 0: #PC was incremented for a constant regOutput = '#' + hex(asm[PC + 1]) extensionWord = True - + elif adrmode == 3: regOutput = adrModes[adrmode].format(register=registerNames[reg]) return (regOutput, extensionWord) if __name__ == '__main__': - main() \ No newline at end of file + signal(SIGINT, lambda *args: print('\nAction cancelled by user.') + exit(0)) + main()