More assembler progress

author: Test_User <hax@andrewyu.org> 2023-06-15 21:54:09 -0400
committer: Test_User <hax@andrewyu.org> 2023-06-15 21:54:09 -0400
commit: 572325d28f93282b16df49a5c7146781ee6d49f0 (patch)
tree: 19730e9ca83bb3ccb45edda4f446fd15a8588607
parent: 4602f3cb8b2f00abdef7c739847828b0025a492a (diff)
download: development-572325d28f93282b16df49a5c7146781ee6d49f0.tar.gz
development-572325d28f93282b16df49a5c7146781ee6d49f0.zip
4 files changed, 275 insertions, 23 deletions
diff --git a/assembler/arbitrary_constants.py b/assembler/arbitrary_constants.py
index 058f00a..4518f10 100644
--- a/assembler/arbitrary_constants.py
+++ b/assembler/arbitrary_constants.py
@@ -1,8 +1,16 @@
+# This value will eventually be a part of the assembly language itself and not a constant here
+
+current_mode = 128
+
+print("Assembling for mode:", current_mode)
+
 # These numbers don't change behavior much and aren't specified yet, but there being a number is required
 
-max_immediate_value_size = 64
+max_immediate_value_size = current_mode * 16
 immediate_bit_size = math.ceil(math.log2(max_immediate_value_size))
 
+print("Bits required to specify immediate references' size or an index into it:", immediate_bit_size)
+
 num_general_registers = 16
 num_segment_registers = 8
 
@@ -13,13 +21,9 @@ num_registers = num_general_registers + num_segment_registers + num_special_regi
 bits_for_register_selection = math.ceil(math.log2(num_registers))
 
 print("Number of different registers (total):", num_registers)
-print("Bits required to specify which of them:", bits_for_register_selection)
-
-# this value will eventually be a part of the assembly language itself and not a constant here
+print("Bits required to specify which register to use:", bits_for_register_selection)
 
-current_mode = 128
 
-print("Assembling for mode:", current_mode)
 
 bits_for_current_mode = math.log2(current_mode)
 if bits_for_current_mode//1 != bits_for_current_mode:
diff --git a/assembler/hasm.py b/assembler/hasm.py
index d9f33b2..6bbcf53 100755
--- a/assembler/hasm.py
+++ b/assembler/hasm.py
@@ -15,17 +15,29 @@ paths = glob.glob(datadir+"/instructions/*/*")
 
 paths.sort()
 
+def my_int(size):
+	base = 10
+	if size[:2] == "0x":
+		base = 16 # int() will remove the 0x
+	elif size[:-1] == "h":
+		if size[:2].lower() == "0x": # We don't want int() to silently remove this
+			raise Exception("Error at line "+str(linenum)+": Parameter size is not a valid number: "+input.split("\n")[instruction["linenum"]])
+		base = 16
+	elif size[:2] == "0b":
+		base = 2 # As with 0x, int() will remove this
+
+	return int(size, base)
+
 instructions = {}
 
 num_instructions = 0
 for file in paths:
 	if os.path.isfile(file) and not os.path.basename(file).endswith("notes.txt"):
 		name = os.path.splitext(os.path.basename(file))[0]
-		try:
-			_ = instructions[name]
+
+		if name in instructions:
 			raise Exception("Instruction name conflict")
-		except KeyError:
-			pass
+
 		instructions[name] = {"path": file, "id": num_instructions}
 		num_instructions += 1
 
@@ -54,6 +66,16 @@ f = open(thisdir+"/arbitrary_constants.py");
 exec(f.read())
 f.close()
 
+registers = ["MR", "PR", "FR", "OFR", "RS", "RW", "OIPH", "OIPE", "OIPDE", "OPRH", "OPRE", "OPRDE", "IP", "SP", "BP"]
+i=0
+while i < num_general_registers:
+	registers.append("R"+str(i))
+	i += 1
+i=0
+while i < num_segment_registers:
+	registers.append("SS"+str(i))
+	i += 1
+
 argc = len(sys.argv)
 print("Number of args given:", argc)
 
@@ -78,12 +100,15 @@ inputf.close()
 
 print(input)
 
-# first pass: evaluate instructions, calculate sizes, validate syntax, evaluate label addresses
-linenum = 0
+labels = {}
+
+# first pass: validate instructions, validate syntax (mostly), evaluate what labels exist and their default sizes, split up input to the relevant data
+lines = [] # For the next pass
+
 in_queue = False
-last_queue_opcode = None
-for line in input.split('\n'):
-	linenum+=1
+linenum = 0
+for line in input.split("\n"):
+	linenum += 1
 	line = line.strip()
 
 	if line == "" or line[0] == ";":
@@ -97,6 +122,224 @@ for line in input.split('\n'):
 			raise Exception("Invalid instruction at line "+str(linenum)+": "+line)
 		i+=1
 
-	print(line[0:i])
+	command = line[:i]
+
+	i += 1
+
+	param_start = i
+
+	params = []
+
+	num_parameters = 0
+	was_space = True
+	escaped = False
+	in_singlequote = False
+	in_doublequote = False
+	in_backtick = False
+	last_space = i-1
+	while i < len(line):
+		if line[i] == "'":
+			raise Exception("Error at line "+str(linenum)+": ' has no defined meaning in this syntax yet!")
+		elif line[i] == '"':
+			if in_doublequote and not escaped:
+				in_doublequote = False
+			elif not in_doublequote:
+				in_doublequote = True
+		elif line[i] == "`":
+			if in_backtick and not escaped:
+				in_backtick = False
+			elif not in_backtick:
+				in_backtick = True
+		elif line[i] == ";" and was_space:
+			break # rest is a comment
+
+		if line[i] == " " or line[i] == "	":
+			old_was_space = was_space
+			was_space = (not in_singlequote and not in_doublequote and not in_backtick)
+			if was_space and not old_was_space:
+				num_parameters = num_parameters + 1
+				if line[i-1] == ",":
+					params.append(line[last_space+1:i-1])
+				else:
+					params.append(line[last_space+1:i])
+
+			last_space = i
+		else:
+			was_space = False
+
+		i += 1
+
+
+	if i == len(line):
+		num_parameters += 1
+		params.append(line[last_space+1:i])
+
+	if command in instructions:
+		if num_parameters != instructions[command]['params']:
+			raise Exception("Error at line "+str(linenum)+": Wrong number of parameters given for this instruction: "+line)
+		# Not actually doing anything with this on this pass
+	elif command == "declare":
+		if in_queue:
+			raise Exception("Error at line "+str(linenum)+": Cannot declare data inside an instruction queue: "+line)
+
+		pass # Again nothing to do on this pass, but important to have these here so they won't be seen as unknown instructions
+	elif command == "{":
+		if num_parameters != 0:
+			raise Exception("Error at line "+str(linenum)+": { does not take any parameters!")
+
+		if in_queue:
+			raise Exception("Error at line "+str(linenum)+": Instruction queues cannot be nested!")
+		in_queue = True
+	elif command == "}":
+		if num_parameters != 0:
+			raise Exception("Error at line "+str(linenum)+": } does not take any parameters!")
+
+		if not in_queue:
+			raise Exception("Error at line "+str(linenum)+": Found } but no matching { before it!")
+		in_queue = False
+	elif command[-1] == ":":
+		if in_queue:
+			raise Exception("Error at line "+str(linenum)+": Cannot declare a label inside an instruction queue: "+line)
+
+		if num_parameters != 0:
+			raise Exception("Error at line "+str(linenum)+": Labels do not take any parameters!")
+
+		label = command[:-1]
+
+		size = current_mode
+		if len(label) > 0 and label[0] == "{":
+			i = 1
+			size = ""
+			while i < len(label):
+				if label[i] == "}":
+					break
+
+				size += label[i]
+
+				i += 1
+			if i == len(label):
+				raise Exception("Error at line "+str(linenum)+": Label starts with { but doesn't contain a }: "+line)
+
+			label = label[i+1:]
+
+			try:
+				size = my_int(size)
+			except ValueError:
+				raise Exception("Error at line "+str(linenum)+": Label size is not a valid number: "+line)
+
+		if label in labels:
+			raise Exception("Error at line "+str(linenum)+": Label already declared at line "+str(labels[label]["linenum"])+": "+line)
+
+		labels[label] = {"linenum": linenum, "bits": size}
+	else:
+		raise Exception("Unknown instruction at line "+str(linenum)+": "+command)
+
+	lines.append({"command": command, "params": params, "linenum": linenum})
+
+if in_queue:
+	raise Exception("Unterminated instruction queue found at <EOF>!")
+
+print("First pass results: labels =", labels, ", lines =", lines)
+
+# second pass: calculate all sizes, calculate label addresses
+def get_size(queue):
+	size = 0
+
+	size += size_instruction_queue_bits
+	last_opcode = None
+	current_opcodes = None
+	for instruction in queue:
+		if instruction["command"] != last_opcode:
+			if max_instructions_per_type != 1: # < 1 makes no sense so I don't care about handling it
+				last_opcode = instruction["command"]
+				current_opcodes = 1
+
+			size += opcode_bit_size
+			size += num_instructions_bit_size
+		else:
+			current_opcodes += 1
+			if current_opcodes == max_instructions_per_type:
+				last_opcode = None
+		size += bits_per_parameter * instructions[instruction["command"]]["params"]
+
+	size += immediate_bit_size
+
+	for instruction in queue:
+		for parameter in instruction["params"]:
+			if parameter[-1] == "]":
+				tmp = parameter.split("[", maxsplit=1)
+				if len(tmp) != 2:
+					raise Exception("Error at line "+str(linenum)+": Parameter ends with ] but no matching [ was found: "+input.split("\n")[instruction["linenum"]])
+				parameter = tmp[1][:-1]
+				if len(parameter) == 0:
+					raise Exception("Error at line "+str(linenum)+": Invalid parameter: "+input.split("\n")[instruction["linenum"]])
+
+			this_size = current_mode
+			if parameter[0] == "{":
+				i = 1
+				this_size = ""
+				while i < len(parameter):
+					if parameter[i] == "}":
+						break
+
+					this_size += parameter[i]
+
+					i += 1
+				if i == len(parameter):
+					raise Exception("Error at line "+str(linenum)+": Parameter starts with { but doesn't contain a }: "+input.split("\n")[instruction["linenum"]])
+
+				parameter = parameter[i+1:]
+
+				try:
+					this_size = my_int(this_size)
+				except ValueError:
+					raise Exception("Error at line "+str(linenum)+": Parameter size is not a valid number: "+input.split("\n")[instruction["linenum"]])
+			elif parameter[-1] == ":":
+				this_size = labels[parameter[:-1]]["bits"]
+
+			if this_size <= parameter_data_bits:
+				continue # So you can actually use the 9th or whatever bit even if it doesn't support a full 16
+				# For other things, it'll have to be rounded up
+
+			this_size = 2**math.ceil(math.log2(this_size))
+
+			if parameter[-1] == ":": # Will stuff this into immediate references, unless the size is declared to be <= max allowed directly in param
+						 # Could get away with it if the higher bits would have been 0 anyways, but not going to bother with that yet
+				size += this_size
+			elif parameter.upper() in registers:
+				continue
+			else:
+				try:
+					my_int(parameter)
+					size += this_size
+				except ValueError:
+					raise Exception("Error at line "+str(linenum)+": Unknown parameter: `"+parameter+"': "+input.split("\n")[instruction["linenum"]])
+
+	size += bits_for_flag_save_definition_size
+
+	return size
+
+address = 0
+
+current_queue = []
+in_queue = False
+
+for line in lines:
+	if line["command"] in instructions:
+		if in_queue:
+			current_queue.append(line)
+		else:
+			address += get_size([line])
+	elif line["command"] == "{":
+		in_queue = True
+	elif line["command"] == "}":
+		address += get_size(current_queue)
+
+		current_queue = []
+		in_queue = False
+	elif line["command"] == "declare":
+		pass # TODO: Declare
+	elif line["command"][:-1] == ":":
+		pass # TODO: Labels
 
-# second pass: evaluate parameter values, create binary output
+# third pass: calculate parameter values, calculate immediate reference values, create binary output
diff --git a/assembler/syntax.txt b/assembler/syntax.txt
index 1eaf91c..399a44d 100644
--- a/assembler/syntax.txt
+++ b/assembler/syntax.txt
@@ -1,6 +1,6 @@
 TODO: possibly simplify this to make it less of a pain to write
 
-Leading and trailing whitespace is ignored
+Leading and trailing spaces and tabs are ignored
 Comments start with ; and must not be inside "", '', or ``
 Comments must be preceeded by a space or tab if not at the start of the line
 
@@ -20,7 +20,7 @@ Instruction queues are surrounded by { and }:
 	add b, a, c
 }
 
-{ and } must be on their own in the line (aside for whitespace and comments)
+{ and } must be on their own in the line for a queue (aside for whitespace and comments)
 
 If a parameter (and just the parameter) is placed between [ and ], it will be considered an indirect reference; the memory pointed to by that will be used
 
@@ -29,6 +29,7 @@ If an instruction is specified without { and }, it is considered to be an instru
 Labels are any combination of non-whitespace non-quotation from the start of the line to a ':', and not followed by any non-comment non-whitespace
 Labels must not be declared inside instruction queues
 If a label includes { or }, it must start with {, then a number, then }, to be used as its default size; all other uses of these characters are invalid
+	Otherwise, the default size will be the current mode
 
 Using the value of the label in a parameter will be '<name of label>:' as the operand, like so:
 	add a, my_data:, b
@@ -36,6 +37,8 @@ Using the value of the label in a parameter will be '<name of label>:' as the op
 
 Constants will have a default size of the minimum amount required to represent it in full, as will labels without a default size explicitly set
 
+If a size is set and it does not fit within the parameter itself, the size will be rounded up to the nearest power of two
+
 The "declare" keyword is to be used like an instruction, but not within an instruction queue
 	It will place the following constant's or label's data into the binary output, for an unlimited number of parameters
 	You may use "<data>" to place the literal value of its contents in the output; \ will only apply to " and itself when within these quotes
diff --git a/assembler/test.asm b/assembler/test.asm
index 3ff5e25..f67b309 100644
--- a/assembler/test.asm
+++ b/assembler/test.asm
@@ -1,9 +1,11 @@
 start:
 { ; comment after the queue specification
-	subtract r0, end:, start: ; here's a nice comment
-	decrement r1
+	subtract r0, {32}[{2}end:], {62}start: ; here's a nice comment
+	decrement {8}r1
 }
-end:
+{8}end: 
 	; another comment
+: ; Zero-length label... why do I support this? Why not
+add_inplace r0, r1
 
-blahblahblah invalid opcode
+;blahblahblah invalid opcode ; currently handled properly, commented out now so it can actually process the rest
author	Test_User <hax@andrewyu.org>	2023-06-15 21:54:09 -0400
committer	Test_User <hax@andrewyu.org>	2023-06-15 21:54:09 -0400
commit	572325d28f93282b16df49a5c7146781ee6d49f0 (patch)
tree	19730e9ca83bb3ccb45edda4f446fd15a8588607
parent	4602f3cb8b2f00abdef7c739847828b0025a492a (diff)
download	development-572325d28f93282b16df49a5c7146781ee6d49f0.tar.gz development-572325d28f93282b16df49a5c7146781ee6d49f0.zip