From afe33e42467a252688cbf28ce0379c93ce76ccff Mon Sep 17 00:00:00 2001 From: Trevor Maze Date: Mon, 26 Jan 2026 21:37:49 -0500 Subject: [PATCH] AST implementation --- Sources/rxcc/rxcc.swift | 182 ++++++++++++++++++++++------ bin/a.out | Bin 0 -> 15680 bytes bin/bitwise.s | 4 - bin/{bitwise_zero.s => no_brace.s} | 0 bin/{not_zero.s => no_semicolon.s} | 0 bin/not_five.s | 4 - c/tests/stage_1/valid/multi_digit.s | 4 - c/tests/stage_1/valid/newlines.s | 4 - c/tests/stage_1/valid/no_newlines.s | 4 - c/tests/stage_1/valid/return_0.s | 4 - c/tests/stage_1/valid/return_2.s | 4 - c/tests/stage_1/valid/spaces.s | 4 - 12 files changed, 147 insertions(+), 67 deletions(-) create mode 100755 bin/a.out delete mode 100644 bin/bitwise.s rename bin/{bitwise_zero.s => no_brace.s} (100%) rename bin/{not_zero.s => no_semicolon.s} (100%) delete mode 100644 bin/not_five.s delete mode 100644 c/tests/stage_1/valid/multi_digit.s delete mode 100644 c/tests/stage_1/valid/newlines.s delete mode 100644 c/tests/stage_1/valid/no_newlines.s delete mode 100644 c/tests/stage_1/valid/return_0.s delete mode 100644 c/tests/stage_1/valid/return_2.s delete mode 100644 c/tests/stage_1/valid/spaces.s diff --git a/Sources/rxcc/rxcc.swift b/Sources/rxcc/rxcc.swift index 43bafa7..7b3bc40 100644 --- a/Sources/rxcc/rxcc.swift +++ b/Sources/rxcc/rxcc.swift @@ -17,8 +17,6 @@ enum TokenType { case UNDEFINED } -let UNARY_OPERATORS: [TokenType] = [.NEGATION, .BITWISE_COMPLIMENT, .LOGICAL_NEGATION] - struct Token { let content: Substring let type: TokenType @@ -28,7 +26,7 @@ typealias Construct = [Element] enum Element { case Construct(type: ConstructDefinitions); - case Token(types: [TokenType]) + case Token(type: TokenType) } enum ConstructType { @@ -38,20 +36,95 @@ enum ConstructType { case Program } +enum ConstructVariant { + // Expression variants + case LiteralInteger + case Negation + case BitwiseCompliment + case LogicalNegation + + // Statement variants + case ReturnInteger + + // Function Variants + case Integer + + // Program Variants + case SingleFunction + + // Misc. + case Root + case Error +} + +class SyntaxTreeNode { + var parent: SyntaxTreeNode? + var children: [SyntaxTreeNode] + + var variant: ConstructVariant + var value: String + + init(_ variant: ConstructVariant) { + self.parent = nil + self.children = [SyntaxTreeNode]() + self.variant = variant + self.value = "" + } + + init(_ variant: ConstructVariant, parent: SyntaxTreeNode) { + self.parent = parent + self.children = [SyntaxTreeNode]() + self.variant = variant + self.value = "" + } + + func addChild(value: ConstructVariant) -> SyntaxTreeNode { + let child = SyntaxTreeNode(value, parent: self) + children.append(child) + return child + } + + func popLastChild() -> SyntaxTreeNode { + if let last: SyntaxTreeNode = children.popLast() { + return last + } + return SyntaxTreeNode(.Error) + } + + func text(_ level: Int = 0) -> String { + var text: String = "\(variant)" + if variant == .LiteralInteger { + text += "(\(value))" + } + for child in children { + text += "\n\(String(repeating: " ", count: level))└───\(child.text(level + 1))" + } + return text + } +} + struct ConstructDefinitions { var type: ConstructType - var variants: Dictionary + var variants: Dictionary } let expression: ConstructDefinitions = ConstructDefinitions( type: .Expression, variants: [ - "LiteralInteger": [ - .Token(types: [.LITERAL_INTEGER]) + .LiteralInteger: [ + .Token(type: .LITERAL_INTEGER) ], - "UnaryOperator": [ - .Token(types: [.NEGATION]), - .Token(types: UNARY_OPERATORS) + .Negation: [ + .Token(type: .NEGATION), + .Token(type: .LITERAL_INTEGER) + ], + .BitwiseCompliment: [ + .Token(type: .BITWISE_COMPLIMENT), + .Token(type: .LITERAL_INTEGER) + ], + .LogicalNegation: [ + .Token(type: .LOGICAL_NEGATION), + .Token(type: .LITERAL_INTEGER) ], ] ) @@ -59,10 +132,10 @@ let expression: ConstructDefinitions = ConstructDefinitions( let statement: ConstructDefinitions = ConstructDefinitions( type: .Statement, variants: [ - "ReturnInteger": [ - .Token(types: [.RETURN]), + .ReturnInteger: [ + .Token(type: .RETURN), .Construct(type: expression), - .Token(types: [.SEMICOLON]) + .Token(type: .SEMICOLON) ] ] ) @@ -70,14 +143,14 @@ let statement: ConstructDefinitions = ConstructDefinitions( let function: ConstructDefinitions = ConstructDefinitions( type: .Function, variants: [ - "Integer": [ - .Token(types: [.INT]), - .Token(types: [.IDENTIFIER]), - .Token(types: [.PARENTHESIS_OPEN]), - .Token(types: [.PARENTHESIS_CLOSE]), - .Token(types: [.BRACE_OPEN]), + .Integer: [ + .Token(type: .INT), + .Token(type: .IDENTIFIER), + .Token(type: .PARENTHESIS_OPEN), + .Token(type: .PARENTHESIS_CLOSE), + .Token(type: .BRACE_OPEN), .Construct(type: statement), - .Token(types: [.BRACE_CLOSE]) + .Token(type: .BRACE_CLOSE) ] ] ) @@ -85,7 +158,7 @@ let function: ConstructDefinitions = ConstructDefinitions( let program: ConstructDefinitions = ConstructDefinitions( type: .Function, variants: [ - "Function": [ + .SingleFunction: [ .Construct(type: function) ] ] @@ -154,7 +227,7 @@ func getTestFiles() -> [TestFile] { var testFiles: [TestFile] = [TestFile]() let fileManager = FileManager.default - let path = "c/tests/stage_2" + let path = "c/tests/stage_1" do { let validItems = try fileManager.contentsOfDirectory(atPath: path + "/valid") @@ -215,33 +288,62 @@ func parse(lexed: [Substring]) -> String { } tokens = tokens.reversed() - var output: String = "" + let abstractSyntaxTree = SyntaxTreeNode(.Root) - if validateConstruct(program.variants["Function"]!, constructType: .Function, tokens: &tokens, output: &output) { + if validateConstruct(program.variants[.SingleFunction]!, tokens: &tokens, node: abstractSyntaxTree) { print("Success") + print(abstractSyntaxTree.text()) print("Assembly:") - print(output) - print() - - return output + let assembly: String = generateOutput(tree: abstractSyntaxTree) + print(assembly) + return assembly } print("Distinct lack of success") return "" } -func validateConstruct(_ construct: Construct, constructType: ConstructType, tokens: inout [Token], output: inout String) -> Bool { +func generateOutput(tree: SyntaxTreeNode) -> String { + var text: String = "" + switch tree.variant { + case .Integer: + text += " .globl \(tree.value)\n\(tree.value):\n" + break + + case .ReturnInteger: + text += " movl $\(tree.children[0].value), %eax\n ret\n" + break + + default: + break + } + + for child in tree.children { + text += generateOutput(tree: child) + } + + return text +} + +func validateConstruct(_ construct: Construct, tokens: inout [Token], node: SyntaxTreeNode) -> Bool { for element in construct { switch element { case .Construct(let type): print("Begin validate subconstruct (type \"\(type.type)\")") var valid: Bool = false - var validVariant: String = "" + var validVariant: ConstructVariant = .Error + let tokenBackup: [Token] = tokens for key in type.variants.keys { - if !validateConstruct(type.variants[key]!, constructType: type.type, tokens: &tokens, output: &output) { + let childNode = node.addChild(value: key) + print("Testing variant \(key)") + if !validateConstruct(type.variants[key]!, tokens: &tokens, node: childNode) { + print("Fail") + tokens = tokenBackup + _ = node.popLastChild() continue } + print("Success") valid = true validVariant = key break @@ -251,16 +353,26 @@ func validateConstruct(_ construct: Construct, constructType: ConstructType, tok return false } print("End validate subconstruct (variant \"\(validVariant)\")") - break + return true case .Token(let type): if let token: Token = tokens.popLast() { - if !type.contains(token.type) { + if type != token.type { print("VALIDATION FAILED FOR TOKEN \"\(token.content)\"") return false } + print("Validated token \"\(token.content)\"") - switch constructType { + if token.type == .LITERAL_INTEGER { + node.value = String(token.content) + } + else if token.type == .IDENTIFIER { + node.value = String(token.content) + } + + continue + + /*switch constructType { case .Function: if token.type == .IDENTIFIER { output.append(" .globl \(token.content)\n\(token.content):\n") @@ -273,7 +385,7 @@ func validateConstruct(_ construct: Construct, constructType: ConstructType, tok break default: break - } + }*/ } else { print("RAN OUT OF TOKENS") @@ -294,7 +406,7 @@ func categorizeToken(token: Substring) -> TokenType { else if token.firstMatch(of: /^int$/) != nil { return .INT } else if token.firstMatch(of: /^return$/) != nil { return .RETURN } else if token.firstMatch(of: /^[a-zA-Z]\w*$/) != nil { return .IDENTIFIER } - else if token.firstMatch(of: /^[0-9]+$/) != nil { return .LITERAL_INTEGER } + else if token.firstMatch(of: /^[0-9]+$/) != nil { return .LITERAL_INTEGER} else if token.firstMatch(of: /^-$/) != nil { return .NEGATION } else if token.firstMatch(of: /^~$/) != nil { return .BITWISE_COMPLIMENT } else if token.firstMatch(of: /^!$/) != nil { return .LOGICAL_NEGATION } diff --git a/bin/a.out b/bin/a.out new file mode 100755 index 0000000000000000000000000000000000000000..ba348701042680de82d36a169a01293e8d914398 GIT binary patch literal 15680 zcmeHOU2Ggz6~4Pp$fil-javsdiO7Jesug%*JBbxaldfZLGDgl1v5SN%dKr7i_DcI> z?am^*2*nV7YL$vm30`<9@Bj}8LDdHkFB=6F@_X3R1$vX@el@}tQ(kSJ7wqj0rrrGWlDNo~J(F3A~jBK}E%5jG%0hOEw+wJ56k<&OR z4FFLdx96}iCC40NToe(QwBxSi-AEySdMm1IBvMvg=NM6?QRvuRu$z88EO9>FKgflsx_kP0N$hq= z#c6y{?0{(RePkz7LNxv{V0v7Z_O=_pbr}bsh{i8N)CAA9a~Gac*;Uo%zMwp#GF#+q zN@iU?2ObW#KloAZJ|BN{!5AV`?_e36X$f{^)`HM8~y>}-_fNWr8S++oNk7GBh*#DTq9;~ z+1K;snqMhD8z?U3a~oCXrY6sf>k)UvJrU=R9anmC=DaQijbNeN41>nZ`SD7<7R>mw zm4ND6sMc#zyN*=5Dk{1lX>8~(x)3rXbAK@(S_jL!c~ssd+$*2^0!^zd-3gl@cq zet*Od$n^`pEfwYpQ8{?X?I$Ci=WKEee~{p_k%~FqO7LAi63r=-y7{|H(AY2NKU@(H6){CBvX7k0kP@b0@| zKUMvVRK2@%yZ6W{E8Y+!{WYV1;^PdDZFp0Q6u<7Hm;UfT#CqzVS5f?VhJ zeD9HoO?&R&>Zu|XtJblnysM|~=B}EvL3J=upY1;upY1;upY1;upY1;upY1;upamy z^8kP6${gx>4}X^8?@fo-J01SUh3}}DXD_SFaz1lt@2=c6QeZv)CV%IRPUjIqHL=Tl z(c*7RsDr;-a$=5uHdzl?4_FUa4_FUa4_FUa4_FUa4_FUa5B&Fe0C9_mQ$!3RVmJ4q z%jT_5sryBCB;N5Mkr5AhNMyu09uygIk%;d``I{S^I;-50U`E6!uF8uf$3=2lm-71} zQ9UQPM{q!}Ul6cMGM-e7cH6>Jkf%wo|63>O50OT7L*xOmr!U2(zwi^iJ{vhF<$n=5 z8Jj6h{6_)tkRvhDJt*hKG)AXBqK){yWs~J83QU zXNLYWfcS7n6gVEZoBk2vvucmJB=bwu)!T(%P4UNt$9#qUdnuDqZ&mq1#9(~hZ}>ii z`4^Qc^-;=jyO{sr*Npyth4~ErYn18M|Cfz^ubOb6xryEM+fPez;187P7Qfv~C+!uWXhwLN_&yphN9Ke1UlbL! zL?7c0tB)Cek9tV%Q{X>EJohK9|03}YloI%L;&Crb(CYid_hspBl$xJvdae-;yLF00 z_h-vG^cNHnvlOddnsNFstT&q4Z!N32dUdH1gh9z2 z>aM{d^JVQf8vcq7YGGqV%{TmNpi8Z4b%jh4k|xb?iz;<0sFk=&TA#fv(2X1%9aA!6@WA1 z?E+o*0BqfO5OX^u2v5NccIpDmm2k@pb@SpH>PG=W!kJ! zE-ANMqa6xM$|x@S%|+#wR%%pfgkd8p`Ebx^mg}`Gf~K-YQ1RJ->nVrI4TEJm@~~4` z-FVLp7Gv5&?-vPYgO_p9YOYPn7|)4^S;P$sIHoT^-!hEUGxV2^bY@Hw#u zr29Wd1-yoVJ=QHitoP7==mD3=p4UW>u`U7*NyF*>qd!+D!(#$_tjmCHQAGRc^N*g- z(VjV@iu5f^{XHC&2WA2Gz4Ir_a-Gi@iKw@SMc@*eKH>J^#Ka z_VKgZ5lz9gK4jom#2)^`dI5;>uGRf=D6MLYDP`{`B zn^dxoelqRjbr$yBrQ3rH{4v?~ChW1U8{iuQCFn)crVRe)RKR18_OTAb{>zu-g2DWN z9p=HW$(B`MkNsQiqeN1AG=lgDd*B;XlC;OV?vB{wI!6Vt1Kv&9tAc3Cz@eXMd&nD< z;kwX&tb_7mk9MFh?trLzh+t0CV;=VD>ksxx#N3UIxjw0INIJ&vjeKdLKk& e#_2=yzVbsfBqb@))Y;8`@I+*=H)W6#)!zZBw^o|~ literal 0 HcmV?d00001 diff --git a/bin/bitwise.s b/bin/bitwise.s deleted file mode 100644 index 8b0e895..0000000 --- a/bin/bitwise.s +++ /dev/null @@ -1,4 +0,0 @@ - .globl main -main: - movl $12, %eax - ret diff --git a/bin/bitwise_zero.s b/bin/no_brace.s similarity index 100% rename from bin/bitwise_zero.s rename to bin/no_brace.s diff --git a/bin/not_zero.s b/bin/no_semicolon.s similarity index 100% rename from bin/not_zero.s rename to bin/no_semicolon.s diff --git a/bin/not_five.s b/bin/not_five.s deleted file mode 100644 index 82c9509..0000000 --- a/bin/not_five.s +++ /dev/null @@ -1,4 +0,0 @@ - .globl main -main: - movl $5, %eax - ret diff --git a/c/tests/stage_1/valid/multi_digit.s b/c/tests/stage_1/valid/multi_digit.s deleted file mode 100644 index bdd010e..0000000 --- a/c/tests/stage_1/valid/multi_digit.s +++ /dev/null @@ -1,4 +0,0 @@ - .globl main -main: - movl $100, %eax - ret diff --git a/c/tests/stage_1/valid/newlines.s b/c/tests/stage_1/valid/newlines.s deleted file mode 100644 index e4eafde..0000000 --- a/c/tests/stage_1/valid/newlines.s +++ /dev/null @@ -1,4 +0,0 @@ - .globl main -main: - movl $0, %eax - ret diff --git a/c/tests/stage_1/valid/no_newlines.s b/c/tests/stage_1/valid/no_newlines.s deleted file mode 100644 index e4eafde..0000000 --- a/c/tests/stage_1/valid/no_newlines.s +++ /dev/null @@ -1,4 +0,0 @@ - .globl main -main: - movl $0, %eax - ret diff --git a/c/tests/stage_1/valid/return_0.s b/c/tests/stage_1/valid/return_0.s deleted file mode 100644 index e4eafde..0000000 --- a/c/tests/stage_1/valid/return_0.s +++ /dev/null @@ -1,4 +0,0 @@ - .globl main -main: - movl $0, %eax - ret diff --git a/c/tests/stage_1/valid/return_2.s b/c/tests/stage_1/valid/return_2.s deleted file mode 100644 index cbfb120..0000000 --- a/c/tests/stage_1/valid/return_2.s +++ /dev/null @@ -1,4 +0,0 @@ - .globl main -main: - movl $2, %eax - ret diff --git a/c/tests/stage_1/valid/spaces.s b/c/tests/stage_1/valid/spaces.s deleted file mode 100644 index e4eafde..0000000 --- a/c/tests/stage_1/valid/spaces.s +++ /dev/null @@ -1,4 +0,0 @@ - .globl main -main: - movl $0, %eax - ret