From dfedf3f2bb434065da3ddbc931e87a4017535f80 Mon Sep 17 00:00:00 2001 From: Bruce Hill Date: Wed, 30 Apr 2025 20:42:31 -0400 Subject: Update compiler to use randomly generated unique-per-file symbol suffixes instead of needing to rename symbols with objcopy --- docs/libraries.md | 70 ++++++++++++++++++++++++++++++++---------------- docs/namespacing.md | 77 ++++++++++++++++++++++++++++++----------------------- 2 files changed, 90 insertions(+), 57 deletions(-) (limited to 'docs') diff --git a/docs/libraries.md b/docs/libraries.md index 1409bca5..7febb2e1 100644 --- a/docs/libraries.md +++ b/docs/libraries.md @@ -24,7 +24,8 @@ produces the following C header file and C source file: #pragma once #include -extern Text_t foo$my_variable; +extern Text_t my_variable$foo_C3zxCsha; +void $initialize$foo_C3zxCsha(void); ``` ```c @@ -32,14 +33,19 @@ extern Text_t foo$my_variable; #include #include "foo.tm.h" -Text_t foo$my_variable = "hello"; +public Text_t my_variable$foo_C3zxCsha = Text("hello"); +public void $initialize$foo_C3zxCsha(void) { + static bool initialized = false; + if (initialized) return; + initialized = true; +} ``` -Notice that the symbols defined here (`foo$my_variable`) use a file-based -prefix that includes a dollar sign. C compilers support an extension that -allows dollar signs in identifiers, and this allows us to use guaranteed-unique -prefixes so symbols from one file don't have naming collisions with symbols -in another file. +Notice that the symbols defined here (`my_variable$foo_C3zxCsha`) use a +filename-based suffix with a random bit at the end that includes a dollar sign. +C compilers support an extension that allows dollar signs in identifiers, and +this allows us to use guaranteed-unique prefixes so symbols from one file don't +have naming collisions with symbols in another file. The C file is compiled by invoking the C compiler with something like: `cc -c foo.tm.c -o foo.tm.o` @@ -65,8 +71,9 @@ If I want to run `baz.tm` with `tomo baz.tm` then this transpiles to: #include #include "./foo.tm.h" -void baz$say_stuff(); -void baz$main(); +void say_stuff$baz_VEDjfzDs(); +void main$baz_VEDjfzDs(); +void $initialize$baz_VEDjfzDs(void); ``` ```c @@ -74,23 +81,44 @@ void baz$main(); #include #include "baz.tm.h" -public void baz$say_stuff() -{ - say(Texts(Text("I got "), foo$my_variable, Text(" from foo"))); +public void say_stuff$baz_VEDjfzDs() { + say(Texts(Text("I got "), my_variable$foo_C3zxCsha, Text(" from foo")), yes); } -public void baz$main() -{ - baz$say_stuff(); +public void main$foo_VEDjfzDs() { + say_stuff$foo_VEDjfzDs(); +} + +public void $initialize$foo_VEDjfzDs(void) { + static bool initialized = false; + if (initialized) return; + initialized = true; + + $initialize$foo_C3zxCsha(); + ... +} + +int main$baz_VEDjfzDs$parse_and_run(int argc, char *argv[]) { + tomo_init(); + $initialize$baz_VEDjfzDs(); + + Text_t usage = Texts(Text("Usage: "), Text$from_str(argv[0]), Text(" [--help]")); + tomo_parse_args(argc, argv, usage, usage); + main$baz_VEDjfzDs(); + return 0; } ``` +The automatically generated function `main$baz_VEDjfzDs$parse_and_run` is in +charge of parsing the command line arguments to `main()` (in this case there +aren't any) and printing out any help/usage errors, then calling `main()`. + Then `baz.tm.o` is compiled to a static object with `cc -c baz.tm.c -o baz.tm.o`. -Next, we need to create an actual executable file that will invoke `baz$main()` -(with any command line arguments). To do that, we create a small wrapper -program: +Next, we need to create an actual executable file that will invoke +`main$baz_VEDjfzDs$parse_and_run()` (with any command line arguments). To do +that, we create a small wrapper program: ```c // File: /tmp/program.c @@ -99,11 +127,7 @@ program: int main(int argc, char *argv[]) { - tomo_init(); - if (argc > 1) - errx(1, "This program doesn't take any arguments."); - baz$main(); - return 0; + return main$baz_VEDjfzDs$parse_and_run(argc, argv); } ``` diff --git a/docs/namespacing.md b/docs/namespacing.md index e9dc428d..42bdd984 100644 --- a/docs/namespacing.md +++ b/docs/namespacing.md @@ -1,15 +1,30 @@ # Namespacing In order to work with C's namespace limitations, I've designed the following -system: +system, which makes use of a C language extension `-fdollars-in-identifiers` +that lets you use dollar signs in identifiers. This extension is supported by +GCC, TinyCC, and Clang. + +## Unique File Suffixes + +Each file gets a unique suffix with the format `$_XXXXXXXX`, where the +Xs are 8 randomly chosen identifier characters and `` includes only +valid identifier characters up to the first period. + +For example, in a file called `hello-world.tm`, a variable like `foo` would +become `foo$helloworld_VEDjfzDs`. This helps avoid namespace conflicts between +two files that define the same symbol. ## Namespaces -In C, there is a GCC extension (also supported by clang and TCC) to allow for -dollar signs in identifiers. This provides a way to have compiled C code which -segments its imports into different namespaces. For example `Foo$Baz` would be -the identifier `Baz` in the namespace `Foo`, and would be guaranteed to not -collide with a user-chosen name like `FooBaz`. +Dollar signs in identifiers provide a way to have compiled C code which segments +its imports into different namespaces. For example `Foo$Baz` would be the +identifier `Baz` in the namespace `Foo`, and would be guaranteed to not collide +with a user-chosen name like `FooBaz` or `Foo_Baz`. + +## Example + +For this Tomo code: ```tomo // File: foo.tm @@ -19,46 +34,40 @@ struct Baz(x:Int) member := 5 func frob(b:Baz -> Int) return b.x + +func main() pass ``` +The generated C source code will look like this: + ```C -// File: foo.tm.h +// File: .build/foo.tm.h ... -typedef struct foo$Baz_s foo$Baz_t; -struct foo$Baz_s { - Int_t $x; +typedef struct Baz$$struct$foo_VEDjfzDs Baz$$type$foo_VEDjfzDs; +struct Baz$$struct$foo_VEDjfzDs { + Int_t x; }; - -extern Int_t foo$my_var; -extern const TypeInfo_t foo$Baz; - -extern Int_t foo$Baz$member; -Int_t foo$Baz$frob(struct foo$Baz_s $b); -void foo$main(); +DEFINE_OPTIONAL_TYPE(struct Baz$$struct$foo_VEDjfzDs, 8,$OptionalBaz$$type$foo_VEDjfzDs); +extern const TypeInfo_t Baz$$info$foo_VEDjfzDs; +extern Int_t Baz$member$foo_VEDjfzDs; +Int_t Baz$frob$foo_VEDjfzDs(struct Baz$$struct$foo_VEDjfzDs _$b); +extern Int_t my_var$foo_VEDjfzDs; +void main$foo_VEDjfzDs(); ... ``` ```C -// File: foo.tm.c +// File: .build/foo.tm.c ... -Int_t foo$my_var = I_small(123); -Int_t foo$Baz$member = I_small(5); - -static Text_t foo$Baz$as_text(foo$Baz_t *obj, bool use_color) -{ - if (!obj) - return "Baz"; - return Texts(use_color ? Text("\x1b[0;1mBaz\x1b[m(") : Text("Baz("), - Int$as_text(stack(obj->$x), use_color, &Int$info), Text(")")); +public Int_t my_var$foo_VEDjfzDs = I_small(123); +public const TypeInfo_t Baz$$info$foo_VEDjfzDs = {...}; +public Int_t Baz$member$foo_VEDjfzDs = I_small(5); + +public Int_t Baz$frob$foo_VEDjfzDs(struct Baz$$struct$foo_VEDjfzDs _$b) { + return (_$b).x; } -public Int_t foo$Baz$frob(struct foo$Baz_s $b) -{ - return ($b).x; +public void main$foo_VEDjfzDs() { } ... ``` - -And on the usage site, the code `include ./foo.tm` compiles to `#include -"./foo.tm.h"` in the header and `use$foo()` in the code that is executed. - -- cgit v1.2.3