aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBruce Hill <bruce@bruce-hill.com>2024-07-06 17:23:32 -0400
committerBruce Hill <bruce@bruce-hill.com>2024-07-06 17:23:32 -0400
commitdf765200c41692eed41c0f84a8b9a32e39f7fa34 (patch)
treed36455d83a57001ac1326c39109293a94dca2a13
parenta86dc05d366c0733b645763dd5c3e7396041bd7b (diff)
Switch to parallel compilation
-rw-r--r--compilation.md164
-rw-r--r--tomo.c68
2 files changed, 203 insertions, 29 deletions
diff --git a/compilation.md b/compilation.md
new file mode 100644
index 00000000..27161115
--- /dev/null
+++ b/compilation.md
@@ -0,0 +1,164 @@
+# Compilation Pipeline
+
+For a simple single-file program, the compilation process has a dependency
+graph that looks like this:
+```
+ +----------+ transpile +------------+ generate arg parser
+ | foo.tm.h | <----------- | foo.tm | -------------
+ +----------+ +------------+ |
+ | | | transpile |
+ | | v |
+ | | +------------+ |
+ | | | foo.tm.c | +--------------------+
+ | | +------------+ | main() entry point |
+ | | | +--------------------+
+ | | compile v |
+ | | +------------+ |
+ | +----------------->| foo.tm.o | |
+ | +------------+ |
+ | | link |
+ | v |
+ | compile +------------+ compile |
+ +---------------------> | foo | <------------+
+ +------------+
+```
+
+For a more complicated example, imagine `foo.tm` imports `baz.tm` and both are
+being compiled into a shared library, `libfoo.so`:
+
+```
+ +---------------------------------------+
+ | |
+ +----------+ transpile +------------+ |
+ +- | baz.tm.h | <----------- | baz.tm | -+-------------------------+
+ | +----------+ +------------+ | |
+ | | | | |
+ | | | transpile | |
+ | | v | |
+ | | +------------+ | |
+ | | | baz.tm.c | | |
+ | | +------------+ | |
+ | | | | |
+ | | | compile | |
+ | | v | |
+ | | compile +------------+ | |
+ | +---------------------> | baz.tm.o | | |
+ | +------------+ | |
+ | | | |
+ | | link | compile |
+ | v v |
+ | compile +--------------------------------------+ |
+ | +---------------------> | libfoo.so | |
+ | | +--------------------------------------+ |
+ | | ^ |
+ | | | link |
+ | | | |
+ | +----------+ transpile +------------+ | |
+ | | foo.tm.h | <----------- | foo.tm | | |
+ | +----------+ +------------+ | |
+ | | | | |
+ | | | transpile | |
+ | | v | |
+ | | +------------+ | type info |
+ | | | foo.tm.c | <+-------------------------+
+ | | +------------+ |
+ | | | |
+ | | | compile |
+ | | v |
+ | | compile +------------+ |
+ +----+---------------------> | foo.tm.o | -+
+ | +------------+
+ | compile ^
+ +-------------------------+
+```
+
+These dependency graphs are relatively complicated-looking, but here are some
+rough takeaways:
+
+ 1) Header files are a dependency for many parts of the process, so it's
+ good to transpile them as early as possible.
+ 2) Once all the header files are available,
+ compiled into their object files in parallel. This is by far the
+ slowest part of compilation (invoking the C compiler), so it benefits
+ the most from parallelization.
+ 3) After all object files are compiled, the last step is to link them
+ all together (fast and simple).
+
+To sastisfy these requirements as efficiently as possible, the approach taken
+below is to first transpile all header files sequentially (this could be
+parallelized, but is probably faster than the overhead of forking new
+processes), then fork a new process for each dependency to transpile and
+compile it to an object file. Then, wait for all child processes to finish and
+link the resulting object files together.
+
+## Phase 1 (sequential transpilation):
+
+```
+ +--------+ +--------+
+ | foo.tm | | baz.tm |
+ +--------+ +--------+
+ | | |
+ +--------------+ |
+ | |
+ v v
+ +----------+ +----------+
+ | foo.tm.h | | baz.tm.h |
+ +----------+ +----------+
+```
+
+## Phase 2 (parallel transpilation/compilation):
+
+```
+ ################################ ################################
+ # Process 1 # # Process 2 #
+ # +--------+ +----------+ # # +----------+ +--------+ #
+ # | foo.tm | | foo.tm.h | # # | baz.tm.h | | baz.tm | #
+ # +--------+ | baz.tm.h | # # +----------+ +--------+ #
+ # | +----+-----+ # # | | #
+ # v | # # | v #
+ # +----------+ | # # | +----------+ #
+ # | foo.tm.c | | # # | | baz.tm.c | #
+ # +----------+ | # # | +----------+ #
+ # | | # # | | #
+ # +------+------+ # # +--------+ #
+ # | # # | #
+ # v # # v #
+ # +----------+ # # +----------+ #
+ # | foo.tm.o | # # | baz.tm.o | #
+ # +----------+ # # +----------+ #
+ ################################ ################################
+```
+
+## Phase 3 (linking a shared object file library):
+
+```
+ +----------+ +----------+
+ | foo.tm.o | | baz.tm.o |
+ +----------+ +----------+
+ | |
+ +--------+--------+
+ |
+ v
+ +-----------+
+ | libfoo.so |
+ +-----------+
+```
+
+## Phase 3 (linking an executable):
+
+```
+ +----------+ +----------+ +--------+ +--------+
+ | foo.tm.o | | baz.tm.o | | foo.tm | | baz.tm |
+ +----------+ +----------+ +--------+ +--------+
+ | | | |
+ +--------+--------+ +----+-----+
+ | link | Figure out command line args
+ v v
+ +-----+ compile +-------------------------+
+ | foo |<-------------| main() function for exe |
+ +-----+ +----------+--------------+
+ | foo.tm.h |
+ +----------+
+ | baz.tm.h |
+ +----------+
+```
diff --git a/tomo.c b/tomo.c
index a0052bac..f6030577 100644
--- a/tomo.c
+++ b/tomo.c
@@ -7,6 +7,7 @@
#include <stdio.h>
#include <stdlib.h>
#include <sys/stat.h>
+#include <sys/wait.h>
#include "ast.h"
#include "builtins/array.h"
@@ -124,53 +125,61 @@ int main(int argc, char *argv[])
env_t *env = new_compilation_unit(&compilation_library_name);
table_t dependency_files = {};
table_t to_link = {};
+ table_t argument_files = {};
for (int i = after_flags; i < argc; i++) {
const char *resolved = resolve_path(argv[i], ".", ".");
if (!resolved) errx(1, "Couldn't resolve path: %s", argv[i]);
+ Table$str_set(&argument_files, resolved, argv[i]);
build_file_dependency_graph(resolved, &dependency_files, &to_link);
if (mode == MODE_RUN) break;
}
int status;
- // Non-lazily (re)compile header files for each source file passed to the compiler:
- for (int i = after_flags; i < argc; i++) {
- const char *filename = argv[i];
- status = transpile_header(env, filename, true);
- if (status != 0) return status;
- }
-
- // Lazily (re)compile all the header files:
+ // (Re)compile header files, eagerly for explicitly passed in files, lazily
+ // for downstream dependencies:
for (int64_t i = 0; i < dependency_files.entries.length; i++) {
const char *filename = *(char**)(dependency_files.entries.data + i*dependency_files.entries.stride);
- status = transpile_header(env, filename, false);
+ bool is_argument_file = (Table$str_get(argument_files, filename) != NULL);
+ status = transpile_header(env, filename, is_argument_file);
if (status != 0) return status;
}
env->imports = new(table_t);
- // Non-lazily (re)compile object files for each source file passed to the compiler:
- for (int i = after_flags; i < argc; i++) {
- const char *filename = argv[i];
- status = transpile_code(env, filename, true);
- if (status != 0) return status;
- status = compile_object_file(filename, true);
- if (status != 0) return status;
- if (mode == MODE_RUN) break;
- }
-
- if (mode == MODE_COMPILE_OBJ)
- return 0;
+ struct child_s {
+ struct child_s *next;
+ pid_t pid;
+ } *child_processes = NULL;
- // Lazily (re)compile object files for each dependency:
+ // (Re)transpile and compile object files, eagerly for files explicitly
+ // specified and lazily for downstream dependencies:
for (int64_t i = 0; i < dependency_files.entries.length; i++) {
const char *filename = *(char**)(dependency_files.entries.data + i*dependency_files.entries.stride);
- status = transpile_code(env, filename, false);
- if (status != 0) return status;
- status = compile_object_file(filename, false);
- if (status != 0) return status;
+ bool is_argument_file = (Table$str_get(argument_files, filename) != NULL);
+ if (mode == MODE_COMPILE_OBJ && !is_argument_file)
+ continue;
+
+ pid_t pid = fork();
+ if (pid == 0) {
+ status = transpile_code(env, filename, is_argument_file);
+ if (status != 0)
+ _exit(status);
+ status = compile_object_file(filename, is_argument_file);
+ _exit(status);
+ }
+ child_processes = new(struct child_s, .next=child_processes, .pid=pid);
+ }
+
+ for (; child_processes; child_processes = child_processes->next) {
+ waitpid(child_processes->pid, &status, 0);
+ if (!WIFEXITED(status) || WEXITSTATUS(status) != 0)
+ return EXIT_FAILURE;
}
+ if (mode == MODE_COMPILE_OBJ)
+ return 0;
+
CORD object_files = CORD_EMPTY;
for (int64_t i = 0; i < dependency_files.entries.length; i++) {
const char *filename = *(char**)(dependency_files.entries.data + i*dependency_files.entries.stride);
@@ -439,9 +448,10 @@ int compile_object_file(const char *filename, bool force_recompile)
int compile_executable(env_t *base_env, const char *filename, CORD object_files)
{
- const char *name = file_base_name(filename);
- env_t *env = Table$str_get(*base_env->imports, name);
- assert(env);
+ ast_t *ast = parse_file(filename, NULL);
+ if (!ast)
+ errx(1, "Could not parse file %s", filename);
+ env_t *env = load_module_env(base_env, ast);
binding_t *main_binding = get_binding(env, "main");
if (!main_binding || main_binding->type->tag != FunctionType) {
errx(1, "No main() function has been defined for %s, so it can't be run!", filename);