Switch to parallel compilation
This commit is contained in:
parent
a86dc05d36
commit
df765200c4
164
compilation.md
Normal file
164
compilation.md
Normal file
@ -0,0 +1,164 @@
|
||||
# Compilation Pipeline
|
||||
|
||||
For a simple single-file program, the compilation process has a dependency
|
||||
graph that looks like this:
|
||||
```
|
||||
+----------+ transpile +------------+ generate arg parser
|
||||
| foo.tm.h | <----------- | foo.tm | -------------
|
||||
+----------+ +------------+ |
|
||||
| | | transpile |
|
||||
| | v |
|
||||
| | +------------+ |
|
||||
| | | foo.tm.c | +--------------------+
|
||||
| | +------------+ | main() entry point |
|
||||
| | | +--------------------+
|
||||
| | compile v |
|
||||
| | +------------+ |
|
||||
| +----------------->| foo.tm.o | |
|
||||
| +------------+ |
|
||||
| | link |
|
||||
| v |
|
||||
| compile +------------+ compile |
|
||||
+---------------------> | foo | <------------+
|
||||
+------------+
|
||||
```
|
||||
|
||||
For a more complicated example, imagine `foo.tm` imports `baz.tm` and both are
|
||||
being compiled into a shared library, `libfoo.so`:
|
||||
|
||||
```
|
||||
+---------------------------------------+
|
||||
| |
|
||||
+----------+ transpile +------------+ |
|
||||
+- | baz.tm.h | <----------- | baz.tm | -+-------------------------+
|
||||
| +----------+ +------------+ | |
|
||||
| | | | |
|
||||
| | | transpile | |
|
||||
| | v | |
|
||||
| | +------------+ | |
|
||||
| | | baz.tm.c | | |
|
||||
| | +------------+ | |
|
||||
| | | | |
|
||||
| | | compile | |
|
||||
| | v | |
|
||||
| | compile +------------+ | |
|
||||
| +---------------------> | baz.tm.o | | |
|
||||
| +------------+ | |
|
||||
| | | |
|
||||
| | link | compile |
|
||||
| v v |
|
||||
| compile +--------------------------------------+ |
|
||||
| +---------------------> | libfoo.so | |
|
||||
| | +--------------------------------------+ |
|
||||
| | ^ |
|
||||
| | | link |
|
||||
| | | |
|
||||
| +----------+ transpile +------------+ | |
|
||||
| | foo.tm.h | <----------- | foo.tm | | |
|
||||
| +----------+ +------------+ | |
|
||||
| | | | |
|
||||
| | | transpile | |
|
||||
| | v | |
|
||||
| | +------------+ | type info |
|
||||
| | | foo.tm.c | <+-------------------------+
|
||||
| | +------------+ |
|
||||
| | | |
|
||||
| | | compile |
|
||||
| | v |
|
||||
| | compile +------------+ |
|
||||
+----+---------------------> | foo.tm.o | -+
|
||||
| +------------+
|
||||
| compile ^
|
||||
+-------------------------+
|
||||
```
|
||||
|
||||
These dependency graphs are relatively complicated-looking, but here are some
|
||||
rough takeaways:
|
||||
|
||||
1) Header files are a dependency for many parts of the process, so it's
|
||||
good to transpile them as early as possible.
|
||||
2) Once all the header files are available,
|
||||
compiled into their object files in parallel. This is by far the
|
||||
slowest part of compilation (invoking the C compiler), so it benefits
|
||||
the most from parallelization.
|
||||
3) After all object files are compiled, the last step is to link them
|
||||
all together (fast and simple).
|
||||
|
||||
To sastisfy these requirements as efficiently as possible, the approach taken
|
||||
below is to first transpile all header files sequentially (this could be
|
||||
parallelized, but is probably faster than the overhead of forking new
|
||||
processes), then fork a new process for each dependency to transpile and
|
||||
compile it to an object file. Then, wait for all child processes to finish and
|
||||
link the resulting object files together.
|
||||
|
||||
## Phase 1 (sequential transpilation):
|
||||
|
||||
```
|
||||
+--------+ +--------+
|
||||
| foo.tm | | baz.tm |
|
||||
+--------+ +--------+
|
||||
| | |
|
||||
+--------------+ |
|
||||
| |
|
||||
v v
|
||||
+----------+ +----------+
|
||||
| foo.tm.h | | baz.tm.h |
|
||||
+----------+ +----------+
|
||||
```
|
||||
|
||||
## Phase 2 (parallel transpilation/compilation):
|
||||
|
||||
```
|
||||
################################ ################################
|
||||
# Process 1 # # Process 2 #
|
||||
# +--------+ +----------+ # # +----------+ +--------+ #
|
||||
# | foo.tm | | foo.tm.h | # # | baz.tm.h | | baz.tm | #
|
||||
# +--------+ | baz.tm.h | # # +----------+ +--------+ #
|
||||
# | +----+-----+ # # | | #
|
||||
# v | # # | v #
|
||||
# +----------+ | # # | +----------+ #
|
||||
# | foo.tm.c | | # # | | baz.tm.c | #
|
||||
# +----------+ | # # | +----------+ #
|
||||
# | | # # | | #
|
||||
# +------+------+ # # +--------+ #
|
||||
# | # # | #
|
||||
# v # # v #
|
||||
# +----------+ # # +----------+ #
|
||||
# | foo.tm.o | # # | baz.tm.o | #
|
||||
# +----------+ # # +----------+ #
|
||||
################################ ################################
|
||||
```
|
||||
|
||||
## Phase 3 (linking a shared object file library):
|
||||
|
||||
```
|
||||
+----------+ +----------+
|
||||
| foo.tm.o | | baz.tm.o |
|
||||
+----------+ +----------+
|
||||
| |
|
||||
+--------+--------+
|
||||
|
|
||||
v
|
||||
+-----------+
|
||||
| libfoo.so |
|
||||
+-----------+
|
||||
```
|
||||
|
||||
## Phase 3 (linking an executable):
|
||||
|
||||
```
|
||||
+----------+ +----------+ +--------+ +--------+
|
||||
| foo.tm.o | | baz.tm.o | | foo.tm | | baz.tm |
|
||||
+----------+ +----------+ +--------+ +--------+
|
||||
| | | |
|
||||
+--------+--------+ +----+-----+
|
||||
| link | Figure out command line args
|
||||
v v
|
||||
+-----+ compile +-------------------------+
|
||||
| foo |<-------------| main() function for exe |
|
||||
+-----+ +----------+--------------+
|
||||
| foo.tm.h |
|
||||
+----------+
|
||||
| baz.tm.h |
|
||||
+----------+
|
||||
```
|
68
tomo.c
68
tomo.c
@ -7,6 +7,7 @@
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/wait.h>
|
||||
|
||||
#include "ast.h"
|
||||
#include "builtins/array.h"
|
||||
@ -124,53 +125,61 @@ int main(int argc, char *argv[])
|
||||
env_t *env = new_compilation_unit(&compilation_library_name);
|
||||
table_t dependency_files = {};
|
||||
table_t to_link = {};
|
||||
table_t argument_files = {};
|
||||
|
||||
for (int i = after_flags; i < argc; i++) {
|
||||
const char *resolved = resolve_path(argv[i], ".", ".");
|
||||
if (!resolved) errx(1, "Couldn't resolve path: %s", argv[i]);
|
||||
Table$str_set(&argument_files, resolved, argv[i]);
|
||||
build_file_dependency_graph(resolved, &dependency_files, &to_link);
|
||||
if (mode == MODE_RUN) break;
|
||||
}
|
||||
|
||||
int status;
|
||||
// Non-lazily (re)compile header files for each source file passed to the compiler:
|
||||
for (int i = after_flags; i < argc; i++) {
|
||||
const char *filename = argv[i];
|
||||
status = transpile_header(env, filename, true);
|
||||
if (status != 0) return status;
|
||||
}
|
||||
|
||||
// Lazily (re)compile all the header files:
|
||||
// (Re)compile header files, eagerly for explicitly passed in files, lazily
|
||||
// for downstream dependencies:
|
||||
for (int64_t i = 0; i < dependency_files.entries.length; i++) {
|
||||
const char *filename = *(char**)(dependency_files.entries.data + i*dependency_files.entries.stride);
|
||||
status = transpile_header(env, filename, false);
|
||||
bool is_argument_file = (Table$str_get(argument_files, filename) != NULL);
|
||||
status = transpile_header(env, filename, is_argument_file);
|
||||
if (status != 0) return status;
|
||||
}
|
||||
|
||||
env->imports = new(table_t);
|
||||
|
||||
// Non-lazily (re)compile object files for each source file passed to the compiler:
|
||||
for (int i = after_flags; i < argc; i++) {
|
||||
const char *filename = argv[i];
|
||||
status = transpile_code(env, filename, true);
|
||||
if (status != 0) return status;
|
||||
status = compile_object_file(filename, true);
|
||||
if (status != 0) return status;
|
||||
if (mode == MODE_RUN) break;
|
||||
struct child_s {
|
||||
struct child_s *next;
|
||||
pid_t pid;
|
||||
} *child_processes = NULL;
|
||||
|
||||
// (Re)transpile and compile object files, eagerly for files explicitly
|
||||
// specified and lazily for downstream dependencies:
|
||||
for (int64_t i = 0; i < dependency_files.entries.length; i++) {
|
||||
const char *filename = *(char**)(dependency_files.entries.data + i*dependency_files.entries.stride);
|
||||
bool is_argument_file = (Table$str_get(argument_files, filename) != NULL);
|
||||
if (mode == MODE_COMPILE_OBJ && !is_argument_file)
|
||||
continue;
|
||||
|
||||
pid_t pid = fork();
|
||||
if (pid == 0) {
|
||||
status = transpile_code(env, filename, is_argument_file);
|
||||
if (status != 0)
|
||||
_exit(status);
|
||||
status = compile_object_file(filename, is_argument_file);
|
||||
_exit(status);
|
||||
}
|
||||
child_processes = new(struct child_s, .next=child_processes, .pid=pid);
|
||||
}
|
||||
|
||||
for (; child_processes; child_processes = child_processes->next) {
|
||||
waitpid(child_processes->pid, &status, 0);
|
||||
if (!WIFEXITED(status) || WEXITSTATUS(status) != 0)
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
if (mode == MODE_COMPILE_OBJ)
|
||||
return 0;
|
||||
|
||||
// Lazily (re)compile object files for each dependency:
|
||||
for (int64_t i = 0; i < dependency_files.entries.length; i++) {
|
||||
const char *filename = *(char**)(dependency_files.entries.data + i*dependency_files.entries.stride);
|
||||
status = transpile_code(env, filename, false);
|
||||
if (status != 0) return status;
|
||||
status = compile_object_file(filename, false);
|
||||
if (status != 0) return status;
|
||||
}
|
||||
|
||||
CORD object_files = CORD_EMPTY;
|
||||
for (int64_t i = 0; i < dependency_files.entries.length; i++) {
|
||||
const char *filename = *(char**)(dependency_files.entries.data + i*dependency_files.entries.stride);
|
||||
@ -439,9 +448,10 @@ int compile_object_file(const char *filename, bool force_recompile)
|
||||
|
||||
int compile_executable(env_t *base_env, const char *filename, CORD object_files)
|
||||
{
|
||||
const char *name = file_base_name(filename);
|
||||
env_t *env = Table$str_get(*base_env->imports, name);
|
||||
assert(env);
|
||||
ast_t *ast = parse_file(filename, NULL);
|
||||
if (!ast)
|
||||
errx(1, "Could not parse file %s", filename);
|
||||
env_t *env = load_module_env(base_env, ast);
|
||||
binding_t *main_binding = get_binding(env, "main");
|
||||
if (!main_binding || main_binding->type->tag != FunctionType) {
|
||||
errx(1, "No main() function has been defined for %s, so it can't be run!", filename);
|
||||
|
Loading…
Reference in New Issue
Block a user