aboutsummaryrefslogtreecommitdiff
path: root/stdlib/paths.c
diff options
context:
space:
mode:
authorBruce Hill <bruce@bruce-hill.com>2024-09-13 20:18:08 -0400
committerBruce Hill <bruce@bruce-hill.com>2024-09-13 20:18:08 -0400
commitc455e7b67d2e55e6ed03e3449203d4e307f5a7dd (patch)
tree27d9d4c77193f7aa1fe3a3c6fe5631d0ccfd59e2 /stdlib/paths.c
parent816aa29b799132acb8c71d4968df6c4619fb2b1d (diff)
Rename builtins/ -> stdlib/
Diffstat (limited to 'stdlib/paths.c')
-rw-r--r--stdlib/paths.c481
1 files changed, 481 insertions, 0 deletions
diff --git a/stdlib/paths.c b/stdlib/paths.c
new file mode 100644
index 00000000..231a7c23
--- /dev/null
+++ b/stdlib/paths.c
@@ -0,0 +1,481 @@
+// A lang for filesystem paths
+#include <dirent.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <gc.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include <unistr.h>
+
+#include "arrays.h"
+#include "files.h"
+#include "integers.h"
+#include "optionals.h"
+#include "paths.h"
+#include "patterns.h"
+#include "text.h"
+#include "types.h"
+#include "util.h"
+
+PUREFUNC public Path_t Path$escape_text(Text_t text)
+{
+ if (Text$has(text, Pattern("/")))
+ fail("Path interpolations cannot contain slashes: %k", &text);
+ else if (Text$has(text, Pattern(";")))
+ fail("Path interpolations cannot contain semicolons: %k", &text);
+ else if (Text$equal_values(text, Path(".")) || Text$equal_values(text, Path("..")))
+ fail("Path interpolation is \"%k\" which is disallowed to prevent security vulnerabilities", &text);
+ return (Path_t)text;
+}
+
+PUREFUNC public Path_t Path$escape_path(Path_t path)
+{
+ if (Text$starts_with(path, Path("~/")) || Text$starts_with(path, Path("/")))
+ fail("Invalid path component: %k", &path);
+ return path;
+}
+
+public Path_t Path$cleanup(Path_t path)
+{
+ if (!Text$starts_with(path, Path("/")) && !Text$starts_with(path, Path("./"))
+ && !Text$starts_with(path, Path("../")) && !Text$starts_with(path, Path("~/")))
+ path = Text$concat(Text("./"), path);
+
+ // Not fully resolved, but at least get rid of some of the cruft like "/./"
+ // and "/foo/../" and "//"
+ bool trailing_slash = Text$ends_with(path, Path("/"));
+ Array_t components = Text$split(path, Pattern("/"));
+ if (components.length == 0) return Path("/");
+ Path_t root = *(Path_t*)components.data;
+ Array$remove_at(&components, I(1), I(1), sizeof(Path_t));
+
+ for (int64_t i = 0; i < components.length; ) {
+ Path_t component = *(Path_t*)(components.data + i*components.stride);
+ if (component.length == 0 || Text$equal_values(component, Path("."))) { // Skip (//) and (/./)
+ Array$remove_at(&components, I(i+1), I(1), sizeof(Path_t));
+ } else if (Text$equal_values(component, Path(".."))) {
+ if (i == 0) {
+ if (root.length == 0) { // (/..) -> (/)
+ Array$remove_at(&components, I(i+1), I(1), sizeof(Path_t));
+ i += 1;
+ } else if (Text$equal_values(root, Path("."))) { // (./..) -> (..)
+ root = Path("..");
+ Array$remove_at(&components, I(i+1), I(1), sizeof(Path_t));
+ i += 1;
+ } else if (Text$equal_values(root, Path("~"))) {
+ root = Path(""); // Convert $HOME to absolute path:
+
+ Array$remove_at(&components, I(i+1), I(1), sizeof(Path_t));
+ // `i` is pointing to where the `..` lived
+
+ const char *home = getenv("HOME");
+ if (!home) fail("Could not get $HOME directory!");
+
+ // Insert all but the last component:
+ for (const char *p = home + 1; *p; ) {
+ const char *next_slash = strchr(p, '/');
+ if (!next_slash) break; // Skip last component
+ Path_t home_component = Text$format("%.*s", (int)(next_slash - p), p);
+ Array$insert(&components, &home_component, I(i+1), sizeof(Path_t));
+ i += 1;
+ p = next_slash + 1;
+ }
+ } else { // (../..) -> (../..)
+ i += 1;
+ }
+ } else if (Text$equal(&component, (Path_t*)(components.data + (i-1)*components.stride))) { // (___/../..) -> (____/../..)
+ i += 1;
+ } else { // (___/foo/..) -> (___)
+ Array$remove_at(&components, I(i), I(2), sizeof(Path_t));
+ i -= 1;
+ }
+ } else { // (___/foo/baz) -> (___/foo/baz)
+ i++;
+ }
+ }
+
+ Text_t cleaned_up = Text$concat(root, Text("/"), Text$join(Text("/"), components));
+ if (trailing_slash && !Text$ends_with(cleaned_up, Text("/")))
+ cleaned_up = Text$concat(cleaned_up, Text("/"));
+ return cleaned_up;
+}
+
+static inline Path_t Path$_expand_home(Path_t path)
+{
+ if (Text$starts_with(path, Path("~/"))) {
+ Path_t after_tilde = Text$slice(path, I(2), I(-1));
+ return Text$format("%s%k", getenv("HOME"), &after_tilde);
+ } else {
+ return path;
+ }
+}
+
+public Path_t Path$_concat(int n, Path_t items[n])
+{
+ Path_t cleaned_up = Path$cleanup(Text$_concat(n, items));
+ if (cleaned_up.length > PATH_MAX)
+ fail("Path exceeds the maximum path length: %k", &cleaned_up);
+ return cleaned_up;
+}
+
+public Text_t Path$resolved(Path_t path, Path_t relative_to)
+{
+ path = Path$cleanup(path);
+
+ const char *path_str = Text$as_c_string(path);
+ const char *relative_to_str = Text$as_c_string(relative_to);
+ const char *resolved_path = resolve_path(path_str, relative_to_str, relative_to_str);
+ if (resolved_path) {
+ return (Path_t)(Text$from_str(resolved_path));
+ } else if (path_str[0] == '/') {
+ return path;
+ } else if (path_str[0] == '~' && path_str[1] == '/') {
+ return (Path_t)Text$format("%s%s", getenv("HOME"), path_str + 1);
+ } else {
+ return Text$concat(Path$resolved(relative_to, Path(".")), Path("/"), path);
+ }
+}
+
+public Text_t Path$relative(Path_t path, Path_t relative_to)
+{
+ path = Path$resolved(path, relative_to);
+ relative_to = Path$resolved(relative_to, Path("."));
+ if (Text$matches(path, Patterns(Pattern("{start}"), relative_to, Pattern("{0+..}"))))
+ return Text$slice(path, I(relative_to.length + 2), I(-1));
+ return path;
+}
+
+public bool Path$exists(Path_t path)
+{
+ path = Path$_expand_home(path);
+ struct stat sb;
+ return (stat(Text$as_c_string(path), &sb) == 0);
+}
+
+public bool Path$is_file(Path_t path, bool follow_symlinks)
+{
+ path = Path$_expand_home(path);
+ struct stat sb;
+ const char *path_str = Text$as_c_string(path);
+ int status = follow_symlinks ? stat(path_str, &sb) : lstat(path_str, &sb);
+ if (status != 0) return false;
+ return (sb.st_mode & S_IFMT) == S_IFREG;
+}
+
+public bool Path$is_directory(Path_t path, bool follow_symlinks)
+{
+ path = Path$_expand_home(path);
+ struct stat sb;
+ const char *path_str = Text$as_c_string(path);
+ int status = follow_symlinks ? stat(path_str, &sb) : lstat(path_str, &sb);
+ if (status != 0) return false;
+ return (sb.st_mode & S_IFMT) == S_IFDIR;
+}
+
+public bool Path$is_pipe(Path_t path, bool follow_symlinks)
+{
+ path = Path$_expand_home(path);
+ struct stat sb;
+ const char *path_str = Text$as_c_string(path);
+ int status = follow_symlinks ? stat(path_str, &sb) : lstat(path_str, &sb);
+ if (status != 0) return false;
+ return (sb.st_mode & S_IFMT) == S_IFIFO;
+}
+
+public bool Path$is_socket(Path_t path, bool follow_symlinks)
+{
+ path = Path$_expand_home(path);
+ struct stat sb;
+ const char *path_str = Text$as_c_string(path);
+ int status = follow_symlinks ? stat(path_str, &sb) : lstat(path_str, &sb);
+ if (status != 0) return false;
+ return (sb.st_mode & S_IFMT) == S_IFSOCK;
+}
+
+public bool Path$is_symlink(Path_t path)
+{
+ path = Path$_expand_home(path);
+ struct stat sb;
+ const char *path_str = Text$as_c_string(path);
+ int status = stat(path_str, &sb);
+ if (status != 0) return false;
+ return (sb.st_mode & S_IFMT) == S_IFLNK;
+}
+
+static void _write(Path_t path, Text_t text, int mode, int permissions)
+{
+ path = Path$_expand_home(path);
+ const char *path_str = Text$as_c_string(path);
+ int fd = open(path_str, mode, permissions);
+ if (fd == -1)
+ fail("Could not write to file: %s\n%s", path_str, strerror(errno));
+
+ const char *str = Text$as_c_string(text);
+ size_t len = strlen(str);
+ ssize_t written = write(fd, str, len);
+ if (written != (ssize_t)len)
+ fail("Could not write to file: %s\n%s", path_str, strerror(errno));
+}
+
+public void Path$write(Path_t path, Text_t text, int permissions)
+{
+ _write(path, text, O_WRONLY | O_CREAT, permissions);
+}
+
+public void Path$append(Path_t path, Text_t text, int permissions)
+{
+ _write(path, text, O_WRONLY | O_APPEND | O_CREAT, permissions);
+}
+
+public Text_t Path$read(Path_t path)
+{
+ path = Path$_expand_home(path);
+ int fd = open(Text$as_c_string(path), O_RDONLY);
+ if (fd == -1)
+ fail("Could not read file: %k (%s)", &path, strerror(errno));
+
+ struct stat sb;
+ if (fstat(fd, &sb) != 0)
+ fail("Could not read file: %k (%s)", &path, strerror(errno));
+
+ if ((sb.st_mode & S_IFMT) == S_IFREG) { // Use memory mapping if it's a real file:
+ const char *mem = mmap(NULL, (size_t)sb.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
+ char *gc_mem = GC_MALLOC_ATOMIC((size_t)sb.st_size+1);
+ memcpy(gc_mem, mem, (size_t)sb.st_size);
+ gc_mem[sb.st_size] = '\0';
+ close(fd);
+ return Text$from_strn(gc_mem, (size_t)sb.st_size);
+ } else {
+ size_t capacity = 256, len = 0;
+ char *content = GC_MALLOC_ATOMIC(capacity);
+ for (;;) {
+ char chunk[256];
+ ssize_t just_read = read(fd, chunk, sizeof(chunk));
+ if (just_read < 0)
+ fail("Failed while reading file: %k (%s)", &path, strerror(errno));
+ else if (just_read == 0) {
+ if (errno == EAGAIN || errno == EINTR)
+ continue;
+ break;
+ }
+
+ if (len + (size_t)just_read >= capacity) {
+ content = GC_REALLOC(content, (capacity *= 2));
+ }
+
+ memcpy(&content[len], chunk, (size_t)just_read);
+ len += (size_t)just_read;
+
+ if ((size_t)just_read < sizeof(chunk))
+ break;
+ }
+ close(fd);
+
+ if (u8_check((uint8_t*)content, len) != NULL)
+ fail("File does not contain valid UTF8 data!");
+
+ return Text$from_strn(content, len);
+ }
+}
+
+public void Path$remove(Path_t path, bool ignore_missing)
+{
+ path = Path$_expand_home(path);
+ const char *path_str = Text$as_c_string(path);
+ struct stat sb;
+ if (lstat(path_str, &sb) != 0) {
+ if (!ignore_missing)
+ fail("Could not remove file: %s (%s)", path_str, strerror(errno));
+ }
+
+ if ((sb.st_mode & S_IFMT) == S_IFREG || (sb.st_mode & S_IFMT) == S_IFLNK) {
+ if (unlink(path_str) != 0 && !ignore_missing)
+ fail("Could not remove file: %s (%s)", path_str, strerror(errno));
+ } else if ((sb.st_mode & S_IFMT) == S_IFDIR) {
+ if (rmdir(path_str) != 0 && !ignore_missing)
+ fail("Could not remove directory: %s (%s)", path_str, strerror(errno));
+ } else {
+ fail("Could not remove path: %s (not a file or directory)", path_str, strerror(errno));
+ }
+}
+
+public void Path$create_directory(Path_t path, int permissions)
+{
+ path = Path$_expand_home(path);
+ if (mkdir(Text$as_c_string(path), (mode_t)permissions) != 0)
+ fail("Could not create directory: %k (%s)", &path, strerror(errno));
+}
+
+static Array_t _filtered_children(Path_t path, bool include_hidden, mode_t filter)
+{
+ path = Path$_expand_home(path);
+ struct dirent *dir;
+ Array_t children = {};
+ const char *path_str = Text$as_c_string(path);
+ size_t path_len = strlen(path_str);
+ DIR *d = opendir(path_str);
+ if (!d)
+ fail("Could not open directory: %k (%s)", &path, strerror(errno));
+
+ if (path_str[path_len-1] == '/')
+ --path_len;
+
+ while ((dir = readdir(d)) != NULL) {
+ if (!include_hidden && dir->d_name[0] == '.')
+ continue;
+ if (streq(dir->d_name, ".") || streq(dir->d_name, ".."))
+ continue;
+
+ const char *child_str = heap_strf("%.*s/%s", path_len, path_str, dir->d_name);
+ struct stat sb;
+ if (stat(child_str, &sb) != 0)
+ continue;
+ if (!((sb.st_mode & S_IFMT) & filter))
+ continue;
+
+ Path_t child = Text$format("%s%s", child_str, ((sb.st_mode & S_IFMT) == S_IFDIR) ? "/" : ""); // Trailing slash for dirs
+ Array$insert(&children, &child, I(0), sizeof(Path_t));
+ }
+ closedir(d);
+ return children;
+}
+
+public Array_t Path$children(Path_t path, bool include_hidden)
+{
+ return _filtered_children(path, include_hidden, (mode_t)-1);
+}
+
+public Array_t Path$files(Path_t path, bool include_hidden)
+{
+ return _filtered_children(path, include_hidden, S_IFREG);
+}
+
+public Array_t Path$subdirectories(Path_t path, bool include_hidden)
+{
+ return _filtered_children(path, include_hidden, S_IFDIR);
+}
+
+public Path_t Path$unique_directory(Path_t path)
+{
+ path = Path$_expand_home(path);
+ const char *path_str = Text$as_c_string(path);
+ size_t len = strlen(path_str);
+ if (len >= PATH_MAX) fail("Path is too long: %s", path_str);
+ char buf[PATH_MAX] = {};
+ strcpy(buf, path_str);
+ if (buf[len-1] == '/')
+ buf[--len] = '\0';
+ char *created = mkdtemp(buf);
+ if (!created) fail("Failed to create temporary directory: %s (%s)", path_str, strerror(errno));
+ return Text$format("%s/", created);
+}
+
+public Text_t Path$write_unique(Path_t path, Text_t text)
+{
+ path = Path$_expand_home(path);
+ const char *path_str = Text$as_c_string(path);
+ size_t len = strlen(path_str);
+ if (len >= PATH_MAX) fail("Path is too long: %s", path_str);
+ char buf[PATH_MAX] = {};
+ strcpy(buf, path_str);
+
+ int64_t suffixlen = 0;
+ (void)Text$find(path, Pattern("{0+!X}{end}"), I(1), &suffixlen);
+ if (suffixlen < 0) suffixlen = 0;
+
+ int fd = mkstemps(buf, suffixlen);
+ if (fd == -1)
+ fail("Could not write to unique file: %s\n%s", buf, strerror(errno));
+
+ const char *str = Text$as_c_string(text);
+ size_t write_len = strlen(str);
+ ssize_t written = write(fd, str, write_len);
+ if (written != (ssize_t)write_len)
+ fail("Could not write to file: %s\n%s", buf, strerror(errno));
+ return Text$format("%s", buf);
+}
+
+public Path_t Path$parent(Path_t path)
+{
+ return Path$cleanup(Text$concat(path, Path("/../")));
+}
+
+public Text_t Path$base_name(Path_t path)
+{
+ path = Path$cleanup(path);
+ if (Text$ends_with(path, Path("/")))
+ return Text$replace(path, Pattern("{0+..}/{!/}/{end}"), Text("@2"), Text("@"), false);
+ else
+ return Text$replace(path, Pattern("{0+..}/{!/}{end}"), Text("@2"), Text("@"), false);
+}
+
+public Text_t Path$extension(Path_t path, bool full)
+{
+ Text_t base = Path$base_name(path);
+ if (Text$matches(base, Pattern(".{!.}.{..}")))
+ return Text$replace(base, full ? Pattern(".{!.}.{..}") : Pattern(".{..}.{!.}{end}"), Text("@2"), Text("@"), false);
+ else if (Text$matches(base, Pattern("{!.}.{..}")))
+ return Text$replace(base, full ? Pattern("{!.}.{..}") : Pattern("{..}.{!.}{end}"), Text("@2"), Text("@"), false);
+ else
+ return Text("");
+}
+
+static void _line_reader_cleanup(FILE **f)
+{
+ if (f && *f) {
+ fclose(*f);
+ *f = NULL;
+ }
+}
+
+static Text_t _next_line(FILE **f)
+{
+ if (!f || !*f) return NULL_TEXT;
+
+ char *line = NULL;
+ size_t size = 0;
+ ssize_t len = getline(&line, &size, *f);
+ if (len <= 0) {
+ _line_reader_cleanup(f);
+ return NULL_TEXT;
+ }
+
+ while (len > 0 && (line[len-1] == '\r' || line[len-1] == '\n'))
+ --len;
+
+ if (u8_check((uint8_t*)line, (size_t)len) != NULL)
+ fail("Invalid UTF8!");
+
+ Text_t line_text = Text$format("%.*s", len, line);
+ free(line);
+ return line_text;
+}
+
+public Closure_t Path$by_line(Path_t path)
+{
+ path = Path$_expand_home(path);
+
+ FILE *f = fopen(Text$as_c_string(path), "r");
+ if (f == NULL)
+ fail("Could not read file: %k (%s)", &path, strerror(errno));
+
+ FILE **wrapper = GC_MALLOC(sizeof(FILE*));
+ *wrapper = f;
+ GC_register_finalizer(wrapper, (void*)_line_reader_cleanup, NULL, NULL, NULL);
+ return (Closure_t){.fn=(void*)_next_line, .userdata=wrapper};
+}
+
+public const TypeInfo Path$info = {
+ .size=sizeof(Path_t),
+ .align=__alignof__(Path_t),
+ .tag=TextInfo,
+ .TextInfo={.lang="Path"},
+};
+
+
+// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0