Add langs to the language

This commit is contained in:
Bruce Hill 2024-03-09 18:22:12 -05:00
parent 1b8f7307a9
commit 2b83ab279d
13 changed files with 157 additions and 50 deletions

3
ast.c
View File

@ -98,7 +98,7 @@ CORD ast_to_cord(ast_t *ast)
T(Int, "(\x1b[35m%ld\x1b[m, bits=\x1b[35m%ld\x1b[m)", data.i, data.bits)
T(Num, "(\x1b[35m%ld\x1b[m, bits=\x1b[35m%ld\x1b[m)", data.n, data.bits)
T(TextLiteral, "%r", Text__quoted(data.cord, true))
T(TextJoin, "(%r)", ast_list_to_cord(data.children))
T(TextJoin, "(%s, %r)", data.lang ? data.lang : "Text", ast_list_to_cord(data.children))
T(Declare, "(var=%s, value=%r)", ast_to_cord(data.var), ast_to_cord(data.value))
T(Assign, "(targets=%r, values=%r)", ast_list_to_cord(data.targets), ast_list_to_cord(data.values))
T(BinaryOp, "(%r, %s, %r)", ast_to_cord(data.lhs), OP_NAMES[data.op], ast_to_cord(data.rhs))
@ -135,6 +135,7 @@ CORD ast_to_cord(ast_t *ast)
T(Extern, "(name=%s, type=%r)", data.name, type_ast_to_cord(data.type))
T(StructDef, "(%s, fields=%r, namespace=%r)", data.name, arg_list_to_cord(data.fields), ast_to_cord(data.namespace))
T(EnumDef, "(%s, tags=%r, namespace=%r)", data.name, tags_to_cord(data.tags), ast_to_cord(data.namespace))
T(LangDef, "(%s, secret=%s, namespace=%r)", data.name, data.secret ? "yes" : "no", ast_to_cord(data.namespace))
T(Index, "(indexed=%r, index=%r)", ast_to_cord(data.indexed), ast_to_cord(data.index))
T(FieldAccess, "(fielded=%r, field=%s)", ast_to_cord(data.fielded), data.field)
T(DocTest, "(expr=%r, output=%r)", ast_to_cord(data.expr), Text__quoted(data.output, true))

9
ast.h
View File

@ -105,8 +105,7 @@ typedef enum {
Skip, Stop, Pass,
Return,
Extern,
StructDef,
EnumDef,
StructDef, EnumDef, LangDef,
Index, FieldAccess,
DocTest,
Use,
@ -141,6 +140,7 @@ struct ast_s {
CORD cord;
} TextLiteral;
struct {
const char *lang;
ast_list_t *children;
} TextJoin;
struct {
@ -238,6 +238,11 @@ struct ast_s {
tag_ast_t *tags;
ast_t *namespace;
} EnumDef;
struct {
const char *name;
ast_t *namespace;
bool secret:1;
} LangDef;
struct {
ast_t *indexed, *index;
bool unchecked;

View File

@ -57,6 +57,7 @@ public uint32_t generic_hash(const void *obj, const TypeInfo *type)
{
switch (type->tag) {
case PointerInfo: case FunctionInfo: return Pointer__hash(obj, type);
case TextInfo: return Text__hash(obj);
case ArrayInfo: return Array__hash(obj, type);
case TableInfo: return Table_hash(obj, type);
case CustomInfo:
@ -76,6 +77,7 @@ public int32_t generic_compare(const void *x, const void *y, const TypeInfo *typ
{
switch (type->tag) {
case PointerInfo: case FunctionInfo: return Pointer__compare(x, y, type);
case TextInfo: return Text__compare(x, y);
case ArrayInfo: return Array__compare(x, y, type);
case TableInfo: return Table_compare(x, y, type);
case CustomInfo:
@ -84,10 +86,6 @@ public int32_t generic_compare(const void *x, const void *y, const TypeInfo *typ
return type->CustomInfo.compare(x, y, type);
default:
compare_data:
{
int diff = memcmp((void*)x, (void*)y, type->size);
printf("GOT DIFF: %d\n", diff);
}
return (int32_t)memcmp((void*)x, (void*)y, type->size);
}
}
@ -96,6 +94,7 @@ public bool generic_equal(const void *x, const void *y, const TypeInfo *type)
{
switch (type->tag) {
case PointerInfo: case FunctionInfo: return Pointer__equal(x, y, type);
case TextInfo: return Text__equal(x, y);
case ArrayInfo: return Array__equal(x, y, type);
case TableInfo: return Table_equal(x, y, type);
case CustomInfo:
@ -113,6 +112,7 @@ public CORD generic_as_text(const void *obj, bool colorize, const TypeInfo *type
switch (type->tag) {
case PointerInfo: return Pointer__as_text(obj, colorize, type);
case FunctionInfo: return Func__as_text(obj, colorize, type);
case TextInfo: return obj ? Text__quoted(*(CORD*)obj, colorize) :type->TextInfo.lang;
case ArrayInfo: return Array__as_text(obj, colorize, type);
case TableInfo: return Table_as_text(obj, colorize, type);
case TypeInfoInfo: return Type__as_text(obj, colorize, type);

View File

@ -88,7 +88,7 @@ public CORD Text__quoted(CORD str, bool colorize)
}
}
public int Text__compare(CORD *x, CORD *y)
public int Text__compare(const CORD *x, const CORD *y)
{
uint8_t *xx = (uint8_t*)CORD_to_const_char_star(*x);
uint8_t *yy = (uint8_t*)CORD_to_const_char_star(*y);
@ -98,12 +98,12 @@ public int Text__compare(CORD *x, CORD *y)
return result;
}
public bool Text__equal(CORD *x, CORD *y)
public bool Text__equal(const CORD *x, const CORD *y)
{
return Text__compare(x, y) == 0;
}
public uint32_t Text__hash(CORD *cord)
public uint32_t Text__hash(const CORD *cord)
{
if (!*cord) return 0;
@ -384,13 +384,8 @@ public array_t Text__character_names(CORD text)
public const TypeInfo Text = {
.size=sizeof(CORD),
.align=__alignof__(CORD),
.tag=CustomInfo,
.CustomInfo={
.as_text=(void*)Text__as_text,
.compare=(void*)Text__compare,
.equal=(void*)Text__equal,
.hash=(void*)Text__hash,
},
.tag=TextInfo,
.TextInfo={.lang="Text"},
};
// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0

View File

@ -16,9 +16,9 @@ typedef struct {
CORD Text__as_text(const void *str, bool colorize, const TypeInfo *info);
CORD Text__quoted(CORD str, bool colorize);
int Text__compare(CORD *x, CORD *y);
bool Text__equal(CORD *x, CORD *y);
uint32_t Text__hash(CORD *cord);
int Text__compare(const CORD *x, const CORD *y);
bool Text__equal(const CORD *x, const CORD *y);
uint32_t Text__hash(const CORD *cord);
CORD Text__upper(CORD str);
CORD Text__lower(CORD str);
CORD Text__title(CORD str);

View File

@ -15,7 +15,7 @@ typedef CORD (*str_fn_t)(const void*, bool, const struct TypeInfo*);
typedef struct TypeInfo {
int64_t size, align;
struct { // Anonymous tagged union for convenience
enum { CustomInfo, PointerInfo, ArrayInfo, TableInfo, FunctionInfo, TypeInfoInfo, OpaqueInfo, } tag;
enum { CustomInfo, PointerInfo, TextInfo, ArrayInfo, TableInfo, FunctionInfo, TypeInfoInfo, OpaqueInfo, } tag;
union {
struct {
equal_fn_t equal;
@ -27,6 +27,9 @@ typedef struct TypeInfo {
const char *sigil;
const struct TypeInfo *pointed;
} PointerInfo;
struct {
const char *lang;
} TextInfo;
struct {
const struct TypeInfo *item;
} ArrayInfo;

View File

@ -82,7 +82,7 @@ CORD compile_type(type_t *t)
case IntType: return Match(t, IntType)->bits == 64 ? "Int_t" : CORD_asprintf("Int%ld_t", Match(t, IntType)->bits);
case NumType: return Match(t, NumType)->bits == 64 ? "Num_t" : CORD_asprintf("Num%ld_t", Match(t, NumType)->bits);
case TextType: {
const char *dsl = Match(t, TextType)->dsl;
const char *dsl = Match(t, TextType)->lang;
return dsl ? CORD_cat(dsl, "_t") : "Text_t";
}
case ArrayType: return "array_t";
@ -109,7 +109,7 @@ CORD compile_statement(env_t *env, ast_t *ast)
{
CORD stmt;
switch (ast->tag) {
case If: case When: case For: case While: case FunctionDef: case Return: case StructDef: case EnumDef:
case If: case When: case For: case While: case FunctionDef: case Return: case StructDef: case EnumDef: case LangDef:
case Declare: case Assign: case UpdateAssign: case DocTest: case Block:
stmt = compile(env, ast);
break;
@ -578,21 +578,45 @@ CORD compile(env_t *env, ast_t *ast)
return CORD_cat_char(code, '"');
}
case TextJoin: {
const char *lang = Match(ast, TextJoin)->lang;
type_t *text_t = Type(TextType, .lang=lang);
table_t *lang_ns = lang ? Table_str_get(*env->type_namespaces, lang) : NULL;
ast_list_t *chunks = Match(ast, TextJoin)->children;
if (!chunks) {
return "(CORD)CORD_EMPTY";
} else if (!chunks->next) {
type_t *t = get_type(env, chunks->ast);
if (t->tag == TextType)
return compile(env, chunks->ast);
return compile_string(env, chunks->ast, "no");
} else if (!chunks->next && chunks->ast->tag == TextLiteral) {
return compile(env, chunks->ast);
} else {
CORD code = "CORD_all(";
for (ast_list_t *chunk = chunks; chunk; chunk = chunk->next) {
CORD chunk_code;
type_t *chunk_t = get_type(env, chunk->ast);
CORD chunk_str = (chunk_t->tag == TextType) ?
compile(env, chunk->ast) : compile_string(env, chunk->ast, "no");
code = CORD_cat(code, chunk_str);
if (chunk->ast->tag == TextLiteral) {
chunk_code = compile(env, chunk->ast);
} else if (chunk_t->tag == TextType && streq(Match(chunk_t, TextType)->lang, lang)) {
chunk_code = compile(env, chunk->ast);
} else if (lang && lang_ns) {
// Get conversion function:
chunk_code = compile(env, chunk->ast);
for (int64_t i = 1; i <= Table_length(*lang_ns); i++) {
struct {const char *name; binding_t *b; } *entry = Table_entry(*lang_ns, i);
if (entry->b->type->tag != FunctionType) continue;
if (strncmp(entry->name, "escape_", strlen("escape_")) != 0) continue;
auto fn = Match(entry->b->type, FunctionType);
if (!fn->args || fn->args->next) continue;
if (fn->ret->tag != TextType || !streq(Match(fn->ret, TextType)->lang, lang))
continue;
if (!promote(env, &chunk_code, chunk_t, fn->args->type))
continue;
chunk_code = CORD_all(entry->b->code, "(", chunk_code, ")");
goto found_conversion;
}
code_err(chunk->ast, "I don't know how to convert a %T to a %T", chunk_t, text_t);
found_conversion:;
} else {
chunk_code = compile_string(env, chunk->ast, "no");
}
code = CORD_cat(code, chunk_code);
if (chunk->next) code = CORD_cat(code, ", ");
}
return CORD_cat(code, ")");
@ -767,7 +791,7 @@ CORD compile(env_t *env, ast_t *ast)
CORD body = compile(body_scope, fndef->body);
if (CORD_fetch(body, 0) != '{')
body = CORD_asprintf("{\n%r\n}", body);
env->code->funcs = CORD_all(env->code->funcs, code, " ", body);
env->code->funcs = CORD_all(env->code->funcs, code, " ", body, "\n");
if (fndef->cache && fndef->cache->tag == Int && Match(fndef->cache, Int)->i > 0) {
const char *arg_type_name = heap_strf("%s$args", CORD_to_const_char_star(name));
@ -798,7 +822,7 @@ CORD compile(env_t *env, ast_t *ast)
pop_code,
"Table_set(&$cache, &$args, &$ret, $table_type);\n"
"return $ret;\n"
"}");
"}\n");
env->code->funcs = CORD_cat(env->code->funcs, wrapper);
}
@ -1197,7 +1221,6 @@ CORD compile(env_t *env, ast_t *ast)
return "return;";
}
}
// Extern,
case StructDef: {
compile_struct_def(env, ast);
return CORD_EMPTY;
@ -1206,6 +1229,17 @@ CORD compile(env_t *env, ast_t *ast)
compile_enum_def(env, ast);
return CORD_EMPTY;
}
case LangDef: {
// TODO: implement
auto def = Match(ast, LangDef);
CORD_appendf(&env->code->typedefs, "typedef CORD %s_t;\n", def->name);
CORD_appendf(&env->code->typedefs, "extern const TypeInfo %s;\n", def->name);
CORD_appendf(&env->code->typeinfos, "public const TypeInfo %s = {%zu, %zu, {.tag=TextInfo, .TextInfo={%s}}};\n",
def->name, sizeof(CORD), __alignof__(CORD),
Text__quoted(def->name, false), "}}};\n");
compile_namespace(env, def->name, def->namespace);
return CORD_EMPTY;
}
case DocTest: {
auto test = Match(ast, DocTest);
CORD src = heap_strn(test->expr->start, (size_t)(test->expr->end - test->expr->start));
@ -1444,7 +1478,7 @@ CORD compile_type_info(env_t *env, type_t *t)
{
switch (t->tag) {
case BoolType: case IntType: case NumType: return CORD_asprintf("&%r", type_to_cord(t));
case TextType: return CORD_all("(&", Match(t, TextType)->dsl ? Match(t, TextType)->dsl : "Text", ")");
case TextType: return CORD_all("(&", Match(t, TextType)->lang ? Match(t, TextType)->lang : "Text", ")");
case StructType: return CORD_all("(&", Match(t, StructType)->name, ")");
case EnumType: return CORD_all("(&", Match(t, EnumType)->name, ")");
case ArrayType: {

56
parse.c
View File

@ -44,7 +44,7 @@ int op_tightness[] = {
static const char *keywords[] = {
"yes", "xor", "while", "when", "use", "then", "struct", "stop", "skip", "return",
"or", "not", "no", "mod1", "mod", "in", "if", "func", "for", "extern",
"or", "not", "no", "mod1", "mod", "lang", "in", "if", "func", "for", "extern",
"enum", "else", "do", "and", "_mix_", "_min_", "_max_",
NULL,
};
@ -84,7 +84,8 @@ static PARSER(parse_opt_indented_block);
static PARSER(parse_var);
static PARSER(parse_enum_def);
static PARSER(parse_struct_def);
static PARSER(parse_string);
static PARSER(parse_lang_def);
static PARSER(parse_text);
static PARSER(parse_func_def);
static PARSER(parse_extern);
static PARSER(parse_declaration);
@ -944,9 +945,10 @@ PARSER(parse_bool) {
return NULL;
}
PARSER(parse_string) {
// ["$" [interp-char [closing-interp-char]]] ('"' ... '"' / "'" ... "'")
PARSER(parse_text) {
// ["$" [name] [interp-char [closing-interp-char]]] ('"' ... '"' / "'" ... "'")
const char *start = pos;
const char *lang = NULL;
// Escape sequence, e.g. \r\n
if (*pos == '\\') {
@ -965,7 +967,8 @@ PARSER(parse_string) {
} else if (match(&pos, "'")) {
open_quote = '\'', close_quote = '\'';
} else if (match(&pos, "$")) {
if (strspn(pos, (char[]){*pos, 0}) >= 2) {
lang = get_id(&pos);
if (pos[1] == pos[0]) {
// Disable interp using a double opener: $;;...; or $``text`
open_quote = *pos;
pos += 2;
@ -1061,7 +1064,7 @@ PARSER(parse_string) {
REVERSE_LIST(chunks);
expect_closing(ctx, &pos, (char[]){close_quote, 0}, "I was expecting a '%c' to finish this string", close_quote);
return NewAST(ctx->file, start, pos, TextJoin, .children=chunks);
return NewAST(ctx->file, start, pos, TextJoin, .lang=lang, .children=chunks);
}
PARSER(parse_skip) {
@ -1139,7 +1142,7 @@ PARSER(parse_term_no_suffix) {
|| (term=parse_heap_alloc(ctx, pos))
|| (term=parse_stack_reference(ctx, pos))
|| (term=parse_bool(ctx, pos))
|| (term=parse_string(ctx, pos))
|| (term=parse_text(ctx, pos))
|| (term=parse_lambda(ctx, pos))
|| (term=parse_parens(ctx, pos))
|| (term=parse_table(ctx, pos))
@ -1496,6 +1499,7 @@ PARSER(parse_namespace) {
ast_t *stmt;
if ((stmt=optional(ctx, &pos, parse_struct_def))
||(stmt=optional(ctx, &pos, parse_enum_def))
||(stmt=optional(ctx, &pos, parse_lang_def))
||(stmt=optional(ctx, &pos, parse_func_def))
||(stmt=optional(ctx, &pos, parse_use))
||(stmt=optional(ctx, &pos, parse_linker))
@ -1639,6 +1643,44 @@ ast_t *parse_enum_def(parse_ctx_t *ctx, const char *pos) {
return NewAST(ctx->file, start, pos, EnumDef, .name=name, .tags=tags, .namespace=namespace);
}
PARSER(parse_lang_def) {
const char *start = pos;
// lang Name
// lang Name(secret)
if (!match_word(&pos, "lang")) return NULL;
int64_t starting_indent = get_indent(ctx->file, pos);
spaces(&pos);
const char *name = get_id(&pos);
if (!name)
parser_err(ctx, start, pos, "I expected a name for this lang");
spaces(&pos);
bool secret = false;
if (match(&pos, "(")) {
whitespace(&pos);
if (match_word(&pos, "secret")) {
secret = true;
whitespace(&pos);
match(&pos, ",");
}
expect_closing(ctx, &pos, ")", "I wasn't able to parse the rest of this lang definition");
}
const char *ns_pos = pos;
whitespace(&ns_pos);
int64_t ns_indent = get_indent(ctx->file, ns_pos);
ast_t *namespace = NULL;
if (ns_indent > starting_indent) {
pos = ns_pos;
namespace = optional(ctx, &pos, parse_namespace);
}
if (!namespace)
namespace = NewAST(ctx->file, pos, pos, Block, .statements=NULL);
return NewAST(ctx->file, start, pos, LangDef, .name=name, .secret=secret, .namespace=namespace);
}
arg_ast_t *parse_args(parse_ctx_t *ctx, const char **pos, bool allow_unnamed)
{
arg_ast_t *args = NULL;

View File

@ -51,3 +51,6 @@
= ["LATIN CAPITAL LETTER A", "LATIN SMALL LETTER M", "LATIN SMALL LETTER E", "COMBINING ACUTE ACCENT", "LATIN SMALL LETTER L", "LATIN SMALL LETTER I", "LATIN SMALL LETTER E"]
>> amelie2:character_names()
= ["LATIN CAPITAL LETTER A", "LATIN SMALL LETTER M", "LATIN SMALL LETTER E", "COMBINING ACUTE ACCENT", "LATIN SMALL LETTER L", "LATIN SMALL LETTER I", "LATIN SMALL LETTER E"]
>> "Hello":replace("e", "X")
= "HXllo"

7
tomo.c
View File

@ -66,7 +66,7 @@ int main(int argc, char *argv[])
module_code_t module = compile_file(ast);
if (verbose) {
if (verbose && mode != MODE_RUN) {
FILE *out = popen(heap_strf("%s | bat -P --file-name=%s.h", autofmt, f->filename), "w");
CORD_put(module.header, out);
pclose(out);
@ -130,6 +130,11 @@ int main(int argc, char *argv[])
"return 0;\n"
"}\n"
);
if (verbose) {
FILE *out = popen(heap_strf("%s | bat -P --file-name=%s.c", autofmt, f->filename), "w");
CORD_put(program, out);
pclose(out);
}
CORD_put(program, runner);
int status = pclose(runner);

View File

@ -179,6 +179,24 @@ void bind_statement(env_t *env, ast_t *statement)
break;
}
case LangDef: {
auto def = Match(statement, LangDef);
type_t *type = Type(TextType, .lang=def->name);
Table_str_set(env->types, def->name, type);
env_t *ns_env = namespace_env(env, def->name);
set_binding(ns_env, "from_unsafe_text",
new(binding_t, .type=Type(FunctionType, .args=new(arg_t, .name="text", .type=Type(TextType)), .ret=type),
.code=CORD_all("(", def->name, "_t)")));
for (ast_list_t *stmt = def->namespace ? Match(def->namespace, Block)->statements : NULL; stmt; stmt = stmt->next)
bind_statement(ns_env, stmt->ast);
type_t *typeinfo_type = Type(TypeInfoType, .name=def->name, .type=type);
Table_str_set(env->globals, def->name, new(binding_t, .type=typeinfo_type));
break;
}
default: break;
}
}
@ -269,8 +287,9 @@ type_t *get_type(env_t *env, ast_t *ast)
code_err(ast, "'&' stack references can only be used on variables or fields of variables");
}
case TextJoin: case TextLiteral: {
return Type(TextType);
case TextLiteral: return Type(TextType);
case TextJoin: {
return Type(TextType, .lang=Match(ast, TextJoin)->lang);
}
case Var: {
auto var = Match(ast, Var);
@ -448,7 +467,7 @@ type_t *get_type(env_t *env, ast_t *ast)
// Early out if the type is knowable without any context from the block:
switch (last->ast->tag) {
case UpdateAssign: case Assign: case Declare: case FunctionDef: case StructDef: case EnumDef:
case UpdateAssign: case Assign: case Declare: case FunctionDef: case StructDef: case EnumDef: case LangDef:
return Type(VoidType);
default: break;
}
@ -624,7 +643,7 @@ type_t *get_type(env_t *env, ast_t *ast)
return Type(ClosureType, Type(FunctionType, .args=args, .ret=ret));
}
case FunctionDef: case StructDef: case EnumDef: {
case FunctionDef: case StructDef: case EnumDef: case LangDef: {
return Type(VoidType);
}
@ -749,7 +768,7 @@ type_t *get_type(env_t *env, ast_t *ast)
bool is_discardable(env_t *env, ast_t *ast)
{
switch (ast->tag) {
case UpdateAssign: case Assign: case Declare: case FunctionDef: case StructDef: case EnumDef: case Use:
case UpdateAssign: case Assign: case Declare: case FunctionDef: case StructDef: case EnumDef: case LangDef: case Use:
return true;
default: break;
}

View File

@ -16,7 +16,7 @@ CORD type_to_cord(type_t *t) {
case VoidType: return "Void";
case MemoryType: return "Memory";
case BoolType: return "Bool";
case TextType: return "Text";
case TextType: return Match(t, TextType)->lang ? Match(t, TextType)->lang : "Text";
case IntType: return Match(t, IntType)->bits == 64 ? "Int" : CORD_asprintf("Int%ld", Match(t, IntType)->bits);
case NumType: return Match(t, NumType)->bits == 64 ? "Num" : CORD_asprintf("Num%ld", Match(t, NumType)->bits);
case ArrayType: {

View File

@ -64,7 +64,7 @@ struct type_s {
int64_t bits;
} NumType;
struct {
const char *dsl;
const char *lang;
} TextType;
struct {
type_t *item_type;