Split pattern code into its own file

This commit is contained in:
Bruce Hill 2024-09-13 13:34:04 -04:00
parent ed55fc2c7a
commit 9447ba8c4a
10 changed files with 1157 additions and 1114 deletions

View File

@ -31,7 +31,7 @@ LDLIBS=-lgc -lcord -lm -lunistring -lgmp -ldl
BUILTIN_OBJS=builtins/siphash.o builtins/array.o builtins/bool.o builtins/channel.o builtins/nums.o builtins/functions.o builtins/integers.o \
builtins/pointer.o builtins/memory.o builtins/text.o builtins/thread.o builtins/c_string.o builtins/table.o \
builtins/types.o builtins/util.o builtins/files.o builtins/range.o builtins/shell.o builtins/path.o \
builtins/optionals.o
builtins/optionals.o builtins/pattern.o
TESTS=$(patsubst %.tm,%.tm.testresult,$(wildcard test/*.tm))
all: libtomo.so tomo

View File

@ -18,6 +18,7 @@
#include "functions.h"
#include "integers.h"
#include "optionals.h"
#include "pattern.h"
#include "pointer.h"
#include "siphash.h"
#include "string.h"

View File

@ -18,6 +18,7 @@
#include "integers.h"
#include "optionals.h"
#include "path.h"
#include "pattern.h"
#include "text.h"
#include "types.h"
#include "util.h"

1065
builtins/pattern.c Normal file

File diff suppressed because it is too large Load Diff

33
builtins/pattern.h Normal file
View File

@ -0,0 +1,33 @@
#pragma once
// The type representing text patterns for pattern matching.
#include <stdbool.h>
#include <printf.h>
#include <stdint.h>
#include "datatypes.h"
#include "integers.h"
#include "types.h"
#define Pattern(text) ((Pattern_t)Text(text))
#define Patterns(...) ((Pattern_t)Texts(__VA_ARGS__))
Text_t Text$replace(Text_t str, Pattern_t pat, Text_t replacement, Pattern_t backref_pat, bool recursive);
Pattern_t Pattern$escape_text(Text_t text);
Text_t Text$replace_all(Text_t text, Table_t replacements, Pattern_t backref_pat, bool recursive);
Array_t Text$split(Text_t text, Pattern_t pattern);
Text_t Text$trim(Text_t text, Pattern_t pattern, bool trim_left, bool trim_right);
Int_t Text$find(Text_t text, Pattern_t pattern, Int_t i, int64_t *match_length);
Array_t Text$find_all(Text_t text, Pattern_t pattern);
PUREFUNC bool Text$has(Text_t text, Pattern_t pattern);
PUREFUNC bool Text$matches(Text_t text, Pattern_t pattern);
Text_t Text$map(Text_t text, Pattern_t pattern, Closure_t fn);
#define Pattern$hash Text$hash
#define Pattern$compare Text$compare
#define Pattern$equal Text$equal
extern const TypeInfo Pattern$info;
// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0

View File

@ -5,6 +5,7 @@
#include "array.h"
#include "functions.h"
#include "integers.h"
#include "pattern.h"
#include "shell.h"
#include "text.h"
#include "types.h"

File diff suppressed because it is too large Load Diff

View File

@ -6,11 +6,16 @@
#include <stdbool.h>
#include <printf.h>
#include <stdint.h>
#include <unistr.h>
#include "datatypes.h"
#include "integers.h"
#include "types.h"
typedef struct {
int64_t subtext, sum_of_previous_subtexts;
} TextIter_t;
int printf_text(FILE *stream, const struct printf_info *info, const void *const args[]);
int printf_text_size(const struct printf_info *info, size_t n, int argtypes[n], int sizes[n]);
@ -34,16 +39,8 @@ Text_t Text$lower(Text_t text);
Text_t Text$title(Text_t text);
Text_t Text$as_text(const void *text, bool colorize, const TypeInfo *info);
Text_t Text$quoted(Text_t str, bool colorize);
Text_t Text$replace(Text_t str, Pattern_t pat, Text_t replacement, Pattern_t backref_pat, bool recursive);
Text_t Text$replace_all(Text_t text, Table_t replacements, Pattern_t backref_pat, bool recursive);
Array_t Text$split(Text_t text, Pattern_t pattern);
Text_t Text$trim(Text_t text, Pattern_t pattern, bool trim_left, bool trim_right);
Int_t Text$find(Text_t text, Pattern_t pattern, Int_t i, int64_t *match_length);
Array_t Text$find_all(Text_t text, Pattern_t pattern);
PUREFUNC bool Text$has(Text_t text, Pattern_t pattern);
PUREFUNC bool Text$starts_with(Text_t text, Text_t prefix);
PUREFUNC bool Text$ends_with(Text_t text, Text_t suffix);
PUREFUNC bool Text$matches(Text_t text, Pattern_t pattern);
char *Text$as_c_string(Text_t text);
__attribute__((format(printf, 1, 2)))
public Text_t Text$format(const char *fmt, ...);
@ -56,19 +53,16 @@ Text_t Text$from_codepoint_names(Array_t codepoint_names);
Text_t Text$from_bytes(Array_t bytes);
Array_t Text$lines(Text_t text);
Text_t Text$join(Text_t glue, Array_t pieces);
Text_t Text$map(Text_t text, Pattern_t pattern, Closure_t fn);
Text_t Text$repeat(Text_t text, Int_t count);
int32_t Text$get_grapheme_fast(Text_t text, TextIter_t *state, int64_t index);
ucs4_t Text$get_main_grapheme_fast(Text_t text, TextIter_t *state, int64_t index);
static inline int32_t Text$get_grapheme(Text_t text, int64_t index)
{
TextIter_t state = {0, 0};
return Text$get_grapheme_fast(text, &state, index);
}
extern const TypeInfo Text$info;
#define Pattern(text) ((Pattern_t)Text(text))
#define Patterns(...) ((Pattern_t)Texts(__VA_ARGS__))
Pattern_t Pattern$escape_text(Text_t text);
#define Pattern$hash Text$hash
#define Pattern$compare Text$compare
#define Pattern$equal Text$equal
extern const TypeInfo Pattern$info;
// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0

View File

@ -22,6 +22,7 @@
#include "nums.h"
#include "optionals.h"
#include "path.h"
#include "pattern.h"
#include "pointer.h"
#include "range.h"
#include "shell.h"

View File

@ -255,9 +255,11 @@ finding the value because the two texts are equivalent under normalization.
# Patterns
As an alternative to full regular expressions, Tomo provides a limited string
matching pattern syntax that is intended to solve 80% of use cases in 2% of the
code size (PCRE's codebase is roughly 150k lines of code, and Tomo's entire
Text codebase is around 1.8K lines of code).
matching pattern syntax that is intended to solve 80% of use cases in under 1%
of the code size (PCRE's codebase is roughly 150k lines of code, and Tomo's
pattern matching code is a bit under 1k lines of code). Tomo's pattern matching
syntax is highly readable and works well for matching literal text without
getting [leaning toothpick syndrome](https://en.wikipedia.org/wiki/Leaning_toothpick_syndrome).
For more advanced use cases, consider linking against a C library for regular
expressions or pattern matching.