Split pattern code into its own file
This commit is contained in:
parent
ed55fc2c7a
commit
9447ba8c4a
2
Makefile
2
Makefile
@ -31,7 +31,7 @@ LDLIBS=-lgc -lcord -lm -lunistring -lgmp -ldl
|
||||
BUILTIN_OBJS=builtins/siphash.o builtins/array.o builtins/bool.o builtins/channel.o builtins/nums.o builtins/functions.o builtins/integers.o \
|
||||
builtins/pointer.o builtins/memory.o builtins/text.o builtins/thread.o builtins/c_string.o builtins/table.o \
|
||||
builtins/types.o builtins/util.o builtins/files.o builtins/range.o builtins/shell.o builtins/path.o \
|
||||
builtins/optionals.o
|
||||
builtins/optionals.o builtins/pattern.o
|
||||
TESTS=$(patsubst %.tm,%.tm.testresult,$(wildcard test/*.tm))
|
||||
|
||||
all: libtomo.so tomo
|
||||
|
@ -18,6 +18,7 @@
|
||||
#include "functions.h"
|
||||
#include "integers.h"
|
||||
#include "optionals.h"
|
||||
#include "pattern.h"
|
||||
#include "pointer.h"
|
||||
#include "siphash.h"
|
||||
#include "string.h"
|
||||
|
@ -18,6 +18,7 @@
|
||||
#include "integers.h"
|
||||
#include "optionals.h"
|
||||
#include "path.h"
|
||||
#include "pattern.h"
|
||||
#include "text.h"
|
||||
#include "types.h"
|
||||
#include "util.h"
|
||||
|
1065
builtins/pattern.c
Normal file
1065
builtins/pattern.c
Normal file
File diff suppressed because it is too large
Load Diff
33
builtins/pattern.h
Normal file
33
builtins/pattern.h
Normal file
@ -0,0 +1,33 @@
|
||||
#pragma once
|
||||
|
||||
// The type representing text patterns for pattern matching.
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <printf.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include "datatypes.h"
|
||||
#include "integers.h"
|
||||
#include "types.h"
|
||||
|
||||
#define Pattern(text) ((Pattern_t)Text(text))
|
||||
#define Patterns(...) ((Pattern_t)Texts(__VA_ARGS__))
|
||||
|
||||
Text_t Text$replace(Text_t str, Pattern_t pat, Text_t replacement, Pattern_t backref_pat, bool recursive);
|
||||
Pattern_t Pattern$escape_text(Text_t text);
|
||||
Text_t Text$replace_all(Text_t text, Table_t replacements, Pattern_t backref_pat, bool recursive);
|
||||
Array_t Text$split(Text_t text, Pattern_t pattern);
|
||||
Text_t Text$trim(Text_t text, Pattern_t pattern, bool trim_left, bool trim_right);
|
||||
Int_t Text$find(Text_t text, Pattern_t pattern, Int_t i, int64_t *match_length);
|
||||
Array_t Text$find_all(Text_t text, Pattern_t pattern);
|
||||
PUREFUNC bool Text$has(Text_t text, Pattern_t pattern);
|
||||
PUREFUNC bool Text$matches(Text_t text, Pattern_t pattern);
|
||||
Text_t Text$map(Text_t text, Pattern_t pattern, Closure_t fn);
|
||||
|
||||
#define Pattern$hash Text$hash
|
||||
#define Pattern$compare Text$compare
|
||||
#define Pattern$equal Text$equal
|
||||
|
||||
extern const TypeInfo Pattern$info;
|
||||
|
||||
// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0
|
@ -5,6 +5,7 @@
|
||||
#include "array.h"
|
||||
#include "functions.h"
|
||||
#include "integers.h"
|
||||
#include "pattern.h"
|
||||
#include "shell.h"
|
||||
#include "text.h"
|
||||
#include "types.h"
|
||||
|
1127
builtins/text.c
1127
builtins/text.c
File diff suppressed because it is too large
Load Diff
@ -6,11 +6,16 @@
|
||||
#include <stdbool.h>
|
||||
#include <printf.h>
|
||||
#include <stdint.h>
|
||||
#include <unistr.h>
|
||||
|
||||
#include "datatypes.h"
|
||||
#include "integers.h"
|
||||
#include "types.h"
|
||||
|
||||
typedef struct {
|
||||
int64_t subtext, sum_of_previous_subtexts;
|
||||
} TextIter_t;
|
||||
|
||||
int printf_text(FILE *stream, const struct printf_info *info, const void *const args[]);
|
||||
int printf_text_size(const struct printf_info *info, size_t n, int argtypes[n], int sizes[n]);
|
||||
|
||||
@ -34,16 +39,8 @@ Text_t Text$lower(Text_t text);
|
||||
Text_t Text$title(Text_t text);
|
||||
Text_t Text$as_text(const void *text, bool colorize, const TypeInfo *info);
|
||||
Text_t Text$quoted(Text_t str, bool colorize);
|
||||
Text_t Text$replace(Text_t str, Pattern_t pat, Text_t replacement, Pattern_t backref_pat, bool recursive);
|
||||
Text_t Text$replace_all(Text_t text, Table_t replacements, Pattern_t backref_pat, bool recursive);
|
||||
Array_t Text$split(Text_t text, Pattern_t pattern);
|
||||
Text_t Text$trim(Text_t text, Pattern_t pattern, bool trim_left, bool trim_right);
|
||||
Int_t Text$find(Text_t text, Pattern_t pattern, Int_t i, int64_t *match_length);
|
||||
Array_t Text$find_all(Text_t text, Pattern_t pattern);
|
||||
PUREFUNC bool Text$has(Text_t text, Pattern_t pattern);
|
||||
PUREFUNC bool Text$starts_with(Text_t text, Text_t prefix);
|
||||
PUREFUNC bool Text$ends_with(Text_t text, Text_t suffix);
|
||||
PUREFUNC bool Text$matches(Text_t text, Pattern_t pattern);
|
||||
char *Text$as_c_string(Text_t text);
|
||||
__attribute__((format(printf, 1, 2)))
|
||||
public Text_t Text$format(const char *fmt, ...);
|
||||
@ -56,19 +53,16 @@ Text_t Text$from_codepoint_names(Array_t codepoint_names);
|
||||
Text_t Text$from_bytes(Array_t bytes);
|
||||
Array_t Text$lines(Text_t text);
|
||||
Text_t Text$join(Text_t glue, Array_t pieces);
|
||||
Text_t Text$map(Text_t text, Pattern_t pattern, Closure_t fn);
|
||||
Text_t Text$repeat(Text_t text, Int_t count);
|
||||
int32_t Text$get_grapheme_fast(Text_t text, TextIter_t *state, int64_t index);
|
||||
ucs4_t Text$get_main_grapheme_fast(Text_t text, TextIter_t *state, int64_t index);
|
||||
|
||||
static inline int32_t Text$get_grapheme(Text_t text, int64_t index)
|
||||
{
|
||||
TextIter_t state = {0, 0};
|
||||
return Text$get_grapheme_fast(text, &state, index);
|
||||
}
|
||||
|
||||
extern const TypeInfo Text$info;
|
||||
|
||||
#define Pattern(text) ((Pattern_t)Text(text))
|
||||
#define Patterns(...) ((Pattern_t)Texts(__VA_ARGS__))
|
||||
Pattern_t Pattern$escape_text(Text_t text);
|
||||
|
||||
#define Pattern$hash Text$hash
|
||||
#define Pattern$compare Text$compare
|
||||
#define Pattern$equal Text$equal
|
||||
|
||||
extern const TypeInfo Pattern$info;
|
||||
|
||||
// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0
|
||||
|
@ -22,6 +22,7 @@
|
||||
#include "nums.h"
|
||||
#include "optionals.h"
|
||||
#include "path.h"
|
||||
#include "pattern.h"
|
||||
#include "pointer.h"
|
||||
#include "range.h"
|
||||
#include "shell.h"
|
||||
|
@ -255,9 +255,11 @@ finding the value because the two texts are equivalent under normalization.
|
||||
# Patterns
|
||||
|
||||
As an alternative to full regular expressions, Tomo provides a limited string
|
||||
matching pattern syntax that is intended to solve 80% of use cases in 2% of the
|
||||
code size (PCRE's codebase is roughly 150k lines of code, and Tomo's entire
|
||||
Text codebase is around 1.8K lines of code).
|
||||
matching pattern syntax that is intended to solve 80% of use cases in under 1%
|
||||
of the code size (PCRE's codebase is roughly 150k lines of code, and Tomo's
|
||||
pattern matching code is a bit under 1k lines of code). Tomo's pattern matching
|
||||
syntax is highly readable and works well for matching literal text without
|
||||
getting [leaning toothpick syndrome](https://en.wikipedia.org/wiki/Leaning_toothpick_syndrome).
|
||||
|
||||
For more advanced use cases, consider linking against a C library for regular
|
||||
expressions or pattern matching.
|
||||
|
Loading…
Reference in New Issue
Block a user