diff options
| author | Bruce Hill <bruce@bruce-hill.com> | 2026-02-08 22:47:02 -0500 |
|---|---|---|
| committer | Bruce Hill <bruce@bruce-hill.com> | 2026-02-08 22:47:02 -0500 |
| commit | 2b7e96835e75e0d153e7f993d1c4fc2add452ddd (patch) | |
| tree | ed1104f60ed35af2bf3c9d8cd66d17f45683f07c /api | |
| parent | 2371542adb017afc87ecc572901107bf493e214f (diff) | |
Added Text.distance(a,b) for text similarity comparisons.
Diffstat (limited to 'api')
| -rw-r--r-- | api/api.md | 27 | ||||
| -rw-r--r-- | api/text.md | 27 | ||||
| -rw-r--r-- | api/text.yaml | 33 |
3 files changed, 87 insertions, 0 deletions
@@ -3966,6 +3966,33 @@ assert "Amélie".codepoint_names() == [ ] ``` +## Text.distance + +```tomo +Text.distance : func(a: Text, b: Text, language: Text = "C" -> Num) +``` + +Get an approximate distance between two texts, such that when the distance is small, the texts are similar and when the distance is large, the texts are dissimilar. + +The exact distance algorithm is not specified and may be subject to change over time. + +Argument | Type | Description | Default +---------|------|-------------|--------- +a | `Text` | The first text to compare. | - +b | `Text` | The second text to compare. | - +language | `Text` | The ISO 639 language code for which character width to use. | `"C"` + +**Return:** The distance between the two texts (larger means more dissimilar). + + +**Example:** +```tomo +assert "hello".distance("hello") == 0 +texts := &["goodbye", "hello", "hallo"] +texts.sort(func(a,b:&Text) a.distance("hello") <> b.distance("hello")) +assert texts == ["hello", "hallo", "goodbye"] + +``` ## Text.ends_with ```tomo diff --git a/api/text.md b/api/text.md index 928cb6ec..2536ff21 100644 --- a/api/text.md +++ b/api/text.md @@ -180,6 +180,33 @@ assert "Amélie".codepoint_names() == [ ] ``` +## Text.distance + +```tomo +Text.distance : func(a: Text, b: Text, language: Text = "C" -> Num) +``` + +Get an approximate distance between two texts, such that when the distance is small, the texts are similar and when the distance is large, the texts are dissimilar. + +The exact distance algorithm is not specified and may be subject to change over time. + +Argument | Type | Description | Default +---------|------|-------------|--------- +a | `Text` | The first text to compare. | - +b | `Text` | The second text to compare. | - +language | `Text` | The ISO 639 language code for which character width to use. | `"C"` + +**Return:** The distance between the two texts (larger means more dissimilar). + + +**Example:** +```tomo +assert "hello".distance("hello") == 0 +texts := &["goodbye", "hello", "hallo"] +texts.sort(func(a,b:&Text) a.distance("hello") <> b.distance("hello")) +assert texts == ["hello", "hallo", "goodbye"] + +``` ## Text.ends_with ```tomo diff --git a/api/text.yaml b/api/text.yaml index 6874bfc8..2af7cae4 100644 --- a/api/text.yaml +++ b/api/text.yaml @@ -225,6 +225,39 @@ Text.ends_with: assert "hello world".ends_with("world", &remainder) == yes assert remainder == "hello " +Text.distance: + short: distance between two texts + description: > + Get an approximate distance between two texts, such that when the distance + is small, the texts are similar and when the distance is large, the texts + are dissimilar. + note: > + The exact distance algorithm is not specified and may be subject to change + over time. + return: + type: 'Num' + description: > + The distance between the two texts (larger means more dissimilar). + args: + a: + type: 'Text' + description: > + The first text to compare. + b: + type: 'Text' + description: > + The second text to compare. + language: + type: 'Text' + default: '"C"' + description: > + The ISO 639 language code for which character width to use. + example: | + assert "hello".distance("hello") == 0 + texts := &["goodbye", "hello", "hallo"] + texts.sort(func(a,b:&Text) a.distance("hello") <> b.distance("hello")) + assert texts == ["hello", "hallo", "goodbye"] + Text.find: short: find a substring description: > |
