Added tests/tutorial

2021-08-28 14:26:32 -07:00 · 2021-08-28 14:26:32 -07:00 · 16bf40bd64
commit 16bf40bd64
parent 9db5e91781
78 changed files with 313 additions and 3 deletions
--- a/13
+++ b/13
@ -43,8 +43,15 @@ clean:
 	rm -f $(NAME) $(OBJFILES)

 test: $(NAME)
-	./$(NAME) Comment -r '[@0]'
-	./$(NAME) -g ./grammars/bp.bp -p Grammar ./grammars/bp.bp
+	./$(NAME) Comment -r '[@0]' >/dev/null
+	./$(NAME) -g ./grammars/bp.bp -p Grammar ./grammars/bp.bp >/dev/null
+	for test in tests/*.sh; do \
+		sh "$$test" <"$${test/.sh/.in}" | diff -q - "$${test/.sh/.out}" ||\
+			sh "$$test" <"$${test/.sh/.in}" | diff -y --color=always - "$${test/.sh/.out}"; \
+	done
+
+tutorial:
+	./tutorial.sh

 leaktest: bp
 	valgrind --leak-check=full ./bp -l -g ./grammars/bp.bp -p Grammar ./grammars/bp.bp
@ -74,4 +81,4 @@ uninstall:
 	  [ "$$confirm" != n ] && rm -rf ~/.config/$(NAME); \
 	fi

-.PHONY: all clean install uninstall leaktest splint test
+.PHONY: all clean install uninstall leaktest splint test tutorial
--- a/README.md
+++ b/README.md
@ -79,6 +79,11 @@ Pattern            | Meaning
 See `man ./bp.1` for more details.


+## Tutorial
+
+Run `make tutorial` to run through the tutorial. It walks through some basic pattern matching.
+
+
 ## Grammar Files

 BP comes packaged with some pattern definitions that can be useful when parsing
--- a/tests/1-literal.in
+++ b/tests/1-literal.in
@ -0,0 +1,5 @@
+xxx
+foo
+baz
+xxfooxx
+zzz
--- a/tests/1-literal.out
+++ b/tests/1-literal.out
@ -0,0 +1,2 @@
+foo
+xxfooxx
--- a/tests/1-literal.sh
+++ b/tests/1-literal.sh
@ -0,0 +1,3 @@
+# Use double quotation marks to match literal strings
+# Example: bp -p '"baz"'
+bp -p '"foo"'
--- a/tests/10-words.in
+++ b/tests/10-words.in
@ -0,0 +1,5 @@
+foobar
+foo
+bazfoo
+xxfooxx
+one foo two
--- a/tests/10-words.out
+++ b/tests/10-words.out
@ -0,0 +1,2 @@
+foo
+one foo two
--- a/tests/10-words.sh
+++ b/tests/10-words.sh
@ -0,0 +1,3 @@
+# The | operator matches word edges
+# Example: bp -p '|"baz"|' matches the word "baz"
+bp '\|foo\|'
--- a/tests/11-ordered-choice.in
+++ b/tests/11-ordered-choice.in
@ -0,0 +1,4 @@
+foo
+bar
+xxx
+yyy
--- a/tests/11-ordered-choice.out
+++ b/tests/11-ordered-choice.out
@ -0,0 +1,2 @@
+foo
+bar
--- a/tests/11-ordered-choice.sh
+++ b/tests/11-ordered-choice.sh
@ -0,0 +1,5 @@
+# The ordered choice operator (/) picks the first choice that matches
+# Example: bp -p '"cabaret"/"cab"' matches either "cabaret" or "cab"
+# Note: if a match occurs, the options to the right will *never* be attempted,
+# so bp -p '"cab"/"cabaret"' will always match "cab" instead of "cabaret"
+bp -p '"foo" / "bar"'
--- a/tests/12-star.in
+++ b/tests/12-star.in
@ -0,0 +1,5 @@
+()
+(x)
+(xxxxxx)
+(y)
+nope
--- a/tests/12-star.out
+++ b/tests/12-star.out
@ -0,0 +1,3 @@
+()
+(x)
+(xxxxxx)
--- a/tests/12-star.sh
+++ b/tests/12-star.sh
@ -0,0 +1,3 @@
+# The star (*) prefix operator matches zero or more repetitions
+# Example: bp -p '"Ha" *"ha"' will match "Ha", "Haha", "Hahaha", etc.
+bp -p '`( *`x `)'
--- a/tests/13-plus.in
+++ b/tests/13-plus.in
@ -0,0 +1,5 @@
+(x)
+(xxxxxx)
+(y)
+()
+nope
--- a/tests/13-plus.out
+++ b/tests/13-plus.out
@ -0,0 +1,2 @@
+(x)
+(xxxxxx)
--- a/tests/13-plus.sh
+++ b/tests/13-plus.sh
@ -0,0 +1,3 @@
+# The plus (+) prefix operator matches one or more of a pattern
+# Example: bp -p '"l" +"ol"' will match "lol", "lolol", "lololol", etc.
+bp -p '`( +`x `)'
--- a/tests/14-repeat-sep.in
+++ b/tests/14-repeat-sep.in
@ -0,0 +1,8 @@
+foo(1)
+foo(1,2,3,4)
+foo(23,34,56)
+foo(xxx)
+foo(,,1,,)
+xxx
+,,,
+1,2,3
--- a/tests/14-repeat-sep.out
+++ b/tests/14-repeat-sep.out
@ -0,0 +1,3 @@
+foo(1)
+foo(1,2,3,4)
+foo(23,34,56)
--- a/tests/14-repeat-sep.sh
+++ b/tests/14-repeat-sep.sh
@ -0,0 +1,3 @@
+# The '%' operator modifies repeating patterns, allowing you to give a separator between matches
+# Example: bp -p '+"x" % ":"' will match "x", "x:x", "x:x:x", etc.
+bp -p '`( +int % `, `)'
--- a/tests/15-repeating.in
+++ b/tests/15-repeating.in
@ -0,0 +1,8 @@
+(1234)
+(abcd)
+(abcd)
+(;;;;)
+(12)
+(ab)
+(x)
+(1234567)
--- a/tests/15-repeating.out
+++ b/tests/15-repeating.out
@ -0,0 +1,4 @@
+(1234)
+(abcd)
+(abcd)
+(;;;;)
--- a/tests/15-repeating.sh
+++ b/tests/15-repeating.sh
@ -0,0 +1,3 @@
+# Numbers allow you to specify repetitions of a pattern
+# Example: bp -p '3 "x"' matches "xxx"
+bp -p '`( 4 . `)'
--- a/tests/16-lookahead.in
+++ b/tests/16-lookahead.in
@ -0,0 +1,4 @@
+one
+two
+three
+four
--- a/tests/16-lookahead.out
+++ b/tests/16-lookahead.out
@ -0,0 +1,2 @@
+two
+three
--- a/tests/16-lookahead.sh
+++ b/tests/16-lookahead.sh
@ -0,0 +1,3 @@
+# >pat is a lookahead
+# Example: bp -p '"foo" >`(' will match "foo" only when it is followed by a parenthesis
+bp -p '>`t word'
--- a/tests/17-lookbehind.in
+++ b/tests/17-lookbehind.in
@ -0,0 +1,4 @@
+1234
+xxxx23yy
+3
+xxx3xx
--- a/tests/17-lookbehind.out
+++ b/tests/17-lookbehind.out
@ -0,0 +1,2 @@
+1234
+xxxx23yy
--- a/tests/17-lookbehind.sh
+++ b/tests/17-lookbehind.sh
@ -0,0 +1,3 @@
+# <pat is a lookbehind
+# Example: bp -p '<`: word' will match words that come after a colon
+bp -p '<`2 `3'
--- a/tests/18-lookbehind-variable-length.in
+++ b/tests/18-lookbehind-variable-length.in
@ -0,0 +1,4 @@
+Uh...ok
+Uhhhhh...ok
+ok
+xxxxokxxx
--- a/tests/18-lookbehind-variable-length.out
+++ b/tests/18-lookbehind-variable-length.out
@ -0,0 +1,2 @@
+Uh...ok
+Uhhhhh...ok
--- a/tests/18-lookbehind-variable-length.sh
+++ b/tests/18-lookbehind-variable-length.sh
@ -0,0 +1,3 @@
+# Lookbehinds can have variable length.
+# Example: bp -p '<(^ +`# _) "foo"' matches lines starting with "# foo", "## foo", "### foo", etc.
+bp -p '<(`U +`h "...") "ok"'
--- a/tests/19-negation.in
+++ b/tests/19-negation.in
@ -0,0 +1,4 @@
+foo
+food
+foobar
+xxx
--- a/tests/19-negation.out
+++ b/tests/19-negation.out
@ -0,0 +1,2 @@
+foo
+food
--- a/tests/19-negation.sh
+++ b/tests/19-negation.sh
@ -0,0 +1,3 @@
+# !pat matches only if pat doesn't match
+# Example: bp -p '"cat" !"aclysm"' matches the "cat" in "catatonic", but not "cataclysm"
+bp -p '"foo" !"bar"'
--- a/tests/2-char.in
+++ b/tests/2-char.in
@ -0,0 +1,5 @@
+nope
+xylophone
+not this line
+hexagonal
+oxen
--- a/tests/2-char.out
+++ b/tests/2-char.out
@ -0,0 +1,3 @@
+xylophone
+hexagonal
+oxen
--- a/tests/2-char.sh
+++ b/tests/2-char.sh
@ -0,0 +1,3 @@
+# Match a single character with backtick:
+# Example: bp -p '`A' matches the letter "A"
+bp -p '`x'
--- a/tests/20-submatch.in
+++ b/tests/20-submatch.in
@ -0,0 +1,5 @@
+one
+two
+three
+four
+five
--- a/tests/20-submatch.out
+++ b/tests/20-submatch.out
@ -0,0 +1,3 @@
+one
+three
+five
--- a/tests/20-submatch.sh
+++ b/tests/20-submatch.sh
@ -0,0 +1,3 @@
+# pat1 ~ pat2 matches if pat2 can be found within pat1, like words containing "e"
+# Example: bp -p '+`0-9 ~ `5' matches "12345" and "72581", but not "789"
+bp -p 'word ~ `e'
--- a/tests/21-no-submatch.in
+++ b/tests/21-no-submatch.in
@ -0,0 +1,5 @@
+one
+two
+three
+four
+five
--- a/tests/21-no-submatch.out
+++ b/tests/21-no-submatch.out
@ -0,0 +1,2 @@
+two
+four
--- a/tests/21-no-submatch.sh
+++ b/tests/21-no-submatch.sh
@ -0,0 +1,3 @@
+# pat1 !~ pat2 matches if pat2 cannot be found within pat1, like words *not* containing "e"
+# Example: bp -p '(|+`0-9|) !~ `5' matches "123" and "678", but not "456"
+bp -p 'word !~ `e'
--- a/tests/22-replace.in
+++ b/tests/22-replace.in
@ -0,0 +1,2 @@
+Microsoft
+Windows
--- a/tests/22-replace.out
+++ b/tests/22-replace.out
@ -0,0 +1,2 @@
+Micro$oft
+Window$
--- a/tests/22-replace.sh
+++ b/tests/22-replace.sh
@ -0,0 +1,3 @@
+# Replacements can be done with pat => "replacement"
+# Example: bp -p '"foo" => "baz"' matches "foobar" and replaces it with "bazbar"
+bp -p '"s" => "$"'
--- a/tests/23-nested-parens.in
+++ b/tests/23-nested-parens.in
@ -0,0 +1,5 @@
+foo(one(), two(three()));
+foo();
+xx()))));
+xx(((();
+yy)()();
--- a/tests/23-nested-parens.out
+++ b/tests/23-nested-parens.out
@ -0,0 +1,2 @@
+foo(one(), two(three()));
+foo();
--- a/tests/23-nested-parens.sh
+++ b/tests/23-nested-parens.sh
@ -0,0 +1,3 @@
+# parens is a pattern matching nested parentheses
+# Example: bp -p '"foo" parens' matches "foo()" or "foo(baz(), qux())", but not "foo(()"
+bp -p 'id parens `;'
--- a/tests/24-backref.in
+++ b/tests/24-backref.in
@ -0,0 +1,5 @@
+foo baz foo
+xx yy xx
+x x x
+foo baz xx
+a b c
--- a/tests/24-backref.out
+++ b/tests/24-backref.out
@ -0,0 +1,3 @@
+foo baz foo
+xx yy xx
+x x x
--- a/tests/24-backref.sh
+++ b/tests/24-backref.sh
@ -0,0 +1,3 @@
+# With @-capturing, you can reference previous captures
+# Example: bp -p '@first=`a-z .. first' matches "aba" and "xyzx", but not "abc"
+bp -p '@first=+Abc _ +Abc _ first'
--- a/tests/25-replace-capture.in
+++ b/tests/25-replace-capture.in
@ -0,0 +1,5 @@
+one
+two
+three
+four
+five
--- a/tests/25-replace-capture.out
+++ b/tests/25-replace-capture.out
@ -0,0 +1,5 @@
+{o}n{e}
+tw{o}
+thr{e}{e}
+f{o}{u}r
+f{i}v{e}
--- a/tests/25-replace-capture.sh
+++ b/tests/25-replace-capture.sh
@ -0,0 +1,4 @@
+# Captures with @ can be referenced in a replacement by @1, @2, etc.
+# Example: bp -p '"=" _ @+`0-9 => "= -@1"' replaces "x = 5" with "x = -5"
+# Note: @0 refers to the entire match, e.g. bp -p '"foo" => "xx@0xx"' replaces "foo" with "xxfooxx"
+bp -p '@`a,e,i,o,u => "{@1}" / .'
--- a/tests/3-char-range.in
+++ b/tests/3-char-range.in
@ -0,0 +1,9 @@
+0
+1
+X
+2
+!
+3
+;
+a
+f
--- a/tests/3-char-range.out
+++ b/tests/3-char-range.out
@ -0,0 +1,6 @@
+0
+1
+2
+3
+a
+f
--- a/tests/3-char-range.sh
+++ b/tests/3-char-range.sh
@ -0,0 +1,3 @@
+# Character sets and ranges work with backticks
+# Example: bp -p '`a-z,A-Z' matches all lowercase and uppercase letters
+bp -p '`0-9,a-f'
--- a/tests/4-sequence.in
+++ b/tests/4-sequence.in
@ -0,0 +1,4 @@
+onetwo
+one
+two
+xxxx
--- a/tests/4-sequence.out
+++ b/tests/4-sequence.out
@ -0,0 +1 @@
+onetwo
--- a/tests/4-sequence.sh
+++ b/tests/4-sequence.sh
@ -0,0 +1,5 @@
+# Multiple patterns in a row represent a sequence.
+# bp pattern syntax mostly doesn't care about whitespace, so you can have
+# spaces between patterns if you want, but it's not required.
+# Example: bp -p '"foo" `0-9' matches "foo1", "foo2", etc.
+bp -p '"one" "two"'
--- a/tests/5-dot.in
+++ b/tests/5-dot.in
@ -0,0 +1,6 @@
+aX
+aY
+a
+Xa
+y
+zz
--- a/tests/5-dot.out
+++ b/tests/5-dot.out
@ -0,0 +1,2 @@
+aX
+aY
--- a/tests/5-dot.sh
+++ b/tests/5-dot.sh
@ -0,0 +1,3 @@
+# The dot matches a single character
+# Example: bp -p '.'
+bp -p '`a .'
--- a/tests/6-start-of-line.in
+++ b/tests/6-start-of-line.in
@ -0,0 +1,5 @@
+xxxxx
+foo
+foobar
+barfoo
+xxfooxx
--- a/tests/6-start-of-line.out
+++ b/tests/6-start-of-line.out
@ -0,0 +1,2 @@
+foo
+foobar
--- a/tests/6-start-of-line.sh
+++ b/tests/6-start-of-line.sh
@ -0,0 +1,3 @@
+# ^ matches start of a line
+# Example: bp -p '^ "x"' matches lines starting with "x"
+bp -p '^ "foo"'
--- a/tests/7-end-of-line.in
+++ b/tests/7-end-of-line.in
@ -0,0 +1,4 @@
+xxxx
+foobar
+foo
+barfoo
--- a/tests/7-end-of-line.out
+++ b/tests/7-end-of-line.out
@ -0,0 +1,2 @@
+foo
+barfoo
--- a/tests/7-end-of-line.sh
+++ b/tests/7-end-of-line.sh
@ -0,0 +1,3 @@
+# $ matches end of line
+# Example: bp -p '"x" $' matches lines ending with "x"
+bp -p '"foo" $'
--- a/tests/8-spaces.in
+++ b/tests/8-spaces.in
@ -0,0 +1,6 @@
+onetwo
+one two
+one    two
+one	two
+xxx
+one;;;two
--- a/tests/8-spaces.out
+++ b/tests/8-spaces.out
@ -0,0 +1,4 @@
+onetwo
+one two
+one    two
+one	two
--- a/tests/8-spaces.sh
+++ b/tests/8-spaces.sh
@ -0,0 +1,3 @@
+# The _ pattern matches zero or more spaces/tabs
+# Example: bp -p '`= _ "foo"' matches "=foo", "= foo", "=  foo", etc.
+bp -p '"one" _ "two"'
--- a/tests/9-ellipsis.in
+++ b/tests/9-ellipsis.in
@ -0,0 +1,6 @@
+helloworld
+hello;;;world
+hello1234world
+goodbye
+hello
+helloworxx
--- a/tests/9-ellipsis.out
+++ b/tests/9-ellipsis.out
@ -0,0 +1,3 @@
+helloworld
+hello;;;world
+hello1234world
--- a/tests/9-ellipsis.sh
+++ b/tests/9-ellipsis.sh
@ -0,0 +1,3 @@
+# The ellipsis matches text upto the following pattern, not counting newlines
+# Example: bp -p '"/*" .. "*/"' matches "/* blah blah */" or "/**/"
+bp -p '"hello" .. "world"'
--- a/tutorial.sh
+++ b/tutorial.sh
@ -0,0 +1,24 @@
+#!/bin/sh
+# Run a small tutorial on basic bp functionality
+
+tmpfile="$(mktemp /tmp/bp-tutorial.XXXXXX)"
+trap 'rm "$tmpfile"' EXIT
+
+for t in $([ $# -gt 0 ] && echo "$@" || ls -v tests/*.sh); do
+    echo
+    printf "\033[1m"
+    sed -n 's/^# //p' "$t"
+    printf "\033[0m"
+    printf "\033[33;1mGiven these lines:              Give this output:\033[0m\n"
+    diff -y -W60 --color=always "${t/.sh/.in}" "${t/.sh/.out}"
+    while true; do
+        printf "\n\033[1mbp pattern: \033[0m"
+        read -r pat
+        printf "\033[0;2mRunning: \033[32m%s\033[0m\n\n" "bp -p '$pat'"
+        printf "\033[33;1mExpected output:                Your pattern's output:\033[0m\n"
+        bp -p "$pat" < "${t/.sh/.in}" 2>"$tmpfile" | diff -y -W60 --color=always "${t/.sh/.out}" - && break
+        cat "$tmpfile"
+        printf "\n\033[0;1;31mSorry, try again!\033[0m\n"
+    done
+    printf "\n\033[0;1;32mCorrect!\033[0m\n"
+done