diff --git a/Makefile b/Makefile index f5eb960..71d9b80 100644 --- a/Makefile +++ b/Makefile @@ -43,8 +43,15 @@ clean: rm -f $(NAME) $(OBJFILES) test: $(NAME) - ./$(NAME) Comment -r '[@0]' - ./$(NAME) -g ./grammars/bp.bp -p Grammar ./grammars/bp.bp + ./$(NAME) Comment -r '[@0]' >/dev/null + ./$(NAME) -g ./grammars/bp.bp -p Grammar ./grammars/bp.bp >/dev/null + for test in tests/*.sh; do \ + sh "$$test" <"$${test/.sh/.in}" | diff -q - "$${test/.sh/.out}" ||\ + sh "$$test" <"$${test/.sh/.in}" | diff -y --color=always - "$${test/.sh/.out}"; \ + done + +tutorial: + ./tutorial.sh leaktest: bp valgrind --leak-check=full ./bp -l -g ./grammars/bp.bp -p Grammar ./grammars/bp.bp @@ -74,4 +81,4 @@ uninstall: [ "$$confirm" != n ] && rm -rf ~/.config/$(NAME); \ fi -.PHONY: all clean install uninstall leaktest splint test +.PHONY: all clean install uninstall leaktest splint test tutorial diff --git a/README.md b/README.md index d690199..8704687 100644 --- a/README.md +++ b/README.md @@ -79,6 +79,11 @@ Pattern | Meaning See `man ./bp.1` for more details. +## Tutorial + +Run `make tutorial` to run through the tutorial. It walks through some basic pattern matching. + + ## Grammar Files BP comes packaged with some pattern definitions that can be useful when parsing diff --git a/tests/1-literal.in b/tests/1-literal.in new file mode 100644 index 0000000..80a7210 --- /dev/null +++ b/tests/1-literal.in @@ -0,0 +1,5 @@ +xxx +foo +baz +xxfooxx +zzz diff --git a/tests/1-literal.out b/tests/1-literal.out new file mode 100644 index 0000000..99f2a64 --- /dev/null +++ b/tests/1-literal.out @@ -0,0 +1,2 @@ +foo +xxfooxx diff --git a/tests/1-literal.sh b/tests/1-literal.sh new file mode 100644 index 0000000..618dc4b --- /dev/null +++ b/tests/1-literal.sh @@ -0,0 +1,3 @@ +# Use double quotation marks to match literal strings +# Example: bp -p '"baz"' +bp -p '"foo"' diff --git a/tests/10-words.in b/tests/10-words.in new file mode 100644 index 0000000..b2f655a --- /dev/null +++ b/tests/10-words.in @@ -0,0 +1,5 @@ +foobar +foo +bazfoo +xxfooxx +one foo two diff --git a/tests/10-words.out b/tests/10-words.out new file mode 100644 index 0000000..c10a99e --- /dev/null +++ b/tests/10-words.out @@ -0,0 +1,2 @@ +foo +one foo two diff --git a/tests/10-words.sh b/tests/10-words.sh new file mode 100644 index 0000000..c5f5193 --- /dev/null +++ b/tests/10-words.sh @@ -0,0 +1,3 @@ +# The | operator matches word edges +# Example: bp -p '|"baz"|' matches the word "baz" +bp '\|foo\|' diff --git a/tests/11-ordered-choice.in b/tests/11-ordered-choice.in new file mode 100644 index 0000000..b81279a --- /dev/null +++ b/tests/11-ordered-choice.in @@ -0,0 +1,4 @@ +foo +bar +xxx +yyy diff --git a/tests/11-ordered-choice.out b/tests/11-ordered-choice.out new file mode 100644 index 0000000..3bd1f0e --- /dev/null +++ b/tests/11-ordered-choice.out @@ -0,0 +1,2 @@ +foo +bar diff --git a/tests/11-ordered-choice.sh b/tests/11-ordered-choice.sh new file mode 100644 index 0000000..277741d --- /dev/null +++ b/tests/11-ordered-choice.sh @@ -0,0 +1,5 @@ +# The ordered choice operator (/) picks the first choice that matches +# Example: bp -p '"cabaret"/"cab"' matches either "cabaret" or "cab" +# Note: if a match occurs, the options to the right will *never* be attempted, +# so bp -p '"cab"/"cabaret"' will always match "cab" instead of "cabaret" +bp -p '"foo" / "bar"' diff --git a/tests/12-star.in b/tests/12-star.in new file mode 100644 index 0000000..5fa70c2 --- /dev/null +++ b/tests/12-star.in @@ -0,0 +1,5 @@ +() +(x) +(xxxxxx) +(y) +nope diff --git a/tests/12-star.out b/tests/12-star.out new file mode 100644 index 0000000..0117a27 --- /dev/null +++ b/tests/12-star.out @@ -0,0 +1,3 @@ +() +(x) +(xxxxxx) diff --git a/tests/12-star.sh b/tests/12-star.sh new file mode 100644 index 0000000..bd2d578 --- /dev/null +++ b/tests/12-star.sh @@ -0,0 +1,3 @@ +# The star (*) prefix operator matches zero or more repetitions +# Example: bp -p '"Ha" *"ha"' will match "Ha", "Haha", "Hahaha", etc. +bp -p '`( *`x `)' diff --git a/tests/13-plus.in b/tests/13-plus.in new file mode 100644 index 0000000..d2f79f2 --- /dev/null +++ b/tests/13-plus.in @@ -0,0 +1,5 @@ +(x) +(xxxxxx) +(y) +() +nope diff --git a/tests/13-plus.out b/tests/13-plus.out new file mode 100644 index 0000000..a593f9c --- /dev/null +++ b/tests/13-plus.out @@ -0,0 +1,2 @@ +(x) +(xxxxxx) diff --git a/tests/13-plus.sh b/tests/13-plus.sh new file mode 100644 index 0000000..c2aa7cc --- /dev/null +++ b/tests/13-plus.sh @@ -0,0 +1,3 @@ +# The plus (+) prefix operator matches one or more of a pattern +# Example: bp -p '"l" +"ol"' will match "lol", "lolol", "lololol", etc. +bp -p '`( +`x `)' diff --git a/tests/14-repeat-sep.in b/tests/14-repeat-sep.in new file mode 100644 index 0000000..cac9acb --- /dev/null +++ b/tests/14-repeat-sep.in @@ -0,0 +1,8 @@ +foo(1) +foo(1,2,3,4) +foo(23,34,56) +foo(xxx) +foo(,,1,,) +xxx +,,, +1,2,3 diff --git a/tests/14-repeat-sep.out b/tests/14-repeat-sep.out new file mode 100644 index 0000000..88984ce --- /dev/null +++ b/tests/14-repeat-sep.out @@ -0,0 +1,3 @@ +foo(1) +foo(1,2,3,4) +foo(23,34,56) diff --git a/tests/14-repeat-sep.sh b/tests/14-repeat-sep.sh new file mode 100644 index 0000000..879ec4b --- /dev/null +++ b/tests/14-repeat-sep.sh @@ -0,0 +1,3 @@ +# The '%' operator modifies repeating patterns, allowing you to give a separator between matches +# Example: bp -p '+"x" % ":"' will match "x", "x:x", "x:x:x", etc. +bp -p '`( +int % `, `)' diff --git a/tests/15-repeating.in b/tests/15-repeating.in new file mode 100644 index 0000000..2d3c3ca --- /dev/null +++ b/tests/15-repeating.in @@ -0,0 +1,8 @@ +(1234) +(abcd) +(abcd) +(;;;;) +(12) +(ab) +(x) +(1234567) diff --git a/tests/15-repeating.out b/tests/15-repeating.out new file mode 100644 index 0000000..dbfe575 --- /dev/null +++ b/tests/15-repeating.out @@ -0,0 +1,4 @@ +(1234) +(abcd) +(abcd) +(;;;;) diff --git a/tests/15-repeating.sh b/tests/15-repeating.sh new file mode 100644 index 0000000..115e791 --- /dev/null +++ b/tests/15-repeating.sh @@ -0,0 +1,3 @@ +# Numbers allow you to specify repetitions of a pattern +# Example: bp -p '3 "x"' matches "xxx" +bp -p '`( 4 . `)' diff --git a/tests/16-lookahead.in b/tests/16-lookahead.in new file mode 100644 index 0000000..f384549 --- /dev/null +++ b/tests/16-lookahead.in @@ -0,0 +1,4 @@ +one +two +three +four diff --git a/tests/16-lookahead.out b/tests/16-lookahead.out new file mode 100644 index 0000000..1946f04 --- /dev/null +++ b/tests/16-lookahead.out @@ -0,0 +1,2 @@ +two +three diff --git a/tests/16-lookahead.sh b/tests/16-lookahead.sh new file mode 100644 index 0000000..18761bd --- /dev/null +++ b/tests/16-lookahead.sh @@ -0,0 +1,3 @@ +# >pat is a lookahead +# Example: bp -p '"foo" >`(' will match "foo" only when it is followed by a parenthesis +bp -p '>`t word' diff --git a/tests/17-lookbehind.in b/tests/17-lookbehind.in new file mode 100644 index 0000000..6e034c0 --- /dev/null +++ b/tests/17-lookbehind.in @@ -0,0 +1,4 @@ +1234 +xxxx23yy +3 +xxx3xx diff --git a/tests/17-lookbehind.out b/tests/17-lookbehind.out new file mode 100644 index 0000000..2efe860 --- /dev/null +++ b/tests/17-lookbehind.out @@ -0,0 +1,2 @@ +1234 +xxxx23yy diff --git a/tests/17-lookbehind.sh b/tests/17-lookbehind.sh new file mode 100644 index 0000000..0b5136a --- /dev/null +++ b/tests/17-lookbehind.sh @@ -0,0 +1,3 @@ +# "replacement" +# Example: bp -p '"foo" => "baz"' matches "foobar" and replaces it with "bazbar" +bp -p '"s" => "$"' diff --git a/tests/23-nested-parens.in b/tests/23-nested-parens.in new file mode 100644 index 0000000..8dadc56 --- /dev/null +++ b/tests/23-nested-parens.in @@ -0,0 +1,5 @@ +foo(one(), two(three())); +foo(); +xx())))); +xx((((); +yy)()(); diff --git a/tests/23-nested-parens.out b/tests/23-nested-parens.out new file mode 100644 index 0000000..f196775 --- /dev/null +++ b/tests/23-nested-parens.out @@ -0,0 +1,2 @@ +foo(one(), two(three())); +foo(); diff --git a/tests/23-nested-parens.sh b/tests/23-nested-parens.sh new file mode 100644 index 0000000..1bf872e --- /dev/null +++ b/tests/23-nested-parens.sh @@ -0,0 +1,3 @@ +# parens is a pattern matching nested parentheses +# Example: bp -p '"foo" parens' matches "foo()" or "foo(baz(), qux())", but not "foo(()" +bp -p 'id parens `;' diff --git a/tests/24-backref.in b/tests/24-backref.in new file mode 100644 index 0000000..181e306 --- /dev/null +++ b/tests/24-backref.in @@ -0,0 +1,5 @@ +foo baz foo +xx yy xx +x x x +foo baz xx +a b c diff --git a/tests/24-backref.out b/tests/24-backref.out new file mode 100644 index 0000000..23f5352 --- /dev/null +++ b/tests/24-backref.out @@ -0,0 +1,3 @@ +foo baz foo +xx yy xx +x x x diff --git a/tests/24-backref.sh b/tests/24-backref.sh new file mode 100644 index 0000000..01a6280 --- /dev/null +++ b/tests/24-backref.sh @@ -0,0 +1,3 @@ +# With @-capturing, you can reference previous captures +# Example: bp -p '@first=`a-z .. first' matches "aba" and "xyzx", but not "abc" +bp -p '@first=+Abc _ +Abc _ first' diff --git a/tests/25-replace-capture.in b/tests/25-replace-capture.in new file mode 100644 index 0000000..b2f931a --- /dev/null +++ b/tests/25-replace-capture.in @@ -0,0 +1,5 @@ +one +two +three +four +five diff --git a/tests/25-replace-capture.out b/tests/25-replace-capture.out new file mode 100644 index 0000000..939bd2b --- /dev/null +++ b/tests/25-replace-capture.out @@ -0,0 +1,5 @@ +{o}n{e} +tw{o} +thr{e}{e} +f{o}{u}r +f{i}v{e} diff --git a/tests/25-replace-capture.sh b/tests/25-replace-capture.sh new file mode 100644 index 0000000..f401605 --- /dev/null +++ b/tests/25-replace-capture.sh @@ -0,0 +1,4 @@ +# Captures with @ can be referenced in a replacement by @1, @2, etc. +# Example: bp -p '"=" _ @+`0-9 => "= -@1"' replaces "x = 5" with "x = -5" +# Note: @0 refers to the entire match, e.g. bp -p '"foo" => "xx@0xx"' replaces "foo" with "xxfooxx" +bp -p '@`a,e,i,o,u => "{@1}" / .' diff --git a/tests/3-char-range.in b/tests/3-char-range.in new file mode 100644 index 0000000..ad062db --- /dev/null +++ b/tests/3-char-range.in @@ -0,0 +1,9 @@ +0 +1 +X +2 +! +3 +; +a +f diff --git a/tests/3-char-range.out b/tests/3-char-range.out new file mode 100644 index 0000000..008986c --- /dev/null +++ b/tests/3-char-range.out @@ -0,0 +1,6 @@ +0 +1 +2 +3 +a +f diff --git a/tests/3-char-range.sh b/tests/3-char-range.sh new file mode 100644 index 0000000..7904466 --- /dev/null +++ b/tests/3-char-range.sh @@ -0,0 +1,3 @@ +# Character sets and ranges work with backticks +# Example: bp -p '`a-z,A-Z' matches all lowercase and uppercase letters +bp -p '`0-9,a-f' diff --git a/tests/4-sequence.in b/tests/4-sequence.in new file mode 100644 index 0000000..c39976c --- /dev/null +++ b/tests/4-sequence.in @@ -0,0 +1,4 @@ +onetwo +one +two +xxxx diff --git a/tests/4-sequence.out b/tests/4-sequence.out new file mode 100644 index 0000000..1d202ca --- /dev/null +++ b/tests/4-sequence.out @@ -0,0 +1 @@ +onetwo diff --git a/tests/4-sequence.sh b/tests/4-sequence.sh new file mode 100644 index 0000000..dc61eeb --- /dev/null +++ b/tests/4-sequence.sh @@ -0,0 +1,5 @@ +# Multiple patterns in a row represent a sequence. +# bp pattern syntax mostly doesn't care about whitespace, so you can have +# spaces between patterns if you want, but it's not required. +# Example: bp -p '"foo" `0-9' matches "foo1", "foo2", etc. +bp -p '"one" "two"' diff --git a/tests/5-dot.in b/tests/5-dot.in new file mode 100644 index 0000000..3e16ece --- /dev/null +++ b/tests/5-dot.in @@ -0,0 +1,6 @@ +aX +aY +a +Xa +y +zz diff --git a/tests/5-dot.out b/tests/5-dot.out new file mode 100644 index 0000000..2e1c61d --- /dev/null +++ b/tests/5-dot.out @@ -0,0 +1,2 @@ +aX +aY diff --git a/tests/5-dot.sh b/tests/5-dot.sh new file mode 100644 index 0000000..ec5c9e7 --- /dev/null +++ b/tests/5-dot.sh @@ -0,0 +1,3 @@ +# The dot matches a single character +# Example: bp -p '.' +bp -p '`a .' diff --git a/tests/6-start-of-line.in b/tests/6-start-of-line.in new file mode 100644 index 0000000..fc48028 --- /dev/null +++ b/tests/6-start-of-line.in @@ -0,0 +1,5 @@ +xxxxx +foo +foobar +barfoo +xxfooxx diff --git a/tests/6-start-of-line.out b/tests/6-start-of-line.out new file mode 100644 index 0000000..4f6c252 --- /dev/null +++ b/tests/6-start-of-line.out @@ -0,0 +1,2 @@ +foo +foobar diff --git a/tests/6-start-of-line.sh b/tests/6-start-of-line.sh new file mode 100644 index 0000000..c44000e --- /dev/null +++ b/tests/6-start-of-line.sh @@ -0,0 +1,3 @@ +# ^ matches start of a line +# Example: bp -p '^ "x"' matches lines starting with "x" +bp -p '^ "foo"' diff --git a/tests/7-end-of-line.in b/tests/7-end-of-line.in new file mode 100644 index 0000000..328e2de --- /dev/null +++ b/tests/7-end-of-line.in @@ -0,0 +1,4 @@ +xxxx +foobar +foo +barfoo diff --git a/tests/7-end-of-line.out b/tests/7-end-of-line.out new file mode 100644 index 0000000..419534f --- /dev/null +++ b/tests/7-end-of-line.out @@ -0,0 +1,2 @@ +foo +barfoo diff --git a/tests/7-end-of-line.sh b/tests/7-end-of-line.sh new file mode 100644 index 0000000..4a1e05d --- /dev/null +++ b/tests/7-end-of-line.sh @@ -0,0 +1,3 @@ +# $ matches end of line +# Example: bp -p '"x" $' matches lines ending with "x" +bp -p '"foo" $' diff --git a/tests/8-spaces.in b/tests/8-spaces.in new file mode 100644 index 0000000..8b72b23 --- /dev/null +++ b/tests/8-spaces.in @@ -0,0 +1,6 @@ +onetwo +one two +one two +one two +xxx +one;;;two diff --git a/tests/8-spaces.out b/tests/8-spaces.out new file mode 100644 index 0000000..75dc1e4 --- /dev/null +++ b/tests/8-spaces.out @@ -0,0 +1,4 @@ +onetwo +one two +one two +one two diff --git a/tests/8-spaces.sh b/tests/8-spaces.sh new file mode 100644 index 0000000..cc16d96 --- /dev/null +++ b/tests/8-spaces.sh @@ -0,0 +1,3 @@ +# The _ pattern matches zero or more spaces/tabs +# Example: bp -p '`= _ "foo"' matches "=foo", "= foo", "= foo", etc. +bp -p '"one" _ "two"' diff --git a/tests/9-ellipsis.in b/tests/9-ellipsis.in new file mode 100644 index 0000000..7668caa --- /dev/null +++ b/tests/9-ellipsis.in @@ -0,0 +1,6 @@ +helloworld +hello;;;world +hello1234world +goodbye +hello +helloworxx diff --git a/tests/9-ellipsis.out b/tests/9-ellipsis.out new file mode 100644 index 0000000..f64e0e2 --- /dev/null +++ b/tests/9-ellipsis.out @@ -0,0 +1,3 @@ +helloworld +hello;;;world +hello1234world diff --git a/tests/9-ellipsis.sh b/tests/9-ellipsis.sh new file mode 100644 index 0000000..0763fd1 --- /dev/null +++ b/tests/9-ellipsis.sh @@ -0,0 +1,3 @@ +# The ellipsis matches text upto the following pattern, not counting newlines +# Example: bp -p '"/*" .. "*/"' matches "/* blah blah */" or "/**/" +bp -p '"hello" .. "world"' diff --git a/tutorial.sh b/tutorial.sh new file mode 100755 index 0000000..e598612 --- /dev/null +++ b/tutorial.sh @@ -0,0 +1,24 @@ +#!/bin/sh +# Run a small tutorial on basic bp functionality + +tmpfile="$(mktemp /tmp/bp-tutorial.XXXXXX)" +trap 'rm "$tmpfile"' EXIT + +for t in $([ $# -gt 0 ] && echo "$@" || ls -v tests/*.sh); do + echo + printf "\033[1m" + sed -n 's/^# //p' "$t" + printf "\033[0m" + printf "\033[33;1mGiven these lines: Give this output:\033[0m\n" + diff -y -W60 --color=always "${t/.sh/.in}" "${t/.sh/.out}" + while true; do + printf "\n\033[1mbp pattern: \033[0m" + read -r pat + printf "\033[0;2mRunning: \033[32m%s\033[0m\n\n" "bp -p '$pat'" + printf "\033[33;1mExpected output: Your pattern's output:\033[0m\n" + bp -p "$pat" < "${t/.sh/.in}" 2>"$tmpfile" | diff -y -W60 --color=always "${t/.sh/.out}" - && break + cat "$tmpfile" + printf "\n\033[0;1;31mSorry, try again!\033[0m\n" + done + printf "\n\033[0;1;32mCorrect!\033[0m\n" +done