From 79d4bd5125de7ff220fbf8a8a5493d437ed16963 Mon Sep 17 00:00:00 2001 From: Bruce Hill Date: Tue, 18 Sep 2018 19:48:58 -0700 Subject: Got rid of repr() use and replaced with :as_lua() or :as_nomsu() in as many places as possible. --- string2.moon | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'string2.moon') diff --git a/string2.moon b/string2.moon index 2259272..662cd0d 100644 --- a/string2.moon +++ b/string2.moon @@ -45,6 +45,20 @@ string2 = { lines[#lines+1] = line return table.concat(lines, "\n") + as_lua: => + escaped = gsub(@, "\\", "\\\\") + escaped = gsub(escaped, "\n", "\\n") + escaped = gsub(escaped, '"', '\\"') + escaped = gsub(escaped, "[^ %g]", (c)-> format("\\%03d", byte(c, 1))) + return '"'..escaped..'"' + + as_nomsu: => + escaped = gsub(@, "\\", "\\\\") + escaped = gsub(escaped, "\n", "\\n") + escaped = gsub(escaped, '"', '\\"') + escaped = gsub(escaped, "[^ %g]", (c)-> format("\\%03d", byte(c, 1))) + return '"'..escaped..'"' + -- Convert an arbitrary text into a valid Lua identifier. This function is injective, -- but not idempotent. In logic terms: (x != y) => (as_lua_id(x) != as_lua_id(y)), -- but not (as_lua_id(a) == b) => (as_lua_id(b) == b). -- cgit v1.2.3 From f2048235f5cc7ff02db39a0e2fe5c79c7f390e0b Mon Sep 17 00:00:00 2001 From: Bruce Hill Date: Fri, 21 Sep 2018 00:30:28 -0700 Subject: Incremental checkin, currently not working, just saving progress. --- string2.moon | 27 ++++++++++++--------------- 1 file changed, 12 insertions(+), 15 deletions(-) (limited to 'string2.moon') diff --git a/string2.moon b/string2.moon index 662cd0d..1b0037f 100644 --- a/string2.moon +++ b/string2.moon @@ -13,8 +13,10 @@ isplit = (sep='%s+')=> return step, {str:@, pos:1, :sep}, 0 lua_keywords = { - "and", "break", "do", "else", "elseif", "end", "false", "for", "function", "goto", "if", - "in", "local", "nil", "not", "or", "repeat", "return", "then", "true", "until", "while" + ["and"]=true, ["break"]=true, ["do"]=true, ["else"]=true, ["elseif"]=true, ["end"]=true, + ["false"]=true, ["for"]=true, ["function"]=true, ["goto"]=true, ["if"]=true, + ["in"]=true, ["local"]=true, ["nil"]=true, ["not"]=true, ["or"]=true, ["repeat"]=true, + ["return"]=true, ["then"]=true, ["true"]=true, ["until"]=true, ["while"]=true } string2 = { @@ -76,24 +78,19 @@ string2 = { str = gsub str, "%W", (c)-> if c == ' ' then '_' else format("x%02X", byte(c)) - -- Lua IDs can't start with numbers, so map "1" -> "_1", "_1" -> "__1", etc. - str = gsub str, "^_*%d", "_%1" - -- This pattern is guaranteed to match all keywords, but also matches some other stuff. - if match str, "^_*[abdefgilnortuw][aefhilnoru][acdefiklnoprstu]*$" - for kw in *lua_keywords - if match str, ("^_*"..kw.."$") - str = "_"..str + + unless string2.is_lua_id(str\match("^_*(.*)$")) + str = "_"..str return str + is_lua_id: (str)-> + match(str, "^[_a-zA-Z][_a-zA-Z0-9]*$") and not lua_keywords[str] + -- from_lua_id(as_lua_id(str)) == str, but behavior is unspecified for inputs that -- did not come from as_lua_id() from_lua_id: (str)-> - -- This pattern is guaranteed to match all keywords, but also matches some other stuff. - if match str, "^_+[abdefgilnortuw][aefhilnoru][acdefiklnoprstu]*$" - for kw in *lua_keywords - if match str, ("^_+"..kw.."$") - str = str\sub(2,-1) - str = gsub(str, "^_(_*%d.*)", "%1") + unless string2.is_lua_id("^_+(.*)$") + str = str\sub(2,-1) str = gsub(str, "_", " ") str = gsub(str, "x([0-9A-F][0-9A-F])", (hex)-> char(tonumber(hex, 16))) str = gsub(str, "^ ([ ]*)$", "%1") -- cgit v1.2.3 From 692fae5416ce1f2702b599ffb27b2e3d2235eba7 Mon Sep 17 00:00:00 2001 From: Bruce Hill Date: Wed, 26 Sep 2018 12:45:08 -0700 Subject: Incremental fixes and more nomnom ports. --- string2.moon | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) (limited to 'string2.moon') diff --git a/string2.moon b/string2.moon index 1b0037f..140c586 100644 --- a/string2.moon +++ b/string2.moon @@ -13,14 +13,16 @@ isplit = (sep='%s+')=> return step, {str:@, pos:1, :sep}, 0 lua_keywords = { - ["and"]=true, ["break"]=true, ["do"]=true, ["else"]=true, ["elseif"]=true, ["end"]=true, - ["false"]=true, ["for"]=true, ["function"]=true, ["goto"]=true, ["if"]=true, - ["in"]=true, ["local"]=true, ["nil"]=true, ["not"]=true, ["or"]=true, ["repeat"]=true, - ["return"]=true, ["then"]=true, ["true"]=true, ["until"]=true, ["while"]=true + ["and"]:true, ["break"]:true, ["do"]:true, ["else"]:true, ["elseif"]:true, ["end"]:true, + ["false"]:true, ["for"]:true, ["function"]:true, ["goto"]:true, ["if"]:true, + ["in"]:true, ["local"]:true, ["nil"]:true, ["not"]:true, ["or"]:true, ["repeat"]:true, + ["return"]:true, ["then"]:true, ["true"]:true, ["until"]:true, ["while"]:true } +is_lua_id = (str)-> + match(str, "^[_a-zA-Z][_a-zA-Z0-9]*$") and not lua_keywords[str] string2 = { - :isplit, uppercase:upper, lowercase:lower, reversed:reverse + :isplit, uppercase:upper, lowercase:lower, reversed:reverse, :is_lua_id capitalized: => gsub(@, '%l', upper, 1) byte: byte, bytes: (i, j)=> {byte(@, i or 1, j or -1)} split: (sep)=> [chunk for i,chunk in isplit(@, sep)] @@ -79,17 +81,14 @@ string2 = { if c == ' ' then '_' else format("x%02X", byte(c)) - unless string2.is_lua_id(str\match("^_*(.*)$")) + unless is_lua_id(str\match("^_*(.*)$")) str = "_"..str return str - is_lua_id: (str)-> - match(str, "^[_a-zA-Z][_a-zA-Z0-9]*$") and not lua_keywords[str] - -- from_lua_id(as_lua_id(str)) == str, but behavior is unspecified for inputs that -- did not come from as_lua_id() from_lua_id: (str)-> - unless string2.is_lua_id("^_+(.*)$") + unless is_lua_id("^_+(.*)$") str = str\sub(2,-1) str = gsub(str, "_", " ") str = gsub(str, "x([0-9A-F][0-9A-F])", (hex)-> char(tonumber(hex, 16))) -- cgit v1.2.3 From b615cb5c8e638cffe77bbe5cb86c9362e2b2fc18 Mon Sep 17 00:00:00 2001 From: Bruce Hill Date: Wed, 3 Oct 2018 16:14:17 -0700 Subject: Fixed up some edge cases with as_lua_id and from_lua_id that were producing bad results. --- string2.moon | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'string2.moon') diff --git a/string2.moon b/string2.moon index 140c586..8d2f3f9 100644 --- a/string2.moon +++ b/string2.moon @@ -67,10 +67,6 @@ string2 = { -- but not idempotent. In logic terms: (x != y) => (as_lua_id(x) != as_lua_id(y)), -- but not (as_lua_id(a) == b) => (as_lua_id(b) == b). as_lua_id: (str)-> - orig = str - -- Empty strings are not valid lua identifiers, so treat them like " ", - -- and treat " " as " ", etc. to preserve injectivity. - str = gsub str, "^ *$", "%1 " -- Escape 'x' (\x78) when it precedes something that looks like an uppercase hex sequence. -- This way, all Lua IDs can be unambiguously reverse-engineered, but normal usage -- of 'x' won't produce ugly Lua IDs. @@ -88,13 +84,18 @@ string2 = { -- from_lua_id(as_lua_id(str)) == str, but behavior is unspecified for inputs that -- did not come from as_lua_id() from_lua_id: (str)-> - unless is_lua_id("^_+(.*)$") + unless is_lua_id(str\match("^_*(.*)$")) str = str\sub(2,-1) str = gsub(str, "_", " ") str = gsub(str, "x([0-9A-F][0-9A-F])", (hex)-> char(tonumber(hex, 16))) - str = gsub(str, "^ ([ ]*)$", "%1") return str } for k,v in pairs(string) do string2[k] or= v +for test in *{"", "_", " ", "return", "asdf", "one two", "one_two", "Hex2Dec", "He-ec", "\3"} + lua_id = string2.as_lua_id(test) + assert is_lua_id(lua_id), "failed to convert '#{test}' to a valid Lua identifier (got '#{lua_id}')" + roundtrip = string2.from_lua_id(lua_id) + assert roundtrip == test, "Failed lua_id roundtrip: '#{test}' -> #{lua_id} -> #{roundtrip}" + return string2 -- cgit v1.2.3 From 2f68357cb6800e97edd31abfc707d7c7905faa64 Mon Sep 17 00:00:00 2001 From: Bruce Hill Date: Wed, 3 Oct 2018 16:26:24 -0700 Subject: Some incremental progress. --- string2.moon | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'string2.moon') diff --git a/string2.moon b/string2.moon index 8d2f3f9..d70abe6 100644 --- a/string2.moon +++ b/string2.moon @@ -96,6 +96,6 @@ for test in *{"", "_", " ", "return", "asdf", "one two", "one_two", "Hex2Dec", " lua_id = string2.as_lua_id(test) assert is_lua_id(lua_id), "failed to convert '#{test}' to a valid Lua identifier (got '#{lua_id}')" roundtrip = string2.from_lua_id(lua_id) - assert roundtrip == test, "Failed lua_id roundtrip: '#{test}' -> #{lua_id} -> #{roundtrip}" + assert roundtrip == test, "Failed lua_id roundtrip: '#{test}' -> '#{lua_id}' -> '#{roundtrip}'" return string2 -- cgit v1.2.3 From 307dea18815ba4a06a3098edb170d7ad90708815 Mon Sep 17 00:00:00 2001 From: Bruce Hill Date: Fri, 2 Nov 2018 14:38:24 -0700 Subject: Changed stub convention to (foo 1 baz 2) -> foo_1_baz instead of foo_1_baz_2, removed "smext", made some cleanup changes. --- string2.moon | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'string2.moon') diff --git a/string2.moon b/string2.moon index d70abe6..e4ee482 100644 --- a/string2.moon +++ b/string2.moon @@ -26,6 +26,7 @@ string2 = { capitalized: => gsub(@, '%l', upper, 1) byte: byte, bytes: (i, j)=> {byte(@, i or 1, j or -1)} split: (sep)=> [chunk for i,chunk in isplit(@, sep)] + starts_with: (s)=> sub(@, 1, #s) == s lines: => [line for i,line in isplit(@, '\n')] line: (line_num)=> for i, line, start in isplit(@, '\n') @@ -41,10 +42,10 @@ string2 = { lines = {} for line in *@lines! while #line > maxlen - chunk = line\sub(1, maxlen) - split = chunk\find(' ', maxlen-buffer, true) or maxlen - chunk = line\sub(1, split) - line = line\sub(split+1, -1) + chunk = sub(line, 1, maxlen) + split = find(chunk, ' ', maxlen-buffer, true) or maxlen + chunk = sub(line, 1, split) + line = sub(line, split+1, -1) lines[#lines+1] = chunk lines[#lines+1] = line return table.concat(lines, "\n") @@ -77,15 +78,15 @@ string2 = { if c == ' ' then '_' else format("x%02X", byte(c)) - unless is_lua_id(str\match("^_*(.*)$")) + unless is_lua_id(match(str, "^_*(.*)$")) str = "_"..str return str -- from_lua_id(as_lua_id(str)) == str, but behavior is unspecified for inputs that -- did not come from as_lua_id() from_lua_id: (str)-> - unless is_lua_id(str\match("^_*(.*)$")) - str = str\sub(2,-1) + unless is_lua_id(match(str, "^_*(.*)$")) + str = sub(str,2,-1) str = gsub(str, "_", " ") str = gsub(str, "x([0-9A-F][0-9A-F])", (hex)-> char(tonumber(hex, 16))) return str -- cgit v1.2.3 From dc41f30c73c9686685e3a4183c1213fb4ba55c90 Mon Sep 17 00:00:00 2001 From: Bruce Hill Date: Fri, 2 Nov 2018 15:10:17 -0700 Subject: Tweak --- string2.moon | 1 + 1 file changed, 1 insertion(+) (limited to 'string2.moon') diff --git a/string2.moon b/string2.moon index e4ee482..e6db628 100644 --- a/string2.moon +++ b/string2.moon @@ -27,6 +27,7 @@ string2 = { byte: byte, bytes: (i, j)=> {byte(@, i or 1, j or -1)} split: (sep)=> [chunk for i,chunk in isplit(@, sep)] starts_with: (s)=> sub(@, 1, #s) == s + ends_with: (s)=> #@ >= #s and sub(@, #@-#s, -1) == s lines: => [line for i,line in isplit(@, '\n')] line: (line_num)=> for i, line, start in isplit(@, '\n') -- cgit v1.2.3