aboutsummaryrefslogtreecommitdiff
path: root/builtin/common/serialize.lua
diff options
context:
space:
mode:
authorLars Müller <34514239+appgurueu@users.noreply.github.com>2022-06-11 20:00:26 +0200
committerGitHub <noreply@github.com>2022-06-11 20:00:26 +0200
commit3eafcab64ecaf8d00a9264b441e996825a6a31bd (patch)
tree75583a4ee4d4c2c75a43319dfee93224edd34a6a /builtin/common/serialize.lua
parentba65e0ace76dc39dab15b70725cafc629d165e7f (diff)
downloadhax-minetest-server-3eafcab64ecaf8d00a9264b441e996825a6a31bd.tar.gz
hax-minetest-server-3eafcab64ecaf8d00a9264b441e996825a6a31bd.zip
Builtin: Redo serialize.lua (#11427)
Features: * Support for arbitrary references, including self-referencing * Short output, references "long" strings as a bonus * Around the same speed, potentially slower if long, short keys are present * Properly works with NaN and inf
Diffstat (limited to 'builtin/common/serialize.lua')
-rw-r--r--builtin/common/serialize.lua349
1 files changed, 179 insertions, 170 deletions
diff --git a/builtin/common/serialize.lua b/builtin/common/serialize.lua
index 300b394c6..6278e2739 100644
--- a/builtin/common/serialize.lua
+++ b/builtin/common/serialize.lua
@@ -1,205 +1,214 @@
--- Lua module to serialize values as Lua code.
--- From: https://github.com/fab13n/metalua/blob/no-dll/src/lib/serialize.lua
+-- From: https://github.com/appgurueu/modlib/blob/master/luon.lua
-- License: MIT
--- @copyright 2006-2997 Fabien Fleutot <metalua@gmail.com>
--- @author Fabien Fleutot <metalua@gmail.com>
--- @author ShadowNinja <shadowninja@minetest.net>
---------------------------------------------------------------------------------
---- Serialize an object into a source code string. This string, when passed as
--- an argument to deserialize(), returns an object structurally identical to
--- the original one. The following are currently supported:
--- * Booleans, numbers, strings, and nil.
--- * Functions; uses interpreter-dependent (and sometimes platform-dependent) bytecode!
--- * Tables; they can cantain multiple references and can be recursive, but metatables aren't saved.
--- This works in two phases:
--- 1. Recursively find and record multiple references and recursion.
--- 2. Recursively dump the value into a string.
--- @param x Value to serialize (nil is allowed).
--- @return load()able string containing the value.
-function core.serialize(x)
- local local_index = 1 -- Top index of the "_" local table in the dump
- -- table->nil/1/2 set of tables seen.
- -- nil = not seen, 1 = seen once, 2 = seen multiple times.
- local seen = {}
+local next, rawget, pairs, pcall, error, type, setfenv, loadstring
+ = next, rawget, pairs, pcall, error, type, setfenv, loadstring
- -- nest_points are places where a table appears within itself, directly
- -- or not. For instance, all of these chunks create nest points in
- -- table x: "x = {}; x[x] = 1", "x = {}; x[1] = x",
- -- "x = {}; x[1] = {y = {x}}".
- -- To handle those, two tables are used by mark_nest_point:
- -- * nested - Transient set of tables being currently traversed.
- -- Used for detecting nested tables.
- -- * nest_points - parent->{key=value, ...} table cantaining the nested
- -- keys and values in the parent. They're all dumped after all the
- -- other table operations have been performed.
- --
- -- mark_nest_point(p, k, v) fills nest_points with information required
- -- to remember that key/value (k, v) creates a nest point in table
- -- parent. It also marks "parent" and the nested item(s) as occuring
- -- multiple times, since several references to it will be required in
- -- order to patch the nest points.
- local nest_points = {}
- local nested = {}
- local function mark_nest_point(parent, k, v)
- local nk, nv = nested[k], nested[v]
- local np = nest_points[parent]
- if not np then
- np = {}
- nest_points[parent] = np
- end
- np[k] = v
- seen[parent] = 2
- if nk then seen[k] = 2 end
- if nv then seen[v] = 2 end
- end
+local table_concat, string_dump, string_format, string_match, math_huge
+ = table.concat, string.dump, string.format, string.match, math.huge
- -- First phase, list the tables and functions which appear more than
- -- once in x.
- local function mark_multiple_occurences(x)
- local tp = type(x)
- if tp ~= "table" and tp ~= "function" then
- -- No identity (comparison is done by value, not by instance)
+-- Recursively counts occurences of objects (non-primitives including strings) in a table.
+local function count_objects(value)
+ local counts = {}
+ if value == nil then
+ -- Early return for nil; tables can't contain nil
+ return counts
+ end
+ local function count_values(val)
+ local type_ = type(val)
+ if type_ == "boolean" or type_ == "number" then
return
end
- if seen[x] == 1 then
- seen[x] = 2
- elseif seen[x] ~= 2 then
- seen[x] = 1
- end
-
- if tp == "table" then
- nested[x] = true
- for k, v in pairs(x) do
- if nested[k] or nested[v] then
- mark_nest_point(x, k, v)
- else
- mark_multiple_occurences(k)
- mark_multiple_occurences(v)
+ local count = counts[val]
+ counts[val] = (count or 0) + 1
+ if type_ == "table" then
+ if not count then
+ for k, v in pairs(val) do
+ count_values(k)
+ count_values(v)
end
end
- nested[x] = nil
+ elseif type_ ~= "string" and type_ ~= "function" then
+ error("unsupported type: " .. type_)
end
end
+ count_values(value)
+ return counts
+end
- local dumped = {} -- object->varname set
- local local_defs = {} -- Dumped local definitions as source code lines
+-- Build a "set" of Lua keywords. These can't be used as short key names.
+-- See https://www.lua.org/manual/5.1/manual.html#2.1
+local keywords = {}
+for _, keyword in pairs({
+ "and", "break", "do", "else", "elseif",
+ "end", "false", "for", "function", "if",
+ "in", "local", "nil", "not", "or",
+ "repeat", "return", "then", "true", "until", "while",
+ "goto" -- LuaJIT, Lua 5.2+
+}) do
+ keywords[keyword] = true
+end
- -- Mutually recursive local functions:
- local dump_val, dump_or_ref_val
+local function quote(string)
+ return string_format("%q", string)
+end
- -- If x occurs multiple times, dump the local variable rather than
- -- the value. If it's the first time it's dumped, also dump the
- -- content in local_defs.
- function dump_or_ref_val(x)
- if seen[x] ~= 2 then
- return dump_val(x)
- end
- local var = dumped[x]
- if var then -- Already referenced
- return var
+local function dump_func(func)
+ return string_format("loadstring(%q)", string_dump(func))
+end
+
+-- Serializes Lua nil, booleans, numbers, strings, tables and even functions
+-- Tables are referenced by reference, strings are referenced by value. Supports circular tables.
+local function serialize(value, write)
+ local reference, refnum = "r1", 1
+ -- [object] = reference string
+ local references = {}
+ -- Circular tables that must be filled using `table[key] = value` statements
+ local to_fill = {}
+ for object, count in pairs(count_objects(value)) do
+ local type_ = type(object)
+ -- Object must appear more than once. If it is a string, the reference has to be shorter than the string.
+ if count >= 2 and (type_ ~= "string" or #reference + 2 < #object) then
+ write(reference)
+ write("=")
+ if type_ == "table" then
+ write("{}")
+ elseif type_ == "function" then
+ write(dump_func(object))
+ elseif type_ == "string" then
+ write(quote(object))
+ end
+ write(";")
+ references[object] = reference
+ if type_ == "table" then
+ to_fill[object] = reference
+ end
+ refnum = refnum + 1
+ reference = ("r%X"):format(refnum)
end
- -- First occurence, create and register reference
- local val = dump_val(x)
- local i = local_index
- local_index = local_index + 1
- var = "_["..i.."]"
- local_defs[#local_defs + 1] = var.." = "..val
- dumped[x] = var
- return var
end
-
- -- Second phase. Dump the object; subparts occuring multiple times
- -- are dumped in local variables which can be referenced multiple
- -- times. Care is taken to dump local vars in a sensible order.
- function dump_val(x)
- local tp = type(x)
- if x == nil then return "nil"
- elseif tp == "string" then return string.format("%q", x)
- elseif tp == "boolean" then return x and "true" or "false"
- elseif tp == "function" then
- return string.format("loadstring(%q)", string.dump(x))
- elseif tp == "number" then
- -- Serialize numbers reversibly with string.format
- return string.format("%.17g", x)
- elseif tp == "table" then
- local vals = {}
- local idx_dumped = {}
- local np = nest_points[x]
- for i, v in ipairs(x) do
- if not np or not np[i] then
- vals[#vals + 1] = dump_or_ref_val(v)
- end
- idx_dumped[i] = true
+ -- Used to decide whether we should do "key=..."
+ local function use_short_key(key)
+ return not references[key] and type(key) == "string" and (not keywords[key]) and string_match(key, "^[%a_][%a%d_]*$")
+ end
+ local function dump(value)
+ -- Primitive types
+ if value == nil then
+ return write("nil")
+ end
+ if value == true then
+ return write("true")
+ end
+ if value == false then
+ return write("false")
+ end
+ local type_ = type(value)
+ if type_ == "number" then
+ return write(string_format("%.17g", value))
+ end
+ -- Reference types: table, function and string
+ local ref = references[value]
+ if ref then
+ return write(ref)
+ end
+ if type_ == "string" then
+ return write(quote(value))
+ end
+ if type_ == "function" then
+ return write(dump_func(value))
+ end
+ if type_ == "table" then
+ write("{")
+ -- First write list keys:
+ -- Don't use the table length #value here as it may horribly fail
+ -- for tables which use large integers as keys in the hash part;
+ -- stop at the first "hole" (nil value) instead
+ local len = 0
+ local first = true -- whether this is the first entry, which may not have a leading comma
+ while true do
+ local v = rawget(value, len + 1) -- use rawget to avoid metatables like the vector metatable
+ if v == nil then break end
+ if first then first = false else write(",") end
+ dump(v)
+ len = len + 1
end
- for k, v in pairs(x) do
- if (not np or not np[k]) and
- not idx_dumped[k] then
- vals[#vals + 1] = "["..dump_or_ref_val(k).."] = "
- ..dump_or_ref_val(v)
+ -- Now write map keys ([key] = value)
+ for k, v in next, value do
+ -- We have written all non-float keys in [1, len] already
+ if type(k) ~= "number" or k % 1 ~= 0 or k < 1 or k > len then
+ if first then first = false else write(",") end
+ if use_short_key(k) then
+ write(k)
+ else
+ write("[")
+ dump(k)
+ write("]")
+ end
+ write("=")
+ dump(v)
end
end
- return "{"..table.concat(vals, ", ").."}"
- else
- error("Can't serialize data of type "..tp)
+ write("}")
+ return
end
end
-
- local function dump_nest_points()
- for parent, vals in pairs(nest_points) do
- for k, v in pairs(vals) do
- local_defs[#local_defs + 1] = dump_or_ref_val(parent)
- .."["..dump_or_ref_val(k).."] = "
- ..dump_or_ref_val(v)
+ -- Write the statements to fill circular tables
+ for table, ref in pairs(to_fill) do
+ for k, v in pairs(table) do
+ write(ref)
+ if use_short_key(k) then
+ write(".")
+ write(k)
+ else
+ write("[")
+ dump(k)
+ write("]")
end
+ write("=")
+ dump(v)
+ write(";")
end
end
-
- mark_multiple_occurences(x)
- local top_level = dump_or_ref_val(x)
- dump_nest_points()
-
- if next(local_defs) then
- return "local _ = {}\n"
- ..table.concat(local_defs, "\n")
- .."\nreturn "..top_level
- else
- return "return "..top_level
- end
+ write("return ")
+ dump(value)
end
--- Deserialization
-
-local function safe_loadstring(...)
- local func, err = loadstring(...)
- if func then
- setfenv(func, {})
- return func
- end
- return nil, err
+function core.serialize(value)
+ local rope = {}
+ serialize(value, function(text)
+ -- Faster than table.insert(rope, text) on PUC Lua 5.1
+ rope[#rope + 1] = text
+ end)
+ return table_concat(rope)
end
local function dummy_func() end
-function core.deserialize(str, safe)
- if type(str) ~= "string" then
- return nil, "Cannot deserialize type '"..type(str)
- .."'. Argument must be a string."
- end
- if str:byte(1) == 0x1B then
- return nil, "Bytecode prohibited"
- end
- local f, err = loadstring(str)
- if not f then return nil, err end
+local nan = (0/0)^1 -- +nan
- -- The environment is recreated every time so deseralized code cannot
- -- pollute it with permanent references.
- setfenv(f, {loadstring = safe and dummy_func or safe_loadstring})
+function core.deserialize(str, safe)
+ local func, err = loadstring(str)
+ if not func then return nil, err end
- local good, data = pcall(f)
- if good then
- return data
+ -- math.huge is serialized to inf, NaNs are serialized to nan by Lua
+ local env = {inf = math_huge, nan = nan}
+ if safe then
+ env.loadstring = dummy_func
else
- return nil, data
+ env.loadstring = function(str, ...)
+ local func, err = loadstring(str, ...)
+ if func then
+ setfenv(func, env)
+ return func
+ end
+ return nil, err
+ end
+ end
+ setfenv(func, env)
+ local success, value_or_err = pcall(func)
+ if success then
+ return value_or_err
end
+ return nil, value_or_err
end