Module:Sandbox/Aidan9382/CodeAnalysis
Yi palo
Documentation for this module may be created at Module:Sandbox/Aidan9382/CodeAnalysis/doc
--[[
This module is designed to perform analysis on other modules. The majority of
the code here is copied from a seperate project,
https://github.com/9382/Dump/tree/main/LuaObfusactor, which in turn has code
borrowed from https://github.com/stravant/LuaMinify.
--]]
local print = mw and mw.log or print
require("strict")
local function lookupify(tb)
for _, v in pairs(tb) do
tb[v] = true
end
return tb
end
local WhiteChars = lookupify{' ', '\n', '\t', '\r'}
local LowerChars = lookupify{'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i',
'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r',
's', 't', 'u', 'v', 'w', 'x', 'y', 'z'}
local UpperChars = lookupify{'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I',
'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R',
'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z'}
local Digits = lookupify{'0', '1', '2', '3', '4', '5', '6', '7', '8', '9'}
local HexDigits = lookupify{'0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
'A', 'a', 'B', 'b', 'C', 'c', 'D', 'd', 'E', 'e', 'F', 'f'}
local Symbols = lookupify{'+', '-', '*', '/', '^', '%', ',', '{', '}', '[', ']', '(', ')', ';', '#'}
local Keywords = lookupify{
'and', 'break', 'do', 'else', 'elseif',
'end', 'false', 'for', 'function', 'goto', 'if',
'in', 'local', 'nil', 'not', 'or', 'repeat',
'return', 'then', 'true', 'until', 'while',
};
local BackslashEscaping = {
a="\a", b="\b", f="\f", n="\n", r="\r", t="\t", v="\v",
["\\"]="\\", ['"']='"', ["'"]="'", ["["]="[", ["]"]="]"
}
local function LexLua(src)
--token dump
local tokens = {}
local st, err = pcall(function()
--line / char / pointer tracking
local p = 1
local line = 1
local char = 1
--get / peek functions
local function get()
local c = src:sub(p,p)
if c == '\n' then
char = 1
line = line + 1
else
char = char + 1
end
p = p + 1
return c
end
local function peek(n)
n = n or 0
return src:sub(p+n,p+n)
end
local function consume(chars)
local c = peek()
for i = 1, #chars do
if c == chars:sub(i,i) then return get() end
end
end
--shared stuff
local function generateError(err)
return error(">> :"..line..":"..char..": "..err, 0)
end
local function tryGetLongString()
local start = p
if peek() == '[' then
local equalsCount = 0
while peek(equalsCount+1) == '=' do
equalsCount = equalsCount + 1
end
if peek(equalsCount+1) == '[' then
--start parsing the string. Strip the starting bit
for _ = 0, equalsCount+1 do get() end
--get the contents
local contentStart = p
while true do
--check for eof
if peek() == '' then
generateError("Expected `]"..string.rep('=', equalsCount).."]` near <eof>.", 3)
end
--check for the end
local foundEnd = true
if peek() == ']' then
for i = 1, equalsCount do
if peek(i) ~= '=' then foundEnd = false end
end
if peek(equalsCount+1) ~= ']' then
foundEnd = false
end
else
foundEnd = false
end
--
if foundEnd then
break
else
get()
end
end
--get the interior string
local contentString = src:sub(contentStart, p-1)
--found the end. Get rid of the trailing bit
for i = 0, equalsCount+1 do get() end
--get the exterior string
local longString = src:sub(start, p-1)
--return the stuff
return contentString, longString
else
return nil
end
else
return nil
end
end
--main token emitting loop
while true do
--get leading whitespace. The leading whitespace will include any comments
--preceding the token. This prevents the parser needing to deal with comments
--separately.
local leadingWhite = ''
while true do
local c = peek()
if WhiteChars[c] then
--whitespace
leadingWhite = leadingWhite..get()
elseif c == '-' and peek(1) == '-' then
--comment
get();get()
leadingWhite = leadingWhite..'--'
local _, wholeText = tryGetLongString()
if wholeText then
leadingWhite = leadingWhite..wholeText
else
while peek() ~= '\n' and peek() ~= '' do
leadingWhite = leadingWhite..get()
end
end
else
break
end
end
--get the initial char
local thisLine = line
local thisChar = char
local c = peek()
--symbol to emit
local toEmit = nil
--branch on type
if c == '' then
--eof
toEmit = {Type = 'Eof'}
elseif UpperChars[c] or LowerChars[c] or c == '_' then
--ident or keyword
local start = p
repeat
get()
c = peek()
until not (UpperChars[c] or LowerChars[c] or Digits[c] or c == '_')
local dat = src:sub(start, p-1)
if Keywords[dat] then
toEmit = {Type = 'Keyword', Data = dat}
else
toEmit = {Type = 'Ident', Data = dat}
end
elseif Digits[c] or (peek() == '.' and Digits[peek(1)]) then
--number const
local start = p
if c == '0' and peek(1) == 'x' then
get();get()
while HexDigits[peek()] do get() end
if consume('Pp') then
consume('+-')
while Digits[peek()] do get() end
end
else
while Digits[peek()] do get() end
if consume('.') then
while Digits[peek()] do get() end
end
if consume('Ee') then
consume('+-')
while Digits[peek()] do get() end
end
end
toEmit = {Type = 'Number', Data = src:sub(start, p-1)}
elseif c == '\'' or c == '\"' then
--string const
local delim = get()
local content = ""
while true do
local c = get()
if c == '\\' then
local next = get()
local replacement = BackslashEscaping[next]
if replacement then
content = content .. replacement
else
if next == "x" then
local n1 = get()
if n1 == "" or n1 == delim or not HexDigits[n1] then
generateError("invalid escape sequence near '"..delim.."'")
end
local n2 = get()
if n2 == "" or n2 == delim or not HexDigits[n2] then
generateError("invalid escape sequence near '"..delim.."'")
end
content = content .. string.char(tonumber(n1 .. n2, 16))
elseif Digits[next] then
local num = next
while #num < 3 and Digits[peek()] do
num = num .. get()
end
content = content .. string.char(tonumber(num))
else
generateError("invalid escape sequence near '"..delim.."'")
end
end
elseif c == delim then
break
elseif c == '' then
generateError("Unfinished string near <eof>")
else
content = content .. c
end
end
toEmit = {Type = 'String', Data = delim .. content .. delim, Constant = content}
elseif c == '[' then
local content, wholetext = tryGetLongString()
if wholetext then
toEmit = {Type = 'String', Data = wholetext, Constant = content}
else
get()
toEmit = {Type = 'Symbol', Data = '['}
end
elseif consume('>=<') then
if consume('=') then
toEmit = {Type = 'Symbol', Data = c..'='}
else
toEmit = {Type = 'Symbol', Data = c}
end
elseif consume('~') then
if consume('=') then
toEmit = {Type = 'Symbol', Data = '~='}
else
generateError("Unexpected symbol `~` in source.", 2)
end
elseif consume('.') then
if consume('.') then
if consume('.') then
toEmit = {Type = 'Symbol', Data = '...'}
else
toEmit = {Type = 'Symbol', Data = '..'}
end
else
toEmit = {Type = 'Symbol', Data = '.'}
end
elseif consume(':') then
if consume(':') then
toEmit = {Type = 'Symbol', Data = '::'}
else
toEmit = {Type = 'Symbol', Data = ':'}
end
elseif Symbols[c] then
get()
toEmit = {Type = 'Symbol', Data = c}
else
local contents, all = tryGetLongString()
if contents then
toEmit = {Type = 'String', Data = all, Constant = contents}
else
generateError("Unexpected Symbol `"..c.."` in source.", 2)
end
end
--add the emitted symbol, after adding some common data
toEmit.LeadingWhite = leadingWhite
toEmit.Line = thisLine
toEmit.Char = thisChar
toEmit.Print = function()
return "<"..(toEmit.Type..string.rep(' ', 7-#toEmit.Type)).." "..(toEmit.Data or '').." >"
end
tokens[#tokens+1] = toEmit
--halt after eof has been emitted
if toEmit.Type == 'Eof' then break end
end
end)
if not st then
return false, err
end
--public interface:
local tok = {}
local savedP = {}
local p = 1
--getters
function tok:Peek(n)
n = n or 0
return tokens[math.min(#tokens, p+n)]
end
function tok:Get()
local t = tokens[p]
p = math.min(p + 1, #tokens)
return t
end
function tok:Is(t)
return tok:Peek().Type == t
end
--save / restore points in the stream
function tok:Save()
savedP[#savedP+1] = p
end
function tok:Commit()
savedP[#savedP] = nil
end
function tok:Restore()
p = savedP[#savedP]
savedP[#savedP] = nil
end
--either return a symbol if there is one, or return true if the requested
--symbol was gotten.
function tok:ConsumeSymbol(symb)
local t = self:Peek()
if t.Type == 'Symbol' then
if symb then
if t.Data == symb then
self:Get()
return true
else
return nil
end
else
self:Get()
return t
end
else
return nil
end
end
function tok:ConsumeKeyword(kw)
local t = self:Peek()
if t.Type == 'Keyword' and t.Data == kw then
self:Get()
return true
else
return nil
end
end
function tok:IsKeyword(kw)
local t = tok:Peek()
return t.Type == 'Keyword' and t.Data == kw
end
function tok:IsSymbol(s)
local t = tok:Peek()
return t.Type == 'Symbol' and t.Data == s
end
function tok:IsEof()
return tok:Peek().Type == 'Eof'
end
return true, tok
end
local ScopeContainer = {}
local GlobalVarGetMap = {}
local function ParseLua(src)
ScopeContainer = {}
local st, tok = LexLua(src)
if not st then
return false, tok
end
--
local function GenerateError(msg)
local err = ">> :"..tok:Peek().Line..":"..tok:Peek().Char..": "..msg.."\n"
--find the line
local lineNum = 0
for line in src:gmatch("[^\n]*\n?") do
if line:sub(-1,-1) == '\n' then line = line:sub(1,-2) end
lineNum = lineNum+1
if lineNum == tok:Peek().Line then
err = err..">> `"..line:gsub('\t',' ').."`\n"
for i = 1, tok:Peek().Char do
local c = line:sub(i,i)
if c == '\t' then
err = err..' '
else
err = err..' '
end
end
err = err.." ^---"
break
end
end
return err
end
--
GlobalVarGetMap = {}
local function CreateScope(parent)
local scope = {}
scope.Parent = parent
scope.LocalList = {}
scope.LocalMap = {}
scope.AccessedLocals = {}
scope.Line = tok:Peek().Line
function scope:GetLocal(name)
--first, try to get my variable
local my = scope.LocalMap[name]
if my then
scope.AccessedLocals[name] = true
return my
end
--next, try parent
if scope.Parent then
local par = scope.Parent:GetLocal(name)
if par then return par end
end
return nil
end
function scope:CreateLocal(name)
--create my own var
local my = {}
my.Scope = scope
my.Name = name
my.CanRename = true
--
scope.LocalList[#scope.LocalList+1] = my
scope.LocalMap[name] = my
--
return my
end
local r = math.random(1e6,1e7-1)
scope.Print = function() return "<Scope " .. r .. ">" end
ScopeContainer[#ScopeContainer+1] = scope
return scope
end
local ParseExpr;
local ParseStatementList;
local function ParseFunctionArgsAndBody(scope)
local funcScope = CreateScope(scope)
if not tok:ConsumeSymbol('(') then
return false, GenerateError("`(` expected.")
end
--arg list
local argList = {}
local isVarArg = false
while not tok:ConsumeSymbol(')') do
if tok:Is('Ident') then
local arg = funcScope:CreateLocal(tok:Get().Data)
argList[#argList+1] = arg
if not tok:ConsumeSymbol(',') then
if tok:ConsumeSymbol(')') then
break
else
return false, GenerateError("`)` expected.")
end
end
elseif tok:ConsumeSymbol('...') then
isVarArg = true
if not tok:ConsumeSymbol(')') then
return false, GenerateError("`...` must be the last argument of a function.")
end
break
else
return false, GenerateError("Argument name or `...` expected")
end
end
--body
local st, body = ParseStatementList(funcScope)
if not st then return false, body end
--end
if not tok:ConsumeKeyword('end') then
return false, GenerateError("`end` expected after function body")
end
local nodeFunc = {}
nodeFunc.AstType = 'Function'
nodeFunc.Scope = funcScope
nodeFunc.Arguments = argList
nodeFunc.Body = body
nodeFunc.VarArg = isVarArg
--
return true, nodeFunc
end
local function ParsePrimaryExpr(scope)
if tok:ConsumeSymbol('(') then
local st, ex = ParseExpr(scope)
if not st then return false, ex end
if not tok:ConsumeSymbol(')') then
return false, GenerateError("`)` Expected.")
end
--save the information about parenthesized expressions somewhere
ex.ParenCount = (ex.ParenCount or 0) + 1
return true, ex
elseif tok:Is('Ident') then
local id = tok:Get()
local var = scope:GetLocal(id.Data)
if not var then
GlobalVarGetMap[id.Data] = true
end
--
local nodePrimExp = {}
nodePrimExp.AstType = 'VarExpr'
nodePrimExp.Name = id.Data
nodePrimExp.Local = var
--
return true, nodePrimExp
else
return false, GenerateError("primary expression expected")
end
end
local function ParseSuffixedExpr(scope, onlyDotColon)
--base primary expression
local st, prim = ParsePrimaryExpr(scope)
if not st then return false, prim end
--
while true do
if tok:IsSymbol('.') or tok:IsSymbol(':') then
local symb = tok:Get().Data
if symb == ":" then
-- scope:CreateLocal("self")
end
if not tok:Is('Ident') then
return false, GenerateError("<Ident> expected.")
end
local id = tok:Get()
local nodeIndex = {}
nodeIndex.AstType = 'MemberExpr'
nodeIndex.Base = prim
nodeIndex.Indexer = symb
nodeIndex.Ident = id
--
prim = nodeIndex
elseif not onlyDotColon and tok:ConsumeSymbol('[') then
local st, ex = ParseExpr(scope)
if not st then return false, ex end
if not tok:ConsumeSymbol(']') then
return false, GenerateError("`]` expected.")
end
local nodeIndex = {}
nodeIndex.AstType = 'IndexExpr'
nodeIndex.Base = prim
nodeIndex.Index = ex
--
prim = nodeIndex
elseif not onlyDotColon and tok:ConsumeSymbol('(') then
local args = {}
while not tok:ConsumeSymbol(')') do
local st, ex = ParseExpr(scope)
if not st then return false, ex end
args[#args+1] = ex
if not tok:ConsumeSymbol(',') then
if tok:ConsumeSymbol(')') then
break
else
return false, GenerateError("`)` Expected.")
end
end
end
local nodeCall = {}
nodeCall.AstType = 'CallExpr'
nodeCall.Base = prim
nodeCall.Arguments = args
--
prim = nodeCall
elseif not onlyDotColon and tok:Is('String') then
--string call
local nodeCall = {}
nodeCall.AstType = 'StringCallExpr'
nodeCall.Base = prim
nodeCall.Arguments = {tok:Get()}
--
prim = nodeCall
elseif not onlyDotColon and tok:IsSymbol('{') then
--table call
local st, ex = ParseExpr(scope)
if not st then return false, ex end
local nodeCall = {}
nodeCall.AstType = 'TableCallExpr'
nodeCall.Base = prim
nodeCall.Arguments = {ex}
--
prim = nodeCall
else
break
end
end
return true, prim
end
local function ParseSimpleExpr(scope)
if tok:Is('Number') then
local nodeNum = {}
nodeNum.AstType = 'NumberExpr'
nodeNum.Value = tok:Get()
return true, nodeNum
elseif tok:Is('String') then
local nodeStr = {}
nodeStr.AstType = 'StringExpr'
nodeStr.Value = tok:Get()
return true, nodeStr
elseif tok:ConsumeKeyword('nil') then
local nodeNil = {}
nodeNil.AstType = 'NilExpr'
return true, nodeNil
elseif tok:IsKeyword('false') or tok:IsKeyword('true') then
local nodeBoolean = {}
nodeBoolean.AstType = 'BooleanExpr'
nodeBoolean.Value = (tok:Get().Data == 'true')
return true, nodeBoolean
elseif tok:ConsumeSymbol('...') then
local nodeDots = {}
nodeDots.AstType = 'DotsExpr'
return true, nodeDots
elseif tok:ConsumeSymbol('{') then
local v = {}
v.AstType = 'ConstructorExpr'
v.EntryList = {}
--
while true do
if tok:IsSymbol('[') then
--key
tok:Get()
local st, key = ParseExpr(scope)
if not st then
return false, GenerateError("Key Expression Expected")
end
if not tok:ConsumeSymbol(']') then
return false, GenerateError("`]` Expected")
end
if not tok:ConsumeSymbol('=') then
return false, GenerateError("`=` Expected")
end
local st, value = ParseExpr(scope)
if not st then
return false, GenerateError("Value Expression Expected")
end
v.EntryList[#v.EntryList+1] = {
Type = 'Key';
Key = key;
Value = value;
}
elseif tok:Is('Ident') then
--value or key
local lookahead = tok:Peek(1)
if lookahead.Type == 'Symbol' and lookahead.Data == '=' then
--we are a key
local key = tok:Get()
if not tok:ConsumeSymbol('=') then
return false, GenerateError("`=` Expected")
end
local st, value = ParseExpr(scope)
if not st then
return false, GenerateError("Value Expression Expected")
end
v.EntryList[#v.EntryList+1] = {
Type = 'KeyString';
Key = key.Data;
Value = value;
}
else
--we are a value
local st, value = ParseExpr(scope)
if not st then
return false, GenerateError("Value Exected")
end
v.EntryList[#v.EntryList+1] = {
Type = 'Value';
Value = value;
}
end
elseif tok:ConsumeSymbol('}') then
break
else
--value
local st, value = ParseExpr(scope)
v.EntryList[#v.EntryList+1] = {
Type = 'Value';
Value = value;
}
if not st then
return false, GenerateError("Value Expected")
end
end
if tok:ConsumeSymbol(';') or tok:ConsumeSymbol(',') then
--all is good
elseif tok:ConsumeSymbol('}') then
break
else
return false, GenerateError("`}` or table entry Expected")
end
end
return true, v
elseif tok:ConsumeKeyword('function') then
local st, func = ParseFunctionArgsAndBody(scope)
if not st then return false, func end
--
func.IsLocal = true
return true, func
else
return ParseSuffixedExpr(scope)
end
end
local unops = lookupify{'-', 'not', '#'}
local unopprio = 8
local priority = {
['+'] = {6,6};
['-'] = {6,6};
['%'] = {7,7};
['/'] = {7,7};
['*'] = {7,7};
['^'] = {10,9};
['..'] = {5,4};
['=='] = {3,3};
['<'] = {3,3};
['<='] = {3,3};
['~='] = {3,3};
['>'] = {3,3};
['>='] = {3,3};
['and'] = {2,2};
['or'] = {1,1};
}
local function ParseSubExpr(scope, level)
--base item, possibly with unop prefix
local st, exp
if unops[tok:Peek().Data] then
local op = tok:Get().Data
st, exp = ParseSubExpr(scope, unopprio)
if not st then return false, exp end
local nodeEx = {}
nodeEx.AstType = 'UnopExpr'
nodeEx.Rhs = exp
nodeEx.Op = op
exp = nodeEx
else
st, exp = ParseSimpleExpr(scope)
if not st then return false, exp end
end
--next items in chain
while true do
local prio = priority[tok:Peek().Data]
if prio and prio[1] > level then
local op = tok:Get().Data
local st, rhs = ParseSubExpr(scope, prio[2])
if not st then return false, rhs end
local nodeEx = {}
nodeEx.AstType = 'BinopExpr'
nodeEx.Lhs = exp
nodeEx.Op = op
nodeEx.Rhs = rhs
--
exp = nodeEx
else
break
end
end
return true, exp
end
ParseExpr = function(scope)
return ParseSubExpr(scope, 0)
end
local function ParseStatement(scope)
local stat = nil
if tok:ConsumeKeyword('if') then
--setup
local nodeIfStat = {}
nodeIfStat.AstType = 'IfStatement'
nodeIfStat.Clauses = {}
--clauses
repeat
local st, nodeCond = ParseExpr(scope)
if not st then return false, nodeCond end
if not tok:ConsumeKeyword('then') then
return false, GenerateError("`then` expected.")
end
local st, nodeBody = ParseStatementList(scope)
if not st then return false, nodeBody end
nodeIfStat.Clauses[#nodeIfStat.Clauses+1] = {
Condition = nodeCond;
Body = nodeBody;
}
until not tok:ConsumeKeyword('elseif')
--else clause
if tok:ConsumeKeyword('else') then
local st, nodeBody = ParseStatementList(scope)
if not st then return false, nodeBody end
nodeIfStat.Clauses[#nodeIfStat.Clauses+1] = {
Body = nodeBody;
}
end
--end
if not tok:ConsumeKeyword('end') then
return false, GenerateError("`end` expected.")
end
stat = nodeIfStat
elseif tok:ConsumeKeyword('while') then
--setup
local nodeWhileStat = {}
nodeWhileStat.AstType = 'WhileStatement'
--condition
local st, nodeCond = ParseExpr(scope)
if not st then return false, nodeCond end
--do
if not tok:ConsumeKeyword('do') then
return false, GenerateError("`do` expected.")
end
--body
local st, nodeBody = ParseStatementList(scope)
if not st then return false, nodeBody end
--end
if not tok:ConsumeKeyword('end') then
return false, GenerateError("`end` expected.")
end
--return
nodeWhileStat.Condition = nodeCond
nodeWhileStat.Body = nodeBody
stat = nodeWhileStat
elseif tok:ConsumeKeyword('do') then
--do block
local st, nodeBlock = ParseStatementList(scope)
if not st then return false, nodeBlock end
if not tok:ConsumeKeyword('end') then
return false, GenerateError("`end` expected.")
end
local nodeDoStat = {}
nodeDoStat.AstType = 'DoStatement'
nodeDoStat.Body = nodeBlock
stat = nodeDoStat
elseif tok:ConsumeKeyword('for') then
--for block
if not tok:Is('Ident') then
return false, GenerateError("<ident> expected.")
end
local baseVarName = tok:Get()
if tok:ConsumeSymbol('=') then
--numeric for
local forScope = CreateScope(scope)
local forVar = forScope:CreateLocal(baseVarName.Data)
--
local st, startEx = ParseExpr(scope)
if not st then return false, startEx end
if not tok:ConsumeSymbol(',') then
return false, GenerateError("`,` Expected")
end
local st, endEx = ParseExpr(scope)
if not st then return false, endEx end
local st, stepEx;
if tok:ConsumeSymbol(',') then
st, stepEx = ParseExpr(scope)
if not st then return false, stepEx end
end
if not tok:ConsumeKeyword('do') then
return false, GenerateError("`do` expected")
end
--
local st, body = ParseStatementList(forScope)
if not st then return false, body end
if not tok:ConsumeKeyword('end') then
return false, GenerateError("`end` expected")
end
--
local nodeFor = {}
nodeFor.AstType = 'NumericForStatement'
nodeFor.Scope = forScope
nodeFor.Variable = forVar
nodeFor.Start = startEx
nodeFor.End = endEx
nodeFor.Step = stepEx
nodeFor.Body = body
stat = nodeFor
else
--generic for
local forScope = CreateScope(scope)
--
local varList = {forScope:CreateLocal(baseVarName.Data)}
while tok:ConsumeSymbol(',') do
if not tok:Is('Ident') then
return false, GenerateError("for variable expected.")
end
varList[#varList+1] = forScope:CreateLocal(tok:Get().Data)
end
if not tok:ConsumeKeyword('in') then
return false, GenerateError("`in` expected.")
end
local generators = {}
local st, firstGenerator = ParseExpr(scope)
if not st then return false, firstGenerator end
generators[#generators+1] = firstGenerator
while tok:ConsumeSymbol(',') do
local st, gen = ParseExpr(scope)
if not st then return false, gen end
generators[#generators+1] = gen
end
if not tok:ConsumeKeyword('do') then
return false, GenerateError("`do` expected.")
end
local st, body = ParseStatementList(forScope)
if not st then return false, body end
if not tok:ConsumeKeyword('end') then
return false, GenerateError("`end` expected.")
end
--
local nodeFor = {}
nodeFor.AstType = 'GenericForStatement'
nodeFor.Scope = forScope
nodeFor.VariableList = varList
nodeFor.Generators = generators
nodeFor.Body = body
stat = nodeFor
end
elseif tok:ConsumeKeyword('repeat') then
local st, body = ParseStatementList(scope)
if not st then return false, body end
--
if not tok:ConsumeKeyword('until') then
return false, GenerateError("`until` expected.")
end
--
local st, cond = ParseExpr(scope)
if not st then return false, cond end
--
local nodeRepeat = {}
nodeRepeat.AstType = 'RepeatStatement'
nodeRepeat.Condition = cond
nodeRepeat.Body = body
stat = nodeRepeat
elseif tok:ConsumeKeyword('function') then
if not tok:Is('Ident') then
return false, GenerateError("Function name expected")
end
local st, name = ParseSuffixedExpr(scope, true) --true => only dots and colons
if not st then return false, name end
--
local st, func = ParseFunctionArgsAndBody(scope)
if not st then return false, func end
--
func.IsLocal = false
func.Name = name
stat = func
elseif tok:ConsumeKeyword('local') then
if tok:Is('Ident') then
local varList = {tok:Get().Data}
while tok:ConsumeSymbol(',') do
if not tok:Is('Ident') then
return false, GenerateError("local var name expected")
end
varList[#varList+1] = tok:Get().Data
end
local initList = {}
if tok:ConsumeSymbol('=') then
repeat
local st, ex = ParseExpr(scope)
if not st then return false, ex end
initList[#initList+1] = ex
until not tok:ConsumeSymbol(',')
end
--now patch var list
--we can't do this before getting the init list, because the init list does not
--have the locals themselves in scope.
for i, v in pairs(varList) do
varList[i] = scope:CreateLocal(v)
end
local nodeLocal = {}
nodeLocal.AstType = 'LocalStatement'
nodeLocal.LocalList = varList
nodeLocal.InitList = initList
--
stat = nodeLocal
elseif tok:ConsumeKeyword('function') then
if not tok:Is('Ident') then
return false, GenerateError("Function name expected")
end
local name = tok:Get().Data
local localVar = scope:CreateLocal(name)
--
local st, func = ParseFunctionArgsAndBody(scope)
if not st then return false, func end
--
func.Name = localVar
func.IsLocal = true
stat = func
else
return false, GenerateError("local var or function def expected")
end
elseif tok:ConsumeKeyword('return') then
local exList = {}
if not tok:IsKeyword('end') then
local st, firstEx = ParseExpr(scope)
if st then
exList[1] = firstEx
while tok:ConsumeSymbol(',') do
local st, ex = ParseExpr(scope)
if not st then return false, ex end
exList[#exList+1] = ex
end
end
end
local nodeReturn = {}
nodeReturn.AstType = 'ReturnStatement'
nodeReturn.Arguments = exList
stat = nodeReturn
elseif tok:ConsumeKeyword('break') then
local nodeBreak = {}
nodeBreak.AstType = 'BreakStatement'
stat = nodeBreak
else
--statementParseExpr
local st, suffixed = ParseSuffixedExpr(scope)
if not st then return false, suffixed end
--assignment or call?
if tok:IsSymbol(',') or tok:IsSymbol('=') then
--check that it was not parenthesized, making it not an lvalue
if (suffixed.ParenCount or 0) > 0 then
return false, GenerateError("Can not assign to parenthesized expression, is not an lvalue")
end
--more processing needed
local lhs = {suffixed}
while tok:ConsumeSymbol(',') do
local st, lhsPart = ParseSuffixedExpr(scope)
if not st then return false, lhsPart end
lhs[#lhs+1] = lhsPart
end
--equals
if not tok:ConsumeSymbol('=') then
return false, GenerateError("`=` Expected.")
end
--rhs
local rhs = {}
local st, firstRhs = ParseExpr(scope)
if not st then return false, firstRhs end
rhs[1] = firstRhs
while tok:ConsumeSymbol(',') do
local st, rhsPart = ParseExpr(scope)
if not st then return false, rhsPart end
rhs[#rhs+1] = rhsPart
end
--done
local nodeAssign = {}
nodeAssign.AstType = 'AssignmentStatement'
nodeAssign.Lhs = lhs
nodeAssign.Rhs = rhs
stat = nodeAssign
elseif suffixed.AstType == 'CallExpr' or
suffixed.AstType == 'TableCallExpr' or
suffixed.AstType == 'StringCallExpr'
then
--it's a call statement
local nodeCall = {}
nodeCall.AstType = 'CallStatement'
nodeCall.Expression = suffixed
stat = nodeCall
else
return false, GenerateError("Assignment Statement Expected")
end
end
stat.HasSemicolon = tok:ConsumeSymbol(';')
return true, stat
end
local statListCloseKeywords = lookupify{'end', 'else', 'elseif', 'until'}
ParseStatementList = function(scope)
local nodeStatlist = {}
nodeStatlist.Scope = CreateScope(scope)
nodeStatlist.AstType = 'Statlist'
--
local stats = {}
--
while not statListCloseKeywords[tok:Peek().Data] and not tok:IsEof() do
local st, nodeStatement = ParseStatement(nodeStatlist.Scope)
if not st then return false, nodeStatement end
stats[#stats+1] = nodeStatement
end
--
nodeStatlist.Body = stats
return true, nodeStatlist
end
local function mainfunc()
local topScope = CreateScope()
return ParseStatementList(topScope)
end
local st, main = mainfunc()
return st, main
end
-- Analysis code actually begins here, the above is just the parser
local DontIngoreReturns = {require=1}
local function TryFetchPageContent(obj)
local out = ""
if type(obj) == "string" then
local page = mw.title.new(obj)
if not page then
error("Failed to grab the page '"..obj.."'")
end
out = page:getContent() or ""
elseif obj == nil then
out = mw.title.getCurrentTitle().subjectPageTitle:getContent() or ""
else
error("Unrecognised page input type '"..type(obj).."'")
end
return out
end
local function GenerateMessages(ast)
local messages = {}
local function AddMessage(s,m)
messages[#messages+1] = {Scope=s, Message=m}
end
--Non-AST based checks (AKA scope checks on ScopeContainer and GlobalVarGetMap)
local ignoredUnderscores = 0
for _,scope in pairs(ScopeContainer) do
for Local,_ in pairs(scope.LocalMap) do
local scopeName = (scope.Line == 1 and "Main scope") or "Scope starting line "..scope.Line
if not scope.AccessedLocals[Local] then
if Local == "_" then
ignoredUnderscores = ignoredUnderscores + 1
else
AddMessage(scopeName, "<code>"..Local.."</code> is defined but never referenced")
end
elseif Local == "_" then
AddMessage(scopeName, "Variable <code>_</code> is referenced, despite the name implying it is unused")
end
end
end
if ignoredUnderscores > 1 then
AddMessage("Entire script", ignoredUnderscores.." local variables called <code>_</code> were defined but never referenced, likely intentionally")
elseif ignoredUnderscores == 1 then
AddMessage("Entire script", "1 local variable called <code>_</code> was defined but never referenced, likely intentionally")
end
for global,_ in pairs(GlobalVarGetMap) do
if not rawget(_G,global) and global ~= "self" then --self picking up wrongly is an issue in the parser, and generally not an expected global name, so we lazily ignore
AddMessage("Entire script", "Global variable <code>"..global.."</code> was referenced or defined, yet isn't a standard global")
end
end
--AST based checks (E.g. ignored returns of certain calls)
local checked = {}
local function deepscan(t)
checked[t] = true
if t.AstType == "CallStatement" then
local caller = t.Expression.Base
if caller.AstType == "VarExpr" and DontIngoreReturns[caller.Name] then
local args = t.Expression.Arguments
if not(args[1] and args[1].Value and args[1].Value.Constant == "strict") then
local representation = (args[1] and args[1].Value and caller.Name.."("..args[1].Value.Data..")") or caller.Name.."(<?>)"
AddMessage("Unknown", "Return value of <code>"..representation.."</code> was ignored")
end
end
end
for _,v in pairs(t) do
if type(v) == "table" and not checked[v] then
deepscan(v)
end
end
end
deepscan(ast)
return messages
end
--Module entry point
local function run(C)
local C = TryFetchPageContent(C)
if C == "" then
print("No lua to parse from the given input")
return
end
local s,p = ParseLua(C)
if not s then
error("Failed to parse the lua - "..p)
end
local messages = GenerateMessages(p)
if #messages == 0 then
print("No issues found")
else
for _,message in pairs(messages) do
local filtered = string.gsub(message.Message, "(.-)</?%w+>", "%1") --Remove formatting tags
print(message.Scope.." || "..filtered)
end
end
print("Analysis finished")
end
--Template entry point
local function main(frame)
local page = mw.title.getCurrentTitle().subjectPageTitle.prefixedText
page = string.gsub(page, "(.+)/doc$","%1") --strip /doc
if frame.args[1] and frame.args[1] ~= "" then
page = frame.args[1]
end
local C = TryFetchPageContent(page)
if C == "" then
return "<span class='error'>No lua to parse in page '"..page.."'</span>"
end
local s,p = ParseLua(C)
if not s then
error("Failed to parse the lua - "..p)
end
local messages = GenerateMessages(p)
if #messages == 0 then
return '{| class="wikitable"\n! style="text-align:center" | Basic code analysis for [[' .. page .. ']]' ..
'\n|-\n| style="text-align:center"| No issues found\n|}'
else
local tableContent = '{| class="wikitable sortable"\n! style="text-align:center" colspan=2 | Basic code analysis for [[' .. page .. ']]' .. '\n|-\n! Scope !! Message'
for _,message in pairs(messages) do
tableContent = tableContent .. "\n|-\n| " .. message.Scope .. " || " .. message.Message
end
return tableContent .. "\n|}"
end
end
return {
run = run,
main = main,
ParseLua = ParseLua --For debugging or just general curiosity
}