Module:Sensitive IP addresses/list/validate

< Module:Sensitive IP addresses‎ | list
Revision as of 05:56, 16 July 2021 by Zoran (talk | contribs) (Pywikibot 6.4.0)
(diff) ← Older revision | Latest revision (diff) | Newer revision → (diff)

This module validates the data in Module:Sensitive IP addresses/list.



-- This module validates the data in [[Module:Sensitive IP addresses/list]].

-- Load modules
local mSIPA_API = require('Module:Sensitive IP addresses/API')
local Subnet = require('Module:IP').Subnet

-- Constants
local DATA_MODULE = 'Module:Sensitive IP addresses/list'

local p = {}

local function makeErrorLogger()
	-- Return an object for formatting errors.
	return {
		errors = {},
		addError = function (self, msg, ...)
			table.insert(self.errors, string.format(msg, ...))
		end,
		addEntryTypeError = function (self, entryIdx, field, actual, expected)
			self:addError(
				'The %s field in data entry #%d was type %s (should be string or nil)',
				field, entryIdx, actual, expected
			)
		end,
		hasErrors = function (self)
			return #self.errors > 0
		end,
		makeReport = function (self)
			if #self.errors < 1 then
				return 'No errors found'
			else
				local ret = {'Found the following errors:'}
				for i, msg in ipairs(self.errors) do
					ret[#ret + 1] = string.format('* <strong class="error">%s</strong>', msg)
				end
				return table.concat(ret, '\n')
			end
		end,
	}
end

local function loadData(logger)
	-- Load the data table, logging any errors in the process.

	-- Check whether the data module can be successfully loaded.
	local success, data = pcall(mw.loadData, DATA_MODULE)
	if not success then
		logger:addError('%s could not be parsed by mw.loadData; check for [[mw:LUAREF#mw.loadData|invalid data]]', DATA_MODULE)
		return nil
	end

	-- Check that the data table is a table.
	if type(data) ~= 'table' then
		logger:addError('%s returned a %s; table expected', DATA_MODULE, type(data))
	end

	return data
end

local function checkDataStructure(logger, data)
	-- Check the structure of the individual entries in the data table.
	for dataIndex, subtable in ipairs(data) do
		-- Check that subtables are tables.
		if type(subtable) ~= 'table' then
			logger:addError('Data entry #%d is not a table', dataIndex)
		end

		-- Check that we have required string fields.
		for _, field in ipairs{'name', 'id', 'description'} do
			if type(subtable[field]) ~= 'string' then
				logger:addError(
					"Missing field '%s' in data entry #%d",
					field,
					dataIndex
				)
			elseif subtable[field] == '' then
				logger:addError(
					"Blank field '%s' in data entry #%d",
					field,
					dataIndex
				)
			end
		end

		-- Check that optional string fields are strings if they are present.
		for _, field in ipairs{'notes'} do
			local val = subtable[field]
			if val ~= nil and type(val) ~= 'string' then
				logger:addEntryTypeError(dataIndex, field, type(val), 'string or nil')
			end
		end

		-- Check that the reason is valid if it is present.
		if subtable.reason ~= nil then
			if type(subtable.reason) ~= 'string' then
				logger:addEntryTypeError(
					dataIndex,
					'reason',
					type(subtable.reason),
					'string or nil'
				)
			elseif not mSIPA_API._isValidSensitivityReason(subtable.reason) then
				logger:addError(
					"The reason field in data entry #%d was invalid (should be '%s')",
					dataIndex,
					mSIPA_API._getSensitivityReasons("', '", "', or '")
				)
			end
		end

		-- Check IP range tables.
		for i, field in ipairs{'ipv4Ranges', 'ipv6Ranges'} do
			local ranges = subtable[field]
			if ranges ~= nil then
				if type(ranges) ~= 'table' then
					logger:addEntryTypeError(dataIndex, field, type(ranges), 'table or nil')
				else
					for j, range in ipairs(ranges) do
						if type(range) ~= 'string' then
							logger:addError(
								'Range #%d in the %s field of entry #%d was type %s (expected string)',
								j, field, type(range)
							)
						elseif range == '' then
							logger:addError(
								'Range #%d in the %s field of entry #%d was a blank string',
								j, field
							)
						end
					end
				end
			end
		end
	end
end

local function makeSubnet(cidr)
	-- Make a subnet object from a CIDR string. Returns a subnet object, or nil
	-- if there were any errors.
	local success, obj = pcall(Subnet.new, cidr)
	if success then
		return obj
	end
end

local function checkDuplicateIds(logger, data)
	-- Check that there are no duplicate IDs in the data.
	local ids = {}
	for dataIndex, subtable in ipairs(data) do
		if ids[subtable.id] then
			logger:addError(
				"Data entry #%d (%s) and data entry #%d (%s) have duplicate ID '%s'",
				ids[subtable.id],
				data[ids[subtable.id]].name,
				dataIndex,
				subtable.name,
				subtable.id
			)
		else
			ids[subtable.id] = dataIndex
		end
	end
end

local function checkRanges(logger, data)
	-- Check the ranges in the data table to make sure they are all valid and
	-- that they don't overlap with each other. This function assumes that the
	-- structure of the data table is valid.

	-- Make an array of subnet data for easy comparison
	local ranges = {
		ipv4 = {},
		ipv6 = {},
	}
	for dataIndex, subtable in ipairs(data) do
		for i, field in ipairs{'ipv4Ranges', 'ipv6Ranges'} do
			local cidrs = subtable[field]
			if cidrs then
				for j, cidr in ipairs(cidrs) do
					local subnet = makeSubnet(cidr)
					if subnet then
						local ipVersion = field == 'ipv4Ranges' and 'IPv4' or 'IPv6'
						local rangeKey = ipVersion:lower()
						if ipVersion == subnet:getVersion() then
							table.insert(ranges[rangeKey], {
								dataIndex = dataIndex,
								field = field,
								rangeIndex = j,
								subnet = subnet,
								name = subtable.name,
							})
						else
							logger:addError(
								"Found %s CIDR string '%s' in range #%d in the %s field of entry #%d (%s); should be %s",
								subnet:getVersion(), cidr, j, field, dataIndex, subtable.name, ipVersion
							)
						end
					else
						logger:addError(
							"Invalid CIDR string '%s' in range #%d in the %s field of entry #%d (%s)",
							cidr, j, field, dataIndex, subtable.name
						)
					end
				end
			end
		end
	end
	
	-- Check for overlapping subnets
	local nComparisons = 0
	for ipVersion, versionData in pairs(ranges) do
		local lim = #versionData
		for i = 1, lim - 1 do
			local subnetData1 = versionData[i]
			for j = i + 1, lim do
				local subnetData2 = versionData[j]
				nComparisons = nComparisons + 1
				if subnetData1.subnet:overlapsSubnet(subnetData2.subnet) then
					logger:addError(
						"%s range #%d '%s' in data entry #%d (%s) overlaps range #%d '%s' in data entry #%d (%s)",
						ipVersion == 'ipv4' and 'IPv4' or 'IPv6',
						subnetData1.rangeIndex,
						subnetData1.subnet:getCIDR(),
						subnetData1.dataIndex,
						subnetData1.name,
						subnetData2.rangeIndex,
						subnetData2.subnet:getCIDR(),
						subnetData2.dataIndex,
						subnetData2.name
					)
				end
			end
		end
	end
	mw.log(nComparisons .. ' subnet comparisons performed')
end

function p.main()
	local logger = makeErrorLogger()
	local data = loadData(logger)
	if logger:hasErrors() then
		return logger:makeReport()
	end
	checkDataStructure(logger, data)
	if logger:hasErrors() then
		return logger:makeReport()
	end
	checkDuplicateIds(logger, data)
	checkRanges(logger, data)
	return logger:makeReport()
end

return p