Module:Charmap

MyWikiBiz, Author Your Legacy — Sunday October 26, 2025
Revision as of 20:55, 15 July 2021 by Zoran (talk | contribs) (Pywikibot 6.4.0)
(diff) ← Older revision | Latest revision (diff) | Newer revision → (diff)
Jump to navigationJump to search
local p = {}
local unicode_convert = require('Module:Unicode convert')
local numcr2namecr = mw.loadData('Module:Numcr2namecr')
local yesno = require('Module:Yesno')
local GB18030_cache = {}

-- Input e.g. "  A  B  FF  ", output "10 11 255"
local function hex2dec_words(s)
	local x = mw.text.split(mw.text.trim(s or ''), '%s+')
	if #x == 1 and x[1] == '' then return '' end
	for i = 1, #x do
		x[i] = tonumber(x[i], 16) or 0
	end
	return table.concat(x, ' ')
end

-- Wrapper to let us replace Template:UTF-8 and Template:UTF-16
local function template(title, args)
	if title == 'UTF-8' then
		return unicode_convert.getUTF8{ args = args }
	elseif title == 'UTF-16' then
		return unicode_convert.getUTF16{ args = args }
	elseif title == 'hex2dec' then
		return tostring(tonumber(args[1], 16))
	elseif title == 'charmap/numcharref' then
		local format = args['base'] == 'dec' and '&amp;#%d;<wbr>' or '&amp;#x%02X;<wbr>'
		return format:format(tonumber(args[1], 16))
	elseif title == 'charmap/showchar' then
		return '&#x' .. mw.text.trim(args[1]) .. ';'
	elseif title == 'GB18030' then
		if GB18030_cache[args[1]] then
			if args['base'] == 'dec' then
				return mw.getCurrentFrame():expandTemplate{
					title = 'GB18030/decimal', args = { GB18030_cache[args[1]] }}
			else return GB18030_cache[args[1]] end
		elseif os.clock() > 7 then
			return "''currently unavailable''"
		else
			GB18030_cache[args[1]] = mw.getCurrentFrame():expandTemplate{title = 'GB18030', args = {args[1]}}
			if args['base'] == 'dec' then
				return hex2dec_words(GB18030_cache[args[1]])
			else return GB18030_cache[args[1]] end
		end
	else
		return mw.getCurrentFrame():expandTemplate{ title = title, args = args }
	end
end

local function _multiUTF(args)
	local code = args[1] or ''
	local encoding = args[2] or 'UTF-8'
	local output = ''
	local pstp_sep = encoding:sub(1, 8) == 'charmap/' and '' or ' '
	local words = mw.text.split(mw.text.trim(code), '%s')
	
	if not (encoding == 'charmap/showchar' or encoding == 'hex2dec') then
		output = '|| '
		for i = 1, #words do
			output = output .. template(encoding, {words[i], base = 'dec'})
			if i == 4 then break end
			output = output .. pstp_sep
		end
		output = output .. '|| '
	end
	
	for i = 1, #words do
		output = output .. template(encoding, {words[i]})
		if i == 4 then break end
		output = output .. pstp_sep
	end
	
	return output
end

p.multiUTF = function(frame) -- temporary
	return _multiUTF(frame.args)
end

local function paramCoalesce(args, arg1, arg2)
	if args[arg1] ~= nil and args[arg1] ~= '' then return args[arg1] end
	if arg2 ~= nil and args[arg2] ~= nil and args[arg2] ~= '' then return args[arg2] end
	return nil
end

p.alt = function(frame, n)
	n = n or frame.args[1]
	local args = frame:getParent().args
	if args['map' .. n] == '' or args['map' .. n] == nil then return ''	end
	
	local output = '|-\n| style="text-align:left;" | ' .. args['map' .. n]
	local codes = {}
	for i = 1, 10 do
		if args['name' .. i] and args['name' .. i] ~= '' then
			local x = args['map' .. n .. 'char' .. i] or ''
			output = output .. ' || ' .. hex2dec_words(x) .. ' || ' .. frame:callParserFunction('uc', x)
		end
	end
	return output .. '\n'
end

p.head = function(frame)
	local output = '|- style="text-align:center;"\n'
	codes = {} -- May contain nils if bad input
	infos = {} -- boolean array
	names = {} -- string array
	args = frame:getParent().args
	for i = 1, 10 do
		if args['name' .. i] and args['name' .. i] ~= '' then
			-- The parser function uc: preserves strip markers.
			codes[1 + #names] = frame:callParserFunction('uc', mw.text.trim(args[i]))
			infos[1 + #names] = paramCoalesce(args, 'Info' .. i, 'Info') == 'yes'
			names[1 + #names] = frame:callParserFunction('uc', args['name' .. i])
		end
	end
	
	if #names > 0 then
		output = output .. "| align=\"center\" | '''" .. frame:preprocess('<templatestyles src="smallcaps/styles.css"/>') ..
			(next(codes) == nil and 'Character' or 'Unicode') .. " name''' "
		for i, n in ipairs(names) do
			-- Display the character in smallcaps
			output = output .. '|| colspan=2 | <span class="smallcaps smallcaps-smaller">' .. n .. '</span>'
		end
	end

	output = output .. '\n|-\n! style="text-align:left;" | Encodings' ..
		string.rep('|| decimal || hex', #names) ..
		'\n|-\n| style="text-align:left;" | [[Unicode]]'
	for i, n in ipairs(names) do
		local code = codes[i] or '';
		-- padleft, get to form U+XXXX or U+XXXX+XXXX.
		code = (#code >= 4) and code:gsub(' ', '+') or ('0000' .. (code or '')):sub(-4)
		output = output .. ' || ' .. _multiUTF{codes[i] or '0', 'hex2dec'} .. ' || ' ..
			(infos[i] and '[http://www.fileformat.info/info/unicode/char/%s/index.htm U+%s]' or 'U+%s'):format(code, code)
	end
	
	output = output .. '\n|-\n| style="text-align:left;" | [[UTF-8]]'
	for i, n in ipairs(names) do
		output = output .. _multiUTF{codes[i]}
	end
	
	local outsideBMP = false -- Do we need to show surrogate pairs?
	for i, n in ipairs(names) do
		if (tonumber(codes[i] or '', 16) or 0) > 0xFFFF then
			outsideBMP = true
			break
		end
	end
	
	if outsideBMP then
		output = output .. '\n|-\n| style="text-align:left;" | [[UTF-16]]'
		for i, n in ipairs(names) do
			output = output .. _multiUTF{codes[i], 'UTF-16'}
		end
	end
	
	if yesno(args['IncludeGB']) then
		output = output .. '\n|-\n| style="text-align:left;" | [[GB 18030]]'
		for i, n in ipairs(names) do
			output = output .. _multiUTF{codes[i], 'GB18030'}
		end
	end
	
	output = output .. '\n|-\n| style="text-align:left;" | [[Numeric character reference]]'
	for i, n in ipairs(names) do
		output = output .. _multiUTF{codes[i], 'charmap/numcharref'}
	end
	
	return output
end

p.named = function(frame, n)
	n = n or frame.args[1]
	local args = frame:getParent().args
	local refchars = {}
	local empty = true;
	local namedref = (n == 'html') and '[[List of XML and HTML character entity references|Named character reference]]' or
		args['namedref' .. n] or '';
	for i = 1, 10 do
		if args['name' .. i] and args['name' .. i] ~= '' then
			local x = (n == 'html') and
				(numcr2namecr[tonumber(args[i], 16)] or '') or
				args['ref' .. n .. 'char' .. i] or ''
			empty = empty and #x == 0
			refchars[1 + #refchars] = x
		end
	end
	if empty then return '' end
	return '|- style="text-align:center"\n| style="text-align:left" | ' .. namedref ..
		(' || colspan="2" | %s'):rep(#refchars):format(unpack(refchars)) .. '\n'
end

p.main = function(frame)
	-- text-align:center: ''
	local output = {'{| class="wikitable template-charmap" style="text-align:right"\n',
		'|+Character information\n',
		'|- style="text-align:center"\n', -- Otherwise mobile site tries aligning preview cells right
		'! scope="row" | Preview'}
	local args = frame:getParent().args
	for i = 1, 10 do
		--  Header row with the symbol in a large font or an image
		if args['name' .. i] and args['name' .. i] ~= '' then
			output[1 + #output] = ' || colspan="2" style="font-size:' .. (paramCoalesce(args, 'size' .. i, 'size') or '150%') ..
				'" | ' .. (paramCoalesce(args, 'image' .. i) or _multiUTF{args[i] or '20', 'charmap/showchar'})
		end
	end
	output[1 + #output] = '\n' .. p.head(frame) .. '\n' .. p.named(frame, 'html')
	for i = 1, 10 do
		output[1 + #output] = p.alt(frame, i)
	end
	for i = 1, 5 do
		output[1 + #output] = p.named(frame, i)
	end
	return table.concat(output) .. '|}'
end

return p