Module:Ancient Greek/typing

< Module:Ancient Greek
Revision as of 19:17, 8 July 2021 by Zoran (talk | contribs) (Moved page from wikipedia:en:Module:Ancient Greek/typing)
(diff) ← Older revision | Latest revision (diff) | Newer revision → (diff)

This module allows easy typing of Greek. It converts a variation of Beta Code to Ancient Greek. Diacritics can be entered in any order and they will be output in the correct order. Diacritics can also be added to existing Greek text. It implements {{grc}}

All tests passed.

test
Text Expected Actual
Template:Tick a__i ᾱͅ ᾱͅ
Template:Tick a)lhqh/s ἀληθής ἀληθής
Template:Tick a)lhqhs* ἀληθησ ἀληθησ
Template:Tick a)lhqhs- ἀληθησ- ἀληθησ-
Template:Tick a^)nh/r ᾰ̓νήρ ᾰ̓νήρ
Template:Tick Phlhi+a/dhs Πηληϊάδης Πηληϊάδης
Template:Tick Phlhi^+a^/dhs Πηληῐ̈ᾰ́δης Πηληῐ̈ᾰ́δης
Template:Tick Πηληϊ^ά^δης Πηληῐ̈ᾰ́δης Πηληῐ̈ᾰ́δης
Template:Tick e)a_/n ἐᾱ́ν ἐᾱ́ν
Template:Tick ἐά_ν ἐᾱ́ν ἐᾱ́ν
Template:Tick pa=sa^ πᾶσᾰ πᾶσᾰ
Template:Tick u_(mei=s ῡ̔μεῖς ῡ̔μεῖς
Template:Tick a/)^ner ᾰ̓́νερ ᾰ̓́νερ
Template:Tick a/^)ner ᾰ̓́νερ ᾰ̓́νερ
Template:Tick a)/^ner ᾰ̓́νερ ᾰ̓́νερ
Template:Tick a)^/ner ᾰ̓́νερ ᾰ̓́νερ
Template:Tick dai+/frwn δαΐφρων δαΐφρων
Template:Tick dai/+frwn δαΐφρων δαΐφρων



local p = {}

local sparse_concat = require("Module:TableTools").sparseConcat
local U = mw.ustring.char

local UTF8_char = "[%z\1-\127\194-\244][\128-\191]*" -- roughly equivalent to "." in Ustring patterns
local one_UTF8_char_or_none = "[%z\1-\127\194-\244]?[\128-\191]*" -- roughly equivalent to ".?" in Ustring patterns

local subscript = U(0x345) -- iota subscript (ypogegrammeni)
local macron = U(0x304) -- macron
local spacing_macron = U(0xAF)
local modifier_macron = U(0x2C9) -- modifier letter macron
local breve = U(0x306) -- breve
local spacing_breve = "˘" -- spacing breve
local diaeresis = U(0x308) -- diaeresis
local rough = U(0x314) -- rough breathing (reversed comma)
local smooth = U(0x313) -- smooth breathing (comma)
local acute = U(0x301) -- acute
local grave = U(0x300) -- grave
local circumflex = U(0x342) -- Greek circumflex (perispomeni)
local question_mark = U(0x37E) -- Greek question mark
local spacing_rough = "῾" -- spacing rough breathing
local spacing_smooth = "᾿" -- spacing smooth breathing

local combining_diacritic = table.concat{
	"[",
	macron, breve,
	rough, smooth, diaeresis,
	acute, grave, circumflex,
	subscript,
	"]",
}

-- The numbers are used to sort series of diacritics.
local diacritic_position = {
	[macron] = 1,
	[breve] = 2,
	[rough] = 3,
	[smooth] = 3,
	[diaeresis] = 3,
	[acute] = 4,
	[grave] = 4,
	[circumflex] = 4,
	[subscript] = 5,
}

-- Perform a function on each Unicode character in a string.
local function for_each(str, func)
	for char in string.gmatch(str, UTF8_char) do
		func(char)
	end
end

--[=[	This function arranges diacritics in the following order:
			1. macron or breve
			2. breathings or diaeresis
			3. acute, circumflex, or grave
			4. iota subscript
		Used by [[Module:typing-aids]].
		
		Returns an error if a sequence of diacritics contains more than one
		of each category.
]=]
local function get_relative_position(diacritic1, diacritic2)
	return diacritic_position[diacritic1] < diacritic_position[diacritic2]
end

local function chars_to_table(chars)
	local t = {}
	local i = 0
	for char in string.gmatch(chars, "[%z\1-\127\194-\244][\128-\191]*") do
		i = i + 1
		t[i] = char
	end
	return t
end

local function reorder_diacritic_sequence(diacritics)
	diacritics = chars_to_table(diacritics)
	table.sort(diacritics, get_relative_position)
	return table.concat(diacritics)
end

function p.reorder_diacritics(text)
	return (mw.ustring.gsub(mw.ustring.toNFD(text),
		combining_diacritic .. combining_diacritic .. "+",
		reorder_diacritic_sequence))
end

local multiple = {
	["_i"] = subscript,
}

local single = {
	["a"] = "α", ["A"] = "Α",
	["b"] = "β", ["B"] = "Β",
	["c"] = "ξ", ["C"] = "Ξ",
	["d"] = "δ", ["D"] = "Δ",
	["e"] = "ε", ["E"] = "Ε",
	["f"] = "φ", ["F"] = "Φ",
	["g"] = "γ", ["G"] = "Γ",
	["h"] = "η", ["H"] = "Η",
	["i"] = "ι", ["I"] = "Ι",
	["k"] = "κ", ["K"] = "Κ",
	["l"] = "λ", ["L"] = "Λ",
	["m"] = "μ", ["M"] = "Μ",
	["n"] = "ν", ["N"] = "Ν",
	["o"] = "ο", ["O"] = "Ο",
	["p"] = "π", ["P"] = "Π",
	["q"] = "θ", ["Q"] = "Θ",
	["r"] = "ρ", ["R"] = "Ρ",
	["s"] = "σ", ["S"] = "Σ",
	["t"] = "τ", ["T"] = "Τ",
	["u"] = "υ", ["U"] = "Υ",
	["v"] = "ϝ", ["V"] = "Ϝ",
	["w"] = "ω", ["W"] = "Ω",
	["x"] = "χ", ["X"] = "Χ",
	["y"] = "ψ", ["Y"] = "Ψ",
	["z"] = "ζ", ["Z"] = "Ζ",
	
	-- vowel length
	["_"] = macron, [spacing_macron] = macron, [modifier_macron] = macron,
	["^"] = breve, [spacing_breve] = breve,
	
	-- diaeresis and breathings
	["+"] = diaeresis, ["("] = rough, [")"] = smooth,
	
	-- accents
	["/"] = acute, ["\\"] = grave,
	["="] = circumflex, ["{{=}}"] = circumflex, ["~"] = circumflex,
	
	-- punctuation
	["'"] = "’",
	["?"] = question_mark,
	[";"] = "·",
	["*"] = "", -- place after s to prevent it from turning into final sigma
	
	-- pipe
	["!"] = "|",
}

local function convert_s_to_sigma(text)
	text = string.gsub(text,
		"s(" .. one_UTF8_char_or_none .. ")",
		function (following)
			return ((following == ""
				or following ~= "*" and following ~= "-" and mw.ustring.find(following, "[%s%p]"))
				and  "ς"
				or "σ") .. following
		end)
	return text
end

local function combining_to_spacing(text)
	for _, accents in ipairs{ { rough, spacing_rough }, { smooth, spacing_smooth } } do
		local combining, spacing = unpack(accents)
		text = string.gsub(text,
			"(" .. one_UTF8_char_or_none .. ")" .. combining,
			function (preceding)
				if preceding == "" then
					return spacing
				else
					return preceding .. combining
				end
			end)
	end
	
	return text
end

function p.to_Greek(text)
	if type(text) ~= "string" then
		error("first argument to to_greek should be string, not " .. type(text))
	end
	
	text = convert_s_to_sigma(text)
	for k, v in pairs(multiple) do
		text = string.gsub(text, k, v)
	end
	text = string.gsub(text, UTF8_char, single)
	text = combining_to_spacing(text)
	return p.reorder_diacritics(text)
end

function p.to_Greek_t(frame)
	local args = {}
	for k, v in pairs(frame:getParent().args) do
		if k == 1 then
			v = mw.text.trim(v)
			if v == "" then
				v = nil
			end
			args[k] = v
		end
	end
	
	if not args[1] then
		if mw.title.getCurrentTitle().nsText == "Template" then
			args[1] = "le/cis"
		else
			error("Parameter 1 is required.")
		end
	end
	
	return p.to_Greek(args[1])
end

local function process(char)
	if char == "" then
		return char
	end
	local entity = ("&#x%X;"):format(mw.ustring.codepoint(char))
	if diacritic_position[char] then
		return "◌" .. entity
	else
		return entity
	end
end

function p.show_shortcuts(frame)
	local output = { '{| class="wikitable"' }
	
	local function comp(item1, item2)
		 -- non-letters after letters
		if item1:find("^%a$") ~= item2:find("^%a$") then
			return item1:find("^%a$")
		end
		
		local lower1, lower2 = item1:lower(), item2:lower()
		-- capitals before lowercase
		if lower1 == lower2 then
			return item1 < item2
		-- otherwise case-insensitive sorting
		else
			return lower1 < lower2
		end
	end
	
	local i = 1
	for k, v in require("Module:TableTools").sortedPairs(single, comp) do
		i = i + 1
		output[i] = '| <code>' .. k .. '</code> || <span lang="grc">' .. process(v) .. '</span>'
		if i % 3 == 0 then -- 3 because each row consists of row syntax |- and two pairs of cells
			i = i + 1
			output[i] = '|-'
		end
	end
	
	table.insert(output, '|}')
	
	return table.concat(output, '\n')	
end

return p