Module:Signpost/index

< Module:Signpost
Revision as of 06:02, 16 July 2021 by Zoran (talk | contribs) (Pywikibot 6.4.0)
(diff) ← Older revision | Latest revision (diff) | Newer revision → (diff)

The tags are generated partially by data-mining the Signpost series templates, and partially by text-matching the subpage name and the long title (converted to lower case) against the following Python dictionary:

<source lang="python">

  1. !/usr/bin/python
  2. -*- coding: utf-8 -*-

keywordTags = {

   r"\babuse filter": [u"abusefilter"],
   r"\baccessibility": [u"accessibility"],
   r"\badmin": [u"administrators"],
   r"\badmins": [u"administrators"],
   r"\badministrator": [u"administrators"],
   r"\badminship": [u"requestsforadminship"],
   r"\badrianne wadewitz": [u"adriannewadewitz"],
   r"\bafc": [u"articlesforcreation"],
   r"\bafd": [u"articlesfordeletion", u"deletion"],
   r"\baffiliates": [u"wikimediamovementaffiliates"],
   r"\bannual plan": [u"annualplan"],
   r"\bapril fool'?s": [u"aprilfools"],
   r"\barbcom election": [u"arbitrationcommitteeelections"],
   r"\barbcom": [u"arbitrationcommittee"],
   r"\barbitration report": [u"arbitrationreport"], # standard section name
   r"\barticle alerts": [u"articlealerts"],
   r"\barticle assessment": [u"articleassessment"],
   r"\baudit": [u"finance", u"audits"],
   r"\bautoconfirmed": [u"autoconfirmation"],
   r"\baward": [u"awards"],
   r"\bbacklog": [u"backlogs"],
   r"\blocked": [u"blocking"],
   r"\bbanned": [u"banning"],
   r"\bbias": [u"systematicbias", u"bias"],
   r"\bbiograph": [u"biographies"],
   r"\bblp": [u"biographiesoflivingpeople"],
   r"\bboard election": [u"boardelections"],
   r"\bboard": [u"wikimediafoundationboard"],
   r"\bbook review": [u"bookreview"], # Irregular section.
   r"\bbureaucrats?\b": [u"bureaucrats"],
   r"\bcategor": [u"categorization"],
   r"\bchapter": [u"chapters"],
   r"\bcheckuser": [u"checkuser"],
   r"\bcitation": [u"citations"],
   r"\bcitizendium": [u"citizendium"],
   r"\bcoi\b": [u"coi"],
   r"\bcommons": [u"commons"],
   r"\bcompromised": [u"compromisedaccounts"],
   r"\bconference": [u"conferences"],
   r"\bconflict of interest": [u"coi"],
   r"\bcontest": [u"contests", u"contest"],
   r"\bcopyright": [u"copyright"],
   r"\bdeath": [u"deaths"],
   r"\bdelet": [u"deletion"],
   r"\bdesysop": [u"desysopping"],
   r"\bdiscussion report": [u"discussionreport"],
   r"\bdonat": [u"donations"],
   r"\be-?mail": [u"email"],
   r"\bedit( |-)counter": [u"editcounter", u"editcountitis"],
   r"\bedit( |-)war": [u"editwars"],
   r"\beducation": [u"educationprogram", u"education"],
   r"\belection": [u"elections"],
   r"\bemployee": [u"staff", u"employees"],
   r"\besperanza": [u"esperanza"],
   r"\bessay": [u"essays"],
   r"\bessjay": [u"essjay"],
   r"\bevent": [u"events"],
   r"\bfair use": [u"fairuse"],
   r"\bfdc": [u"fundsdisseminationcommittee"],
   r"\bfinanc": [u"finance"],
   r"\bflagged revisions": [u"flaggedrevisions"],
   r"\bflow\b": [u"flow"],
   r"\bforum": [u"forum"],
   r"\bfoundation": [u"wikimediafoundation"],
   r"\bfreedom of panaroma": [u"freedomofpanaroma"],
   r"\bfromtheeditor": [u"fromtheeditors"],
   r"\bfromtheeditors": [u"fromtheeditors"],
   r"\bfrom the editor": [u"fromtheeditors"],
   r"\bfrom the editors": [u"fromtheeditors"],
   r"\bfrom-the-editor": [u"fromtheeditors"],
   r"\bfrom-the-editors": [u"fromtheeditors"],
   r"\bfundrais": [u"fundraising"],
   r"\bgamergate": [u"gamergate"],
   r"\bgardner": [u"suegardner"],
   r"\bgender gap": [u"gendergap"],
   r"\bgeneral council": [u"generalcouncil"],
   r"\bgeodata": [u"geodata"],
   r"\bgerman wikipedia": [u"germanwikipedia"],
   r"\bglam\b": [u"glam"],
   r"\bglobal south": [u"globalsouth"],
   r"\bgoogle": [u"google"],
   r"\bgrantmaking": [u"grantmaking"],
   r"\bgrowth": [u"growth"],
   r"\bguild of copy editors": [u"guildofcopyeditors"],
   r"\bhhvm": [u"hhvm"],
   r"\bhoax": [u"hoaxes"],
   r"\bhuggle": [u"huggle"],
   r"\bhumor": [u"humour"],
   r"\bhumour": [u"humour"],
   r"\bieg\b": [u"individualengagementgrants"],
   r"\bimage filter": [u"imagefilter"],
   r"\bimage": [u"images"],
   r"\bincubator": [u"incubator"],
   r"\binfobox": [u"infoboxes"],
   r"\binfocus": [u"infocus"],
   r"\bin focus": [u"infocus"],
   r"\binterview": [u"interviews"],
   r"\bipv6": [u"ipv6"],
   r"\bwales": [u"jimmywales"],
   r"\bjournal": [u"journals"],
   r"\blawsuit": [u"legal"],
   r"\blgbt": [u"lgbt"],
   r"\bpride": [u"lgbt"],
   r"\blibel": [u"legal"],
   r"\blicens": [u"licensing"],
   r"\blink rot": [u"linkrot"],
   r"\bliquid threads": [u"liquidthreads"],
   r"\blog( |-)?in": [u"login"],
   r"\blua\b": [u"lua"],
   r"\bmailing list": [u"mailinglists"],
   r"\bmain page redesign": [u"mainpageredesign"],
   r"\bmain page": [u"mainpage"],
   r"\bmanning": [u"manning"],
   r"\bmanual of style": [u"manualofstyle"],
   r"\bmedia viewer": [u"mediaviewer"],
   r"\bmediation": [u"mediation", u"disputeresolution"],
   r"\bmediawiki": [u"mediawiki"],
   r"\bmedic": [u"medical"],
   r"\bmeta\b": [u"metawiki"],
   r"\bmusic": [u"music"],
   r"\bnew page patrol": [u"newpagepatrol"],
   r"\bnews and notes": [u"newsandnotes"],
   r"\bnofollow": [u"nofollow"],
   r"\bnoticeboard": [u"noticeboards"],
   r"\boauth": [u"oauth"],
   r"\bobama": [u"obama"],
   r"\boffice action": [u"officeaction"],
   r"\boped": [u"opinion", u"oped"],
   r"\bop ed": [u"opinion", u"oped"],
   r"\bop-ed": [u"opinion", u"oped"],
   r"\bopinion essay": [u"opinion", u"oped"],
   r"\borphan": [u"orphans"],
   r"\boutage": [u"servers", u"tech"],
   r"\boversighter": [u"oversight"],
   r"\bpage creation": [u"pagecreation"],
   r"\bpaid advocacy": [u"paidadvocacy", u"paidediting", u"coi"],
   r"\bpaid editing": [u"paidediting", u"coi"],
   r"\bparsoid": [u"parsoid", u"parser"],
   r"\bparser function": [u"parserfunctions"],
   r"\bpediapress": [u"pediapress"],
   r"\bpending changes": [u"pendingchanges"],
   r"\bpicture of the year": [u"pictureoftheyear"],
   r"\bplagiari": [u"plagiarism"],
   r"\bpodcast": [u"podcasts"],
   r"\bpolicy": [u"policy"],
   r"\bpolish wikipedia": [u"polishwikipedia"],
   r"\bpolitician": [u"politicians"],
   r"\bpoll": [u"polls"],
   r"\bporn": [u"porn"],
   r"\bportal": [u"portals"],
   r"\bportals": [u"portals"],
   r"\bprivacy": [u"privacy"],
   r"\bprotection": [u"protection"],
   r"\bpublic policy": [u"publicpolicypilot"],
   r"\bqrpedia": [u"qrpedia"],
   r"\brecent research": [u"recentresearch", u"research"],
   r"\breference desk": [u"referencedesk"],
   r"\bresearch": [u"research"],
   r"\bresidence": [u"wikimediansinresidence"],
   r"\bresign": [u"resignations"],
   r"\bresysop": [u"admins"],
   r"\bsanger": [u"larrysanger"],
   r"\bschool": [u"schools"],
   r"\bscientology": [u"scientology"],
   r"\bsearch": [u"search"],
   r"\bsecurity": [u"security"],
   r"\bseigenthaler": [u"seigenthaler"],
   r"\bserver": [u"servers"],
   r"\bsignpost": [u"signpost"],
   r"\bsimilpedia": [u"similpedia"],
   r"\benglish wikipedia": [u"englishwikipedia"],
   r"\bsimple english wikipedia": [u"simpleenglishwikipedia"],
   r"\bsister projects": [u"sisterprojects"],
   r"\bsnowden": [u"edwardsnowden"],
   r"\bsockpuppet": [u"sockpuppetry"],
   r"\bsopa\b": [u"sopa"],
   r"\bspam\b": [u"spam"],
   r"\bspecial report": [u"specialreport"],
   r"\bspecial story": [u"specialstory"],
   r"\bspoiler": [u"spoilers"],
   r"\bspoken": [u"spokenwikipedia"],
   r"\bstaff": [u"staff"],
   r"\bstatistics": [u"statistics"],
   r"\bstats\b": [u"statistics"],
   r"\bsteve jobs": [u"stevejobs"],
   r"\bsteward": [u"stewards"],
   r"\bstrategic plan": [u"strategicplan"],
   r"\bstub": [u"stubs"],
   r"\bsummer of research": [u"summerofresearch"],
   r"\bsystemic bias": [u"systematicbias"],
   r"\bteahouse": [u"teahouse"],
   r"\btechnology report": [u"techreport", u"tech"],
   r"\btemplate": [u"templates"],
   r"\bterms of use": [u"termsofuse", u"tou"],
   r"\bthree-revert rule": [u"threerevertrule", u"3rr"],
   r"\btools": [u"toolserver"],
   r"\btrademark": [u"trademarks"],
   r"\btraffic report": [u"trafficreport", u"traffic", u"statistics"],
   r"\btraffic statistics": [u"traffic", u"statistics"],
   r"\btraffic": [u"traffic"],
   r"\btransparency report": [u"transparency", u"transparencyreport"],
   r"\btron": [u"tron"],
   r"\btropenmuseum": [u"tropenmuseum"],
   r"\btrustee": [u"board"],
   r"\btutorial": [u"tutorial"],
   r"\btypography": [u"typography"],
   r"\bupload wizard": [u"uploadwizard"],
   r"\busability": [u"usability"],
   r"\buserbox": [u"userboxes"],
   r"\bvandal": [u"vandalism"],
   r"\bvfd": [u"votesfordeletion"],
   r"\bvideo": [u"video"],
   r"\bvisual ?editor": [u"visualeditor"],
   r"\bweasel words": [u"weaselwords"],
   r"\bwebcite": [u"webcite"],
   r"\bwomen": [u"women"],
   r"\bwiki loves monuments": [u"wikilovesmonuments"],
   r"\bwiki-pr": [u"wikipr"],
   r"\bwikia\b": [u"wikia"],
   r"\bwikicup": [u"wikicup", u"contests"],
   r"\bwikidashboard": [u"wikidashboard"],
   r"\bwikidata": [u"wikidata"],
   r"\bwikijunior": [u"wikijunior"],
   r"\bwikileaks": [u"wikileaks"],
   r"\bwikilove": [u"wikilove"],
   r"\bwikimania": [u"wikimania", u"events"],
   r"\bwikimedia conference": [u"wikimediaconference"],
   r"\bwikimedia germany": [u"wikimediagermany"],
   r"\bwikimedia israel": [u"wikimediaisrael"],
   r"\bwikimedia labs": [u"wikimedialabs"],
   r"\bwikimedia russia": [u"wikimediarussia"],
   r"\bwikimedia uk": [u"wikimediauk"],
   r"\bwikinews": [u"wikinews"],
   r"\bwikipedia library": [u"wikipedialibrary"],
   r"\bwikipedia loves art": [u"wikipedialovesart"],
   r"\bwikipedia offline": [u"wikipediaoffline"],
   r"\bwikipedia zero": [u"wikipediazero"],
   r"\bwikipedian in residence": [u"wikimediansinresidence"],
   r"\bwikiproject report": [u"wikiprojectreport", u"wikiprojects"],
   r"\bwikiproject": [u"wikiprojects"],
   r"\bwikiscanner": [u"wikiscanner"],
   r"\bwikisource": [u"wikisource"],
   r"\bwikispecies": [u"wikispecies"],
   r"\bwikibooks": [u"wikibooks"],
   r"\bwikisym": [u"wikisym"],
   r"\bwikiversity": [u"wikiversity"],
   r"\bwikivoyage": [u"wikivoyage"],
   r"\bwiktionary": [u"wikitionary"],
   r"\bwikiquote": [u"wikiquote"],
   r"\bwikiworld": [u"wikiworld"],
   r"\byahoo": [u"yahoo"],
   r"\bswedish wikipedia": [u"swedishwikipedia"],
   r"\bdutch wikipedia": [u"dutchwikipedia"],
   r"\bfrench wikipedia": [u"frenchwikipedia"],
   r"\bwaray-waray wikipedia": [u"waraywaraywikipedia"],
   r"\brussian wikipedia": [u"russianwikipedia"],
   r"\bcebuano wikipedia": [u"cebuanowikipedia"],
   r"\bitalian wikipedia": [u"italianwikipedia"],
   r"\bspanish wikipedia": [u"spanishwikipedia"],
   r"\bvietnamese wikipedia": [u"vietnamesewikipedia"],
   r"\bjapanese wikipedia": [u"japanesewikipedia"],
   r"\bportugese wikipedia": [u"portugesewikipedia"],
   r"\bchinese wikipedia": [u"chinesewikipedia"],
   r"\bukrainian wikipedia": [u"ukrainianwikipedia"],
   r"\bcatalan wikipedia": [u"catalanwikipedia"],
   r"\bpersian wikipedia": [u"persianwikipedia"],
   r"\bnorwegian wikipedia": [u"norwegianwikipedia"],
   r"\bserbo-croatian wikipedia": [u"serbocroatianwikipedia"],
   r"\bfinnish wikipedia": [u"finnishwikipedia"],
   r"\bindonesian wikipedia": [u"indonesianwikipedia"],
   r"\barabic wikipedia": [u"arabicwikipedia"],
   r"\bczech wikipedia": [u"czechwikipedia"],
   r"\bserbian wikipedia": [u"serbianwikipedia"],
   r"\bkorean wikipedia": [u"koreanwikipedia"],
   r"\bmobile": [u"mobile"],
   r"\btwinkle": [u"twinkle"],
   r"\bsul\b": [u"singleuserlogin"],
   r"\bmagic word": [u"magicwords"],
   r"\bscribunto": [u"scribunto"],
   r"\bfoundation wiki": [u"foundationwiki"],
   r"\bnupedia": [u"nupedia"],

} </source>



-- This module processes data from [[Module:Signpost/index]], to be loaded from
-- [[Module:Signpost]] with mw.loadData.

local PAGE_FORMAT = 'Wikipedia:Wikipedia Signpost/%s/%s'
local INDEX_START_YEAR = 2005
local INDEX_BASE = 'Module:Signpost/index/'
local ALIASES_MODULE = 'Module:Signpost/aliases'
local insert = table.insert
local format = string.format

local function makePageName(date, subpage)
	return format(PAGE_FORMAT, date, subpage)
end

local function addSubtable(tIn, tOut, key)
	tOut[key] = tOut[key] or {}
	insert(tOut[key], tIn)
end

local function maybeRequire(page)
	local success, module = pcall(require, page)
	if success then
		return module
	end
end

local function getYearIndexes()
	local ret = {}
	for i = INDEX_START_YEAR, os.date('*t').year + 1 do
		local module = maybeRequire(INDEX_BASE .. tostring(i))
		insert(ret, module)
	end
	return ret
end

local function makeAliases()
	local aliasData = require(ALIASES_MODULE)
	local ret = {}
	for key, aliases in pairs(aliasData) do
		for i, alias in ipairs(aliases) do
			ret[alias] = key
		end
	end
	return ret
end

local function main()
	local list, dates, tags, pages = {}, {}, {}, {}
	local i = 0
	for j, index in ipairs(getYearIndexes()) do
		for k, t in ipairs(index) do
			i = i + 1
			t.sortKey = i
			local date = t.date
			local page = makePageName(date, t.subpage)
			t.page = page
			list[i] = t
			addSubtable(t, dates, date)
			for l, tag in ipairs(t.tags) do
				addSubtable(t, tags, tag)
			end
			pages[page] = t
		end
	end
	return {
		list = list,
		dates = dates,
		tags = tags,
		pages = pages,
		aliases = makeAliases()
	}
end

return main()