Module:Naval Vessel Register URL/data extraction tool

< Module:Naval Vessel Register URL
Revision as of 05:04, 16 July 2021 by Zoran (talk | contribs) (Pywikibot 6.4.0)
(diff) ← Older revision | Latest revision (diff) | Newer revision → (diff)

This tool reads a local copy of an NVR web page to extract information required by Module:Naval Vessel Register URL.

To use this tool:

  1. open a blank sandbox page for editing – can be any page, there will be no need to save it unless you want to
  2. copy and paste this line into the sandbox:
    {{#invoke:Naval Vessel Register URL/data extraction tool|nvr_extract}}
  3. in another browser window, open the NVR hull classification page for ships or service craft
  4. choose a hull classification symbol
  5. right-click and choose 'View page source'
  6. highlight and copy the entire html source to the clipboard and then paste the source into the sandbox below the line added at step 2
  7. click Show preview to run the tool

p = {}

--[[-------------------------< N A M E _ C A S E >--------------------------------------------------------------

Make mixed case names from uppercase names: HARRY L GLUCKSMAN to Harry L Glucksman 

]]

local function name_case (name)
	local s =  mw.text.trim (name or '');										-- no leading or trailing white space
	local words = mw.text.split (s, ' ');										-- split the name at the spaces it there are any
	for i, s in ipairs (words) do												-- for each 'word' of the name
		s = string.lower (s)													-- make it lowercase
		words[i] = mw.getContentLanguage():ucfirst(s);							-- then uppercase the the first character
	end

	s = table.concat (words, ' ');												-- put the name back together
	words = mw.text.split (s, '\'');											-- now handle apostrophes if any (O'Brien, etc)
	for i, s in ipairs (words) do
		words[i] = mw.getContentLanguage():ucfirst(s);
	end
	return table.concat (words, '\\\'');										-- put the name back together and done
end


----------------------------< N V R _ E X T R A C T >----------------------------------------------------------

function p.nvr_extract (frame)
	local page = mw.title.getCurrentTitle();									-- get a page object for this page
	local content = page:getContent();											-- get unparsed content
	local out_table = {};														-- output goes here
	local group;																-- identify this collection of hull numbers
																				-- search for nvr links and associated hull numbers

	for id, ltr, num, name in mw.ustring.gmatch (content, 'SHIPSDETAIL_([^%.]+)%.HTML\">([%a%- %(%)]+)%s+(%d+).-_NameLink_%d\">([^<]+)') do
		table.insert (out_table, "[\'" .. ltr .. "-" .. num .."\'] = {\'" .. id .. "\', \'" .. name_case (name) .. "\'}");	-- make table entries in wp hull number format
		group = ltr:gsub ('T%-(.+)', '%1');										-- because ltr is local to the for loop; also strip off the 'T-' prefix
	end
																				-- make pretty output
	return "<br /><pre>&#9;[\'" .. group .. "\'] = <br />&#9;&#9;{<br />&#9;&#9;" .. table.concat (out_table, ',<br />&#9;&#9;') .. "<br />&#9;&#9;},<br /></pre>";
end

return p;