Module:Clade/converter
Documentation for this module may be created at Module:Clade/converter/doc
--require('Module:No globals') -- comment out until clade also uses noglobals local p = {} local pargs = mw.getCurrentFrame():getParent().args --[[ =================== parser for conversion to clade structure ============================= Function p.newickConverter() convert Newick strings to clade format Usage: {{#invoke:Module:Sandbox/Jts1882/CladeN|newickConverter|newickstring={{{NEWICK_STRING}}} }} Function p.listConverter() convert wikitext-like lists to clade format use @ instead of * in wikitext to avoid processing Usage: {{#invoke:Module:Clade/converter|listConverter|list={{{LIST_STRING}}} }} ]] function p.cladeConverter(frame) if frame.args['newickstring'] or pargs['newick'] or pargs['newickstring'] then return p.newickConverter(frame) elseif frame.args['list'] or pargs['list'] then return p.listConverter(frame) end end --[[ =================== Newick to clade parser function ============================= Function of convert Newick strings to clade format Usage: {{#invoke:Module:Sandbox/Jts1882/CladeN|newickConverter|newickstring={{{NEWICK_STRING}}} }} ]] function p.newickConverter(frame) local newickString = frame.args['newickstring'] or pargs['newick'] or pargs['newickstring'] --if newickString == '{{{newickstring}}}' then return newickString end newickString = require('Module:Clade').processNewickString(newickString,"") -- "childNumber") -- show the Newick string local cladeString = '' local levelNumber = 1 -- for depth of iteration local childNumber = 1 -- number of sister elements on node (always one for root) -- converted the newick string to the clade structure cladeString = cladeString .. '{{clade' cladeString = cladeString .. p.newickParseLevel(newickString, levelNumber, childNumber) cladeString = cladeString .. '\r}}' local resultString = '' local option = mw.getCurrentFrame():getParent().args['option'] or '' if option == 'tree' then --show the transcluded clade diagram resultString = cladeString else -- show the Newick string resultString = '<div>Modified Newick string:' .. '<pre>'..newickString..'</pre>' -- show the converted clade structure resultString = resultString .. 'Output of clade template structure:' .. '<pre>'.. cladeString ..'</pre></div>' end --resultString = frame:expandTemplate{ title = 'clade', frame:preprocess(cladeString) } return resultString end --[[ Parse one level of Newick string This function receives a Newick string, which has two components 1. the right hand term is a clade label: |labelN=labelname 2. the left hand term in parenthesis has common delimited child nodes, each of which can be i. a taxon name which just needs: |N=leafname ii. a Newick string which needs further processing through reiteration ]] function p.newickParseLevel(newickString,levelNumber,childNumber) local cladeString = "" local indent = p.getIndent(levelNumber) --levelNumber=levelNumber+1 local j=0 local k=0 j,k = string.find(newickString, '%(.*%)') -- find location of outer parenthesised term local innerTerm = string.sub(newickString, j+1, k-1) -- select content in parenthesis local outerTerm = string.gsub(newickString, "%b()", "") -- delete parenthetic term cladeString = cladeString .. indent .. '|label'..childNumber..'=' .. outerTerm cladeString = cladeString .. indent .. '|' .. childNumber..'=' .. '{{clade' levelNumber=levelNumber+1 indent = p.getIndent(levelNumber) -- protect commas in inner parentheses from split; temporarily replace commas between parentheses local innerTerm2 = string.gsub(innerTerm, "%b()", function (n) return string.gsub(n, ",%s*", "XXX") -- also strip spaces after commas here end) --local s = p.strsplit(innerTerm2, ",") local s = mw.text.split(innerTerm2, ",") local i=1 while s[i] do local restoredString = string.gsub(s[i],"XXX", ",") -- convert back to commas local outerTerm = string.gsub(restoredString, "%b()", "") if string.find(restoredString, '%(.*%)') then --cladeString = cladeString .. indent .. '|y' .. i .. '=' .. p.newickParseLevel(restoredString,levelNumber+1,i) cladeString = cladeString .. p.newickParseLevel(restoredString,levelNumber,i) else cladeString = cladeString .. indent .. '|' .. i .. '=' .. restoredString --.. '(level=' .. levelNumber .. ')' end i=i+1 end -- end -- end splitting of strings cladeString = cladeString .. indent .. '}}' return cladeString end function p.getIndent(levelNumber) local indent = "\r" local extraIndent = pargs['indent'] or mw.getCurrentFrame().args['indent'] or 0 while tonumber(extraIndent) > 0 do indent = indent .. " " -- an extra indent to make aligining compound trees easier extraIndent = extraIndent - 1 end while levelNumber > 1 do indent = indent .. " " levelNumber = levelNumber-1 end return indent end --[[ =================== experimental list to clade parser function ============================= Function of convert wikitext-like listss to clade format - use @ instead of * in wikitext to avoid processing Usage: {{#invoke:Module:Clade/converter|listConverter|list={{{LIST_STRING}}} }} ]] function p.listConverter(frame) local listString = frame.args['list'] or mw.getCurrentFrame():getParent().args['list'] -- show the list string local cladeString = '' local levelNumber = 1 -- for depth of iteration local childNumber = 1 -- number of sister elements on node (always one for root) local indent = p.getIndent(levelNumber) -- converted the newick string to the clade structure cladeString = cladeString .. indent .. '{{clade' cladeString = cladeString .. p.listParseLevel(listString, levelNumber, childNumber) --cladeString = cladeString .. '\r}}' local resultString = '' local option = mw.getCurrentFrame():getParent().args['option'] or '' if option == 'tree' then --show the transcluded clade diagram resultString = cladeString else -- show the list string --resultString = '<pre>'..listString..'</pre>' -- show the converted clade structure resultString = resultString .. '<pre>'.. cladeString ..'</pre>' end --resultString = frame:expandTemplate{ title = 'clade', frame:preprocess(cladeString) } return resultString end function p.listParseLevel(listString,levelNumber,childNumber) local cladeString = "" local indent = p.getIndent(levelNumber) levelNumber=levelNumber+1 local list = mw.text.split(listString, "\n") local i=1 local child=1 local lastNode=0 while list[i] do list[i]=list[i]:gsub("^@", "") -- strip the first @ if not string.match( list[i], "^@", 1 ) then -- count children at this level (not beginning wiht @) lastNode = lastNode+1 end i=i+1 end i=1 while list[i] do --[[ pseudocode: if next value begins with @ we have a subtree, which must be recombined and past iteratively else we have a simple leaf ]] -- if the next value begins with @, we have a subtree which should be recombined if list[i+1] and string.match( list[i+1], "^@", 1 ) then local label=list[i] i=i+1 local recombined = list[i] while list[i+1] and string.match( list[i+1], "^@", 1 ) do recombined = recombined .. "\n" .. list[i+1] i=i+1 end cladeString = cladeString .. indent .. '|label' .. child ..'=' .. label cladeString = cladeString .. indent .. '|' .. child ..'=' .. '{{clade' .. p.listParseLevel(recombined,levelNumber,i) else cladeString = cladeString .. indent .. '|' .. child ..'=' .. list[i] end i=i+1 child=child+1 end cladeString = cladeString .. indent .. '}}' return cladeString end return p