Module:Sandbox/AbstractWikipedia/Wikidata
Appearance
This is the Wikidata module of the Abstract Wikipedia template-renderer prototype.
It defines various helper function which access Wikidata items and lexemes, and return certain properties of them in a convenient format (e.g. a boolean, a string or a table, depending on the required data).
local p = {}
local gf = require("Module:Sandbox/AbstractWikipedia/GrammaticalFeatures")
-- Helper function to safely explore a path in table, and return nil upon
-- missing fields
local function explorePath(data, ...)
for _, v in ipairs(arg) do
if type(data) ~= "table" then
mw.log("Cannot access path element "..v)
return nil
end
data = data[v]
end
return data
end
-- Returns the list of demonyms (together with their features) for a q_id in a
-- given language.
function p.getDemonyms (q_id, lang)
lang = lang or language -- language should be a global variable
local demonyms = mw.wikibase.getBestStatements( q_id, "P1549" )
local result = {}
for _, demonym_table in ipairs(demonyms) do
local value = explorePath(demonym_table, "mainsnak", "datavalue", "value")
if (value and explorePath(value, "language") == lang) then
local label = explorePath(value, "text")
if label then
local demonym = { label = label, features = {} }
feature_tables = explorePath(demonym_table, "qualifiers", "P518") --, 1, "datavalue", "value", "id")
if feature_tables then
for _, feature_table in ipairs(feature_tables) do
local feature = explorePath(feature_table, "datavalue", "value", "id")
if feature then
table.insert(demonym.features, feature)
end
end
end
-- A demonym can be linked to a lexeme
sense_id = explorePath(demonym_table, "qualifiers", "P7018", 1, "datavalue", "value", "id")
if sense_id then
demonym.lexeme = mw.wikibase.lexeme.splitLexemeId(sense_id)
end
table.insert(result, demonym)
end
end
end
return result
end
-- Helper function to get the label of an Item, and possibly do some fallback
-- logic if the label is missing.
-- Note that language should be a globally-defined variable.
function p.getLabel (q_id, lang)
lang = lang or language -- language should be a global variable
local label = mw.wikibase.getLabelByLang( q_id, lang )
if not label then
label = "<missing label>"
-- There could be some fallback logic here (i.e. use another language)
mw.log("Missing label of "..q_id.." in language "..lang)
end
return label
end
-- Return the property associated with a certain q_id through a p_id
-- Currently only the first "best" property is returned
-- Use expected_type to validate the correctness of the type
function p.getProperty (q_id, p_id, expected_type)
local property = mw.wikibase.getBestStatements( q_id, p_id )
property = explorePath(property, 1, "mainsnak", "datavalue")
if (property and expected_type) then
local actual_type = explorePath(property, "type")
if expected_type ~= actual_type then
error("Property "..p_id.." expected to yield type "..expected_type..". Got ".. actual_type)
end
end
return explorePath(property, "value")
end
-- Returns the id of an item associated through the given p_id with a given q_id
function p.getItemId (q_id, p_id)
local property = p.getProperty(q_id, p_id, "wikibase-entityid")
return explorePath(property, "id")
end
-- For items with different male and female labels, returns both labels,
-- and possible lexemes associated with them. This relies on annotations
-- of the "male form of label" (P3321) "female form of label" (P2521) on items.
-- Additionally, if an item has a "literal translation" (P2441) property linked
-- to a lexeme, it will fetch that as an "unspecified" gender.
-- See discussion in https://phabricator.wikimedia.org/T320263.
function p.getGenderedLabels (q_id, lang)
lang = lang or language -- language should be a global variable
local result = {}
for gender, property in pairs({ male = "P3321", female = "P2521", unspecified = "P2441"}) do
local labels = mw.wikibase.getBestStatements( q_id, property )
-- We need to find the label in the right language
for _, label in ipairs(labels) do
if (explorePath(label, "mainsnak", "datavalue", "value", "language") == lang) then
local lexeme_id
sense_id = explorePath(label, "qualifiers", "P7018", 1, "datavalue", "value", "id")
if sense_id then
lexeme_id = mw.wikibase.lexeme.splitLexemeId(sense_id)
end
local label = explorePath(label, "mainsnak", "datavalue", "value", "text")
result[gender] = { label = label, lexeme = lexeme_id}
break
end
end
if not result[gender] then
result[gender] = {}
end
end
return result
end
-- Return true if an entity has a date of death property
function p.isDead ( q_id )
local date_of_death = mw.wikibase.getBestStatements( q_id, 'P570' )
return (#date_of_death > 0)
end
-- Check whether a q-id references a human being
function p.isHuman ( q_id )
local reference = mw.wikibase.getReferencedEntityId( q_id, 'P31', { 'Q5' } )
return (reference == "Q5")
end
-- Returns "masculine", "feminine", "other" or "unknown"
-- according to "sex or gender" property
function p.getHumanGender ( q_id )
local gender = p.getItemId( q_id, "P21" )
-- Handling of non-binary gender is language dependent and would have to
-- be done in a language-specific implementation.
if (gender == "Q6581097" or gender == "Q2449503" or gender == "Q44148") then
return "masculine"
elseif (gender == "Q6581072" or gender == "Q1052281" or gender == "Q43445") then
return "feminine"
elseif (gender == "Q1097630" or gender == "Q48270") then
return "other"
else
return "unknown"
end
end
-- Returns the grammatical gender associated with a lexeme ("masculine", "feminine")
-- or nil, if not present
function p.getGrammaticalGender ( q_id )
local gender = p.getItemId(q_id, "P5185")
if not gender then return nil end
if (gf.features_map[gender]) then
return gf.features_map[gender].gender
else -- fallback to English label
return p.getLabel(gender, "en")
end
end
-- Maps a wikidata feature (a q-id) to a table (possibly empty) of
-- category-feature pairings.
function p.expandFeature ( q_id )
if gf.features_map[q_id] then
return gf.features_map[q_id]
end
-- Fallback: find category by means of the "instance of" property
local category = q_id
local possible_category = p.getItemId( q_id, "P31")
if possible_category then
category = p.getLabel(possible_category, "en")
end
local result = {}
result[category] = p.getLabel(q_id, "en")
return result
end
-- Gets the lemma in the rendering language or falls back to first language
-- represented.
function p.getLemma (wdLexeme, lexeme_id)
local lemma, used_language = wdLexeme:getLemma(language)
if not lemma then
lemmas = wdLexeme:getLemmas()
lemma = lemmas[1][1]
used_language = lemmas[1][2]
mw.log("Lexeme "..lexeme_id.." has no lemma for language "..language..". Using instead language "..used_language..".")
end
return lemma, used_language
end
function p.getPOS (wdLexeme)
local category = wdLexeme:getLexicalCategory()
if gf.categories_map[category] then
return gf.categories_map[category]
else -- fallback: Use English label
return p.getLabel( category, "en" )
end
end
return p