Module:Infobox television/sandbox

< Module:Infobox television

Documentation for this module may be created at Module:Infobox television/sandbox/doc

require("strict")

--- @module
local p = {}

local maintenance_categories = {
	alt_name = "[[Category:Pages using infobox television with incorrectly formatted values|%s]]",
	dates_incorrectly_formatted = "[[Category:Pages using infobox television with nonstandard dates]]",
	dates_missing = "[[Category:Pages using infobox television with missing dates]]",
	flag_icon = "[[Category:Pages using infobox television with flag icon]]",
	image_values_without_an_image = "[[Category:Pages using infobox television with image-related values without an image]]",
	incorrectly_formatted = "[[Category:Pages using infobox television with incorrectly formatted values|%s]]",
	manual_display_title = "[[Category:Pages using infobox television with unnecessary manual displaytitle]]",
	manual_display_title_temp_tracking = "[[Category:Pages using infobox television with manual displaytitle]]",
	non_matching_title = "[[Category:Pages using infobox television with non-matching title]]",
	unnecessary_title_parameter = "[[Category:Pages using infobox television with unnecessary name parameter]]",
}

--- Returns a table consisting of the title's title parts.
---
--- The return table's properties:
--- - title - The title.
--- - disambiguation - the disambiguation text without parentheses.
---
--- Note: could potentially be moved to an outside module for other template and module uses.
---
--- @param text string
--- @return table<string, string | nil>
local function get_title_parts(text)
	local title, disambiguation = string.match(text, "^(.+) (%b())$")

	if not title or type(title) ~= "string" then
		title = text
	end

	---@type table<string, string | nil>
	local title_parts = {title = --[[---@not number | nil]] title, disambiguation = nil}

	if not disambiguation or type(disambiguation) ~= "string" then
		return title_parts
	end

	-- Remove outside parentheses from names which use parentheses as part of the name such as "episode (Randall and Hopkirk (Deceased))".
	disambiguation = string.sub(--[[---@not number | nil]] disambiguation, 2, -2)
	title_parts.disambiguation = --[[---@not number]] disambiguation
	return title_parts
end

--- Returns a maintenance category if the italic_title value is not "no".
---
--- Infobox parameters checked:
--- - |italic_title=
---
--- @param args table
--- @return string
local function is_italic_title_valid_value(args)
	if args.italic_title and args.italic_title ~= "no" then
		return string.format(maintenance_categories.incorrectly_formatted, "italic_title")
	end
	return ""
end

--- Returns a maintenance category if the dates are not formatted correctly with
--- {{Start date}} and {{End date}} templates.
---
--- Infobox parameters checked:
--- - |first_aired[1-6]=
--- - |released[1-6]=
--- - |last_aired[1-6]=
---
--- Note: all_tests is meant only for /testcases testing.
---
--- @param all_tests string Testing conditional value.
--- @param released string The start date value.
--- @param first_aired string The start date value.
--- @param last_aired string The end date value.
--- @return string
local function are_dates_formatted_correctly(all_tests, released, first_aired, last_aired)
	-- To keep /testcases clean, this is set so only what is test is shown.
	if all_tests == "no" then
		return ""
	end
	
	-- Config parameters
	local first_aired_future = "Upcoming"
	local last_aired_current = "present"
	local start_date_class = "itvstart"
	local end_date_class = "itvend"
	local film_date_class = "film%-date"

	-- Both parameters should not be set.
	if (released and first_aired) or (released and last_aired) then
		return maintenance_categories.dates_incorrectly_formatted
	end
	
	local start_date = released or first_aired

	-- A start date should always be set.
	if not start_date then
		return maintenance_categories.dates_missing
	end

	-- Validate the start date is formmated using {{Start date}} and not any other template, including {{Film date}}, or uses the word "Upcoming".
	if start_date and (string.find(start_date, film_date_class) or not string.find(start_date, start_date_class) and start_date ~= first_aired_future) then
		return maintenance_categories.dates_incorrectly_formatted
	end

	-- An end date should always be set if the show or film wasn't released all at once.
	if first_aired and first_aired ~= first_aired_future and not last_aired then
		return maintenance_categories.dates_missing
	end

	-- Validate the end date is formmated using {{End date}} and not any other template, or uses the word "present".
	if last_aired and (not string.find(last_aired, end_date_class) and last_aired ~= last_aired_current) then
		return maintenance_categories.dates_incorrectly_formatted
	end

	-- Only one date should be used per field.
	if (start_date and select(2, string.gsub(start_date, start_date_class, "")) > 1) or (last_aired and select(2, string.gsub(last_aired, end_date_class, "")) > 1) then
		return maintenance_categories.dates_incorrectly_formatted
	end

	return ""
end

--- Returns a maintenance category if the dates are not formatted correctly
--- and using "Original", "Revival" or italics to denote a split in the date range.
---
--- Infobox parameters checked:
--- - |first_aired[1-6]=
--- - |released[1-6]=
--- - |last_aired[1-6]=
--- - |network[1-6]=
--- - |channel[1-6]=
---
--- @param args table
--- @return string
local function does_release_information_have_extraneous_text(args)
	for k, v in pairs(args) do
		if string.find(string.lower(v), "original") and not string.find(string.lower(v), "aboriginal")	or 
			string.find(string.lower(v), "revival") or 
			string.find(v, "''") then
			return string.format(maintenance_categories.incorrectly_formatted, k)
		end
	end
	return ""
end

--- Returns a maintenance category if the release information:
---- Is not formatted correctly
---- Has extraneous text
---- Dates don't use correct mark up
---
--- Infobox parameters checked:
--- - |first_aired[1-6]=
--- - |released[1-6]=
--- - |last_aired[1-6]=
--- - |network[1-6]=
--- - |channel[1-6]=
---
--- @param args table
--- @return string
local function is_release_information_formatted_correctly(args)
	local release_information = {"first_aired", "released", "last_aired", "network", "channel"}
	local parameter_max_value = 6

	for i = 1, parameter_max_value do
		local num = ""
		if i > 1 then
			num = i
		end

		local numbered_args = {}
		for _, v in pairs(release_information) do
			numbered_args[v .. num] = args[v .. num]
		end
	
		-- If current table is empty, break current cycle.	
		if next(numbered_args) then
			local category = does_release_information_have_extraneous_text(numbered_args)
			if category ~= "" then
				return category
			end

			category = are_dates_formatted_correctly(args.all_tests, args["released" .. num], args["first_aired" .. num], args["last_aired" .. num])
			if category ~= "" then
				return category
			end
		end
	end

	return ""
end

--- Returns a maintenance category if a {{Italic title}} or {{DISPLAYTITLE}} template is used.
--- Checks also for the following {{Italic title}} redirects:
---- Italic
---- Italics
---- Italictitle
---- Italics title
---
--- Testing parameters:
--- - |page_test= - a real Wikipedia page to read the content of the page.
---
--- Infobox parameters checked:
--- - |italic_title=
---
--- @param args table
--- @return string
local function has_display_title(args)
	--TODO: when testing below is done uncomment code
	--if args.italic_title then
	--	return ""
	--end

	local article
	if args.page_test then
		article = mw.title.new(args.page_test)
	else
		article = mw.title.getCurrentTitle()
	end

	local page_text = article:getContent()
	if not page_text then
		return ""
	end

	if (string.find(page_text, "{{[Ii]talics?%s?title}}") or string.find(page_text, "{{[Ii]talics?}}")) and not string.match(page_text, "{{[Ii]talic title|all=yes}}")then
		return maintenance_categories.manual_display_title
	end

	local display_title = string.match(page_text, "{{DISPLAYTITLE:(.*)}}")
	local article_title = article.text
	--TODO: currently does not work
	--local display_title_no_namespace = string.gsub(display_title, article.nsText .. ":", "")
	--local display_title_no_italics = string.sub(display_title_no_namespace, 3, string.len(display_title_no_namespace) - 2)
	if display_title then
		-- if article_title == display_title or article_title == display_title_no_italics then
		if article_title == display_title or article_title == string.sub(display_title, 3, string.len(display_title) - 2) then
			return maintenance_categories.manual_display_title
		elseif string.find(display_title, "<sub>") or string.find(display_title, "<sup>") then
			-- TODO: This is valid. Will remove when done with cleanup.
			return ""
		else
			-- TODO: remove when done checking results.
			return maintenance_categories.manual_display_title_temp_tracking
		end
	end

	return ""
end

--- Returns a maintenance category if a flag icon is used.
---
--- All the infobox values are checked.
---
--- @param args table
--- @return string
local function has_flag_icon(args)
	for _, v in pairs(args) do
		if string.find(v, "flagicon") then
			return maintenance_categories.flag_icon
		end
	end
	return ""
end

--- Returns a maintenance category if the producer information entered is from the following list:
--- executive, associate, co-
---
--- Infobox parameters checked:
--- - |producer=
---
--- @param producer string
--- @return string
local function is_producer_used_correctly(producer)
	if not producer then
		return ""
	end

	local args = {"executive", "associate", "co%-", "line producer"}
	for _, v in pairs(args) do
		if string.find(producer, v) then
			return string.format(maintenance_categories.incorrectly_formatted, "producer")
		end
	end

	return ""
end

--- Returns a maintenance category if the country information entered is from the following list:
--- U.S.A, USA, U.S., US, UK, U.K.
---
--- Infobox parameters checked:
--- - |country=
---
--- @param country string
--- @return string
local function is_country_name_valid(country)
	if not country then
		return ""
	end

	local args = {"U.S", "US", "UK", "U.K."}
	for _, v in pairs(args) do
		if string.find(country, v) then
			return string.format(maintenance_categories.incorrectly_formatted, "country")
		end
	end
	return ""
end

--- Returns a maintenance category if the values are linked or formatted.
---
--- Infobox parameters checked:
--- - |language=
---
--- The function currently checks if the following values are present:
--- - ] - links.
---
--- @param args table
--- @return string
local function are_values_linked_or_formatted(args)
	args = {language = args.language}
	for key, value in pairs(args) do
		for _, bad_value in pairs({"]"}) do
			if string.find(value, bad_value, 1, true) then
				return string.format(maintenance_categories.incorrectly_formatted, key)
			end
		end
	end
	return ""
end

-- Splits a string and returns a table.
--
-- @param str string
-- @return table
local function split(str)
	local sep = "\n"
	local result = {}
	local regex = ("([^%s]+)"):format(sep)
	for each in str:gmatch(regex) do
		table.insert(result, each)
	end
	return result
end

-- Returns a string value clean from various list syntax.
--
-- @param str string
-- @return string
local function clean_list_syntax(str)
	str = string.gsub(str, "\127[^\127]*UNIQ%-%-(%a+)%-%x+%-QINU[^\127]*\127", "")				-- Remove all strip-markers.
	str = string.gsub(string.gsub(str, "%<%/? *div[^%>]*%>", ""), "%<%/? *span[^%>]*%>", "")	-- Removes div and span tags.
	str = string.gsub(str, "%<%/? *ul[^%>]*%>", "")		-- Remove list tags.
	str = string.gsub(str, "%<%/? *li[^%>]*%>", "\n")	-- Remove list tags. Replace with new line.
	str = string.gsub(str, "</? *br */?>", "\n")		-- Replace <br /> (and variants) with new line.
	str = string.gsub(str, "\n\n", "\n")				-- Replace double new line with a single new line.
	str = string.gsub(str, "*", "")						-- Remove asterisks.
	return str
end

--- Returns a maintenance category if:
---- When alt_name= is a list of values, and not all entries are in italics.
---- When alt_name= is a single value and is in italics.
----- This is because the template automatically handles the italics and when also manually added,
----- results in 4 apostrophes which produce a bold title instead.
---
--- Infobox parameters checked:
--- - |alt_name=
---
--- @param alt_name string
--- @return string
local function is_alt_name_in_italics(alt_name)
	if not alt_name then
		return ""
	end
	local detect_singular = require("Module:Detect singular")._main
	local args = {alt_name, ["no_and"] = "1", ["no_comma"] = "1"}
	local is_singular = detect_singular(args)
	if is_singular > 1 then
		local alt_names = clean_list_syntax(alt_name)
		alt_names = split(alt_names)
		for _, name in ipairs(alt_names) do
			if not string.find(name, "''") then
				return string.format(maintenance_categories.alt_name, "alt_name")
			end
		end
	else
		if string.find(alt_name, "''") then
			return string.format(maintenance_categories.alt_name, "alt_name")
		end
	end
	
	return ""
end

--- Returns a maintenance category if the |image= value includes the "File:" or "Image:" prefix.
---
--- Infobox parameters checked:
--- - |image=
---
--- @param image string
--- @return string
local function is_image_using_incorrect_syntax(image)
	if not image then
		return ""
	end

	if string.find(image, "[Ff]ile:") or string.find(image, "[Ii]mage:") then
		return string.format(maintenance_categories.incorrectly_formatted, "image")
	end

	return ""
end

--- Returns a maintenance category if the |image_size= value includes "px".
---
--- Infobox parameters checked:
--- - |image_size=
---
--- @param image_size string
--- @return string
local function is_image_size_using_px(image_size)
	if image_size and string.find(image_size, "px") then
		return string.format(maintenance_categories.incorrectly_formatted, "image_size")
	end
	return ""
end

--- Returns a maintenance category if there is no image file while image auxiliary values are present.
---
--- Infobox parameters checked:
--- - |image=
--- - |image_size=
--- - |image_upright=
--- - |image_alt=
--- - |alt=
--- - |caption=
---
--- @param args table
--- @return string
local function are_image_auxiliary_values_used_for_no_image(args)
	if args.image then
		return ""
	end

	if args.image_size or args.image_upright or args.image_alt or args.alt or args.caption then
		return maintenance_categories.image_values_without_an_image
	end

	return ""
end

--- Returns the display title text used in either the {{DISPLAYTITLE}} or {{Italic title}} templates.
---
--- @param page_text string
--- @param article_title string
--- @return string | nil
local function get_display_title_text(page_text, article_title)
	if not page_text then
		return nil
	end

	local title_modification = string.match(page_text, "{{DISPLAYTITLE:(.-)}}")
	if title_modification and type(title_modification) == "string" then
		local title_parts = get_title_parts(title_modification)
		return string.gsub(title_parts.title, "'", "")
	end

	title_modification = string.match(page_text, "{{[Ii]talic title|all=yes}}")
	if title_modification and type(title_modification) == "string" then
		return article_title
	end

	return nil
end

--- Returns the title used in the {{Lowercase title}} template and an optional maintenance category.
---
--- @param page_text string
--- @param args table
--- @param article_title string
--- @param title_parts table
--- @param return_category boolean
--- @return string | nil
local function get_lowercase_template_status(page_text, args, article_title, title_parts, return_category)
	if not page_text then
		return nil
	end
	local lowercase_template =  string.match(page_text, "{{[Ll]owercase title.-}}")

	if not lowercase_template then
		return nil
	end

	local lowercase_title
	if string.find(lowercase_template, "|force=") then
		lowercase_title = string.gsub(article_title,"^%u", string.lower)
	else
		lowercase_title = string.gsub(title_parts.title,"^%u", string.lower)
	end

	if return_category and args.name then
		if args.name == lowercase_title then
			return maintenance_categories.unnecessary_title_parameter
		else
			return maintenance_categories.non_matching_title
		end
		return ""
	end

	return lowercase_title
end

--- Returns the title used in the {{Correct title}} template and an optional maintenance category.
---
--- @param page_text string
--- @param args table
--- @param return_category boolean
--- @return string | nil
local function get_correct_title_value(page_text, args, return_category)
	if not page_text then
		return nil
	end

	local correct_title_template_pattern = "{{[Cc]orrect title|title=(.*)|reason=.-}}"

	local correct_title = string.match(page_text, correct_title_template_pattern)

	if not correct_title then
		correct_title_template_pattern = "{{[Cc]orrect title|(.*)|reason=.-}}"
		correct_title = string.match(page_text, correct_title_template_pattern)
	end

	if not correct_title and type(correct_title) ~= "string" then
		return nil
	end

	local correct_title_title_parts = get_title_parts(correct_title)

	if return_category and args.name then
		if args.name == correct_title or args.name == correct_title_title_parts.title then
			return maintenance_categories.unnecessary_title_parameter
		else
			return maintenance_categories.non_matching_title
		end
	end

	return correct_title
end

--- Returns a maintenance category if the infobox title is equal to the article title.
---
--- Infobox parameters checked:
--- - |name=
---
--- The function currently checks if the infobox title is equal to the article title while ignoring styling such as:
--- - Nowrap spans.
--- - Line breaks.
---
--- A return value can be one of three options:
--- - The value of maintenance_categories.non_matching_title - when the args.title does not match the article title.
--- - The value of maintenance_categories.unnecessary_title_parameter - when the args.title matches the article title.
--- - An empty string - when args.name isn't used or the args.name uses an allowed modification
--- (such as a nowrap template) while the rest of the args.name matches the article title.
---
--- Testing parameters:
--- - |page_test= - a real Wikipedia page to read the content of the page.
--- - |page_title_test= - the title of the page being checked.
---
--- @param frame table
--- @param args table
--- @return string
local function is_infobox_title_equal_to_article_title(frame, args)
	if not args.name then
		return ""
	end

	local page_text
	if args.page_test then
		page_text = mw.title.new(args.page_test):getContent()
	else
		page_text = mw.title.getCurrentTitle():getContent()
	end

	-- Check if the article is using a {{Correct title}} template.
	local correct_title = get_correct_title_value(page_text, args, true)
	if correct_title then
		return correct_title
	end

	local article_title = args.page_title_test
	if not args.page_title_test then
		article_title = mw.title.getCurrentTitle().text
	end

	-- Remove disambiguation.
	local title_parts = get_title_parts(article_title)

	-- Check if the article is using a {{Lowercase title}} template.
	local lowercase_title = get_lowercase_template_status(page_text, args, article_title, title_parts, true)
	if lowercase_title then
		return lowercase_title
	end

	-- Remove nowrap span.
	if string.find(args.name, "nowrap") then
		local title = frame:expandTemplate{title = "Strip tags", args = {args.name}}
		if title == article_title or title == title_parts.title then
			return ""
		end
		return maintenance_categories.non_matching_title
	end

	-- Remove line breaks and additional spaces as a result.
	if string.find(args.name, "<br%s?/?>") then
		local title, _ = string.gsub(args.name, "<br%s?/?>", "")
		title, _ = string.gsub(title, "  ", " ")
		if title == article_title or title == title_parts.title then
			return ""
		end
		return maintenance_categories.non_matching_title
	end

	if args.name == article_title or args.name == title_parts.title then
		return maintenance_categories.unnecessary_title_parameter
	end

	-- Article and infobox titles do not match.
	return maintenance_categories.non_matching_title
end

--- Returns the relevant maintenance categories based on the {{Infobox television}} values validated.
---
--- @param frame table
--- @return string
function p.validate_values(frame)
	local getArgs = require("Module:Arguments").getArgs
	local args = getArgs(frame)

	local categories = {}
	table.insert(categories, is_infobox_title_equal_to_article_title(frame, args))
	table.insert(categories, has_display_title(args))
	table.insert(categories, are_image_auxiliary_values_used_for_no_image(args))
	table.insert(categories, is_image_using_incorrect_syntax(args.image))
	table.insert(categories, is_image_size_using_px(args.image_size))
	--table.insert(categories, is_alt_name_in_italics(args.alt_name))
	table.insert(categories, are_values_linked_or_formatted(args))
	table.insert(categories, is_country_name_valid(args.country))
	table.insert(categories, has_flag_icon(args))
	table.insert(categories, is_producer_used_correctly(args.producer))
	table.insert(categories, is_release_information_formatted_correctly(args))
	table.insert(categories, is_italic_title_valid_value(args))

	return table.concat(categories, "")
end

--- Returns the text used for the |above= field of the infobox.
---
--- Infobox parameters checked:
--- - |name=
---
--- Testing parameters:
--- - |page_test= - a real Wikipedia page to read the content of the page.
--- - |page_title_test= - the title of the page being checked.
---
--- @param frame table
--- @return string
function p.above_title(frame)
	local getArgs = require("Module:Arguments").getArgs
	local args = getArgs(frame)

	local page
	if args.page_test then
		page = mw.title.new(args.page_test)
	else
		page = mw.title.getCurrentTitle()
	end

	local page_text = page:getContent()

	local article_title = args.page_title_test
	if not args.page_title_test then
		article_title = page.text
	end

	local title_format = "''%s''"

	local correct_title = get_correct_title_value(page_text, args, false)
	if correct_title then
		return string.format(title_format, correct_title)
	end

	local title_parts = get_title_parts(article_title)

	local lowercase_title = get_lowercase_template_status(page_text, args, article_title, title_parts, false)
	if lowercase_title then
		return string.format(title_format, lowercase_title)
	end

	if args.italic_title then
		local title_modification = get_display_title_text(page_text, article_title)
		if title_modification then
			return string.format(title_format, title_modification)
		end
	end

	if args.name then
		return string.format(title_format, args.name)
	end

	return string.format(title_format, title_parts.title)
end

return p