Changes

m
KS update 1.4
Line 21: Line 21:  
--============================<< H E L P E R  F U N C T I O N S >>============================================
 
--============================<< H E L P E R  F U N C T I O N S >>============================================
    +
--[[--------------------------< W I K I D A T A _ A R T I C L E _ N A M E _ G E T >----------------------------
   −
--[[--------------------------< L I N K _ L A B E L _ M A K E >------------------------------------------------
+
as an aid to internationalizing identifier-label wikilinks, gets identifier article names from Wikidata.
   −
common function to create identifier link label from handler table
+
returns w:<lang code>:<article title> when <q> has an <article title> for <lang code>; nil else. 'w:<lang code>'
 +
ensures that sister project (like wiktionary) will link to the <lang code>.wikipedia article.
   −
returns the first available of
+
for identifiers that do not have <q>, returns nil
 +
 
 +
for wikis that do not have mw.wikibase installed, returns nil
 +
 
 +
]]
 +
 
 +
local function wikidata_article_name_get (q)
 +
if not is_set (q) or (q and not mw.wikibase) then -- when no q number or when a q number but mw.wikibase not installed on this wiki
 +
return nil; -- abandon
 +
end
 +
 
 +
local wd_article;
 +
local this_wiki_code = cfg.this_wiki_code; -- Wikipedia subdomain; 'en' for en.wikipedia.org
 +
 
 +
wd_article = mw.wikibase.getSitelink (q, this_wiki_code .. 'wiki'); -- fetch article title from WD; nil when no title available at this wiki
 +
 
 +
if wd_article then
 +
wd_article = table.concat ({'w:', this_wiki_code, ':', wd_article}); -- interwiki-style link without brackets if taken from WD; leading 'w:' required
 +
end
 +
 
 +
return wd_article; -- article title from WD; nil else
 +
end
 +
 
 +
 
 +
--[[--------------------------< L A B E L _ L I N K _ M A K E >------------------------------------------------
 +
 
 +
common function to create a link for an identifier label from handler table or from Wikidata
 +
 
 +
returns the first available of:
 
1. redirect from local wiki's handler table (if enabled)
 
1. redirect from local wiki's handler table (if enabled)
2. label specified in the local wiki's handler table
+
2. Wikidata sitelink to the local language wikipedia article (if there is a Wikidata entry for this identifier in the local language)
 +
3. link to wikipedia article specified in the local wiki's handler table
 
 
 
]]
 
]]
   −
local function link_label_make (handler)
+
local function label_link_make (handler)
return (cfg.use_identifier_redirects and is_set (handler.redirect) and handler.redirect) or handler.link;
+
local wd_article;
 +
 +
if not (cfg.use_identifier_redirects and is_set (handler.redirect)) then -- redirect has priority so if enabled and available don't fetch from Wikidata because expensive
 +
wd_article = wikidata_article_name_get (handler.q); -- if Wikidata has an article title for this wiki, get it;
 +
end
 +
 +
return (cfg.use_identifier_redirects and is_set (handler.redirect) and handler.redirect) or wd_article or handler.link;
 
end
 
end
   Line 47: Line 84:  
local ext_link;
 
local ext_link;
 
local this_wiki_code = cfg.this_wiki_code; -- Wikipedia subdomain; 'en' for en.wikipedia.org
 
local this_wiki_code = cfg.this_wiki_code; -- Wikipedia subdomain; 'en' for en.wikipedia.org
 
+
local wd_article; -- article title from Wikidata
 +
 
if options.encode == true or options.encode == nil then
 
if options.encode == true or options.encode == nil then
 
url_string = mw.uri.encode (url_string, 'PATH');
 
url_string = mw.uri.encode (url_string, 'PATH');
Line 62: Line 100:     
return table.concat ({
 
return table.concat ({
make_wikilink (link_label_make (options), options.label), -- redirect, or locally specified link (in that order)
+
make_wikilink (label_link_make (options), options.label), -- redirect, Wikidata link, or locally specified link (in that order)
 
options.separator or '&nbsp;',
 
options.separator or '&nbsp;',
 
ext_link
 
ext_link
Line 84: Line 122:  
return table.concat (
 
return table.concat (
 
{
 
{
make_wikilink (link_label_make (options), options.label), -- wiki-link the identifier label
+
make_wikilink (label_link_make (options), options.label), -- wiki-link the identifier label
 
options.separator or '&nbsp;', -- add the separator
 
options.separator or '&nbsp;', -- add the separator
 
make_wikilink (
 
make_wikilink (
Line 127: Line 165:       −
--[=[-------------------------< I S _ V A L I D _ B I O R X I V _ D A T E >------------------------------------
+
--[=[-------------------------< I S _ V A L I D _ R X I V _ D A T E >------------------------------------------
   −
returns true if:
+
for biorxiv, returns true if:
 
2019-12-11T00:00Z <= biorxiv_date < today + 2 days
 
2019-12-11T00:00Z <= biorxiv_date < today + 2 days
 +
for medrxiv, returns true if:
 +
2020-01-01T00:00Z <= medrxiv_date < today + 2 days
 
 
 
The dated form of biorxiv identifier has a start date of 2019-12-11.  The Unix timestamp for that date is {{#time:U|2019-12-11}} = 1576022400
 
The dated form of biorxiv identifier has a start date of 2019-12-11.  The Unix timestamp for that date is {{#time:U|2019-12-11}} = 1576022400
 +
The medrxiv identifier has a start date of 2020-01-01.  The Unix timestamp for that date is {{#time:U|2020-01-01}} = 1577836800
   −
biorxiv_date is the date provided in those |biorxiv= parameter values that are dated at time 00:00:00 UTC
+
<rxiv_date> is the date provided in those |biorxiv= parameter values that are dated and in |medrxiv= parameter values at time 00:00:00 UTC
today is the current date at time 00:00:00 UTC plus 48 hours
+
<today> is the current date at time 00:00:00 UTC plus 48 hours
if today is 2015-01-01T00:00:00 then
+
if today's date is 2023-01-01T00:00:00 then
adding 24 hours gives 2015-01-02T00:00:00 – one second more than today
+
adding 24 hours gives 2023-01-02T00:00:00 – one second more than today
adding 24 hours gives 2015-01-03T00:00:00 – one second more than tomorrow
+
adding 24 hours gives 2023-01-03T00:00:00 – one second more than tomorrow
   −
This function does not work if it is fed month names for languages other than English.  Wikimedia #time: parser
+
inputs:
apparently doesn't understand non-English date month names. This function will always return false when the date
+
<y>, <m>, <d> – year, month, day parts of the date from the birxiv or medrxiv identifier
contains a non-English month name because good1 is false after the call to lang_object.formatDate().  To get
+
<select> 'b' for biorxiv, 'm' for medrxiv; defaults to 'b'
around that call this function with date parts and create a YYYY-MM-DD format date.
      
]=]
 
]=]
   −
local function is_valid_biorxiv_date (y, m, d)
+
local function is_valid_rxiv_date (y, m, d, select)
local biorxiv_date = table.concat ({y, m, d}, '-'); -- make ymd date
+
if 0 == tonumber (m) and 12 < tonumber (m) then -- <m> must be a number 1–12
 +
return false;
 +
end
 +
if 0 == tonumber (d) and 31 < tonumber (d) then -- <d> must be a number 1–31; TODO: account for month length and leap yer?
 +
return false;
 +
end
 +
 +
local rxiv_date = table.concat ({y, m, d}, '-'); -- make ymd date string
 
local good1, good2;
 
local good1, good2;
local biorxiv_ts, tomorrow_ts; -- to hold Unix timestamps representing the dates
+
local rxiv_ts, tomorrow_ts; -- to hold Unix timestamps representing the dates
 
local lang_object = mw.getContentLanguage();
 
local lang_object = mw.getContentLanguage();
   −
good1, biorxiv_ts = pcall (lang_object.formatDate, lang_object, 'U', biorxiv_date); -- convert biorxiv_date value to Unix timestamp  
+
good1, rxiv_ts = pcall (lang_object.formatDate, lang_object, 'U', rxiv_date); -- convert rxiv_date value to Unix timestamp  
 
good2, tomorrow_ts = pcall (lang_object.formatDate, lang_object, 'U', 'today + 2 days' ); -- today midnight + 2 days is one second more than all day tomorrow
 
good2, tomorrow_ts = pcall (lang_object.formatDate, lang_object, 'U', 'today + 2 days' ); -- today midnight + 2 days is one second more than all day tomorrow
 
 
 
if good1 and good2 then -- lang.formatDate() returns a timestamp in the local script which tonumber() may not understand
 
if good1 and good2 then -- lang.formatDate() returns a timestamp in the local script which tonumber() may not understand
biorxiv_ts = tonumber (biorxiv_ts) or lang_object:parseFormattedNumber (biorxiv_ts); -- convert to numbers for the comparison;
+
rxiv_ts = tonumber (rxiv_ts) or lang_object:parseFormattedNumber (rxiv_ts); -- convert to numbers for the comparison;
 
tomorrow_ts = tonumber (tomorrow_ts) or lang_object:parseFormattedNumber (tomorrow_ts);
 
tomorrow_ts = tonumber (tomorrow_ts) or lang_object:parseFormattedNumber (tomorrow_ts);
 
else
 
else
Line 163: Line 210:  
end
 
end
   −
return ((1576022400 <= biorxiv_ts) and (biorxiv_ts < tomorrow_ts)) -- 2012-12-11T00:00Z <= biorxiv_date < tomorrow's date
+
local limit_ts = ((select and ('m' == select)) and 1577836800) or 1576022400; -- choose the appropriate limit timesatmp
 +
 
 +
return ((limit_ts <= rxiv_ts) and (rxiv_ts < tomorrow_ts)) -- limit_ts <= rxiv_date < tomorrow's date
 
end
 
end
   Line 243: Line 292:  
 
 
return lccn;
 
return lccn;
end
+
end
      Line 330: Line 379:  
if is_set (class) then
 
if is_set (class) then
 
if id:match ('^%d+') then
 
if id:match ('^%d+') then
text = table.concat ({text, ' [[//arxiv.org/archive/', class, ' ', class, ']]'}); -- external link within square brackets, not wikilink
+
text = table.concat ({text, ' [[https://arxiv.org/archive/', class, ' ', class, ']]'}); -- external link within square brackets, not wikilink
 
else
 
else
 
set_message ('err_class_ignored');
 
set_message ('err_class_ignored');
 +
end
 +
else -- class not set
 +
if id:match ('^%d+') and options.CitationClass == 'arxiv' then -- new (post 2007) format; {{cite arxiv}} only
 +
set_message ('maint_missing_class'); -- add maint cat
 
end
 
end
 
end
 
end
Line 362: Line 415:  
local access = options.access;
 
local access = options.access;
 
local handler = options.handler;
 
local handler = options.handler;
 +
local ignore_invalid = options.accept;
 
local err_type;
 
local err_type;
 
local err_msg = '';
 
local err_msg = '';
Line 384: Line 438:  
if id:find('&%.') then
 
if id:find('&%.') then
 
err_type = cfg.err_msg_supl.journal; -- journal abbreviation must not have '&.' (if it does it's missing a letter)
 
err_type = cfg.err_msg_supl.journal; -- journal abbreviation must not have '&.' (if it does it's missing a letter)
 +
end
 +
if id:match ('.........%.tmp%.') then -- temporary bibcodes when positions 10–14 are '.tmp.'
 +
set_message ('maint_bibcode');
 
end
 
end
 
end
 
end
 
end
 
end
   −
if is_set (err_type) then -- if there was an error detected
+
if is_set (err_type) and not ignore_invalid then -- if there was an error detected and accept-as-written markup not used
 
set_message ('err_bad_bibcode', {err_type});
 
set_message ('err_bad_bibcode', {err_type});
 
options.coins_list_t['BIBCODE'] = nil; -- when error, unset so not included in COinS
 
options.coins_list_t['BIBCODE'] = nil; -- when error, unset so not included in COinS
   
end
 
end
   Line 419: Line 475:  
 
 
local patterns = {
 
local patterns = {
'^10.1101/%d%d%d%d%d%d$', -- simple 6-digit identifier (before 2019-12-11)
+
'^10%.1101/%d%d%d%d%d%d$', -- simple 6-digit identifier (before 2019-12-11)
'^10.1101/(20[1-9]%d)%.([01]%d)%.([0-3]%d)%.%d%d%d%d%d%dv%d+$', -- y.m.d. date + 6-digit identifier + version (after 2019-12-11)
+
'^10%.1101/(20%d%d)%.(%d%d)%.(%d%d)%.%d%d%d%d%d%dv%d+$', -- y.m.d. date + 6-digit identifier + version (after 2019-12-11)
'^10.1101/(20[1-9]%d)%.([01]%d)%.([0-3]%d)%.%d%d%d%d%d%d$', -- y.m.d. date + 6-digit identifier (after 2019-12-11)
+
'^10%.1101/(20%d%d)%.(%d%d)%.(%d%d)%.%d%d%d%d%d%d$', -- y.m.d. date + 6-digit identifier (after 2019-12-11)
 
}
 
}
 
 
Line 429: Line 485:     
if m then -- m is nil when id is the six-digit form
 
if m then -- m is nil when id is the six-digit form
if not is_valid_biorxiv_date (y, m, d) then -- validate the encoded date; TODO: don't ignore leap-year and actual month lengths ({{#time:}} is a poor date validator)
+
if not is_valid_rxiv_date (y, m, d, 'b') then -- validate the encoded date; 'b' for biorxiv limit
 
break; -- date fail; break out early so we don't unset the error message
 
break; -- date fail; break out early so we don't unset the error message
 
end
 
end
Line 503: Line 559:  
local handler = options.handler;
 
local handler = options.handler;
 
local err_flag;
 
local err_flag;
 +
 +
local function is_extended_free (registrant, suffix) -- local function to check those few registrants that are mixed; identifiable by the doi suffix <incipit>
 +
if cfg.extended_registrants_t[registrant] then -- if this registrant has known free-to-read extentions
 +
for _, incipit in ipairs (cfg.extended_registrants_t[registrant]) do -- loop through the registrant's incipits
 +
if mw.ustring.find (suffix, '^' .. incipit) then -- if found
 +
return true;
 +
end
 +
end
 +
end
 +
end
    
local text;
 
local text;
 
if is_set (inactive) then
 
if is_set (inactive) then
local inactive_year = inactive:match("%d%d%d%d") or ''; -- try to get the year portion from the inactive date
+
local inactive_year = inactive:match("%d%d%d%d"); -- try to get the year portion from the inactive date
 
local inactive_month, good;
 
local inactive_month, good;
   Line 517: Line 583:  
end
 
end
 
end
 
end
else
+
end -- otherwise, |doi-broken-date= has something but it isn't a date
inactive_year = nil; -- |doi-broken-date= has something but it isn't a date
  −
end
   
 
 
if is_set (inactive_year) and is_set (inactive_month) then
 
if is_set (inactive_year) and is_set (inactive_month) then
Line 531: Line 595:  
end
 
end
   −
local registrant = mw.ustring.match (id, '^10%.([^/]+)/[^%s–]-[^%.,]$'); -- registrant set when DOI has the proper basic form
+
local suffix;
 +
local registrant, suffix = mw.ustring.match (id, '^10%.([^/]+)/([^%s–]-[^%.,])$'); -- registrant and suffix set when DOI has the proper basic form
    
local registrant_err_patterns = { -- these patterns are for code ranges that are not supported  
 
local registrant_err_patterns = { -- these patterns are for code ranges that are not supported  
'^[^1-3]%d%d%d%d%.%d%d*$', -- 5 digits with subcode (0xxxx, 40000+); accepts: 10000–39999
+
'^[^1-3]%d%d%d%d%.%d+$', -- 5 digits with subcode (0xxxx, 40000+); accepts: 10000–39999
'^[^1-5]%d%d%d%d$', -- 5 digits without subcode (0xxxx, 60000+); accepts: 10000–59999
+
'^[^1-7]%d%d%d%d$', -- 5 digits without subcode (0xxxx, 60000+); accepts: 10000–69999
'^[^1-9]%d%d%d%.%d%d*$', -- 4 digits with subcode (0xxx); accepts: 1000–9999
+
'^[^1-9]%d%d%d%.%d+$', -- 4 digits with subcode (0xxx); accepts: 1000–9999
 
'^[^1-9]%d%d%d$', -- 4 digits without subcode (0xxx); accepts: 1000–9999
 
'^[^1-9]%d%d%d$', -- 4 digits without subcode (0xxx); accepts: 1000–9999
 
'^%d%d%d%d%d%d+', -- 6 or more digits
 
'^%d%d%d%d%d%d+', -- 6 or more digits
Line 562: Line 627:  
if err_flag then
 
if err_flag then
 
options.coins_list_t['DOI'] = nil; -- when error, unset so not included in COinS
 
options.coins_list_t['DOI'] = nil; -- when error, unset so not included in COinS
 +
else
 +
if not access and (cfg.known_free_doi_registrants_t[registrant] or is_extended_free (registrant, suffix)) then -- |doi-access=free not set and <registrant> is known to be free
 +
set_message ('maint_doi_unflagged_free'); -- set a maint cat
 +
end
 
end
 
end
 
 
Line 646: Line 715:  
]]
 
]]
   −
local function isbn (options)
+
local function isbn (options_t)
local isbn_str = options.id;
+
local isbn_str = options_t.id;
local ignore_invalid = options.accept;
+
local ignore_invalid = options_t.accept;
local handler = options.handler;
+
local handler = options_t.handler;
 +
local year = options_t.Year; -- when set, valid anchor_year; may have a disambiguator which must be removed
    
local function return_result (check, err_type) -- local function to handle the various returns
 
local function return_result (check, err_type) -- local function to handle the various returns
Line 658: Line 728:  
else -- here when not ignoring
 
else -- here when not ignoring
 
if not check then -- and there is an error
 
if not check then -- and there is an error
options.coins_list_t['ISBN'] = nil; -- when error, unset so not included in COinS
+
options_t.coins_list_t['ISBN'] = nil; -- when error, unset so not included in COinS
 
set_message ('err_bad_isbn', err_type); -- set an error message
 
set_message ('err_bad_isbn', err_type); -- set an error message
 
return ISBN; -- return id text
 
return ISBN; -- return id text
Line 664: Line 734:  
end
 
end
 
return ISBN; -- return id text
 
return ISBN; -- return id text
 +
end
 +
 +
if year and not ignore_invalid then --
 +
year = year:match ('%d%d%d%d?'); -- strip disambiguator if present
 +
if year and (1965 > tonumber(year)) then -- <year> will be nil here when |year=n.d. or |year=nd
 +
set_message ('err_invalid_isbn_date'); -- set an error message
 +
return internal_link_id ({link = handler.link, label = handler.label, redirect = handler.redirect,
 +
prefix = handler.prefix, id = isbn_str, separator = handler.separator});
 +
end
 
end
 
end
   Line 796: Line 875:  
text = table.concat ( -- because no place to link to yet
 
text = table.concat ( -- because no place to link to yet
 
{
 
{
make_wikilink (link_label_make (handler), handler.label),
+
make_wikilink (label_link_make (handler), handler.label),
 
handler.separator,
 
handler.separator,
 
id_copy
 
id_copy
Line 975: Line 1,054:  
return external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect,
 
return external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect,
 
prefix = handler.prefix, id = lccn, separator = handler.separator, encode = handler.encode});
 
prefix = handler.prefix, id = lccn, separator = handler.separator, encode = handler.encode});
 +
end
 +
 +
 +
--[[--------------------------< M E D R X I V >-----------------------------------------------------------------
 +
 +
Format medRxiv ID and do simple error checking.  Similar to later bioRxiv IDs, medRxiv IDs are prefixed with a
 +
yyyy.mm.dd. date and suffixed with an optional version identifier.  Ealiest date accepted is 2020.01.01
 +
 +
The medRxiv ID is a date followed by an eight-digit number followed by an optional version indicator 'v' and one or more digits:
 +
https://www.medrxiv.org/content/10.1101/2020.11.16.20232009v2 -> 10.1101/2020.11.16.20232009v2
 +
 +
]]
 +
 +
local function medrxiv (options)
 +
local id = options.id;
 +
local handler = options.handler;
 +
local err_msg_flag = true; -- flag; assume that there will be an error
 +
 +
local patterns = {
 +
'^%d%d%d%d%d%d%d%d$', -- simple 8-digit identifier; these should be relatively rare
 +
'^10%.1101/(20%d%d)%.(%d%d)%.(%d%d)%.%d%d%d%d%d%d%d%dv%d+$', -- y.m.d. date + 8-digit identifier + version (2020-01-01 and later)
 +
'^10%.1101/(20%d%d)%.(%d%d)%.(%d%d)%.%d%d%d%d%d%d%d%d$', -- y.m.d. date + 8-digit identifier (2020-01-01 and later)
 +
}
 +
 +
for _, pattern in ipairs (patterns) do -- spin through the patterns looking for a match
 +
if id:match (pattern) then
 +
local y, m, d = id:match (pattern); -- found a match, attempt to get year, month and date from the identifier
 +
 +
if m then -- m is nil when id is the 8-digit form
 +
if not is_valid_rxiv_date (y, m, d, 'b') then -- validate the encoded date; 'b' for medrxiv limit
 +
break; -- date fail; break out early so we don't unset the error message
 +
end
 +
end
 +
err_msg_flag = nil; -- we found a match so unset the error message
 +
break; -- and done
 +
end
 +
end -- <err_msg_flag> remains set here when no match
 +
 +
if err_msg_flag then
 +
options.coins_list_t['MEDRXIV'] = nil; -- when error, unset so not included in COinS
 +
set_message ('err_bad_medrxiv'); -- and set the error message
 +
end
 +
 +
return external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect,
 +
prefix = handler.prefix, id = id, separator = handler.separator,
 +
encode = handler.encode, access = handler.access});
 
end
 
end
   Line 1,036: Line 1,161:  
elseif id:match('^%d+$') then -- no prefix
 
elseif id:match('^%d+$') then -- no prefix
 
number = id; -- get the number
 
number = id; -- get the number
if 10 < number:len() then
+
if tonumber (id) > handler.id_limit then
number = nil; -- constrain to 1 to 10 digits; change this when OCLC issues 11-digit numbers
+
number = nil; -- unset when id value exceeds the limit
 
end
 
end
 
end
 
end
Line 1,169: Line 1,294:  
text = table.concat ( -- still embargoed so no external link
 
text = table.concat ( -- still embargoed so no external link
 
{
 
{
make_wikilink (link_label_make (handler), handler.label),
+
make_wikilink (label_link_make (handler), handler.label),
 
handler.separator,
 
handler.separator,
 
id,
 
id,
Line 1,498: Line 1,623:  
['JSTOR'] = jstor,
 
['JSTOR'] = jstor,
 
['LCCN'] = lccn,
 
['LCCN'] = lccn,
 +
['MEDRXIV'] = medrxiv,
 
['MR'] = mr,
 
['MR'] = mr,
 
['OCLC'] = oclc,
 
['OCLC'] = oclc,