Changes

m
KS update 1.4
Line 20: Line 20:     
--============================<< H E L P E R  F U N C T I O N S >>============================================
 
--============================<< H E L P E R  F U N C T I O N S >>============================================
 +
 +
--[[--------------------------< W I K I D A T A _ A R T I C L E _ N A M E _ G E T >----------------------------
 +
 +
as an aid to internationalizing identifier-label wikilinks, gets identifier article names from Wikidata.
 +
 +
returns :<lang code>:<article title> when <q> has an <article title> for <lang code>; nil else
 +
 +
for identifiers that do not have q, returns nil
 +
 +
for wikis that do not have mw.wikibase installed, returns nil
 +
 +
]]
 +
 +
local function wikidata_article_name_get (q)
 +
if not is_set (q) or (q and not mw.wikibase) then -- when no q number or when a q number but mw.wikibase not installed on this wiki
 +
return nil; -- abandon
 +
end
 +
 +
local wd_article;
 +
local this_wiki_code = cfg.this_wiki_code; -- Wikipedia subdomain; 'en' for en.wikipedia.org
 +
 +
wd_article = mw.wikibase.getSitelink (q, this_wiki_code .. 'wiki'); -- fetch article title from WD; nil when no title available at this wiki
 +
 +
if wd_article then
 +
wd_article = table.concat ({':', this_wiki_code, ':', wd_article}); -- interwiki-style link without brackets if taken from WD; leading colon required
 +
end
 +
 +
return wd_article; -- article title from WD; nil else
 +
end
       
--[[--------------------------< L I N K _ L A B E L _ M A K E >------------------------------------------------
 
--[[--------------------------< L I N K _ L A B E L _ M A K E >------------------------------------------------
   −
common function to create identifier link label from handler table
+
common function to create identifier link label from handler table or from Wikidata
    
returns the first available of
 
returns the first available of
 
1. redirect from local wiki's handler table (if enabled)
 
1. redirect from local wiki's handler table (if enabled)
2. label specified in the local wiki's handler table
+
2. Wikidata (if there is a Wikidata entry for this identifier in the local wiki's language)
 +
3. label specified in the local wiki's handler table
 
 
 
]]
 
]]
    
local function link_label_make (handler)
 
local function link_label_make (handler)
return (cfg.use_identifier_redirects and is_set (handler.redirect) and handler.redirect) or handler.link;
+
local wd_article;
 +
 +
if not (cfg.use_identifier_redirects and is_set (handler.redirect)) then -- redirect has priority so if enabled and available don't fetch from Wikidata because expensive
 +
wd_article = wikidata_article_name_get (handler.q); -- if Wikidata has an article title for this wiki, get it;
 +
end
 +
 +
return (cfg.use_identifier_redirects and is_set (handler.redirect) and handler.redirect) or wd_article or handler.link;
 
end
 
end
   Line 47: Line 83:  
local ext_link;
 
local ext_link;
 
local this_wiki_code = cfg.this_wiki_code; -- Wikipedia subdomain; 'en' for en.wikipedia.org
 
local this_wiki_code = cfg.this_wiki_code; -- Wikipedia subdomain; 'en' for en.wikipedia.org
 
+
local wd_article; -- article title from Wikidata
 +
 
if options.encode == true or options.encode == nil then
 
if options.encode == true or options.encode == nil then
 
url_string = mw.uri.encode (url_string, 'PATH');
 
url_string = mw.uri.encode (url_string, 'PATH');
Line 62: Line 99:     
return table.concat ({
 
return table.concat ({
make_wikilink (link_label_make (options), options.label), -- redirect, or locally specified link (in that order)
+
make_wikilink (link_label_make (options), options.label), -- redirect, Wikidata link, or locally specified link (in that order)
 
options.separator or '&nbsp;',
 
options.separator or '&nbsp;',
 
ext_link
 
ext_link
Line 127: Line 164:       −
--[=[-------------------------< I S _ V A L I D _ B I O R X I V _ D A T E >------------------------------------
+
--[=[-------------------------< I S _ V A L I D _ R X I V _ D A T E >------------------------------------------
   −
returns true if:
+
for biorxiv, returns true if:
 
2019-12-11T00:00Z <= biorxiv_date < today + 2 days
 
2019-12-11T00:00Z <= biorxiv_date < today + 2 days
 +
for medrxiv, returns true if:
 +
2020-01-01T00:00Z <= medrxiv_date < today + 2 days
 
 
 
The dated form of biorxiv identifier has a start date of 2019-12-11.  The Unix timestamp for that date is {{#time:U|2019-12-11}} = 1576022400
 
The dated form of biorxiv identifier has a start date of 2019-12-11.  The Unix timestamp for that date is {{#time:U|2019-12-11}} = 1576022400
 +
The medrxiv identifier has a start date of 2020-01-01.  The Unix timestamp for that date is {{#time:U|2020-01-01}} = 1577836800
   −
biorxiv_date is the date provided in those |biorxiv= parameter values that are dated at time 00:00:00 UTC
+
<rxiv_date> is the date provided in those |biorxiv= parameter values that are dated and in |medrxiv= parameter values at time 00:00:00 UTC
today is the current date at time 00:00:00 UTC plus 48 hours
+
<today> is the current date at time 00:00:00 UTC plus 48 hours
if today is 2015-01-01T00:00:00 then
+
if today's date is 2023-01-01T00:00:00 then
adding 24 hours gives 2015-01-02T00:00:00 – one second more than today
+
adding 24 hours gives 2023-01-02T00:00:00 – one second more than today
adding 24 hours gives 2015-01-03T00:00:00 – one second more than tomorrow
+
adding 24 hours gives 2023-01-03T00:00:00 – one second more than tomorrow
   −
This function does not work if it is fed month names for languages other than English.  Wikimedia #time: parser
+
inputs:
apparently doesn't understand non-English date month names. This function will always return false when the date
+
<y>, <m>, <d> – year, month, day parts of the date from the birxiv or medrxiv identifier
contains a non-English month name because good1 is false after the call to lang_object.formatDate().  To get
+
<select> 'b' for biorxiv, 'm' for medrxiv; defaults to 'b'
around that call this function with date parts and create a YYYY-MM-DD format date.
      
]=]
 
]=]
   −
local function is_valid_biorxiv_date (y, m, d)
+
local function is_valid_rxiv_date (y, m, d, select)
local biorxiv_date = table.concat ({y, m, d}, '-'); -- make ymd date
+
if 0 == tonumber (m) and 12 < tonumber (m) then -- <m> must be a number 1–12
 +
return false;
 +
end
 +
if 0 == tonumber (d) and 31 < tonumber (d) then -- <d> must be a number 1–31; TODO: account for month length and leap yer?
 +
return false;
 +
end
 +
 +
local rxiv_date = table.concat ({y, m, d}, '-'); -- make ymd date string
 
local good1, good2;
 
local good1, good2;
local biorxiv_ts, tomorrow_ts; -- to hold Unix timestamps representing the dates
+
local rxiv_ts, tomorrow_ts; -- to hold Unix timestamps representing the dates
 
local lang_object = mw.getContentLanguage();
 
local lang_object = mw.getContentLanguage();
   −
good1, biorxiv_ts = pcall (lang_object.formatDate, lang_object, 'U', biorxiv_date); -- convert biorxiv_date value to Unix timestamp  
+
good1, rxiv_ts = pcall (lang_object.formatDate, lang_object, 'U', rxiv_date); -- convert rxiv_date value to Unix timestamp  
 
good2, tomorrow_ts = pcall (lang_object.formatDate, lang_object, 'U', 'today + 2 days' ); -- today midnight + 2 days is one second more than all day tomorrow
 
good2, tomorrow_ts = pcall (lang_object.formatDate, lang_object, 'U', 'today + 2 days' ); -- today midnight + 2 days is one second more than all day tomorrow
 
 
 
if good1 and good2 then -- lang.formatDate() returns a timestamp in the local script which tonumber() may not understand
 
if good1 and good2 then -- lang.formatDate() returns a timestamp in the local script which tonumber() may not understand
biorxiv_ts = tonumber (biorxiv_ts) or lang_object:parseFormattedNumber (biorxiv_ts); -- convert to numbers for the comparison;
+
rxiv_ts = tonumber (rxiv_ts) or lang_object:parseFormattedNumber (rxiv_ts); -- convert to numbers for the comparison;
 
tomorrow_ts = tonumber (tomorrow_ts) or lang_object:parseFormattedNumber (tomorrow_ts);
 
tomorrow_ts = tonumber (tomorrow_ts) or lang_object:parseFormattedNumber (tomorrow_ts);
 
else
 
else
Line 163: Line 209:  
end
 
end
   −
return ((1576022400 <= biorxiv_ts) and (biorxiv_ts < tomorrow_ts)) -- 2012-12-11T00:00Z <= biorxiv_date < tomorrow's date
+
local limit_ts = ((select and ('m' == select)) and 1577836800) or 1576022400; -- choose the appropriate limit timesatmp
 +
 
 +
return ((limit_ts <= rxiv_ts) and (rxiv_ts < tomorrow_ts)) -- limit_ts <= rxiv_date < tomorrow's date
 
end
 
end
   Line 243: Line 291:  
 
 
return lccn;
 
return lccn;
end
+
end
      Line 330: Line 378:  
if is_set (class) then
 
if is_set (class) then
 
if id:match ('^%d+') then
 
if id:match ('^%d+') then
text = table.concat ({text, ' [[//arxiv.org/archive/', class, ' ', class, ']]'}); -- external link within square brackets, not wikilink
+
text = table.concat ({text, ' [[https://arxiv.org/archive/', class, ' ', class, ']]'}); -- external link within square brackets, not wikilink
 
else
 
else
 
set_message ('err_class_ignored');
 
set_message ('err_class_ignored');
Line 362: Line 410:  
local access = options.access;
 
local access = options.access;
 
local handler = options.handler;
 
local handler = options.handler;
 +
local ignore_invalid = options.accept;
 
local err_type;
 
local err_type;
 
local err_msg = '';
 
local err_msg = '';
Line 384: Line 433:  
if id:find('&%.') then
 
if id:find('&%.') then
 
err_type = cfg.err_msg_supl.journal; -- journal abbreviation must not have '&.' (if it does it's missing a letter)
 
err_type = cfg.err_msg_supl.journal; -- journal abbreviation must not have '&.' (if it does it's missing a letter)
 +
end
 +
if id:match ('.........%.tmp%.') then -- temporary bibcodes when positions 10–14 are '.tmp.'
 +
set_message ('maint_bibcode');
 
end
 
end
 
end
 
end
 
end
 
end
   −
if is_set (err_type) then -- if there was an error detected
+
if is_set (err_type) and not ignore_invalid then -- if there was an error detected and accept-as-written markup not used
 
set_message ('err_bad_bibcode', {err_type});
 
set_message ('err_bad_bibcode', {err_type});
 
options.coins_list_t['BIBCODE'] = nil; -- when error, unset so not included in COinS
 
options.coins_list_t['BIBCODE'] = nil; -- when error, unset so not included in COinS
   
end
 
end
   Line 419: Line 470:  
 
 
local patterns = {
 
local patterns = {
'^10.1101/%d%d%d%d%d%d$', -- simple 6-digit identifier (before 2019-12-11)
+
'^10%.1101/%d%d%d%d%d%d$', -- simple 6-digit identifier (before 2019-12-11)
'^10.1101/(20[1-9]%d)%.([01]%d)%.([0-3]%d)%.%d%d%d%d%d%dv%d+$', -- y.m.d. date + 6-digit identifier + version (after 2019-12-11)
+
'^10%.1101/(20%d%d)%.(%d%d)%.(%d%d)%.%d%d%d%d%d%dv%d+$', -- y.m.d. date + 6-digit identifier + version (after 2019-12-11)
'^10.1101/(20[1-9]%d)%.([01]%d)%.([0-3]%d)%.%d%d%d%d%d%d$', -- y.m.d. date + 6-digit identifier (after 2019-12-11)
+
'^10%.1101/(20%d%d)%.(%d%d)%.(%d%d)%.%d%d%d%d%d%d$', -- y.m.d. date + 6-digit identifier (after 2019-12-11)
 
}
 
}
 
 
Line 429: Line 480:     
if m then -- m is nil when id is the six-digit form
 
if m then -- m is nil when id is the six-digit form
if not is_valid_biorxiv_date (y, m, d) then -- validate the encoded date; TODO: don't ignore leap-year and actual month lengths ({{#time:}} is a poor date validator)
+
if not is_valid_rxiv_date (y, m, d, 'b') then -- validate the encoded date; 'b' for biorxiv limit
 
break; -- date fail; break out early so we don't unset the error message
 
break; -- date fail; break out early so we don't unset the error message
 
end
 
end
Line 503: Line 554:  
local handler = options.handler;
 
local handler = options.handler;
 
local err_flag;
 
local err_flag;
 +
 +
local function is_extended_free (registrant, suffix) -- local function to check those few registrants that are mixed; identifiable by the doi suffix <incipit>
 +
if cfg.extended_registrants_t[registrant] then -- if this registrant has known free-to-read extentions
 +
for _, incipit in ipairs (cfg.extended_registrants_t[registrant]) do -- loop through the registrant's incipits
 +
if mw.ustring.find (suffix, '^' .. incipit) then -- if found
 +
return true;
 +
end
 +
end
 +
end
 +
end
    
local text;
 
local text;
 
if is_set (inactive) then
 
if is_set (inactive) then
local inactive_year = inactive:match("%d%d%d%d") or ''; -- try to get the year portion from the inactive date
+
local inactive_year = inactive:match("%d%d%d%d"); -- try to get the year portion from the inactive date
 
local inactive_month, good;
 
local inactive_month, good;
   Line 517: Line 578:  
end
 
end
 
end
 
end
else
+
end -- otherwise, |doi-broken-date= has something but it isn't a date
inactive_year = nil; -- |doi-broken-date= has something but it isn't a date
  −
end
   
 
 
if is_set (inactive_year) and is_set (inactive_month) then
 
if is_set (inactive_year) and is_set (inactive_month) then
Line 531: Line 590:  
end
 
end
   −
local registrant = mw.ustring.match (id, '^10%.([^/]+)/[^%s–]-[^%.,]$'); -- registrant set when DOI has the proper basic form
+
local suffix;
 +
local registrant, suffix = mw.ustring.match (id, '^10%.([^/]+)/([^%s–]-[^%.,])$'); -- registrant and suffix set when DOI has the proper basic form
    
local registrant_err_patterns = { -- these patterns are for code ranges that are not supported  
 
local registrant_err_patterns = { -- these patterns are for code ranges that are not supported  
'^[^1-3]%d%d%d%d%.%d%d*$', -- 5 digits with subcode (0xxxx, 40000+); accepts: 10000–39999
+
'^[^1-3]%d%d%d%d%.%d+$', -- 5 digits with subcode (0xxxx, 40000+); accepts: 10000–39999
'^[^1-5]%d%d%d%d$', -- 5 digits without subcode (0xxxx, 60000+); accepts: 10000–59999
+
'^[^1-7]%d%d%d%d$', -- 5 digits without subcode (0xxxx, 60000+); accepts: 10000–69999
'^[^1-9]%d%d%d%.%d%d*$', -- 4 digits with subcode (0xxx); accepts: 1000–9999
+
'^[^1-9]%d%d%d%.%d+$', -- 4 digits with subcode (0xxx); accepts: 1000–9999
 
'^[^1-9]%d%d%d$', -- 4 digits without subcode (0xxx); accepts: 1000–9999
 
'^[^1-9]%d%d%d$', -- 4 digits without subcode (0xxx); accepts: 1000–9999
 
'^%d%d%d%d%d%d+', -- 6 or more digits
 
'^%d%d%d%d%d%d+', -- 6 or more digits
Line 562: Line 622:  
if err_flag then
 
if err_flag then
 
options.coins_list_t['DOI'] = nil; -- when error, unset so not included in COinS
 
options.coins_list_t['DOI'] = nil; -- when error, unset so not included in COinS
 +
else
 +
if not access and (cfg.known_free_doi_registrants_t[registrant] or is_extended_free (registrant, suffix)) then -- |doi-access=free not set and <registrant> is known to be free
 +
set_message ('maint_doi_unflagged_free'); -- set a maint cat
 +
end
 
end
 
end
 
 
Line 646: Line 710:  
]]
 
]]
   −
local function isbn (options)
+
local function isbn (options_t)
local isbn_str = options.id;
+
local isbn_str = options_t.id;
local ignore_invalid = options.accept;
+
local ignore_invalid = options_t.accept;
local handler = options.handler;
+
local handler = options_t.handler;
 +
local year = options_t.Year; -- when set, valid anchor_year; may have a disambiguator which must be removed
    
local function return_result (check, err_type) -- local function to handle the various returns
 
local function return_result (check, err_type) -- local function to handle the various returns
Line 658: Line 723:  
else -- here when not ignoring
 
else -- here when not ignoring
 
if not check then -- and there is an error
 
if not check then -- and there is an error
options.coins_list_t['ISBN'] = nil; -- when error, unset so not included in COinS
+
options_t.coins_list_t['ISBN'] = nil; -- when error, unset so not included in COinS
 
set_message ('err_bad_isbn', err_type); -- set an error message
 
set_message ('err_bad_isbn', err_type); -- set an error message
 
return ISBN; -- return id text
 
return ISBN; -- return id text
Line 664: Line 729:  
end
 
end
 
return ISBN; -- return id text
 
return ISBN; -- return id text
 +
end
 +
 +
if year and not ignore_invalid then --
 +
year = year:match ('%d%d%d%d?'); -- strip disambiguator if present
 +
if year and (1965 > tonumber(year)) then
 +
set_message ('err_invalid_isbn_date'); -- set an error message
 +
return internal_link_id ({link = handler.link, label = handler.label, redirect = handler.redirect,
 +
prefix = handler.prefix, id = isbn_str, separator = handler.separator});
 +
end
 
end
 
end
   Line 975: Line 1,049:  
return external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect,
 
return external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect,
 
prefix = handler.prefix, id = lccn, separator = handler.separator, encode = handler.encode});
 
prefix = handler.prefix, id = lccn, separator = handler.separator, encode = handler.encode});
 +
end
 +
 +
 +
--[[--------------------------< M E D R X I V >-----------------------------------------------------------------
 +
 +
Format medRxiv ID and do simple error checking.  Similar to later bioRxiv IDs, medRxiv IDs are prefixed with a
 +
yyyy.mm.dd. date and suffixed with an optional version identifier.  Ealiest date accepted is 2020.01.01
 +
 +
The medRxiv ID is a date followed by an eight-digit number followed by an optional version indicator 'v' and one or more digits:
 +
https://www.medrxiv.org/content/10.1101/2020.11.16.20232009v2 -> 10.1101/2020.11.16.20232009v2
 +
 +
]]
 +
 +
local function medrxiv (options)
 +
local id = options.id;
 +
local handler = options.handler;
 +
local err_msg_flag = true; -- flag; assume that there will be an error
 +
 +
local patterns = {
 +
'%d%d%d%d%d%d%d%d$', -- simple 8-digit identifier; these should be relatively rare
 +
'^10%.1101/(20%d%d)%.(%d%d)%.(%d%d)%.%d%d%d%d%d%d%d%dv%d+$', -- y.m.d. date + 8-digit identifier + version (2020-01-01 and later)
 +
'^10%.1101/(20%d%d)%.(%d%d)%.(%d%d)%.%d%d%d%d%d%d%d%d$', -- y.m.d. date + 8-digit identifier (2020-01-01 and later)
 +
}
 +
 +
for _, pattern in ipairs (patterns) do -- spin through the patterns looking for a match
 +
if id:match (pattern) then
 +
local y, m, d = id:match (pattern); -- found a match, attempt to get year, month and date from the identifier
 +
 +
if m then -- m is nil when id is the 8-digit form
 +
if not is_valid_rxiv_date (y, m, d, 'b') then -- validate the encoded date; 'b' for medrxiv limit
 +
break; -- date fail; break out early so we don't unset the error message
 +
end
 +
end
 +
err_msg_flag = nil; -- we found a match so unset the error message
 +
break; -- and done
 +
end
 +
end -- <err_msg_flag> remains set here when no match
 +
 +
if err_msg_flag then
 +
options.coins_list_t['MEDRXIV'] = nil; -- when error, unset so not included in COinS
 +
set_message ('err_bad_medrxiv'); -- and set the error message
 +
end
 +
 +
return external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect,
 +
prefix = handler.prefix, id = id, separator = handler.separator,
 +
encode = handler.encode, access = handler.access});
 
end
 
end
   Line 1,036: Line 1,156:  
elseif id:match('^%d+$') then -- no prefix
 
elseif id:match('^%d+$') then -- no prefix
 
number = id; -- get the number
 
number = id; -- get the number
if 10 < number:len() then
+
if tonumber (id) > handler.id_limit then
number = nil; -- constrain to 1 to 10 digits; change this when OCLC issues 11-digit numbers
+
number = nil; -- unset when id value exceeds the limit
 
end
 
end
 
end
 
end
Line 1,498: Line 1,618:  
['JSTOR'] = jstor,
 
['JSTOR'] = jstor,
 
['LCCN'] = lccn,
 
['LCCN'] = lccn,
 +
['MEDRXIV'] = medrxiv,
 
['MR'] = mr,
 
['MR'] = mr,
 
['OCLC'] = oclc,
 
['OCLC'] = oclc,