Changes

no edit summary
Line 20: Line 20:     
--============================<< H E L P E R  F U N C T I O N S >>============================================
 
--============================<< H E L P E R  F U N C T I O N S >>============================================
  −
--[[--------------------------< W I K I D A T A _ A R T I C L E _ N A M E _ G E T >----------------------------
  −
  −
as an aid to internationalizing identifier-label wikilinks, gets identifier article names from Wikidata.
  −
  −
returns :<lang code>:<article title> when <q> has an <article title> for <lang code>; nil else
  −
  −
for identifiers that do not have q, returns nil
  −
  −
for wikis that do not have mw.wikibase installed, returns nil
  −
  −
]]
  −
  −
local function wikidata_article_name_get (q)
  −
if not is_set (q) or (q and not mw.wikibase) then -- when no q number or when a q number but mw.wikibase not installed on this wiki
  −
return nil; -- abandon
  −
end
  −
  −
local wd_article;
  −
local this_wiki_code = cfg.this_wiki_code; -- Wikipedia subdomain; 'en' for en.wikipedia.org
  −
  −
wd_article = mw.wikibase.getSitelink (q, this_wiki_code .. 'wiki'); -- fetch article title from WD; nil when no title available at this wiki
  −
  −
if wd_article then
  −
wd_article = table.concat ({':', this_wiki_code, ':', wd_article}); -- interwiki-style link without brackets if taken from WD; leading colon required
  −
end
  −
  −
return wd_article; -- article title from WD; nil else
  −
end
         
--[[--------------------------< L I N K _ L A B E L _ M A K E >------------------------------------------------
 
--[[--------------------------< L I N K _ L A B E L _ M A K E >------------------------------------------------
   −
common function to create identifier link label from handler table or from Wikidata
+
common function to create identifier link label from handler table
    
returns the first available of
 
returns the first available of
 
1. redirect from local wiki's handler table (if enabled)
 
1. redirect from local wiki's handler table (if enabled)
2. Wikidata (if there is a Wikidata entry for this identifier in the local wiki's language)
+
2. label specified in the local wiki's handler table
3. label specified in the local wiki's handler table
   
 
 
]]
 
]]
    
local function link_label_make (handler)
 
local function link_label_make (handler)
local wd_article;
+
return (cfg.use_identifier_redirects and is_set (handler.redirect) and handler.redirect) or handler.link;
  −
if not (cfg.use_identifier_redirects and is_set (handler.redirect)) then -- redirect has priority so if enabled and available don't fetch from Wikidata because expensive
  −
wd_article = wikidata_article_name_get (handler.q); -- if Wikidata has an article title for this wiki, get it;
  −
end
  −
  −
return (cfg.use_identifier_redirects and is_set (handler.redirect) and handler.redirect) or wd_article or handler.link;
   
end
 
end
   Line 83: Line 47:  
local ext_link;
 
local ext_link;
 
local this_wiki_code = cfg.this_wiki_code; -- Wikipedia subdomain; 'en' for en.wikipedia.org
 
local this_wiki_code = cfg.this_wiki_code; -- Wikipedia subdomain; 'en' for en.wikipedia.org
local wd_article; -- article title from Wikidata
+
 
   
if options.encode == true or options.encode == nil then
 
if options.encode == true or options.encode == nil then
 
url_string = mw.uri.encode (url_string, 'PATH');
 
url_string = mw.uri.encode (url_string, 'PATH');
Line 99: Line 62:     
return table.concat ({
 
return table.concat ({
make_wikilink (link_label_make (options), options.label), -- redirect, Wikidata link, or locally specified link (in that order)
+
make_wikilink (link_label_make (options), options.label), -- redirect, or locally specified link (in that order)
 
options.separator or '&nbsp;',
 
options.separator or '&nbsp;',
 
ext_link
 
ext_link
Line 164: Line 127:       −
--[=[-------------------------< I S _ V A L I D _ R X I V _ D A T E >------------------------------------------
+
--[=[-------------------------< I S _ V A L I D _ B I O R X I V _ D A T E >------------------------------------
   −
for biorxiv, returns true if:
+
returns true if:
 
2019-12-11T00:00Z <= biorxiv_date < today + 2 days
 
2019-12-11T00:00Z <= biorxiv_date < today + 2 days
for medrxiv, returns true if:
  −
2020-01-01T00:00Z <= medrxiv_date < today + 2 days
   
 
 
The dated form of biorxiv identifier has a start date of 2019-12-11.  The Unix timestamp for that date is {{#time:U|2019-12-11}} = 1576022400
 
The dated form of biorxiv identifier has a start date of 2019-12-11.  The Unix timestamp for that date is {{#time:U|2019-12-11}} = 1576022400
The medrxiv identifier has a start date of 2020-01-01.  The Unix timestamp for that date is {{#time:U|2020-01-01}} = 1577836800
     −
<rxiv_date> is the date provided in those |biorxiv= parameter values that are dated and in |medrxiv= parameter values at time 00:00:00 UTC
+
biorxiv_date is the date provided in those |biorxiv= parameter values that are dated at time 00:00:00 UTC
<today> is the current date at time 00:00:00 UTC plus 48 hours
+
today is the current date at time 00:00:00 UTC plus 48 hours
if today's date is 2023-01-01T00:00:00 then
+
if today is 2015-01-01T00:00:00 then
adding 24 hours gives 2023-01-02T00:00:00 – one second more than today
+
adding 24 hours gives 2015-01-02T00:00:00 – one second more than today
adding 24 hours gives 2023-01-03T00:00:00 – one second more than tomorrow
+
adding 24 hours gives 2015-01-03T00:00:00 – one second more than tomorrow
   −
inputs:
+
This function does not work if it is fed month names for languages other than English.  Wikimedia #time: parser
<y>, <m>, <d> – year, month, day parts of the date from the birxiv or medrxiv identifier
+
apparently doesn't understand non-English date month names. This function will always return false when the date
<select> 'b' for biorxiv, 'm' for medrxiv; defaults to 'b'
+
contains a non-English month name because good1 is false after the call to lang_object.formatDate().  To get
 +
around that call this function with date parts and create a YYYY-MM-DD format date.
    
]=]
 
]=]
   −
local function is_valid_rxiv_date (y, m, d, select)
+
local function is_valid_biorxiv_date (y, m, d)
if 0 == tonumber (m) and 12 < tonumber (m) then -- <m> must be a number 1–12
+
local biorxiv_date = table.concat ({y, m, d}, '-'); -- make ymd date
return false;
  −
end
  −
if 0 == tonumber (d) and 31 < tonumber (d) then -- <d> must be a number 1–31; TODO: account for month length and leap yer?
  −
return false;
  −
end
  −
  −
local rxiv_date = table.concat ({y, m, d}, '-'); -- make ymd date string
   
local good1, good2;
 
local good1, good2;
local rxiv_ts, tomorrow_ts; -- to hold Unix timestamps representing the dates
+
local biorxiv_ts, tomorrow_ts; -- to hold Unix timestamps representing the dates
 
local lang_object = mw.getContentLanguage();
 
local lang_object = mw.getContentLanguage();
   −
good1, rxiv_ts = pcall (lang_object.formatDate, lang_object, 'U', rxiv_date); -- convert rxiv_date value to Unix timestamp  
+
good1, biorxiv_ts = pcall (lang_object.formatDate, lang_object, 'U', biorxiv_date); -- convert biorxiv_date value to Unix timestamp  
 
good2, tomorrow_ts = pcall (lang_object.formatDate, lang_object, 'U', 'today + 2 days' ); -- today midnight + 2 days is one second more than all day tomorrow
 
good2, tomorrow_ts = pcall (lang_object.formatDate, lang_object, 'U', 'today + 2 days' ); -- today midnight + 2 days is one second more than all day tomorrow
 
 
 
if good1 and good2 then -- lang.formatDate() returns a timestamp in the local script which tonumber() may not understand
 
if good1 and good2 then -- lang.formatDate() returns a timestamp in the local script which tonumber() may not understand
rxiv_ts = tonumber (rxiv_ts) or lang_object:parseFormattedNumber (rxiv_ts); -- convert to numbers for the comparison;
+
biorxiv_ts = tonumber (biorxiv_ts) or lang_object:parseFormattedNumber (biorxiv_ts); -- convert to numbers for the comparison;
 
tomorrow_ts = tonumber (tomorrow_ts) or lang_object:parseFormattedNumber (tomorrow_ts);
 
tomorrow_ts = tonumber (tomorrow_ts) or lang_object:parseFormattedNumber (tomorrow_ts);
 
else
 
else
Line 209: Line 163:  
end
 
end
   −
local limit_ts = ((select and ('m' == select)) and 1577836800) or 1576022400; -- choose the appropriate limit timesatmp
+
return ((1576022400 <= biorxiv_ts) and (biorxiv_ts < tomorrow_ts)) -- 2012-12-11T00:00Z <= biorxiv_date < tomorrow's date
 
  −
return ((limit_ts <= rxiv_ts) and (rxiv_ts < tomorrow_ts)) -- limit_ts <= rxiv_date < tomorrow's date
   
end
 
end
   Line 291: Line 243:  
 
 
return lccn;
 
return lccn;
end
+
end
      Line 378: Line 330:  
if is_set (class) then
 
if is_set (class) then
 
if id:match ('^%d+') then
 
if id:match ('^%d+') then
text = table.concat ({text, ' [[https://arxiv.org/archive/', class, ' ', class, ']]'}); -- external link within square brackets, not wikilink
+
text = table.concat ({text, ' [[//arxiv.org/archive/', class, ' ', class, ']]'}); -- external link within square brackets, not wikilink
 
else
 
else
 
set_message ('err_class_ignored');
 
set_message ('err_class_ignored');
Line 410: Line 362:  
local access = options.access;
 
local access = options.access;
 
local handler = options.handler;
 
local handler = options.handler;
local ignore_invalid = options.accept;
   
local err_type;
 
local err_type;
 
local err_msg = '';
 
local err_msg = '';
Line 433: Line 384:  
if id:find('&%.') then
 
if id:find('&%.') then
 
err_type = cfg.err_msg_supl.journal; -- journal abbreviation must not have '&.' (if it does it's missing a letter)
 
err_type = cfg.err_msg_supl.journal; -- journal abbreviation must not have '&.' (if it does it's missing a letter)
end
  −
if id:match ('.........%.tmp%.') then -- temporary bibcodes when positions 10–14 are '.tmp.'
  −
set_message ('maint_bibcode');
   
end
 
end
 
end
 
end
 
end
 
end
   −
if is_set (err_type) and not ignore_invalid then -- if there was an error detected and accept-as-written markup not used
+
if is_set (err_type) then -- if there was an error detected
 
set_message ('err_bad_bibcode', {err_type});
 
set_message ('err_bad_bibcode', {err_type});
 
options.coins_list_t['BIBCODE'] = nil; -- when error, unset so not included in COinS
 
options.coins_list_t['BIBCODE'] = nil; -- when error, unset so not included in COinS
 +
 
end
 
end
   Line 470: Line 419:  
 
 
local patterns = {
 
local patterns = {
'^10%.1101/%d%d%d%d%d%d$', -- simple 6-digit identifier (before 2019-12-11)
+
'^10.1101/%d%d%d%d%d%d$', -- simple 6-digit identifier (before 2019-12-11)
'^10%.1101/(20%d%d)%.(%d%d)%.(%d%d)%.%d%d%d%d%d%dv%d+$', -- y.m.d. date + 6-digit identifier + version (after 2019-12-11)
+
'^10.1101/(20[1-9]%d)%.([01]%d)%.([0-3]%d)%.%d%d%d%d%d%dv%d+$', -- y.m.d. date + 6-digit identifier + version (after 2019-12-11)
'^10%.1101/(20%d%d)%.(%d%d)%.(%d%d)%.%d%d%d%d%d%d$', -- y.m.d. date + 6-digit identifier (after 2019-12-11)
+
'^10.1101/(20[1-9]%d)%.([01]%d)%.([0-3]%d)%.%d%d%d%d%d%d$', -- y.m.d. date + 6-digit identifier (after 2019-12-11)
 
}
 
}
 
 
Line 480: Line 429:     
if m then -- m is nil when id is the six-digit form
 
if m then -- m is nil when id is the six-digit form
if not is_valid_rxiv_date (y, m, d, 'b') then -- validate the encoded date; 'b' for biorxiv limit
+
if not is_valid_biorxiv_date (y, m, d) then -- validate the encoded date; TODO: don't ignore leap-year and actual month lengths ({{#time:}} is a poor date validator)
 
break; -- date fail; break out early so we don't unset the error message
 
break; -- date fail; break out early so we don't unset the error message
 
end
 
end
Line 554: Line 503:  
local handler = options.handler;
 
local handler = options.handler;
 
local err_flag;
 
local err_flag;
  −
local function is_extended_free (registrant, suffix) -- local function to check those few registrants that are mixed; identifiable by the doi suffix <incipit>
  −
if cfg.extended_registrants_t[registrant] then -- if this registrant has known free-to-read extentions
  −
for _, incipit in ipairs (cfg.extended_registrants_t[registrant]) do -- loop through the registrant's incipits
  −
if mw.ustring.find (suffix, '^' .. incipit) then -- if found
  −
return true;
  −
end
  −
end
  −
end
  −
end
      
local text;
 
local text;
 
if is_set (inactive) then
 
if is_set (inactive) then
local inactive_year = inactive:match("%d%d%d%d"); -- try to get the year portion from the inactive date
+
local inactive_year = inactive:match("%d%d%d%d") or ''; -- try to get the year portion from the inactive date
 
local inactive_month, good;
 
local inactive_month, good;
   Line 578: Line 517:  
end
 
end
 
end
 
end
end -- otherwise, |doi-broken-date= has something but it isn't a date
+
else
 +
inactive_year = nil; -- |doi-broken-date= has something but it isn't a date
 +
end
 
 
 
if is_set (inactive_year) and is_set (inactive_month) then
 
if is_set (inactive_year) and is_set (inactive_month) then
Line 590: Line 531:  
end
 
end
   −
local suffix;
+
local registrant = mw.ustring.match (id, '^10%.([^/]+)/[^%s–]-[^%.,]$'); -- registrant set when DOI has the proper basic form
local registrant, suffix = mw.ustring.match (id, '^10%.([^/]+)/([^%s–]-[^%.,])$'); -- registrant and suffix set when DOI has the proper basic form
      
local registrant_err_patterns = { -- these patterns are for code ranges that are not supported  
 
local registrant_err_patterns = { -- these patterns are for code ranges that are not supported  
'^[^1-3]%d%d%d%d%.%d+$', -- 5 digits with subcode (0xxxx, 40000+); accepts: 10000–39999
+
'^[^1-3]%d%d%d%d%.%d%d*$', -- 5 digits with subcode (0xxxx, 40000+); accepts: 10000–39999
'^[^1-7]%d%d%d%d$', -- 5 digits without subcode (0xxxx, 60000+); accepts: 10000–69999
+
'^[^1-5]%d%d%d%d$', -- 5 digits without subcode (0xxxx, 60000+); accepts: 10000–59999
'^[^1-9]%d%d%d%.%d+$', -- 4 digits with subcode (0xxx); accepts: 1000–9999
+
'^[^1-9]%d%d%d%.%d%d*$', -- 4 digits with subcode (0xxx); accepts: 1000–9999
 
'^[^1-9]%d%d%d$', -- 4 digits without subcode (0xxx); accepts: 1000–9999
 
'^[^1-9]%d%d%d$', -- 4 digits without subcode (0xxx); accepts: 1000–9999
 
'^%d%d%d%d%d%d+', -- 6 or more digits
 
'^%d%d%d%d%d%d+', -- 6 or more digits
Line 622: Line 562:  
if err_flag then
 
if err_flag then
 
options.coins_list_t['DOI'] = nil; -- when error, unset so not included in COinS
 
options.coins_list_t['DOI'] = nil; -- when error, unset so not included in COinS
else
  −
if not access and (cfg.known_free_doi_registrants_t[registrant] or is_extended_free (registrant, suffix)) then -- |doi-access=free not set and <registrant> is known to be free
  −
set_message ('maint_doi_unflagged_free'); -- set a maint cat
  −
end
   
end
 
end
 
 
Line 710: Line 646:  
]]
 
]]
   −
local function isbn (options_t)
+
local function isbn (options)
local isbn_str = options_t.id;
+
local isbn_str = options.id;
local ignore_invalid = options_t.accept;
+
local ignore_invalid = options.accept;
local handler = options_t.handler;
+
local handler = options.handler;
local year = options_t.Year; -- when set, valid anchor_year; may have a disambiguator which must be removed
      
local function return_result (check, err_type) -- local function to handle the various returns
 
local function return_result (check, err_type) -- local function to handle the various returns
Line 723: Line 658:  
else -- here when not ignoring
 
else -- here when not ignoring
 
if not check then -- and there is an error
 
if not check then -- and there is an error
options_t.coins_list_t['ISBN'] = nil; -- when error, unset so not included in COinS
+
options.coins_list_t['ISBN'] = nil; -- when error, unset so not included in COinS
 
set_message ('err_bad_isbn', err_type); -- set an error message
 
set_message ('err_bad_isbn', err_type); -- set an error message
 
return ISBN; -- return id text
 
return ISBN; -- return id text
Line 729: Line 664:  
end
 
end
 
return ISBN; -- return id text
 
return ISBN; -- return id text
end
  −
  −
if year and not ignore_invalid then --
  −
year = year:match ('%d%d%d%d?'); -- strip disambiguator if present
  −
if year and (1965 > tonumber(year)) then
  −
set_message ('err_invalid_isbn_date'); -- set an error message
  −
return internal_link_id ({link = handler.link, label = handler.label, redirect = handler.redirect,
  −
prefix = handler.prefix, id = isbn_str, separator = handler.separator});
  −
end
   
end
 
end
   Line 1,049: Line 975:  
return external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect,
 
return external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect,
 
prefix = handler.prefix, id = lccn, separator = handler.separator, encode = handler.encode});
 
prefix = handler.prefix, id = lccn, separator = handler.separator, encode = handler.encode});
end
  −
  −
  −
--[[--------------------------< M E D R X I V >-----------------------------------------------------------------
  −
  −
Format medRxiv ID and do simple error checking.  Similar to later bioRxiv IDs, medRxiv IDs are prefixed with a
  −
yyyy.mm.dd. date and suffixed with an optional version identifier.  Ealiest date accepted is 2020.01.01
  −
  −
The medRxiv ID is a date followed by an eight-digit number followed by an optional version indicator 'v' and one or more digits:
  −
https://www.medrxiv.org/content/10.1101/2020.11.16.20232009v2 -> 10.1101/2020.11.16.20232009v2
  −
  −
]]
  −
  −
local function medrxiv (options)
  −
local id = options.id;
  −
local handler = options.handler;
  −
local err_msg_flag = true; -- flag; assume that there will be an error
  −
  −
local patterns = {
  −
'%d%d%d%d%d%d%d%d$', -- simple 8-digit identifier; these should be relatively rare
  −
'^10%.1101/(20%d%d)%.(%d%d)%.(%d%d)%.%d%d%d%d%d%d%d%dv%d+$', -- y.m.d. date + 8-digit identifier + version (2020-01-01 and later)
  −
'^10%.1101/(20%d%d)%.(%d%d)%.(%d%d)%.%d%d%d%d%d%d%d%d$', -- y.m.d. date + 8-digit identifier (2020-01-01 and later)
  −
}
  −
  −
for _, pattern in ipairs (patterns) do -- spin through the patterns looking for a match
  −
if id:match (pattern) then
  −
local y, m, d = id:match (pattern); -- found a match, attempt to get year, month and date from the identifier
  −
  −
if m then -- m is nil when id is the 8-digit form
  −
if not is_valid_rxiv_date (y, m, d, 'b') then -- validate the encoded date; 'b' for medrxiv limit
  −
break; -- date fail; break out early so we don't unset the error message
  −
end
  −
end
  −
err_msg_flag = nil; -- we found a match so unset the error message
  −
break; -- and done
  −
end
  −
end -- <err_msg_flag> remains set here when no match
  −
  −
if err_msg_flag then
  −
options.coins_list_t['MEDRXIV'] = nil; -- when error, unset so not included in COinS
  −
set_message ('err_bad_medrxiv'); -- and set the error message
  −
end
  −
  −
return external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect,
  −
prefix = handler.prefix, id = id, separator = handler.separator,
  −
encode = handler.encode, access = handler.access});
   
end
 
end
   Line 1,156: Line 1,036:  
elseif id:match('^%d+$') then -- no prefix
 
elseif id:match('^%d+$') then -- no prefix
 
number = id; -- get the number
 
number = id; -- get the number
if tonumber (id) > handler.id_limit then
+
if 10 < number:len() then
number = nil; -- unset when id value exceeds the limit
+
number = nil; -- constrain to 1 to 10 digits; change this when OCLC issues 11-digit numbers
 
end
 
end
 
end
 
end
Line 1,618: Line 1,498:  
['JSTOR'] = jstor,
 
['JSTOR'] = jstor,
 
['LCCN'] = lccn,
 
['LCCN'] = lccn,
['MEDRXIV'] = medrxiv,
   
['MR'] = mr,
 
['MR'] = mr,
 
['OCLC'] = oclc,
 
['OCLC'] = oclc,