| Line 20: |
Line 20: |
| | | | |
| | --============================<< H E L P E R F U N C T I O N S >>============================================ | | --============================<< H E L P E R F U N C T I O N S >>============================================ |
| − |
| |
| − | --[[--------------------------< W I K I D A T A _ A R T I C L E _ N A M E _ G E T >----------------------------
| |
| − |
| |
| − | as an aid to internationalizing identifier-label wikilinks, gets identifier article names from Wikidata.
| |
| − |
| |
| − | returns :<lang code>:<article title> when <q> has an <article title> for <lang code>; nil else
| |
| − |
| |
| − | for identifiers that do not have q, returns nil
| |
| − |
| |
| − | for wikis that do not have mw.wikibase installed, returns nil
| |
| − |
| |
| − | ]]
| |
| − |
| |
| − | local function wikidata_article_name_get (q)
| |
| − | if not is_set (q) or (q and not mw.wikibase) then -- when no q number or when a q number but mw.wikibase not installed on this wiki
| |
| − | return nil; -- abandon
| |
| − | end
| |
| − |
| |
| − | local wd_article;
| |
| − | local this_wiki_code = cfg.this_wiki_code; -- Wikipedia subdomain; 'en' for en.wikipedia.org
| |
| − |
| |
| − | wd_article = mw.wikibase.getSitelink (q, this_wiki_code .. 'wiki'); -- fetch article title from WD; nil when no title available at this wiki
| |
| − |
| |
| − | if wd_article then
| |
| − | wd_article = table.concat ({':', this_wiki_code, ':', wd_article}); -- interwiki-style link without brackets if taken from WD; leading colon required
| |
| − | end
| |
| − |
| |
| − | return wd_article; -- article title from WD; nil else
| |
| − | end
| |
| | | | |
| | | | |
| | --[[--------------------------< L I N K _ L A B E L _ M A K E >------------------------------------------------ | | --[[--------------------------< L I N K _ L A B E L _ M A K E >------------------------------------------------ |
| | | | |
| − | common function to create identifier link label from handler table or from Wikidata | + | common function to create identifier link label from handler table |
| | | | |
| | returns the first available of | | returns the first available of |
| | 1. redirect from local wiki's handler table (if enabled) | | 1. redirect from local wiki's handler table (if enabled) |
| − | 2. Wikidata (if there is a Wikidata entry for this identifier in the local wiki's language) | + | 2. label specified in the local wiki's handler table |
| − | 3. label specified in the local wiki's handler table
| |
| | | | |
| | ]] | | ]] |
| | | | |
| | local function link_label_make (handler) | | local function link_label_make (handler) |
| − | local wd_article;
| + | return (cfg.use_identifier_redirects and is_set (handler.redirect) and handler.redirect) or handler.link; |
| − |
| |
| − | if not (cfg.use_identifier_redirects and is_set (handler.redirect)) then -- redirect has priority so if enabled and available don't fetch from Wikidata because expensive
| |
| − | wd_article = wikidata_article_name_get (handler.q); -- if Wikidata has an article title for this wiki, get it;
| |
| − | end
| |
| − |
| |
| − | return (cfg.use_identifier_redirects and is_set (handler.redirect) and handler.redirect) or wd_article or handler.link; | |
| | end | | end |
| | | | |
| Line 83: |
Line 47: |
| | local ext_link; | | local ext_link; |
| | local this_wiki_code = cfg.this_wiki_code; -- Wikipedia subdomain; 'en' for en.wikipedia.org | | local this_wiki_code = cfg.this_wiki_code; -- Wikipedia subdomain; 'en' for en.wikipedia.org |
| − | local wd_article; -- article title from Wikidata
| + | |
| − |
| |
| | if options.encode == true or options.encode == nil then | | if options.encode == true or options.encode == nil then |
| | url_string = mw.uri.encode (url_string, 'PATH'); | | url_string = mw.uri.encode (url_string, 'PATH'); |
| Line 99: |
Line 62: |
| | | | |
| | return table.concat ({ | | return table.concat ({ |
| − | make_wikilink (link_label_make (options), options.label), -- redirect, Wikidata link, or locally specified link (in that order) | + | make_wikilink (link_label_make (options), options.label), -- redirect, or locally specified link (in that order) |
| | options.separator or ' ', | | options.separator or ' ', |
| | ext_link | | ext_link |
| Line 164: |
Line 127: |
| | | | |
| | | | |
| − | --[=[-------------------------< I S _ V A L I D _ R X I V _ D A T E >------------------------------------------ | + | --[=[-------------------------< I S _ V A L I D _ B I O R X I V _ D A T E >------------------------------------ |
| | | | |
| − | for biorxiv, returns true if:
| + | returns true if: |
| | 2019-12-11T00:00Z <= biorxiv_date < today + 2 days | | 2019-12-11T00:00Z <= biorxiv_date < today + 2 days |
| − | for medrxiv, returns true if:
| |
| − | 2020-01-01T00:00Z <= medrxiv_date < today + 2 days
| |
| | | | |
| | The dated form of biorxiv identifier has a start date of 2019-12-11. The Unix timestamp for that date is {{#time:U|2019-12-11}} = 1576022400 | | The dated form of biorxiv identifier has a start date of 2019-12-11. The Unix timestamp for that date is {{#time:U|2019-12-11}} = 1576022400 |
| − | The medrxiv identifier has a start date of 2020-01-01. The Unix timestamp for that date is {{#time:U|2020-01-01}} = 1577836800
| |
| | | | |
| − | <rxiv_date> is the date provided in those |biorxiv= parameter values that are dated and in |medrxiv= parameter values at time 00:00:00 UTC
| + | biorxiv_date is the date provided in those |biorxiv= parameter values that are dated at time 00:00:00 UTC |
| − | <today> is the current date at time 00:00:00 UTC plus 48 hours
| + | today is the current date at time 00:00:00 UTC plus 48 hours |
| − | if today's date is 2023-01-01T00:00:00 then | + | if today is 2015-01-01T00:00:00 then |
| − | adding 24 hours gives 2023-01-02T00:00:00 – one second more than today | + | adding 24 hours gives 2015-01-02T00:00:00 – one second more than today |
| − | adding 24 hours gives 2023-01-03T00:00:00 – one second more than tomorrow | + | adding 24 hours gives 2015-01-03T00:00:00 – one second more than tomorrow |
| | | | |
| − | inputs:
| + | This function does not work if it is fed month names for languages other than English. Wikimedia #time: parser |
| − | <y>, <m>, <d> – year, month, day parts of the date from the birxiv or medrxiv identifier
| + | apparently doesn't understand non-English date month names. This function will always return false when the date |
| − | <select> 'b' for biorxiv, 'm' for medrxiv; defaults to 'b'
| + | contains a non-English month name because good1 is false after the call to lang_object.formatDate(). To get |
| | + | around that call this function with date parts and create a YYYY-MM-DD format date. |
| | | | |
| | ]=] | | ]=] |
| | | | |
| − | local function is_valid_rxiv_date (y, m, d, select) | + | local function is_valid_biorxiv_date (y, m, d) |
| − | if 0 == tonumber (m) and 12 < tonumber (m) then -- <m> must be a number 1–12
| + | local biorxiv_date = table.concat ({y, m, d}, '-'); -- make ymd date |
| − | return false;
| |
| − | end
| |
| − | if 0 == tonumber (d) and 31 < tonumber (d) then -- <d> must be a number 1–31; TODO: account for month length and leap yer?
| |
| − | return false;
| |
| − | end
| |
| − |
| |
| − | local rxiv_date = table.concat ({y, m, d}, '-'); -- make ymd date string | |
| | local good1, good2; | | local good1, good2; |
| − | local rxiv_ts, tomorrow_ts; -- to hold Unix timestamps representing the dates | + | local biorxiv_ts, tomorrow_ts; -- to hold Unix timestamps representing the dates |
| | local lang_object = mw.getContentLanguage(); | | local lang_object = mw.getContentLanguage(); |
| | | | |
| − | good1, rxiv_ts = pcall (lang_object.formatDate, lang_object, 'U', rxiv_date); -- convert rxiv_date value to Unix timestamp | + | good1, biorxiv_ts = pcall (lang_object.formatDate, lang_object, 'U', biorxiv_date); -- convert biorxiv_date value to Unix timestamp |
| | good2, tomorrow_ts = pcall (lang_object.formatDate, lang_object, 'U', 'today + 2 days' ); -- today midnight + 2 days is one second more than all day tomorrow | | good2, tomorrow_ts = pcall (lang_object.formatDate, lang_object, 'U', 'today + 2 days' ); -- today midnight + 2 days is one second more than all day tomorrow |
| | | | |
| | if good1 and good2 then -- lang.formatDate() returns a timestamp in the local script which tonumber() may not understand | | if good1 and good2 then -- lang.formatDate() returns a timestamp in the local script which tonumber() may not understand |
| − | rxiv_ts = tonumber (rxiv_ts) or lang_object:parseFormattedNumber (rxiv_ts); -- convert to numbers for the comparison; | + | biorxiv_ts = tonumber (biorxiv_ts) or lang_object:parseFormattedNumber (biorxiv_ts); -- convert to numbers for the comparison; |
| | tomorrow_ts = tonumber (tomorrow_ts) or lang_object:parseFormattedNumber (tomorrow_ts); | | tomorrow_ts = tonumber (tomorrow_ts) or lang_object:parseFormattedNumber (tomorrow_ts); |
| | else | | else |
| Line 209: |
Line 163: |
| | end | | end |
| | | | |
| − | local limit_ts = ((select and ('m' == select)) and 1577836800) or 1576022400; -- choose the appropriate limit timesatmp | + | return ((1576022400 <= biorxiv_ts) and (biorxiv_ts < tomorrow_ts)) -- 2012-12-11T00:00Z <= biorxiv_date < tomorrow's date |
| − | | |
| − | return ((limit_ts <= rxiv_ts) and (rxiv_ts < tomorrow_ts)) -- limit_ts <= rxiv_date < tomorrow's date
| |
| | end | | end |
| | | | |
| Line 291: |
Line 243: |
| | | | |
| | return lccn; | | return lccn; |
| − | end | + | end |
| | | | |
| | | | |
| Line 378: |
Line 330: |
| | if is_set (class) then | | if is_set (class) then |
| | if id:match ('^%d+') then | | if id:match ('^%d+') then |
| − | text = table.concat ({text, ' [[https://arxiv.org/archive/', class, ' ', class, ']]'}); -- external link within square brackets, not wikilink | + | text = table.concat ({text, ' [[//arxiv.org/archive/', class, ' ', class, ']]'}); -- external link within square brackets, not wikilink |
| | else | | else |
| | set_message ('err_class_ignored'); | | set_message ('err_class_ignored'); |
| Line 410: |
Line 362: |
| | local access = options.access; | | local access = options.access; |
| | local handler = options.handler; | | local handler = options.handler; |
| − | local ignore_invalid = options.accept;
| |
| | local err_type; | | local err_type; |
| | local err_msg = ''; | | local err_msg = ''; |
| Line 433: |
Line 384: |
| | if id:find('&%.') then | | if id:find('&%.') then |
| | err_type = cfg.err_msg_supl.journal; -- journal abbreviation must not have '&.' (if it does it's missing a letter) | | err_type = cfg.err_msg_supl.journal; -- journal abbreviation must not have '&.' (if it does it's missing a letter) |
| − | end
| |
| − | if id:match ('.........%.tmp%.') then -- temporary bibcodes when positions 10–14 are '.tmp.'
| |
| − | set_message ('maint_bibcode');
| |
| | end | | end |
| | end | | end |
| | end | | end |
| | | | |
| − | if is_set (err_type) and not ignore_invalid then -- if there was an error detected and accept-as-written markup not used | + | if is_set (err_type) then -- if there was an error detected |
| | set_message ('err_bad_bibcode', {err_type}); | | set_message ('err_bad_bibcode', {err_type}); |
| | options.coins_list_t['BIBCODE'] = nil; -- when error, unset so not included in COinS | | options.coins_list_t['BIBCODE'] = nil; -- when error, unset so not included in COinS |
| | + | |
| | end | | end |
| | | | |
| Line 470: |
Line 419: |
| | | | |
| | local patterns = { | | local patterns = { |
| − | '^10%.1101/%d%d%d%d%d%d$', -- simple 6-digit identifier (before 2019-12-11) | + | '^10.1101/%d%d%d%d%d%d$', -- simple 6-digit identifier (before 2019-12-11) |
| − | '^10%.1101/(20%d%d)%.(%d%d)%.(%d%d)%.%d%d%d%d%d%dv%d+$', -- y.m.d. date + 6-digit identifier + version (after 2019-12-11) | + | '^10.1101/(20[1-9]%d)%.([01]%d)%.([0-3]%d)%.%d%d%d%d%d%dv%d+$', -- y.m.d. date + 6-digit identifier + version (after 2019-12-11) |
| − | '^10%.1101/(20%d%d)%.(%d%d)%.(%d%d)%.%d%d%d%d%d%d$', -- y.m.d. date + 6-digit identifier (after 2019-12-11) | + | '^10.1101/(20[1-9]%d)%.([01]%d)%.([0-3]%d)%.%d%d%d%d%d%d$', -- y.m.d. date + 6-digit identifier (after 2019-12-11) |
| | } | | } |
| | | | |
| Line 480: |
Line 429: |
| | | | |
| | if m then -- m is nil when id is the six-digit form | | if m then -- m is nil when id is the six-digit form |
| − | if not is_valid_rxiv_date (y, m, d, 'b') then -- validate the encoded date; 'b' for biorxiv limit | + | if not is_valid_biorxiv_date (y, m, d) then -- validate the encoded date; TODO: don't ignore leap-year and actual month lengths ({{#time:}} is a poor date validator) |
| | break; -- date fail; break out early so we don't unset the error message | | break; -- date fail; break out early so we don't unset the error message |
| | end | | end |
| Line 554: |
Line 503: |
| | local handler = options.handler; | | local handler = options.handler; |
| | local err_flag; | | local err_flag; |
| − |
| |
| − | local function is_extended_free (registrant, suffix) -- local function to check those few registrants that are mixed; identifiable by the doi suffix <incipit>
| |
| − | if cfg.extended_registrants_t[registrant] then -- if this registrant has known free-to-read extentions
| |
| − | for _, incipit in ipairs (cfg.extended_registrants_t[registrant]) do -- loop through the registrant's incipits
| |
| − | if mw.ustring.find (suffix, '^' .. incipit) then -- if found
| |
| − | return true;
| |
| − | end
| |
| − | end
| |
| − | end
| |
| − | end
| |
| | | | |
| | local text; | | local text; |
| | if is_set (inactive) then | | if is_set (inactive) then |
| − | local inactive_year = inactive:match("%d%d%d%d"); -- try to get the year portion from the inactive date | + | local inactive_year = inactive:match("%d%d%d%d") or ''; -- try to get the year portion from the inactive date |
| | local inactive_month, good; | | local inactive_month, good; |
| | | | |
| Line 578: |
Line 517: |
| | end | | end |
| | end | | end |
| − | end -- otherwise, |doi-broken-date= has something but it isn't a date | + | else |
| | + | inactive_year = nil; -- |doi-broken-date= has something but it isn't a date |
| | + | end |
| | | | |
| | if is_set (inactive_year) and is_set (inactive_month) then | | if is_set (inactive_year) and is_set (inactive_month) then |
| Line 590: |
Line 531: |
| | end | | end |
| | | | |
| − | local suffix;
| + | local registrant = mw.ustring.match (id, '^10%.([^/]+)/[^%s–]-[^%.,]$'); -- registrant set when DOI has the proper basic form |
| − | local registrant, suffix = mw.ustring.match (id, '^10%.([^/]+)/([^%s–]-[^%.,])$'); -- registrant and suffix set when DOI has the proper basic form | |
| | | | |
| | local registrant_err_patterns = { -- these patterns are for code ranges that are not supported | | local registrant_err_patterns = { -- these patterns are for code ranges that are not supported |
| − | '^[^1-3]%d%d%d%d%.%d+$', -- 5 digits with subcode (0xxxx, 40000+); accepts: 10000–39999 | + | '^[^1-3]%d%d%d%d%.%d%d*$', -- 5 digits with subcode (0xxxx, 40000+); accepts: 10000–39999 |
| − | '^[^1-7]%d%d%d%d$', -- 5 digits without subcode (0xxxx, 60000+); accepts: 10000–69999 | + | '^[^1-5]%d%d%d%d$', -- 5 digits without subcode (0xxxx, 60000+); accepts: 10000–59999 |
| − | '^[^1-9]%d%d%d%.%d+$', -- 4 digits with subcode (0xxx); accepts: 1000–9999 | + | '^[^1-9]%d%d%d%.%d%d*$', -- 4 digits with subcode (0xxx); accepts: 1000–9999 |
| | '^[^1-9]%d%d%d$', -- 4 digits without subcode (0xxx); accepts: 1000–9999 | | '^[^1-9]%d%d%d$', -- 4 digits without subcode (0xxx); accepts: 1000–9999 |
| | '^%d%d%d%d%d%d+', -- 6 or more digits | | '^%d%d%d%d%d%d+', -- 6 or more digits |
| Line 622: |
Line 562: |
| | if err_flag then | | if err_flag then |
| | options.coins_list_t['DOI'] = nil; -- when error, unset so not included in COinS | | options.coins_list_t['DOI'] = nil; -- when error, unset so not included in COinS |
| − | else
| |
| − | if not access and (cfg.known_free_doi_registrants_t[registrant] or is_extended_free (registrant, suffix)) then -- |doi-access=free not set and <registrant> is known to be free
| |
| − | set_message ('maint_doi_unflagged_free'); -- set a maint cat
| |
| − | end
| |
| | end | | end |
| | | | |
| Line 710: |
Line 646: |
| | ]] | | ]] |
| | | | |
| − | local function isbn (options_t) | + | local function isbn (options) |
| − | local isbn_str = options_t.id; | + | local isbn_str = options.id; |
| − | local ignore_invalid = options_t.accept; | + | local ignore_invalid = options.accept; |
| − | local handler = options_t.handler; | + | local handler = options.handler; |
| − | local year = options_t.Year; -- when set, valid anchor_year; may have a disambiguator which must be removed
| |
| | | | |
| | local function return_result (check, err_type) -- local function to handle the various returns | | local function return_result (check, err_type) -- local function to handle the various returns |
| Line 723: |
Line 658: |
| | else -- here when not ignoring | | else -- here when not ignoring |
| | if not check then -- and there is an error | | if not check then -- and there is an error |
| − | options_t.coins_list_t['ISBN'] = nil; -- when error, unset so not included in COinS | + | options.coins_list_t['ISBN'] = nil; -- when error, unset so not included in COinS |
| | set_message ('err_bad_isbn', err_type); -- set an error message | | set_message ('err_bad_isbn', err_type); -- set an error message |
| | return ISBN; -- return id text | | return ISBN; -- return id text |
| Line 729: |
Line 664: |
| | end | | end |
| | return ISBN; -- return id text | | return ISBN; -- return id text |
| − | end
| |
| − |
| |
| − | if year and not ignore_invalid then --
| |
| − | year = year:match ('%d%d%d%d?'); -- strip disambiguator if present
| |
| − | if year and (1965 > tonumber(year)) then
| |
| − | set_message ('err_invalid_isbn_date'); -- set an error message
| |
| − | return internal_link_id ({link = handler.link, label = handler.label, redirect = handler.redirect,
| |
| − | prefix = handler.prefix, id = isbn_str, separator = handler.separator});
| |
| − | end
| |
| | end | | end |
| | | | |
| Line 1,049: |
Line 975: |
| | return external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect, | | return external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect, |
| | prefix = handler.prefix, id = lccn, separator = handler.separator, encode = handler.encode}); | | prefix = handler.prefix, id = lccn, separator = handler.separator, encode = handler.encode}); |
| − | end
| |
| − |
| |
| − |
| |
| − | --[[--------------------------< M E D R X I V >-----------------------------------------------------------------
| |
| − |
| |
| − | Format medRxiv ID and do simple error checking. Similar to later bioRxiv IDs, medRxiv IDs are prefixed with a
| |
| − | yyyy.mm.dd. date and suffixed with an optional version identifier. Ealiest date accepted is 2020.01.01
| |
| − |
| |
| − | The medRxiv ID is a date followed by an eight-digit number followed by an optional version indicator 'v' and one or more digits:
| |
| − | https://www.medrxiv.org/content/10.1101/2020.11.16.20232009v2 -> 10.1101/2020.11.16.20232009v2
| |
| − |
| |
| − | ]]
| |
| − |
| |
| − | local function medrxiv (options)
| |
| − | local id = options.id;
| |
| − | local handler = options.handler;
| |
| − | local err_msg_flag = true; -- flag; assume that there will be an error
| |
| − |
| |
| − | local patterns = {
| |
| − | '%d%d%d%d%d%d%d%d$', -- simple 8-digit identifier; these should be relatively rare
| |
| − | '^10%.1101/(20%d%d)%.(%d%d)%.(%d%d)%.%d%d%d%d%d%d%d%dv%d+$', -- y.m.d. date + 8-digit identifier + version (2020-01-01 and later)
| |
| − | '^10%.1101/(20%d%d)%.(%d%d)%.(%d%d)%.%d%d%d%d%d%d%d%d$', -- y.m.d. date + 8-digit identifier (2020-01-01 and later)
| |
| − | }
| |
| − |
| |
| − | for _, pattern in ipairs (patterns) do -- spin through the patterns looking for a match
| |
| − | if id:match (pattern) then
| |
| − | local y, m, d = id:match (pattern); -- found a match, attempt to get year, month and date from the identifier
| |
| − |
| |
| − | if m then -- m is nil when id is the 8-digit form
| |
| − | if not is_valid_rxiv_date (y, m, d, 'b') then -- validate the encoded date; 'b' for medrxiv limit
| |
| − | break; -- date fail; break out early so we don't unset the error message
| |
| − | end
| |
| − | end
| |
| − | err_msg_flag = nil; -- we found a match so unset the error message
| |
| − | break; -- and done
| |
| − | end
| |
| − | end -- <err_msg_flag> remains set here when no match
| |
| − |
| |
| − | if err_msg_flag then
| |
| − | options.coins_list_t['MEDRXIV'] = nil; -- when error, unset so not included in COinS
| |
| − | set_message ('err_bad_medrxiv'); -- and set the error message
| |
| − | end
| |
| − |
| |
| − | return external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect,
| |
| − | prefix = handler.prefix, id = id, separator = handler.separator,
| |
| − | encode = handler.encode, access = handler.access});
| |
| | end | | end |
| | | | |
| Line 1,156: |
Line 1,036: |
| | elseif id:match('^%d+$') then -- no prefix | | elseif id:match('^%d+$') then -- no prefix |
| | number = id; -- get the number | | number = id; -- get the number |
| − | if tonumber (id) > handler.id_limit then | + | if 10 < number:len() then |
| − | number = nil; -- unset when id value exceeds the limit | + | number = nil; -- constrain to 1 to 10 digits; change this when OCLC issues 11-digit numbers |
| | end | | end |
| | end | | end |
| Line 1,618: |
Line 1,498: |
| | ['JSTOR'] = jstor, | | ['JSTOR'] = jstor, |
| | ['LCCN'] = lccn, | | ['LCCN'] = lccn, |
| − | ['MEDRXIV'] = medrxiv,
| |
| | ['MR'] = mr, | | ['MR'] = mr, |
| | ['OCLC'] = oclc, | | ['OCLC'] = oclc, |