| Line 20: |
Line 20: |
| | | | |
| | --============================<< H E L P E R F U N C T I O N S >>============================================ | | --============================<< H E L P E R F U N C T I O N S >>============================================ |
| | + | |
| | + | --[[--------------------------< W I K I D A T A _ A R T I C L E _ N A M E _ G E T >---------------------------- |
| | + | |
| | + | as an aid to internationalizing identifier-label wikilinks, gets identifier article names from Wikidata. |
| | + | |
| | + | returns :<lang code>:<article title> when <q> has an <article title> for <lang code>; nil else |
| | + | |
| | + | for identifiers that do not have q, returns nil |
| | + | |
| | + | for wikis that do not have mw.wikibase installed, returns nil |
| | + | |
| | + | ]] |
| | + | |
| | + | local function wikidata_article_name_get (q) |
| | + | if not is_set (q) or (q and not mw.wikibase) then -- when no q number or when a q number but mw.wikibase not installed on this wiki |
| | + | return nil; -- abandon |
| | + | end |
| | + | |
| | + | local wd_article; |
| | + | local this_wiki_code = cfg.this_wiki_code; -- Wikipedia subdomain; 'en' for en.wikipedia.org |
| | + | |
| | + | wd_article = mw.wikibase.getSitelink (q, this_wiki_code .. 'wiki'); -- fetch article title from WD; nil when no title available at this wiki |
| | + | |
| | + | if wd_article then |
| | + | wd_article = table.concat ({':', this_wiki_code, ':', wd_article}); -- interwiki-style link without brackets if taken from WD; leading colon required |
| | + | end |
| | + | |
| | + | return wd_article; -- article title from WD; nil else |
| | + | end |
| | | | |
| | | | |
| | --[[--------------------------< L I N K _ L A B E L _ M A K E >------------------------------------------------ | | --[[--------------------------< L I N K _ L A B E L _ M A K E >------------------------------------------------ |
| | | | |
| − | common function to create identifier link label from handler table | + | common function to create identifier link label from handler table or from Wikidata |
| | | | |
| | returns the first available of | | returns the first available of |
| | 1. redirect from local wiki's handler table (if enabled) | | 1. redirect from local wiki's handler table (if enabled) |
| − | 2. label specified in the local wiki's handler table | + | 2. Wikidata (if there is a Wikidata entry for this identifier in the local wiki's language) |
| | + | 3. label specified in the local wiki's handler table |
| | | | |
| | ]] | | ]] |
| | | | |
| | local function link_label_make (handler) | | local function link_label_make (handler) |
| − | return (cfg.use_identifier_redirects and is_set (handler.redirect) and handler.redirect) or handler.link; | + | local wd_article; |
| | + | |
| | + | if not (cfg.use_identifier_redirects and is_set (handler.redirect)) then -- redirect has priority so if enabled and available don't fetch from Wikidata because expensive |
| | + | wd_article = wikidata_article_name_get (handler.q); -- if Wikidata has an article title for this wiki, get it; |
| | + | end |
| | + | |
| | + | return (cfg.use_identifier_redirects and is_set (handler.redirect) and handler.redirect) or wd_article or handler.link; |
| | end | | end |
| | | | |
| Line 47: |
Line 83: |
| | local ext_link; | | local ext_link; |
| | local this_wiki_code = cfg.this_wiki_code; -- Wikipedia subdomain; 'en' for en.wikipedia.org | | local this_wiki_code = cfg.this_wiki_code; -- Wikipedia subdomain; 'en' for en.wikipedia.org |
| − | | + | local wd_article; -- article title from Wikidata |
| | + | |
| | if options.encode == true or options.encode == nil then | | if options.encode == true or options.encode == nil then |
| | url_string = mw.uri.encode (url_string, 'PATH'); | | url_string = mw.uri.encode (url_string, 'PATH'); |
| Line 62: |
Line 99: |
| | | | |
| | return table.concat ({ | | return table.concat ({ |
| − | make_wikilink (link_label_make (options), options.label), -- redirect, or locally specified link (in that order) | + | make_wikilink (link_label_make (options), options.label), -- redirect, Wikidata link, or locally specified link (in that order) |
| | options.separator or ' ', | | options.separator or ' ', |
| | ext_link | | ext_link |
| Line 127: |
Line 164: |
| | | | |
| | | | |
| − | --[=[-------------------------< I S _ V A L I D _ B I O R X I V _ D A T E >------------------------------------ | + | --[=[-------------------------< I S _ V A L I D _ R X I V _ D A T E >------------------------------------------ |
| | | | |
| − | returns true if: | + | for biorxiv, returns true if: |
| | 2019-12-11T00:00Z <= biorxiv_date < today + 2 days | | 2019-12-11T00:00Z <= biorxiv_date < today + 2 days |
| | + | for medrxiv, returns true if: |
| | + | 2020-01-01T00:00Z <= medrxiv_date < today + 2 days |
| | | | |
| | The dated form of biorxiv identifier has a start date of 2019-12-11. The Unix timestamp for that date is {{#time:U|2019-12-11}} = 1576022400 | | The dated form of biorxiv identifier has a start date of 2019-12-11. The Unix timestamp for that date is {{#time:U|2019-12-11}} = 1576022400 |
| | + | The medrxiv identifier has a start date of 2020-01-01. The Unix timestamp for that date is {{#time:U|2020-01-01}} = 1577836800 |
| | | | |
| − | biorxiv_date is the date provided in those |biorxiv= parameter values that are dated at time 00:00:00 UTC
| + | <rxiv_date> is the date provided in those |biorxiv= parameter values that are dated and in |medrxiv= parameter values at time 00:00:00 UTC |
| − | today is the current date at time 00:00:00 UTC plus 48 hours | + | <today> is the current date at time 00:00:00 UTC plus 48 hours |
| − | if today is 2015-01-01T00:00:00 then | + | if today's date is 2023-01-01T00:00:00 then |
| − | adding 24 hours gives 2015-01-02T00:00:00 – one second more than today | + | adding 24 hours gives 2023-01-02T00:00:00 – one second more than today |
| − | adding 24 hours gives 2015-01-03T00:00:00 – one second more than tomorrow | + | adding 24 hours gives 2023-01-03T00:00:00 – one second more than tomorrow |
| | | | |
| − | This function does not work if it is fed month names for languages other than English. Wikimedia #time: parser
| + | inputs: |
| − | apparently doesn't understand non-English date month names. This function will always return false when the date
| + | <y>, <m>, <d> – year, month, day parts of the date from the birxiv or medrxiv identifier |
| − | contains a non-English month name because good1 is false after the call to lang_object.formatDate(). To get
| + | <select> 'b' for biorxiv, 'm' for medrxiv; defaults to 'b' |
| − | around that call this function with date parts and create a YYYY-MM-DD format date.
| |
| | | | |
| | ]=] | | ]=] |
| | | | |
| − | local function is_valid_biorxiv_date (y, m, d) | + | local function is_valid_rxiv_date (y, m, d, select) |
| − | local biorxiv_date = table.concat ({y, m, d}, '-'); -- make ymd date | + | if 0 == tonumber (m) and 12 < tonumber (m) then -- <m> must be a number 1–12 |
| | + | return false; |
| | + | end |
| | + | if 0 == tonumber (d) and 31 < tonumber (d) then -- <d> must be a number 1–31; TODO: account for month length and leap yer? |
| | + | return false; |
| | + | end |
| | + | |
| | + | local rxiv_date = table.concat ({y, m, d}, '-'); -- make ymd date string |
| | local good1, good2; | | local good1, good2; |
| − | local biorxiv_ts, tomorrow_ts; -- to hold Unix timestamps representing the dates | + | local rxiv_ts, tomorrow_ts; -- to hold Unix timestamps representing the dates |
| | local lang_object = mw.getContentLanguage(); | | local lang_object = mw.getContentLanguage(); |
| | | | |
| − | good1, biorxiv_ts = pcall (lang_object.formatDate, lang_object, 'U', biorxiv_date); -- convert biorxiv_date value to Unix timestamp | + | good1, rxiv_ts = pcall (lang_object.formatDate, lang_object, 'U', rxiv_date); -- convert rxiv_date value to Unix timestamp |
| | good2, tomorrow_ts = pcall (lang_object.formatDate, lang_object, 'U', 'today + 2 days' ); -- today midnight + 2 days is one second more than all day tomorrow | | good2, tomorrow_ts = pcall (lang_object.formatDate, lang_object, 'U', 'today + 2 days' ); -- today midnight + 2 days is one second more than all day tomorrow |
| | | | |
| | if good1 and good2 then -- lang.formatDate() returns a timestamp in the local script which tonumber() may not understand | | if good1 and good2 then -- lang.formatDate() returns a timestamp in the local script which tonumber() may not understand |
| − | biorxiv_ts = tonumber (biorxiv_ts) or lang_object:parseFormattedNumber (biorxiv_ts); -- convert to numbers for the comparison; | + | rxiv_ts = tonumber (rxiv_ts) or lang_object:parseFormattedNumber (rxiv_ts); -- convert to numbers for the comparison; |
| | tomorrow_ts = tonumber (tomorrow_ts) or lang_object:parseFormattedNumber (tomorrow_ts); | | tomorrow_ts = tonumber (tomorrow_ts) or lang_object:parseFormattedNumber (tomorrow_ts); |
| | else | | else |
| Line 163: |
Line 209: |
| | end | | end |
| | | | |
| − | return ((1576022400 <= biorxiv_ts) and (biorxiv_ts < tomorrow_ts)) -- 2012-12-11T00:00Z <= biorxiv_date < tomorrow's date | + | local limit_ts = ((select and ('m' == select)) and 1577836800) or 1576022400; -- choose the appropriate limit timesatmp |
| | + | |
| | + | return ((limit_ts <= rxiv_ts) and (rxiv_ts < tomorrow_ts)) -- limit_ts <= rxiv_date < tomorrow's date |
| | end | | end |
| | | | |
| Line 243: |
Line 291: |
| | | | |
| | return lccn; | | return lccn; |
| − | end
| + | end |
| | | | |
| | | | |
| Line 330: |
Line 378: |
| | if is_set (class) then | | if is_set (class) then |
| | if id:match ('^%d+') then | | if id:match ('^%d+') then |
| − | text = table.concat ({text, ' [[//arxiv.org/archive/', class, ' ', class, ']]'}); -- external link within square brackets, not wikilink | + | text = table.concat ({text, ' [[https://arxiv.org/archive/', class, ' ', class, ']]'}); -- external link within square brackets, not wikilink |
| | else | | else |
| | set_message ('err_class_ignored'); | | set_message ('err_class_ignored'); |
| Line 362: |
Line 410: |
| | local access = options.access; | | local access = options.access; |
| | local handler = options.handler; | | local handler = options.handler; |
| | + | local ignore_invalid = options.accept; |
| | local err_type; | | local err_type; |
| | local err_msg = ''; | | local err_msg = ''; |
| Line 384: |
Line 433: |
| | if id:find('&%.') then | | if id:find('&%.') then |
| | err_type = cfg.err_msg_supl.journal; -- journal abbreviation must not have '&.' (if it does it's missing a letter) | | err_type = cfg.err_msg_supl.journal; -- journal abbreviation must not have '&.' (if it does it's missing a letter) |
| | + | end |
| | + | if id:match ('.........%.tmp%.') then -- temporary bibcodes when positions 10–14 are '.tmp.' |
| | + | set_message ('maint_bibcode'); |
| | end | | end |
| | end | | end |
| | end | | end |
| | | | |
| − | if is_set (err_type) then -- if there was an error detected | + | if is_set (err_type) and not ignore_invalid then -- if there was an error detected and accept-as-written markup not used |
| | set_message ('err_bad_bibcode', {err_type}); | | set_message ('err_bad_bibcode', {err_type}); |
| | options.coins_list_t['BIBCODE'] = nil; -- when error, unset so not included in COinS | | options.coins_list_t['BIBCODE'] = nil; -- when error, unset so not included in COinS |
| − |
| |
| | end | | end |
| | | | |
| Line 419: |
Line 470: |
| | | | |
| | local patterns = { | | local patterns = { |
| − | '^10.1101/%d%d%d%d%d%d$', -- simple 6-digit identifier (before 2019-12-11) | + | '^10%.1101/%d%d%d%d%d%d$', -- simple 6-digit identifier (before 2019-12-11) |
| − | '^10.1101/(20[1-9]%d)%.([01]%d)%.([0-3]%d)%.%d%d%d%d%d%dv%d+$', -- y.m.d. date + 6-digit identifier + version (after 2019-12-11) | + | '^10%.1101/(20%d%d)%.(%d%d)%.(%d%d)%.%d%d%d%d%d%dv%d+$', -- y.m.d. date + 6-digit identifier + version (after 2019-12-11) |
| − | '^10.1101/(20[1-9]%d)%.([01]%d)%.([0-3]%d)%.%d%d%d%d%d%d$', -- y.m.d. date + 6-digit identifier (after 2019-12-11) | + | '^10%.1101/(20%d%d)%.(%d%d)%.(%d%d)%.%d%d%d%d%d%d$', -- y.m.d. date + 6-digit identifier (after 2019-12-11) |
| | } | | } |
| | | | |
| Line 429: |
Line 480: |
| | | | |
| | if m then -- m is nil when id is the six-digit form | | if m then -- m is nil when id is the six-digit form |
| − | if not is_valid_biorxiv_date (y, m, d) then -- validate the encoded date; TODO: don't ignore leap-year and actual month lengths ({{#time:}} is a poor date validator) | + | if not is_valid_rxiv_date (y, m, d, 'b') then -- validate the encoded date; 'b' for biorxiv limit |
| | break; -- date fail; break out early so we don't unset the error message | | break; -- date fail; break out early so we don't unset the error message |
| | end | | end |
| Line 503: |
Line 554: |
| | local handler = options.handler; | | local handler = options.handler; |
| | local err_flag; | | local err_flag; |
| | + | |
| | + | local function is_extended_free (registrant, suffix) -- local function to check those few registrants that are mixed; identifiable by the doi suffix <incipit> |
| | + | if cfg.extended_registrants_t[registrant] then -- if this registrant has known free-to-read extentions |
| | + | for _, incipit in ipairs (cfg.extended_registrants_t[registrant]) do -- loop through the registrant's incipits |
| | + | if mw.ustring.find (suffix, '^' .. incipit) then -- if found |
| | + | return true; |
| | + | end |
| | + | end |
| | + | end |
| | + | end |
| | | | |
| | local text; | | local text; |
| | if is_set (inactive) then | | if is_set (inactive) then |
| − | local inactive_year = inactive:match("%d%d%d%d") or ''; -- try to get the year portion from the inactive date | + | local inactive_year = inactive:match("%d%d%d%d"); -- try to get the year portion from the inactive date |
| | local inactive_month, good; | | local inactive_month, good; |
| | | | |
| Line 517: |
Line 578: |
| | end | | end |
| | end | | end |
| − | else | + | end -- otherwise, |doi-broken-date= has something but it isn't a date |
| − | inactive_year = nil; -- |doi-broken-date= has something but it isn't a date
| |
| − | end
| |
| | | | |
| | if is_set (inactive_year) and is_set (inactive_month) then | | if is_set (inactive_year) and is_set (inactive_month) then |
| Line 531: |
Line 590: |
| | end | | end |
| | | | |
| − | local registrant = mw.ustring.match (id, '^10%.([^/]+)/[^%s–]-[^%.,]$'); -- registrant set when DOI has the proper basic form | + | local suffix; |
| | + | local registrant, suffix = mw.ustring.match (id, '^10%.([^/]+)/([^%s–]-[^%.,])$'); -- registrant and suffix set when DOI has the proper basic form |
| | | | |
| | local registrant_err_patterns = { -- these patterns are for code ranges that are not supported | | local registrant_err_patterns = { -- these patterns are for code ranges that are not supported |
| − | '^[^1-3]%d%d%d%d%.%d%d*$', -- 5 digits with subcode (0xxxx, 40000+); accepts: 10000–39999 | + | '^[^1-3]%d%d%d%d%.%d+$', -- 5 digits with subcode (0xxxx, 40000+); accepts: 10000–39999 |
| − | '^[^1-5]%d%d%d%d$', -- 5 digits without subcode (0xxxx, 60000+); accepts: 10000–59999 | + | '^[^1-7]%d%d%d%d$', -- 5 digits without subcode (0xxxx, 60000+); accepts: 10000–69999 |
| − | '^[^1-9]%d%d%d%.%d%d*$', -- 4 digits with subcode (0xxx); accepts: 1000–9999 | + | '^[^1-9]%d%d%d%.%d+$', -- 4 digits with subcode (0xxx); accepts: 1000–9999 |
| | '^[^1-9]%d%d%d$', -- 4 digits without subcode (0xxx); accepts: 1000–9999 | | '^[^1-9]%d%d%d$', -- 4 digits without subcode (0xxx); accepts: 1000–9999 |
| | '^%d%d%d%d%d%d+', -- 6 or more digits | | '^%d%d%d%d%d%d+', -- 6 or more digits |
| Line 562: |
Line 622: |
| | if err_flag then | | if err_flag then |
| | options.coins_list_t['DOI'] = nil; -- when error, unset so not included in COinS | | options.coins_list_t['DOI'] = nil; -- when error, unset so not included in COinS |
| | + | else |
| | + | if not access and (cfg.known_free_doi_registrants_t[registrant] or is_extended_free (registrant, suffix)) then -- |doi-access=free not set and <registrant> is known to be free |
| | + | set_message ('maint_doi_unflagged_free'); -- set a maint cat |
| | + | end |
| | end | | end |
| | | | |
| Line 646: |
Line 710: |
| | ]] | | ]] |
| | | | |
| − | local function isbn (options) | + | local function isbn (options_t) |
| − | local isbn_str = options.id; | + | local isbn_str = options_t.id; |
| − | local ignore_invalid = options.accept; | + | local ignore_invalid = options_t.accept; |
| − | local handler = options.handler; | + | local handler = options_t.handler; |
| | + | local year = options_t.Year; -- when set, valid anchor_year; may have a disambiguator which must be removed |
| | | | |
| | local function return_result (check, err_type) -- local function to handle the various returns | | local function return_result (check, err_type) -- local function to handle the various returns |
| Line 658: |
Line 723: |
| | else -- here when not ignoring | | else -- here when not ignoring |
| | if not check then -- and there is an error | | if not check then -- and there is an error |
| − | options.coins_list_t['ISBN'] = nil; -- when error, unset so not included in COinS | + | options_t.coins_list_t['ISBN'] = nil; -- when error, unset so not included in COinS |
| | set_message ('err_bad_isbn', err_type); -- set an error message | | set_message ('err_bad_isbn', err_type); -- set an error message |
| | return ISBN; -- return id text | | return ISBN; -- return id text |
| Line 664: |
Line 729: |
| | end | | end |
| | return ISBN; -- return id text | | return ISBN; -- return id text |
| | + | end |
| | + | |
| | + | if year and not ignore_invalid then -- |
| | + | year = year:match ('%d%d%d%d?'); -- strip disambiguator if present |
| | + | if year and (1965 > tonumber(year)) then |
| | + | set_message ('err_invalid_isbn_date'); -- set an error message |
| | + | return internal_link_id ({link = handler.link, label = handler.label, redirect = handler.redirect, |
| | + | prefix = handler.prefix, id = isbn_str, separator = handler.separator}); |
| | + | end |
| | end | | end |
| | | | |
| Line 975: |
Line 1,049: |
| | return external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect, | | return external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect, |
| | prefix = handler.prefix, id = lccn, separator = handler.separator, encode = handler.encode}); | | prefix = handler.prefix, id = lccn, separator = handler.separator, encode = handler.encode}); |
| | + | end |
| | + | |
| | + | |
| | + | --[[--------------------------< M E D R X I V >----------------------------------------------------------------- |
| | + | |
| | + | Format medRxiv ID and do simple error checking. Similar to later bioRxiv IDs, medRxiv IDs are prefixed with a |
| | + | yyyy.mm.dd. date and suffixed with an optional version identifier. Ealiest date accepted is 2020.01.01 |
| | + | |
| | + | The medRxiv ID is a date followed by an eight-digit number followed by an optional version indicator 'v' and one or more digits: |
| | + | https://www.medrxiv.org/content/10.1101/2020.11.16.20232009v2 -> 10.1101/2020.11.16.20232009v2 |
| | + | |
| | + | ]] |
| | + | |
| | + | local function medrxiv (options) |
| | + | local id = options.id; |
| | + | local handler = options.handler; |
| | + | local err_msg_flag = true; -- flag; assume that there will be an error |
| | + | |
| | + | local patterns = { |
| | + | '%d%d%d%d%d%d%d%d$', -- simple 8-digit identifier; these should be relatively rare |
| | + | '^10%.1101/(20%d%d)%.(%d%d)%.(%d%d)%.%d%d%d%d%d%d%d%dv%d+$', -- y.m.d. date + 8-digit identifier + version (2020-01-01 and later) |
| | + | '^10%.1101/(20%d%d)%.(%d%d)%.(%d%d)%.%d%d%d%d%d%d%d%d$', -- y.m.d. date + 8-digit identifier (2020-01-01 and later) |
| | + | } |
| | + | |
| | + | for _, pattern in ipairs (patterns) do -- spin through the patterns looking for a match |
| | + | if id:match (pattern) then |
| | + | local y, m, d = id:match (pattern); -- found a match, attempt to get year, month and date from the identifier |
| | + | |
| | + | if m then -- m is nil when id is the 8-digit form |
| | + | if not is_valid_rxiv_date (y, m, d, 'b') then -- validate the encoded date; 'b' for medrxiv limit |
| | + | break; -- date fail; break out early so we don't unset the error message |
| | + | end |
| | + | end |
| | + | err_msg_flag = nil; -- we found a match so unset the error message |
| | + | break; -- and done |
| | + | end |
| | + | end -- <err_msg_flag> remains set here when no match |
| | + | |
| | + | if err_msg_flag then |
| | + | options.coins_list_t['MEDRXIV'] = nil; -- when error, unset so not included in COinS |
| | + | set_message ('err_bad_medrxiv'); -- and set the error message |
| | + | end |
| | + | |
| | + | return external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect, |
| | + | prefix = handler.prefix, id = id, separator = handler.separator, |
| | + | encode = handler.encode, access = handler.access}); |
| | end | | end |
| | | | |
| Line 1,036: |
Line 1,156: |
| | elseif id:match('^%d+$') then -- no prefix | | elseif id:match('^%d+$') then -- no prefix |
| | number = id; -- get the number | | number = id; -- get the number |
| − | if 10 < number:len() then | + | if tonumber (id) > handler.id_limit then |
| − | number = nil; -- constrain to 1 to 10 digits; change this when OCLC issues 11-digit numbers | + | number = nil; -- unset when id value exceeds the limit |
| | end | | end |
| | end | | end |
| Line 1,498: |
Line 1,618: |
| | ['JSTOR'] = jstor, | | ['JSTOR'] = jstor, |
| | ['LCCN'] = lccn, | | ['LCCN'] = lccn, |
| | + | ['MEDRXIV'] = medrxiv, |
| | ['MR'] = mr, | | ['MR'] = mr, |
| | ['OCLC'] = oclc, | | ['OCLC'] = oclc, |