Module:Text: திருத்தங்களுக்கு இடையிலான வேறுபாடு
Jump to navigation
Jump to search
உள்ளடக்கம் நீக்கப்பட்டது உள்ளடக்கம் சேர்க்கப்பட்டது
imported>Sukanthi "local yesNo = require("Module:Yesno") local Text = { serial = "2022-07-21", suite = "Text" } --[=[ Text utilities ]=] -- local globals local PatternCJK = false local PatternCombined = false local PatternLatin = false local PatternTerminated = false local QuoteLang = false local QuoteType = false local RangesLatin..."-இப்பெயரில் புதிய பக்கம் உருவாக்கப்பட்டுள்ளது |
2024-06-05 |
||
| வரிசை 1: | வரிசை 1: | ||
local |
local Text = { serial = "2024-06-05", |
||
suite = "Text", |
|||
item = 29387871 } |
|||
--[=[ |
--[=[ |
||
Text utilities |
Text utilities |
||
]=] |
]=] |
||
local Failsafe = Text |
|||
local GlobalMod = Text |
|||
local Patterns = { } |
|||
local RangesLatin = false |
|||
local SeekQuote = false |
|||
local foreignModule = function ( access, advanced, append, alt, alert ) |
|||
-- local globals |
|||
-- Fetch global module |
|||
local PatternCJK = false |
|||
-- Precondition: |
|||
local PatternCombined = false |
|||
-- access -- string, with name of base module |
|||
local PatternLatin = false |
|||
-- advanced -- true, for require(); else mw.loadData() |
|||
local PatternTerminated = false |
|||
-- append -- string, with subpage part, if any; or false |
|||
local QuoteLang = false |
|||
-- alt -- number, of wikidata item of root; or false |
|||
local QuoteType = false |
|||
-- alert -- true, for throwing error on data problem |
|||
local RangesLatin = false |
|||
-- Postcondition: |
|||
local SeekQuote = false |
|||
-- Returns whatever, probably table |
|||
-- 2019-10-29 |
|||
local function initLatinData() |
|||
local storage = access |
|||
if not RangesLatin then |
|||
local finer = function () |
|||
if append then |
|||
storage = string.format( "%s/%s", |
|||
storage, |
|||
append ) |
|||
end |
|||
end |
|||
local fun, lucky, r, suited |
|||
if advanced then |
|||
fun = require |
|||
else |
|||
fun = mw.loadData |
|||
end |
end |
||
GlobalMod.globalModules = GlobalMod.globalModules or { } |
|||
if not PatternLatin then |
|||
suited = GlobalMod.globalModules[ access ] |
|||
local range |
|||
if not suited then |
|||
PatternLatin = "^[" |
|||
finer() |
|||
lucky, r = pcall( fun, "Module:" .. storage ) |
|||
PatternLatin = PatternLatin .. |
|||
mw.ustring.char( range[ 1 ], 45, range[ 2 ] ) |
|||
end -- for i |
|||
PatternLatin = PatternLatin .. "]*$" |
|||
end |
end |
||
if not lucky then |
|||
end |
|||
if not suited and |
|||
type( alt ) == "number" and |
|||
alt > 0 then |
|||
suited = string.format( "Q%d", alt ) |
|||
suited = mw.wikibase.getSitelink( suited ) |
|||
GlobalMod.globalModules[ access ] = suited or true |
|||
end |
|||
if type( suited ) == "string" then |
|||
storage = suited |
|||
finer() |
|||
lucky, r = pcall( fun, storage ) |
|||
end |
|||
if not lucky and alert then |
|||
error( "Missing or invalid page: " .. storage, 0 ) |
|||
end |
|||
end |
|||
return r |
|||
end -- foreignModule() |
|||
local function initQuoteData() |
|||
local function factoryQuote() |
|||
-- Create quote definitions |
-- Create quote definitions |
||
if not |
if not Text.quoteLang then |
||
local quoting = foreignModule( "Text", |
|||
QuoteLang = |
|||
false, |
|||
"quoting", |
|||
Text.item ) |
|||
if type( quoting ) == "table" then |
|||
Text.quoteLang = quoting.langs |
|||
Text.quoteType = quoting.types |
|||
end |
|||
if type( Text.quoteLang ) ~= "table" then |
|||
Text.quoteLang = { } |
|||
end |
|||
if type( Text.quoteType ) ~= "table" then |
|||
Text.quoteType = { } |
|||
end |
|||
if type( Text.quoteLang.en ) ~= "string" then |
|||
Text.quoteLang.en = "ld" |
|||
end |
|||
if type( Text.quoteType[ Text.quoteLang.en ] ) ~= "table" then |
|||
Text.quoteType[ Text.quoteLang.en ] = { { 8220, 8221 }, |
|||
{ 8216, 8217 } } |
|||
end |
|||
hsb = "bd", |
|||
hu = "bd", |
|||
hy = "labd", |
|||
id = "rd", |
|||
is = "bd", |
|||
it = "ld", |
|||
ja = "x300C", |
|||
ka = "bd", |
|||
ko = "ld", |
|||
lt = "bd", |
|||
lv = "bd", |
|||
nl = "ld", |
|||
nn = "la", |
|||
no = "la", |
|||
pl = "bdla", |
|||
pt = "lald", |
|||
ro = "bdla", |
|||
ru = "labd", |
|||
sk = "bd", |
|||
sl = "bd", |
|||
sq = "la", |
|||
sr = "bx", |
|||
sv = "rd", |
|||
th = "ld", |
|||
tr = "ld", |
|||
uk = "la", |
|||
zh = "ld", |
|||
["de-ch"] = "la", |
|||
["en-gb"] = "lsld", |
|||
["en-us"] = "ld", |
|||
["fr-ch"] = "la", |
|||
["it-ch"] = "la", |
|||
["pt-br"] = "ldla", |
|||
["zh-tw"] = "x300C", |
|||
["zh-cn"] = "ld" } |
|||
end |
|||
if not QuoteType then |
|||
QuoteType = |
|||
{ bd = { { 8222, 8220 }, { 8218, 8217 } }, |
|||
bdla = { { 8222, 8220 }, { 171, 187 } }, |
|||
bx = { { 8222, 8221 }, { 8218, 8217 } }, |
|||
la = { { 171, 187 }, { 8249, 8250 } }, |
|||
laSPC = { { 171, 187 }, { 8249, 8250 }, true }, |
|||
labd = { { 171, 187 }, { 8222, 8220 } }, |
|||
lald = { { 171, 187 }, { 8220, 8221 } }, |
|||
ld = { { 8220, 8221 }, { 8216, 8217 } }, |
|||
ldla = { { 8220, 8221 }, { 171, 187 } }, |
|||
lsld = { { 8216, 8217 }, { 8220, 8221 } }, |
|||
rd = { { 8221, 8221 }, { 8217, 8217 } }, |
|||
x300C = { { 0x300C, 0x300D }, |
|||
{ 0x300E, 0x300F } } } |
|||
end |
end |
||
end -- |
end -- factoryQuote() |
||
| வரிசை 123: | வரிசை 101: | ||
-- alien -- string, with language code |
-- alien -- string, with language code |
||
-- advance -- number, with level 1 or 2 |
-- advance -- number, with level 1 or 2 |
||
local r = apply |
local r = apply |
||
local quotes, suite |
|||
alien = alien or "en" |
|||
factoryQuote() |
|||
advance = tonumber(advance) or 0 |
|||
if alien then |
|||
suite = mw.text.trim( alien ) |
|||
initQuoteData() |
|||
if suite == "" then |
|||
suite = false |
|||
suite = QuoteLang[alien] or slang and QuoteLang[slang] or QuoteLang["en"] |
|||
else |
|||
local |
local s = Text.quoteLang[ suite ] |
||
if |
if s then |
||
suite = s |
|||
if quotes[ 3 ] then |
|||
space = " " |
|||
else |
else |
||
local slang = suite:match( "^(%l+)-" ) |
|||
if slang then |
|||
suite = Text.quoteLang[ slang ] |
|||
end |
|||
r = mw.ustring.format( "%s%s%s%s%s", |
|||
mw.ustring.char( quotes[ 1 ] ), |
|||
space, |
|||
apply, |
|||
space, |
|||
mw.ustring.char( quotes[ 2 ] ) ) |
|||
end |
end |
||
end |
|||
end |
|||
if not suite then |
|||
suite = Text.quoteLang.en |
|||
end |
|||
quotes = Text.quoteType[ suite ] |
|||
if quotes then |
|||
local space |
|||
if quotes[ 3 ] then |
|||
space = " " |
|||
else |
else |
||
space = "" |
|||
end |
end |
||
quotes = quotes[ advance ] |
|||
if quotes then |
|||
r = mw.ustring.format( "%s%s%s%s%s", |
|||
mw.ustring.char( quotes[ 1 ] ), |
|||
space, |
|||
apply, |
|||
space, |
|||
mw.ustring.char( quotes[ 2 ] ) ) |
|||
end |
|||
else |
|||
mw.log( "fiatQuote() " .. suite ) |
|||
end |
end |
||
return r |
return r |
||
| வரிசை 164: | வரிசை 155: | ||
-- accept -- true, if no error messages to be appended |
-- accept -- true, if no error messages to be appended |
||
-- Returns: string |
-- Returns: string |
||
local r |
local r |
||
if type( apply ) == "table" then |
|||
local bad = { } |
|||
again = math.floor(tonumber(again) or 1) |
|||
local codes = { } |
|||
local s |
|||
for k, v in pairs( apply ) do |
|||
s = type( v ) |
|||
if s == "number" then |
|||
if v < 32 and v ~= 9 and v ~= 10 then |
|||
v = tostring( v ) |
|||
else |
|||
v = math.floor( v ) |
|||
s = false |
|||
end |
|||
elseif s ~= "string" then |
|||
v = tostring( v ) |
|||
end |
|||
if s then |
|||
table.insert( bad, v ) |
|||
else |
|||
table.insert( codes, v ) |
|||
end |
|||
end -- for k, v |
|||
if #bad == 0 then |
|||
if #codes > 0 then |
|||
r = mw.ustring.char( unpack( codes ) ) |
|||
if again then |
|||
if type( again ) == "number" then |
|||
local n = math.floor( again ) |
|||
if n > 1 then |
|||
r = r:rep( n ) |
|||
elseif n < 1 then |
|||
r = "" |
|||
end |
|||
else |
|||
s = "bad repetitions: " .. tostring( again ) |
|||
end |
|||
end |
|||
end |
|||
else |
|||
s = "bad codepoints: " .. table.concat( bad, " " ) |
|||
end |
|||
if s and not accept then |
|||
r = tostring( mw.html.create( "span" ) |
|||
:addClass( "error" ) |
|||
:wikitext( s ) ) |
|||
end |
|||
end |
end |
||
return r or "" |
|||
local codes = { } |
|||
for _, v in ipairs( apply ) do |
|||
local n = tonumber(v) |
|||
if not n or (n < 32 and n ~= 9 and n ~= 10) then |
|||
table.insert(bad, tostring(v)) |
|||
else |
|||
table.insert(codes, math.floor(n)) |
|||
end |
|||
end |
|||
if #bad > 0 then |
|||
if not accept then |
|||
r = tostring( mw.html.create( "span" ) |
|||
:addClass( "error" ) |
|||
:wikitext( "bad codepoints: " .. table.concat( bad, " " )) ) |
|||
end |
|||
return r |
|||
end |
|||
if #codes > 0 then |
|||
r = mw.ustring.char( unpack( codes ) ) |
|||
if again > 1 then |
|||
r = r:rep(again) |
|||
end |
|||
end |
|||
return r |
|||
end -- Text.char() |
end -- Text.char() |
||
local function trimAndFormat(args, fmt) |
|||
local result = {} |
|||
if type(args) ~= 'table' then |
|||
args = {args} |
|||
end |
|||
for _, v in ipairs(args) do |
|||
v = mw.text.trim(tostring(v)) |
|||
if v ~= "" then |
|||
table.insert(result,fmt and mw.ustring.format(fmt, v) or v) |
|||
end |
|||
end |
|||
return result |
|||
end |
|||
Text.concatParams = function ( args, apply, adapt ) |
Text.concatParams = function ( args, apply, adapt ) |
||
| வரிசை 219: | வரிசை 216: | ||
-- Returns: string |
-- Returns: string |
||
local collect = { } |
local collect = { } |
||
for k, v in pairs( args ) do |
|||
return table.concat(trimAndFormat(args,adapt), apply or "|") |
|||
if type( k ) == "number" then |
|||
v = mw.text.trim( v ) |
|||
if v ~= "" then |
|||
if adapt then |
|||
v = mw.ustring.format( adapt, v ) |
|||
end |
|||
table.insert( collect, v ) |
|||
end |
|||
end |
|||
end -- for k, v |
|||
return table.concat( collect, apply or "|" ) |
|||
end -- Text.concatParams() |
end -- Text.concatParams() |
||
Text.containsCJK = function ( |
Text.containsCJK = function ( analyse ) |
||
-- Is any CJK code within? |
-- Is any CJK code within? |
||
-- Parameter: |
-- Parameter: |
||
-- |
-- analyse -- string |
||
-- Returns: true, if CJK detected |
-- Returns: true, if CJK detected |
||
local r |
|||
s = s and tostring(s) or "" |
|||
if not |
if not Patterns.CJK then |
||
Patterns.CJK = mw.ustring.char( 91, |
|||
0x3400, 45, 0x9FFF, |
|||
0x20000, 45, 0x2B81F, |
|||
93 ) |
|||
44032, 45, 55215, |
|||
63744, 45, 64255, |
|||
65072, 45, 65103, |
|||
65381, 45, 65500, |
|||
131072, 45, 196607, |
|||
93 ) |
|||
end |
end |
||
if mw.ustring.find( analyse, Patterns.CJK ) then |
|||
r = true |
|||
else |
|||
r = false |
|||
end |
|||
return r |
|||
end -- Text.containsCJK() |
end -- Text.containsCJK() |
||
Text.removeDelimited = function (s, prefix, suffix) |
|||
-- Remove all text in s delimited by prefix and suffix (inclusive) |
|||
-- Arguments: |
|||
-- s = string to process |
|||
-- prefix = initial delimiter |
|||
-- suffix = ending delimiter |
|||
-- Returns: stripped string |
|||
s = s and tostring(s) or "" |
|||
prefix = prefix and tostring(prefix) or "" |
|||
suffix = suffix and tostring(suffix) or "" |
|||
local prefixLen = mw.ustring.len(prefix) |
|||
local suffixLen = mw.ustring.len(suffix) |
|||
if prefixLen == 0 or suffixLen == 0 then |
|||
return s |
|||
end |
|||
local i = s:find(prefix, 1, true) |
|||
local r = s |
|||
local j |
|||
while i do |
|||
j = r:find(suffix, i + prefixLen) |
|||
if j then |
|||
r = r:sub(1, i - 1)..r:sub(j+suffixLen) |
|||
else |
|||
r = r:sub(1, i - 1) |
|||
end |
|||
i = r:find(prefix, 1, true) |
|||
end |
|||
return r |
|||
end |
|||
Text.getPlain = function ( adjust ) |
Text.getPlain = function ( adjust ) |
||
| வரிசை 280: | வரிசை 259: | ||
-- adjust -- string |
-- adjust -- string |
||
-- Returns: string |
-- Returns: string |
||
local |
local i = adjust:find( "<!--", 1, true ) |
||
local r = adjust |
|||
local j |
|||
while i do |
|||
j = r:find( "-->", i + 3, true ) |
|||
if j then |
|||
r = r:sub( 1, i ) .. r:sub( j + 3 ) |
|||
else |
|||
r = r:sub( 1, i ) |
|||
end |
|||
i = r:find( "<!--", i, true ) |
|||
end -- "<!--" |
|||
r = r:gsub( "(</?%l[^>]*>)", "" ) |
r = r:gsub( "(</?%l[^>]*>)", "" ) |
||
:gsub( "'''", "" ) |
:gsub( "'''(.+)'''", "%1" ) |
||
:gsub( "''", "" ) |
:gsub( "''(.+)''", "%1" ) |
||
:gsub( " ", " " ) |
:gsub( " ", " " ) |
||
return r |
return mw.text.unstrip( r ) |
||
end -- Text.getPlain() |
end -- Text.getPlain() |
||
Text.isLatinRange = function (s) |
|||
Text.isLatinRange = function ( adjust ) |
|||
-- Are characters expected to be latin or symbols within latin texts? |
-- Are characters expected to be latin or symbols within latin texts? |
||
-- |
-- Precondition: |
||
-- |
-- adjust -- string, or nil for initialization |
||
-- Returns: true, if valid for latin only |
-- Returns: true, if valid for latin only |
||
local r |
|||
s = s and tostring(s) or "" --- ensure input is always string |
|||
if not RangesLatin then |
|||
initLatinData() |
|||
RangesLatin = { { 0x07, 0x02AF }, |
|||
return mw.ustring.match(s, PatternLatin) ~= nil |
|||
{ 0x1D6B, 0x1D9A }, |
|||
{ 0x1E00, 0x1EFF }, |
|||
{ 0x2002, 0x203A }, |
|||
{ 0x2190, 0x23BD } } |
|||
end |
|||
if not Patterns.Latin then |
|||
local range |
|||
Patterns.Latin = "^[" |
|||
for i = 1, #RangesLatin do |
|||
range = RangesLatin[ i ] |
|||
Patterns.Latin = Patterns.Latin .. |
|||
mw.ustring.char( range[ 1 ], 45, range[ 2 ] ) |
|||
end -- for i |
|||
Patterns.Latin = Patterns.Latin .. "]*$" |
|||
end |
|||
if adjust then |
|||
if mw.ustring.match( adjust, Patterns.Latin ) then |
|||
r = true |
|||
else |
|||
r = false |
|||
end |
|||
end |
|||
return r |
|||
end -- Text.isLatinRange() |
end -- Text.isLatinRange() |
||
Text.isQuote = function ( |
Text.isQuote = function ( ask ) |
||
-- Is this character any quotation mark? |
-- Is this character any quotation mark? |
||
-- Parameter: |
-- Parameter: |
||
-- |
-- ask -- string, with single character |
||
-- Returns: true, if |
-- Returns: true, if ask is quotation mark |
||
local r |
|||
s = s and tostring(s) or "" |
|||
if s == "" then |
|||
return false |
|||
end |
|||
if not SeekQuote then |
if not SeekQuote then |
||
SeekQuote = mw.ustring.char( 34, -- " |
SeekQuote = mw.ustring.char( 34, -- " |
||
| வரிசை 327: | வரிசை 339: | ||
0x300F ) -- CJK |
0x300F ) -- CJK |
||
end |
end |
||
if ask == "" then |
|||
return mw.ustring.find( SeekQuote, s, 1, true ) ~= nil |
|||
r = false |
|||
elseif mw.ustring.find( SeekQuote, ask, 1, true ) then |
|||
r = true |
|||
else |
|||
r = false |
|||
end |
|||
return r |
|||
end -- Text.isQuote() |
end -- Text.isQuote() |
||
| வரிசை 338: | வரிசை 357: | ||
-- adapt -- string (optional); format including "%s" |
-- adapt -- string (optional); format including "%s" |
||
-- Returns: string |
-- Returns: string |
||
local collect = { } |
|||
return mw.text.listToText(trimAndFormat(args, adapt)) |
|||
for k, v in pairs( args ) do |
|||
if type( k ) == "number" then |
|||
v = mw.text.trim( v ) |
|||
if v ~= "" then |
|||
if adapt then |
|||
v = mw.ustring.format( adapt, v ) |
|||
end |
|||
table.insert( collect, v ) |
|||
end |
|||
end |
|||
end -- for k, v |
|||
return mw.text.listToText( collect ) |
|||
end -- Text.listToText() |
end -- Text.listToText() |
||
| வரிசை 350: | வரிசை 381: | ||
-- advance -- number, with level 1 or 2, or nil |
-- advance -- number, with level 1 or 2, or nil |
||
-- Returns: quoted string |
-- Returns: quoted string |
||
apply = apply and tostring(apply) or "" |
|||
local mode, slang |
local mode, slang |
||
if type( alien ) == "string" then |
if type( alien ) == "string" then |
||
slang = mw.text.trim( alien ):lower() |
slang = mw.text.trim( alien ):lower() |
||
else |
else |
||
local pageLang = mw.title.getCurrentTitle().pageLanguage |
|||
if |
if pageLang then |
||
slang = pageLang.code |
|||
else |
|||
slang = mw.language.getContentLanguage():getCode() |
slang = mw.language.getContentLanguage():getCode() |
||
end |
end |
||
| வரிசை 378: | வரிசை 409: | ||
-- advance -- number, with level 1 or 2, or nil |
-- advance -- number, with level 1 or 2, or nil |
||
-- Returns: string; possibly quoted |
-- Returns: string; possibly quoted |
||
local r = mw.text.trim( apply |
local r = mw.text.trim( apply ) |
||
local s = mw.ustring.sub( r, 1, 1 ) |
local s = mw.ustring.sub( r, 1, 1 ) |
||
if s ~= "" and not Text.isQuote( s, advance ) then |
if s ~= "" and not Text.isQuote( s, advance ) then |
||
| வரிசை 398: | வரிசை 429: | ||
-- or basic greek or cyrillic or symbols etc. |
-- or basic greek or cyrillic or symbols etc. |
||
local cleanup, decomposed |
local cleanup, decomposed |
||
if not |
if not Patterns.Combined then |
||
Patterns.Combined = mw.ustring.char( 91, |
|||
0x0300, 45, 0x036F, |
0x0300, 45, 0x036F, |
||
0x1AB0, 45, 0x1AFF, |
0x1AB0, 45, 0x1AFF, |
||
0x1DC0, 45, 0x1DFF, |
0x1DC0, 45, 0x1DFF, |
||
0xFE20, 45, 0xFE2F, |
0xFE20, 45, 0xFE2F, |
||
93 ) |
93 ) |
||
end |
end |
||
decomposed = mw.ustring.toNFD( adjust |
decomposed = mw.ustring.toNFD( adjust ) |
||
cleanup = mw.ustring.gsub( decomposed, |
cleanup = mw.ustring.gsub( decomposed, Patterns.Combined, "" ) |
||
return mw.ustring.toNFC( cleanup ) |
return mw.ustring.toNFC( cleanup ) |
||
end -- Text.removeDiacritics() |
end -- Text.removeDiacritics() |
||
Text.removeWhitespace = function ( adjust ) |
|||
-- Remove all whitespace, or replace with ASCII space |
|||
-- Parameter: |
|||
-- adjust -- string |
|||
-- Returns: string; modified |
|||
local r = mw.text.decode( adjust ) |
|||
if r:find( "&", 1, true ) then |
|||
r = r:gsub( "‎", "" ) |
|||
:gsub( "‏", "" ) |
|||
:gsub( "‍", "" ) |
|||
:gsub( "‌", "" ) |
|||
:gsub( " ", " " ) |
|||
:gsub( " ", " " ) |
|||
:gsub( " ", " " ) |
|||
end |
|||
if not Patterns.Whitespace then |
|||
Patterns.Whitespace = mw.ustring.char( 0x00AD, |
|||
91, 0x200C, 45, 0x200F, 93, |
|||
91, 0x2028, 45, 0x202E, 93, |
|||
0x205F, |
|||
0x2060 ) |
|||
Patterns.Space = mw.ustring.char( 0x00A0, |
|||
0x1680, |
|||
91, 0x2000, 45, 0x200A, 93, |
|||
0x202F, |
|||
0x205F, |
|||
0x3000, |
|||
0x303F ) |
|||
end |
|||
r = mw.ustring.gsub( r, Patterns.Whitespace, "" ) |
|||
r = mw.ustring.gsub( r, Patterns.Space, " " ) |
|||
return mw.text.trim( r ) |
|||
end -- Text.removeWhitespace() |
|||
| வரிசை 419: | வரிசை 486: | ||
-- analyse -- string |
-- analyse -- string |
||
-- Returns: true, if sentence terminated |
-- Returns: true, if sentence terminated |
||
local r |
local r = mw.text.trim( analyse ) |
||
local lt = r:find( "<", 1, true ) |
|||
if not PatternTerminated then |
|||
if not Patterns.Terminated then |
|||
PatternTerminated = mw.ustring.char( 91, |
|||
Patterns.Terminated = mw.ustring.char( 91, |
|||
12290, |
|||
0x3002, |
|||
0xFF01, |
|||
0xFF0E, |
|||
0xFF1F ) |
|||
.. "!%.%?…][\"'%]‹›«»‘’“”]*$" |
|||
end |
end |
||
if |
if lt then |
||
r = r:gsub( "</span>", "" ) |
|||
end |
|||
if mw.ustring.find( r, Patterns.Terminated ) then |
|||
r = true |
r = true |
||
elseif lt then |
|||
local s = "<bdi[^>]* dir=\"([lr]t[rl])\".+</bdi></bdo>" |
|||
s = r:match( s ) |
|||
if s then |
|||
if mw.language.getContentLanguage():isRTL() then |
|||
r = ( s == "ltr" ) |
|||
else |
|||
r = ( s == "rtl" ) |
|||
end |
|||
else |
|||
r = false |
|||
end |
|||
else |
else |
||
r = false |
r = false |
||
| வரிசை 438: | வரிசை 521: | ||
Text. |
Text.tokenWords = function ( adjust ) |
||
-- Split text in words of digits or letters |
|||
-- Precondition: |
|||
-- adjust -- string |
|||
-- Returns: string with |
|||
local r = mw.uri.decode( adjust, "WIKI" ) |
|||
if r:find( "&", 1, true ) then |
|||
r = mw.text.decode( r ) |
|||
end |
|||
r = Text.removeWhitespace( r ) |
|||
r = mw.ustring.gsub( r, "[%p%s]+", " " ) |
|||
return r |
|||
end -- Text.tokenWords() |
|||
Text.ucfirstAll = function ( adjust ) |
|||
-- Capitalize all words |
-- Capitalize all words |
||
-- |
-- Precondition: |
||
-- adjust |
-- adjust -- string |
||
-- Returns: string with all first letters in upper case |
-- Returns: string with all first letters in upper case |
||
local r = " " .. adjust |
|||
local r = mw.text.decode(adjust,true) |
|||
local i = 1 |
local i = 1 |
||
local c, j, m |
local c, j, m |
||
if adjust:find( "&" ) then |
|||
r = " " |
r = r:gsub( "&", "&" ) |
||
:gsub( "<", "<" ) |
|||
:gsub( ">", ">" ) |
|||
:gsub( " ", " " ) |
|||
:gsub( " ", " " ) |
|||
:gsub( "‌", "‌" ) |
|||
:gsub( "‍", "‍" ) |
|||
:gsub( "‎", "‎" ) |
|||
:gsub( "‏", "‏" ) |
|||
m = true |
|||
end |
|||
while i do |
while i do |
||
i = mw.ustring.find( r, "%W%l", i ) |
i = mw.ustring.find( r, "%W%l", i ) |
||
| வரிசை 463: | வரிசை 571: | ||
r = r:sub( 2 ) |
r = r:sub( 2 ) |
||
if m then |
if m then |
||
r = r:gsub( "&", "&" ) |
|||
r = mw.text.encode(r) |
|||
:gsub( "<", "<" ) |
|||
:gsub( ">", ">" ) |
|||
:gsub( " ", " " ) |
|||
:gsub( " ", " " ) |
|||
:gsub( "‌", "‌" ) |
|||
:gsub( "‍", "‍" ) |
|||
:gsub( "‎", "‎" ) |
|||
:gsub( "‏", "‏" ) |
|||
:gsub( "&#X(%x+);", "&#x%1;" ) |
|||
end |
end |
||
return r |
return r |
||
end -- Text.ucfirstAll() |
end -- Text.ucfirstAll() |
||
| வரிசை 476: | வரிசை 594: | ||
-- Returns: string with non-latin parts enclosed in <span> |
-- Returns: string with non-latin parts enclosed in <span> |
||
local r |
local r |
||
Text.isLatinRange() |
|||
if mw.ustring.match( adjust, |
if mw.ustring.match( adjust, Patterns.Latin ) then |
||
-- latin only, horizontal dashes, quotes |
-- latin only, horizontal dashes, quotes |
||
r = adjust |
r = adjust |
||
else |
else |
||
local c |
local c |
||
local |
local e = mw.html.create( "span" ) |
||
local |
local j = false |
||
local |
local k = 1 |
||
local |
local m = false |
||
local |
local n = mw.ustring.len( adjust ) |
||
local p |
|||
local flat = function ( a ) |
local flat = function ( a ) |
||
-- isLatin |
-- isLatin |
||
| வரிசை 508: | வரிசை 627: | ||
end -- focus() |
end -- focus() |
||
local form = function ( a ) |
local form = function ( a ) |
||
return string.format( |
return string.format( p, |
||
r, |
r, |
||
mw.ustring.sub( adjust, k, j - 1 ), |
mw.ustring.sub( adjust, k, j - 1 ), |
||
mw.ustring.sub( adjust, j, a ) ) |
mw.ustring.sub( adjust, j, a ) ) |
||
end -- form() |
end -- form() |
||
e:attr( "dir", "auto" ) |
|||
:css( "font-style", "normal" ) |
|||
:wikitext( "%s" ) |
|||
p = "%s%s" .. tostring( e ) |
|||
r = "" |
r = "" |
||
for i = 1, n do |
for i = 1, n do |
||
| வரிசை 564: | வரிசை 687: | ||
return r |
return r |
||
end -- Text.uprightNonlatin() |
end -- Text.uprightNonlatin() |
||
Failsafe.failsafe = function ( atleast ) |
|||
-- Retrieve versioning and check for compliance |
|||
-- Precondition: |
|||
-- atleast -- string, with required version |
|||
-- or wikidata|item|~|@ or false |
|||
-- Postcondition: |
|||
-- Returns string -- with queried version/item, also if problem |
|||
-- false -- if appropriate |
|||
-- 2024-03-01 |
|||
local since = atleast |
|||
local last = ( since == "~" ) |
|||
local linked = ( since == "@" ) |
|||
local link = ( since == "item" ) |
|||
local r |
|||
if last or link or linked or since == "wikidata" then |
|||
local item = Failsafe.item |
|||
since = false |
|||
if type( item ) == "number" and item > 0 then |
|||
local suited = string.format( "Q%d", item ) |
|||
if link then |
|||
r = suited |
|||
else |
|||
local entity = mw.wikibase.getEntity( suited ) |
|||
if type( entity ) == "table" then |
|||
local seek = Failsafe.serialProperty or "P348" |
|||
local vsn = entity:formatPropertyValues( seek ) |
|||
if type( vsn ) == "table" and |
|||
type( vsn.value ) == "string" and |
|||
vsn.value ~= "" then |
|||
if last and vsn.value == Failsafe.serial then |
|||
r = false |
|||
elseif linked then |
|||
if mw.title.getCurrentTitle().prefixedText |
|||
== mw.wikibase.getSitelink( suited ) then |
|||
r = false |
|||
else |
|||
r = suited |
|||
end |
|||
else |
|||
r = vsn.value |
|||
end |
|||
end |
|||
end |
|||
end |
|||
elseif link then |
|||
r = false |
|||
end |
|||
end |
|||
if type( r ) == "nil" then |
|||
if not since or since <= Failsafe.serial then |
|||
r = Failsafe.serial |
|||
else |
|||
r = false |
|||
end |
|||
end |
|||
return r |
|||
end -- Failsafe.failsafe() |
|||
| வரிசை 569: | வரிசை 753: | ||
local r |
local r |
||
if about == "quote" then |
if about == "quote" then |
||
factoryQuote() |
|||
r = { |
r = { QuoteLang = Text.quoteLang, |
||
QuoteType = Text.quoteType } |
|||
r.QuoteType = QuoteType |
|||
end |
end |
||
return r |
return r |
||
| வரிசை 581: | வரிசை 764: | ||
-- Export |
-- Export |
||
local p = { } |
local p = { } |
||
for _, func in ipairs({'containsCJK','isLatinRange','isQuote','sentenceTerminated'}) do |
|||
p[func] = function (frame) |
|||
return Text[func]( frame.args[ 1 ] or "" ) and "1" or "" |
|||
end |
|||
end |
|||
for _, func in ipairs({'getPlain','removeDiacritics','ucfirstAll','uprightNonlatin'}) do |
|||
p[func] = function (frame) |
|||
return Text[func]( frame.args[ 1 ] or "" ) |
|||
end |
|||
end |
|||
function p.char( frame ) |
function p.char( frame ) |
||
| வரிசை 603: | வரிசை 774: | ||
end |
end |
||
if story then |
if story then |
||
local items = mw.text.split( |
local items = mw.text.split( story, "%s+" ) |
||
if #items > 0 then |
if #items > 0 then |
||
local j |
local j |
||
lenient = ( |
lenient = ( params.errors == "0" ) |
||
codes = { } |
codes = { } |
||
multiple = tonumber( params[ "*" ] ) |
multiple = tonumber( params[ "*" ] ) |
||
for |
for k, v in pairs( items ) do |
||
if v:sub( 1, 1 ) == "x" then |
|||
j = tonumber( "0" .. v ) |
|||
elseif v == "" then |
|||
v = false |
|||
else |
|||
j = tonumber( v ) |
|||
end |
|||
if v then |
|||
table.insert( codes, j or v ) |
|||
end |
|||
end -- for k, v |
|||
end |
end |
||
end |
end |
||
| வரிசை 634: | வரிசை 813: | ||
frame.args.format ) |
frame.args.format ) |
||
end |
end |
||
function p.containsCJK( frame ) |
|||
return Text.containsCJK( frame.args[ 1 ] or "" ) and "1" or "" |
|||
end |
|||
function p.getPlain( frame ) |
|||
return Text.getPlain( frame.args[ 1 ] or "" ) |
|||
end |
|||
function p.isLatinRange( frame ) |
|||
return Text.isLatinRange( frame.args[ 1 ] or "" ) and "1" or "" |
|||
end |
|||
function p.isQuote( frame ) |
|||
return Text.isQuote( frame.args[ 1 ] or "" ) and "1" or "" |
|||
end |
|||
| வரிசை 660: | வரிசை 856: | ||
result_line = pformat |
result_line = pformat |
||
for j = 1, #lists do |
for j = 1, #lists do |
||
result_line = mw.ustring.gsub(result_line, |
result_line = mw.ustring.gsub( result_line, |
||
"%%s", |
|||
lists[ j ][ i ], |
|||
1 ) |
|||
end |
end |
||
result = result .. result_line |
result = result .. result_line |
||
| வரிசை 714: | வரிசை 913: | ||
tonumber( frame.args[3] ) ) |
tonumber( frame.args[3] ) ) |
||
end |
end |
||
function p.removeDiacritics( frame ) |
|||
return Text.removeDiacritics( frame.args[ 1 ] or "" ) |
|||
end |
|||
function p.sentenceTerminated( frame ) |
|||
return Text.sentenceTerminated( frame.args[ 1 ] or "" ) and "1" or "" |
|||
end |
|||
function p.tokenWords( frame ) |
|||
return Text.tokenWords( frame.args[ 1 ] or "" ) |
|||
end |
|||
function p.ucfirstAll( frame ) |
|||
return Text.ucfirstAll( frame.args[ 1 ] or "" ) |
|||
end |
|||
function p.unstrip( frame ) |
|||
return mw.text.trim( mw.text.unstrip( frame.args[ 1 ] or "" ) ) |
|||
end |
|||
function p.uprightNonlatin( frame ) |
|||
return Text.uprightNonlatin( frame.args[ 1 ] or "" ) |
|||
end |
|||
| வரிசை 758: | வரிசை 984: | ||
p.failsafe = function ( frame ) |
|||
-- Versioning interface |
|||
return Text.serial |
|||
local s = type( frame ) |
|||
end |
|||
local since |
|||
if s == "table" then |
|||
since = frame.args[ 1 ] |
|||
elseif s == "string" then |
|||
since = frame |
|||
end |
|||
if since then |
|||
since = mw.text.trim( since ) |
|||
if since == "" then |
|||
since = false |
|||
end |
|||
end |
|||
return Failsafe.failsafe( since ) or "" |
|||
end -- p.failsafe() |
|||
| வரிசை 767: | வரிசை 1,007: | ||
return Text |
return Text |
||
end -- p.Text |
end -- p.Text |
||
setmetatable( p, { __call = function ( func, ... ) |
|||
setmetatable( p, nil ) |
|||
return Failsafe |
|||
end } ) |
|||
return p |
return p |
||
16:15, 29 சூலை 2024 இல் நிலவும் திருத்தம்
Documentation for this module may be created at Module:Text/doc
local Text = { serial = "2024-06-05",
suite = "Text",
item = 29387871 }
--[=[
Text utilities
]=]
local Failsafe = Text
local GlobalMod = Text
local Patterns = { }
local RangesLatin = false
local SeekQuote = false
local foreignModule = function ( access, advanced, append, alt, alert )
-- Fetch global module
-- Precondition:
-- access -- string, with name of base module
-- advanced -- true, for require(); else mw.loadData()
-- append -- string, with subpage part, if any; or false
-- alt -- number, of wikidata item of root; or false
-- alert -- true, for throwing error on data problem
-- Postcondition:
-- Returns whatever, probably table
-- 2019-10-29
local storage = access
local finer = function ()
if append then
storage = string.format( "%s/%s",
storage,
append )
end
end
local fun, lucky, r, suited
if advanced then
fun = require
else
fun = mw.loadData
end
GlobalMod.globalModules = GlobalMod.globalModules or { }
suited = GlobalMod.globalModules[ access ]
if not suited then
finer()
lucky, r = pcall( fun, "Module:" .. storage )
end
if not lucky then
if not suited and
type( alt ) == "number" and
alt > 0 then
suited = string.format( "Q%d", alt )
suited = mw.wikibase.getSitelink( suited )
GlobalMod.globalModules[ access ] = suited or true
end
if type( suited ) == "string" then
storage = suited
finer()
lucky, r = pcall( fun, storage )
end
if not lucky and alert then
error( "Missing or invalid page: " .. storage, 0 )
end
end
return r
end -- foreignModule()
local function factoryQuote()
-- Create quote definitions
if not Text.quoteLang then
local quoting = foreignModule( "Text",
false,
"quoting",
Text.item )
if type( quoting ) == "table" then
Text.quoteLang = quoting.langs
Text.quoteType = quoting.types
end
if type( Text.quoteLang ) ~= "table" then
Text.quoteLang = { }
end
if type( Text.quoteType ) ~= "table" then
Text.quoteType = { }
end
if type( Text.quoteLang.en ) ~= "string" then
Text.quoteLang.en = "ld"
end
if type( Text.quoteType[ Text.quoteLang.en ] ) ~= "table" then
Text.quoteType[ Text.quoteLang.en ] = { { 8220, 8221 },
{ 8216, 8217 } }
end
end
end -- factoryQuote()
local function fiatQuote( apply, alien, advance )
-- Quote text
-- Parameter:
-- apply -- string, with text
-- alien -- string, with language code
-- advance -- number, with level 1 or 2
local r = apply
local quotes, suite
factoryQuote()
if alien then
suite = mw.text.trim( alien )
if suite == "" then
suite = false
else
local s = Text.quoteLang[ suite ]
if s then
suite = s
else
local slang = suite:match( "^(%l+)-" )
if slang then
suite = Text.quoteLang[ slang ]
end
end
end
end
if not suite then
suite = Text.quoteLang.en
end
quotes = Text.quoteType[ suite ]
if quotes then
local space
if quotes[ 3 ] then
space = " "
else
space = ""
end
quotes = quotes[ advance ]
if quotes then
r = mw.ustring.format( "%s%s%s%s%s",
mw.ustring.char( quotes[ 1 ] ),
space,
apply,
space,
mw.ustring.char( quotes[ 2 ] ) )
end
else
mw.log( "fiatQuote() " .. suite )
end
return r
end -- fiatQuote()
Text.char = function ( apply, again, accept )
-- Create string from codepoints
-- Parameter:
-- apply -- table (sequence) with numerical codepoints, or nil
-- again -- number of repetitions, or nil
-- accept -- true, if no error messages to be appended
-- Returns: string
local r
if type( apply ) == "table" then
local bad = { }
local codes = { }
local s
for k, v in pairs( apply ) do
s = type( v )
if s == "number" then
if v < 32 and v ~= 9 and v ~= 10 then
v = tostring( v )
else
v = math.floor( v )
s = false
end
elseif s ~= "string" then
v = tostring( v )
end
if s then
table.insert( bad, v )
else
table.insert( codes, v )
end
end -- for k, v
if #bad == 0 then
if #codes > 0 then
r = mw.ustring.char( unpack( codes ) )
if again then
if type( again ) == "number" then
local n = math.floor( again )
if n > 1 then
r = r:rep( n )
elseif n < 1 then
r = ""
end
else
s = "bad repetitions: " .. tostring( again )
end
end
end
else
s = "bad codepoints: " .. table.concat( bad, " " )
end
if s and not accept then
r = tostring( mw.html.create( "span" )
:addClass( "error" )
:wikitext( s ) )
end
end
return r or ""
end -- Text.char()
Text.concatParams = function ( args, apply, adapt )
-- Concat list items into one string
-- Parameter:
-- args -- table (sequence) with numKey=string
-- apply -- string (optional); separator (default: "|")
-- adapt -- string (optional); format including "%s"
-- Returns: string
local collect = { }
for k, v in pairs( args ) do
if type( k ) == "number" then
v = mw.text.trim( v )
if v ~= "" then
if adapt then
v = mw.ustring.format( adapt, v )
end
table.insert( collect, v )
end
end
end -- for k, v
return table.concat( collect, apply or "|" )
end -- Text.concatParams()
Text.containsCJK = function ( analyse )
-- Is any CJK code within?
-- Parameter:
-- analyse -- string
-- Returns: true, if CJK detected
local r
if not Patterns.CJK then
Patterns.CJK = mw.ustring.char( 91,
0x3400, 45, 0x9FFF,
0x20000, 45, 0x2B81F,
93 )
end
if mw.ustring.find( analyse, Patterns.CJK ) then
r = true
else
r = false
end
return r
end -- Text.containsCJK()
Text.getPlain = function ( adjust )
-- Remove wikisyntax from string, except templates
-- Parameter:
-- adjust -- string
-- Returns: string
local i = adjust:find( "<!--", 1, true )
local r = adjust
local j
while i do
j = r:find( "-->", i + 3, true )
if j then
r = r:sub( 1, i ) .. r:sub( j + 3 )
else
r = r:sub( 1, i )
end
i = r:find( "<!--", i, true )
end -- "<!--"
r = r:gsub( "(</?%l[^>]*>)", "" )
:gsub( "'''(.+)'''", "%1" )
:gsub( "''(.+)''", "%1" )
:gsub( " ", " " )
return mw.text.unstrip( r )
end -- Text.getPlain()
Text.isLatinRange = function ( adjust )
-- Are characters expected to be latin or symbols within latin texts?
-- Precondition:
-- adjust -- string, or nil for initialization
-- Returns: true, if valid for latin only
local r
if not RangesLatin then
RangesLatin = { { 0x07, 0x02AF },
{ 0x1D6B, 0x1D9A },
{ 0x1E00, 0x1EFF },
{ 0x2002, 0x203A },
{ 0x2190, 0x23BD } }
end
if not Patterns.Latin then
local range
Patterns.Latin = "^["
for i = 1, #RangesLatin do
range = RangesLatin[ i ]
Patterns.Latin = Patterns.Latin ..
mw.ustring.char( range[ 1 ], 45, range[ 2 ] )
end -- for i
Patterns.Latin = Patterns.Latin .. "]*$"
end
if adjust then
if mw.ustring.match( adjust, Patterns.Latin ) then
r = true
else
r = false
end
end
return r
end -- Text.isLatinRange()
Text.isQuote = function ( ask )
-- Is this character any quotation mark?
-- Parameter:
-- ask -- string, with single character
-- Returns: true, if ask is quotation mark
local r
if not SeekQuote then
SeekQuote = mw.ustring.char( 34, -- "
39, -- '
171, -- laquo
187, -- raquo
8216, -- lsquo
8217, -- rsquo
8218, -- sbquo
8220, -- ldquo
8221, -- rdquo
8222, -- bdquo
8249, -- lsaquo
8250, -- rsaquo
0x300C, -- CJK
0x300D, -- CJK
0x300E, -- CJK
0x300F ) -- CJK
end
if ask == "" then
r = false
elseif mw.ustring.find( SeekQuote, ask, 1, true ) then
r = true
else
r = false
end
return r
end -- Text.isQuote()
Text.listToText = function ( args, adapt )
-- Format list items similar to mw.text.listToText()
-- Parameter:
-- args -- table (sequence) with numKey=string
-- adapt -- string (optional); format including "%s"
-- Returns: string
local collect = { }
for k, v in pairs( args ) do
if type( k ) == "number" then
v = mw.text.trim( v )
if v ~= "" then
if adapt then
v = mw.ustring.format( adapt, v )
end
table.insert( collect, v )
end
end
end -- for k, v
return mw.text.listToText( collect )
end -- Text.listToText()
Text.quote = function ( apply, alien, advance )
-- Quote text
-- Parameter:
-- apply -- string, with text
-- alien -- string, with language code, or nil
-- advance -- number, with level 1 or 2, or nil
-- Returns: quoted string
local mode, slang
if type( alien ) == "string" then
slang = mw.text.trim( alien ):lower()
else
local pageLang = mw.title.getCurrentTitle().pageLanguage
if pageLang then
slang = pageLang.code
else
slang = mw.language.getContentLanguage():getCode()
end
end
if advance == 2 then
mode = 2
else
mode = 1
end
return fiatQuote( mw.text.trim( apply ), slang, mode )
end -- Text.quote()
Text.quoteUnquoted = function ( apply, alien, advance )
-- Quote text, if not yet quoted and not empty
-- Parameter:
-- apply -- string, with text
-- alien -- string, with language code, or nil
-- advance -- number, with level 1 or 2, or nil
-- Returns: string; possibly quoted
local r = mw.text.trim( apply )
local s = mw.ustring.sub( r, 1, 1 )
if s ~= "" and not Text.isQuote( s, advance ) then
s = mw.ustring.sub( r, -1, 1 )
if not Text.isQuote( s ) then
r = Text.quote( r, alien, advance )
end
end
return r
end -- Text.quoteUnquoted()
Text.removeDiacritics = function ( adjust )
-- Remove all diacritics
-- Parameter:
-- adjust -- string
-- Returns: string; all latin letters should be ASCII
-- or basic greek or cyrillic or symbols etc.
local cleanup, decomposed
if not Patterns.Combined then
Patterns.Combined = mw.ustring.char( 91,
0x0300, 45, 0x036F,
0x1AB0, 45, 0x1AFF,
0x1DC0, 45, 0x1DFF,
0xFE20, 45, 0xFE2F,
93 )
end
decomposed = mw.ustring.toNFD( adjust )
cleanup = mw.ustring.gsub( decomposed, Patterns.Combined, "" )
return mw.ustring.toNFC( cleanup )
end -- Text.removeDiacritics()
Text.removeWhitespace = function ( adjust )
-- Remove all whitespace, or replace with ASCII space
-- Parameter:
-- adjust -- string
-- Returns: string; modified
local r = mw.text.decode( adjust )
if r:find( "&", 1, true ) then
r = r:gsub( "‎", "" )
:gsub( "‏", "" )
:gsub( "‍", "" )
:gsub( "‌", "" )
:gsub( " ", " " )
:gsub( " ", " " )
:gsub( " ", " " )
end
if not Patterns.Whitespace then
Patterns.Whitespace = mw.ustring.char( 0x00AD,
91, 0x200C, 45, 0x200F, 93,
91, 0x2028, 45, 0x202E, 93,
0x205F,
0x2060 )
Patterns.Space = mw.ustring.char( 0x00A0,
0x1680,
91, 0x2000, 45, 0x200A, 93,
0x202F,
0x205F,
0x3000,
0x303F )
end
r = mw.ustring.gsub( r, Patterns.Whitespace, "" )
r = mw.ustring.gsub( r, Patterns.Space, " " )
return mw.text.trim( r )
end -- Text.removeWhitespace()
Text.sentenceTerminated = function ( analyse )
-- Is string terminated by dot, question or exclamation mark?
-- Quotation, link termination and so on granted
-- Parameter:
-- analyse -- string
-- Returns: true, if sentence terminated
local r = mw.text.trim( analyse )
local lt = r:find( "<", 1, true )
if not Patterns.Terminated then
Patterns.Terminated = mw.ustring.char( 91,
0x3002,
0xFF01,
0xFF0E,
0xFF1F )
.. "!%.%?…][\"'%]‹›«»‘’“”]*$"
end
if lt then
r = r:gsub( "</span>", "" )
end
if mw.ustring.find( r, Patterns.Terminated ) then
r = true
elseif lt then
local s = "<bdi[^>]* dir=\"([lr]t[rl])\".+</bdi></bdo>"
s = r:match( s )
if s then
if mw.language.getContentLanguage():isRTL() then
r = ( s == "ltr" )
else
r = ( s == "rtl" )
end
else
r = false
end
else
r = false
end
return r
end -- Text.sentenceTerminated()
Text.tokenWords = function ( adjust )
-- Split text in words of digits or letters
-- Precondition:
-- adjust -- string
-- Returns: string with
local r = mw.uri.decode( adjust, "WIKI" )
if r:find( "&", 1, true ) then
r = mw.text.decode( r )
end
r = Text.removeWhitespace( r )
r = mw.ustring.gsub( r, "[%p%s]+", " " )
return r
end -- Text.tokenWords()
Text.ucfirstAll = function ( adjust )
-- Capitalize all words
-- Precondition:
-- adjust -- string
-- Returns: string with all first letters in upper case
local r = " " .. adjust
local i = 1
local c, j, m
if adjust:find( "&" ) then
r = r:gsub( "&", "&" )
:gsub( "<", "<" )
:gsub( ">", ">" )
:gsub( " ", " " )
:gsub( " ", " " )
:gsub( "‌", "‌" )
:gsub( "‍", "‍" )
:gsub( "‎", "‎" )
:gsub( "‏", "‏" )
m = true
end
while i do
i = mw.ustring.find( r, "%W%l", i )
if i then
j = i + 1
c = mw.ustring.upper( mw.ustring.sub( r, j, j ) )
r = string.format( "%s%s%s",
mw.ustring.sub( r, 1, i ),
c,
mw.ustring.sub( r, i + 2 ) )
i = j
end
end -- while i
r = r:sub( 2 )
if m then
r = r:gsub( "&", "&" )
:gsub( "<", "<" )
:gsub( ">", ">" )
:gsub( " ", " " )
:gsub( " ", " " )
:gsub( "‌", "‌" )
:gsub( "‍", "‍" )
:gsub( "‎", "‎" )
:gsub( "‏", "‏" )
:gsub( "&#X(%x+);", "&#x%1;" )
end
return r
end -- Text.ucfirstAll()
Text.uprightNonlatin = function ( adjust )
-- Ensure non-italics for non-latin text parts
-- One single greek letter might be granted
-- Precondition:
-- adjust -- string
-- Returns: string with non-latin parts enclosed in <span>
local r
Text.isLatinRange()
if mw.ustring.match( adjust, Patterns.Latin ) then
-- latin only, horizontal dashes, quotes
r = adjust
else
local c
local e = mw.html.create( "span" )
local j = false
local k = 1
local m = false
local n = mw.ustring.len( adjust )
local p
local flat = function ( a )
-- isLatin
local range
for i = 1, #RangesLatin do
range = RangesLatin[ i ]
if a >= range[ 1 ] and a <= range[ 2 ] then
return true
end
end -- for i
end -- flat()
local focus = function ( a )
-- char is not ambivalent
local r = ( a > 64 )
if r then
r = ( a < 8192 or a > 8212 )
else
r = ( a == 38 or a == 60 ) -- '&' '<'
end
return r
end -- focus()
local form = function ( a )
return string.format( p,
r,
mw.ustring.sub( adjust, k, j - 1 ),
mw.ustring.sub( adjust, j, a ) )
end -- form()
e:attr( "dir", "auto" )
:css( "font-style", "normal" )
:wikitext( "%s" )
p = "%s%s" .. tostring( e )
r = ""
for i = 1, n do
c = mw.ustring.codepoint( adjust, i, i )
if focus( c ) then
if flat( c ) then
if j then
if m then
if i == m then
-- single greek letter.
j = false
end
m = false
end
if j then
local nx = i - 1
local s = ""
for ix = nx, 1, -1 do
c = mw.ustring.sub( adjust, ix, ix )
if c == " " or c == "(" then
nx = nx - 1
s = c .. s
else
break -- for ix
end
end -- for ix
r = form( nx ) .. s
j = false
k = i
end
end
elseif not j then
j = i
if c >= 880 and c <= 1023 then
-- single greek letter?
m = i + 1
else
m = false
end
end
elseif m then
m = m + 1
end
end -- for i
if j and ( not m or m < n ) then
r = form( n )
else
r = r .. mw.ustring.sub( adjust, k )
end
end
return r
end -- Text.uprightNonlatin()
Failsafe.failsafe = function ( atleast )
-- Retrieve versioning and check for compliance
-- Precondition:
-- atleast -- string, with required version
-- or wikidata|item|~|@ or false
-- Postcondition:
-- Returns string -- with queried version/item, also if problem
-- false -- if appropriate
-- 2024-03-01
local since = atleast
local last = ( since == "~" )
local linked = ( since == "@" )
local link = ( since == "item" )
local r
if last or link or linked or since == "wikidata" then
local item = Failsafe.item
since = false
if type( item ) == "number" and item > 0 then
local suited = string.format( "Q%d", item )
if link then
r = suited
else
local entity = mw.wikibase.getEntity( suited )
if type( entity ) == "table" then
local seek = Failsafe.serialProperty or "P348"
local vsn = entity:formatPropertyValues( seek )
if type( vsn ) == "table" and
type( vsn.value ) == "string" and
vsn.value ~= "" then
if last and vsn.value == Failsafe.serial then
r = false
elseif linked then
if mw.title.getCurrentTitle().prefixedText
== mw.wikibase.getSitelink( suited ) then
r = false
else
r = suited
end
else
r = vsn.value
end
end
end
end
elseif link then
r = false
end
end
if type( r ) == "nil" then
if not since or since <= Failsafe.serial then
r = Failsafe.serial
else
r = false
end
end
return r
end -- Failsafe.failsafe()
Text.test = function ( about )
local r
if about == "quote" then
factoryQuote()
r = { QuoteLang = Text.quoteLang,
QuoteType = Text.quoteType }
end
return r
end -- Text.test()
-- Export
local p = { }
function p.char( frame )
local params = frame:getParent().args
local story = params[ 1 ]
local codes, lenient, multiple
if not story then
params = frame.args
story = params[ 1 ]
end
if story then
local items = mw.text.split( story, "%s+" )
if #items > 0 then
local j
lenient = ( params.errors == "0" )
codes = { }
multiple = tonumber( params[ "*" ] )
for k, v in pairs( items ) do
if v:sub( 1, 1 ) == "x" then
j = tonumber( "0" .. v )
elseif v == "" then
v = false
else
j = tonumber( v )
end
if v then
table.insert( codes, j or v )
end
end -- for k, v
end
end
return Text.char( codes, multiple, lenient )
end
function p.concatParams( frame )
local args
local template = frame.args.template
if type( template ) == "string" then
template = mw.text.trim( template )
template = ( template == "1" )
end
if template then
args = frame:getParent().args
else
args = frame.args
end
return Text.concatParams( args,
frame.args.separator,
frame.args.format )
end
function p.containsCJK( frame )
return Text.containsCJK( frame.args[ 1 ] or "" ) and "1" or ""
end
function p.getPlain( frame )
return Text.getPlain( frame.args[ 1 ] or "" )
end
function p.isLatinRange( frame )
return Text.isLatinRange( frame.args[ 1 ] or "" ) and "1" or ""
end
function p.isQuote( frame )
return Text.isQuote( frame.args[ 1 ] or "" ) and "1" or ""
end
function p.listToFormat(frame)
local lists = {}
local pformat = frame.args["format"]
local sep = frame.args["sep"] or ";"
-- Parameter parsen: Listen
for k, v in pairs(frame.args) do
local knum = tonumber(k)
if knum then lists[knum] = v end
end
-- Listen splitten
local maxListLen = 0
for i = 1, #lists do
lists[i] = mw.text.split(lists[i], sep)
if #lists[i] > maxListLen then maxListLen = #lists[i] end
end
-- Ergebnisstring generieren
local result = ""
local result_line = ""
for i = 1, maxListLen do
result_line = pformat
for j = 1, #lists do
result_line = mw.ustring.gsub( result_line,
"%%s",
lists[ j ][ i ],
1 )
end
result = result .. result_line
end
return result
end
function p.listToText( frame )
local args
local template = frame.args.template
if type( template ) == "string" then
template = mw.text.trim( template )
template = ( template == "1" )
end
if template then
args = frame:getParent().args
else
args = frame.args
end
return Text.listToText( args, frame.args.format )
end
function p.quote( frame )
local slang = frame.args[2]
if type( slang ) == "string" then
slang = mw.text.trim( slang )
if slang == "" then
slang = false
end
end
return Text.quote( frame.args[ 1 ] or "",
slang,
tonumber( frame.args[3] ) )
end
function p.quoteUnquoted( frame )
local slang = frame.args[2]
if type( slang ) == "string" then
slang = mw.text.trim( slang )
if slang == "" then
slang = false
end
end
return Text.quoteUnquoted( frame.args[ 1 ] or "",
slang,
tonumber( frame.args[3] ) )
end
function p.removeDiacritics( frame )
return Text.removeDiacritics( frame.args[ 1 ] or "" )
end
function p.sentenceTerminated( frame )
return Text.sentenceTerminated( frame.args[ 1 ] or "" ) and "1" or ""
end
function p.tokenWords( frame )
return Text.tokenWords( frame.args[ 1 ] or "" )
end
function p.ucfirstAll( frame )
return Text.ucfirstAll( frame.args[ 1 ] or "" )
end
function p.unstrip( frame )
return mw.text.trim( mw.text.unstrip( frame.args[ 1 ] or "" ) )
end
function p.uprightNonlatin( frame )
return Text.uprightNonlatin( frame.args[ 1 ] or "" )
end
function p.zip(frame)
local lists = {}
local seps = {}
local defaultsep = frame.args["sep"] or ""
local innersep = frame.args["isep"] or ""
local outersep = frame.args["osep"] or ""
-- Parameter parsen
for k, v in pairs(frame.args) do
local knum = tonumber(k)
if knum then lists[knum] = v else
if string.sub(k, 1, 3) == "sep" then
local sepnum = tonumber(string.sub(k, 4))
if sepnum then seps[sepnum] = v end
end
end
end
-- sofern keine expliziten Separatoren angegeben sind, den Standardseparator verwenden
for i = 1, math.max(#seps, #lists) do
if not seps[i] then seps[i] = defaultsep end
end
-- Listen splitten
local maxListLen = 0
for i = 1, #lists do
lists[i] = mw.text.split(lists[i], seps[i])
if #lists[i] > maxListLen then maxListLen = #lists[i] end
end
local result = ""
for i = 1, maxListLen do
if i ~= 1 then result = result .. outersep end
for j = 1, #lists do
if j ~= 1 then result = result .. innersep end
result = result .. (lists[j][i] or "")
end
end
return result
end
p.failsafe = function ( frame )
-- Versioning interface
local s = type( frame )
local since
if s == "table" then
since = frame.args[ 1 ]
elseif s == "string" then
since = frame
end
if since then
since = mw.text.trim( since )
if since == "" then
since = false
end
end
return Failsafe.failsafe( since ) or ""
end -- p.failsafe()
p.Text = function ()
return Text
end -- p.Text
setmetatable( p, { __call = function ( func, ... )
setmetatable( p, nil )
return Failsafe
end } )
return p