Aller au contenu

Module:HiddenUnicode

De Wikivoyage

La documentation pour ce module peut être créée à Module:HiddenUnicode/doc

require('strict')
local char = mw.ustring.char
local find = string.find

-- Format control characters: these are multibyte characters in UTF-8 (not part of ASCII).
-- Unfortunately, Lua patterns don't support alternations like in regular expressions, and
-- can't match non-ASCII characters using 1-byte character classes).
-- So we need to try matching each one using a loop.
local hiddenUnicodeChars = {

	-- Bidi controls, strongly discouraged in HTML (better use "bdi" or "bdo" elements)
	char(0x200E), -- LEFT-TO-RIGHT MARK (LRM)
	char(0x200F), -- RIGHT-TO-LEFT MARK (RLM)
	char(0x202A), -- LEFT-TO-RIGHT EMBEDDING (LRE)
	char(0x202B), -- RIGHT-TO-LEFT EMBEDDING (RLE)
	char(0x202C), -- POP DIRECTIONAL FORMATTING (PDF)
	char(0x202D), -- LEFT-TO-RIGHT OVERRIDE (LRO)
	char(0x202E), -- RIGHT-TO-LEFT OVERRIDE (RLO)
	char(0x2066), -- LEFT-TO-RIGHT ISOLATE (LRI)
	char(0x2067), -- RIGHT-TO-LEFT ISOLATE (RLI)
	char(0x2068), -- FIRST STRONG ISOLATE (FSI)
	char(0x2069), -- POP DIRECTIONAL ISOLATE (PDI)

	-- Invisible zero-width semantic characters
	char(0x200B), -- ZERO-WIDTH SPACE
	char(0x2060), -- WORD JOINER (WJ)

	-- Invisible zero-width mathematical characters (better use math elements in HTML)
	char(0x2061), -- FUNCTION APPLICATION (contiguity operator indicating function application)
	char(0x2062), -- INVISIBLE TIMES (contiguity operator indicating multiplication)
	char(0x2063), -- INVISIBLE SEPARATOR (invisible comma, contiguity operator indicating that adjacent mathematical symbols form a list, e.g. when no visible comma is used between multiple indices)
	char(0x2064), -- INVISIBLE PLUS (contiguity operator indicating addition)

 	-- Deprecated,  use of these characters is strongly discouraged in all texts (not only in HTML).
	char(0x206A), -- INHIBIT SYMMETRIC SWAPPING
	char(0x206B), -- ACTIVATE SYMMETRIC SWAPPING
	char(0x206C), -- INHIBIT ARABIC FORM SHAPING
	char(0x206D), -- ACTIVATE ARABIC FORM SHAPING
	char(0x206E), -- NATIONAL DIGIT SHAPES
	char(0x206F), -- NOMINAL DIGIT SHAPES
	char(0xFEFF), -- ZERO WIDTH NO-BREAK SPACE (ZWNBSP) (deprecated in favor of WJ, now used only for byte order marks)

}

local p = {}

function p.HiddenUnicode(frame)
	local t = frame.args[1] or ''
	for _, c in ipairs(hiddenUnicodeChars) do
		if t:find(c, 1, true) then
			return '[[Catégorie:Listing avec un caractère unicode caché]]'
		end
	end
	return ''
end

return p