Jump to content

Module:Make emoji zwj table

From Humanipedia

This module creates a new version of emoji_t for use in Module:Citation/CS1/Configuration.

To use this module:

  1. locate a copy of the new version of the Unicode file emoji-zwj-sequences.txt. This file might be found in https://unicode.org/Public/emoji/VV.V/ (where VV.V is the new Unicode version number).
  2. copy the whole content of emoji-zwj-sequences.txt to your clipboard
  3. edit this page (the module's documentation page)
  4. paste your clipboard into this page overwriting any previous version of the Unicode data; do not disturb the html comment tags.
  5. replace the url in the {{#invoke:}} with the url of the new emoji-zwj-sequences.txt file
  6. preview this page; if nothing wrong with the rendering, save.
  7. copy emoji_t to your clipboard and paste it over emoji_t in Module:Citation/CS1/Configuration/sandbox (always update the live module suite from its sandboxen)

emoji_t[edit source]

use this table to overwrite same-named table in Module:Citation/CS1/Configuration/sandbox

-- list of emoji that use a zwj character (U+200D) to combine with another emoji
-- from: https://unicode.org/Public/emoji/16.0/emoji-zwj-sequences.txt; version: 16.0; 2024-08-14
-- table created by: [[:en:Module:Make emoji zwj table]]
local emoji_t = {																-- indexes are decimal forms of the hex values in U+xxxx
	[8596] = true,																-- U+2194 ↔ left right arrow
	[8597] = true,																-- U+2195 ↕ up down arrow
	[9760] = true,																-- U+2620 ☠ skull and crossbones
	[9792] = true,																-- U+2640 ♀ female sign
	[9794] = true,																-- U+2642 ♂ male sign
	[9877] = true,																-- U+2695 ⚕ staff of aesculapius
	[9878] = true,																-- U+2696 ⚖ scales
	[9895] = true,																-- U+26A7 ⚧ male with stroke and male and female sign
	[9992] = true,																-- U+2708 ✈ airplane
	[10052] = true,																-- U+2744 ❄ snowflake
	[10084] = true,																-- U+2764 ❤ heavy black heart
	[10145] = true,																-- U+27A1 ➡ black rightwards arrow
	[11035] = true,																-- U+2B1B ⬛ black large square
	[127752] = true,															-- U+1F308 🌈 rainbow
	[127787] = true,															-- U+1F32B 🌫 fog
	[127806] = true,															-- U+1F33E 🌾 ear of rice
	[127859] = true,															-- U+1F373 🍳 cooking
	[127868] = true,															-- U+1F37C 🍼 baby bottle
	[127876] = true,															-- U+1F384 🎄 christmas tree
	[127891] = true,															-- U+1F393 🎓 graduation cap
	[127908] = true,															-- U+1F3A4 🎤 microphone
	[127912] = true,															-- U+1F3A8 🎨 artist palette
	[127979] = true,															-- U+1F3EB 🏫 school
	[127981] = true,															-- U+1F3ED 🏭 factory
	[128102] = true,															-- U+1F466 👦 boy
	[128103] = true,															-- U+1F467 👧 girl
	[128104] = true,															-- U+1F468 👨 man
	[128105] = true,															-- U+1F469 👩 woman
	[128139] = true,															-- U+1F48B 💋 kiss mark
	[128165] = true,															-- U+1F4A5 💥 collision symbol
	[128168] = true,															-- U+1F4A8 💨 dash symbol
	[128171] = true,															-- U+1F4AB 💫 dizzy symbol
	[128187] = true,															-- U+1F4BB 💻 personal computer
	[128188] = true,															-- U+1F4BC 💼 brief case
	[128293] = true,															-- U+1F525 🔥 fire
	[128295] = true,															-- U+1F527 🔧 wrench
	[128300] = true,															-- U+1F52C 🔬 microscope
	[128488] = true,															-- U+1F5E8 🗨 left speech bubble
	[128640] = true,															-- U+1F680 🚀 rocket
	[128658] = true,															-- U+1F692 🚒 fire engine
	[129001] = true,															-- U+1F7E9 🟩 large green square
	[129003] = true,															-- U+1F7EB 🟫 large brown square
	[129309] = true,															-- U+1F91D 🤝 handshake
	[129455] = true,															-- U+1F9AF 🦯 probing cane
	[129456] = true,															-- U+1F9B0 🦰 emoji component red hair
	[129457] = true,															-- U+1F9B1 🦱 emoji component curly hair
	[129458] = true,															-- U+1F9B2 🦲 emoji component bald
	[129459] = true,															-- U+1F9B3 🦳 emoji component white hair
	[129466] = true,															-- U+1F9BA 🦺 safety vest
	[129468] = true,															-- U+1F9BC 🦼 motorized wheelchair
	[129469] = true,															-- U+1F9BD 🦽 manual wheelchair
	[129489] = true,															-- U+1F9D1 🧑 adult
	[129490] = true,															-- U+1F9D2 🧒 child
	[129657] = true,															-- U+1FA79 🩹 adhesive bandage
	[129778] = true,															-- U+1FAF2 🫲 leftwards hand
	}

emoji_names_t[edit source]

use this table to overwrite same-named table in :en:Module:Make emoji zwj table; add missing names.

local emoji_names_t = {															-- keys are hex values from U+xxxx code points
	['2194'] = 'left right arrow',
	['2195'] = 'up down arrow',
	['2620'] = 'skull and crossbones',
	['2640'] = 'female sign',
	['2642'] = 'male sign',
	['2695'] = 'staff of aesculapius',
	['2696'] = 'scales',
	['26A7'] = 'male with stroke and male and female sign',
	['2708'] = 'airplane',
	['2744'] = 'snowflake',
	['2764'] = 'heavy black heart',
	['27A1'] = 'black rightwards arrow',
	['2B1B'] = 'black large square',
	['1F308'] = 'rainbow',
	['1F32B'] = 'fog',
	['1F33E'] = 'ear of rice',
	['1F373'] = 'cooking',
	['1F37C'] = 'baby bottle',
	['1F384'] = 'christmas tree',
	['1F393'] = 'graduation cap',
	['1F3A4'] = 'microphone',
	['1F3A8'] = 'artist palette',
	['1F3EB'] = 'school',
	['1F3ED'] = 'factory',
	['1F466'] = 'boy',
	['1F467'] = 'girl',
	['1F468'] = 'man',
	['1F469'] = 'woman',
	['1F48B'] = 'kiss mark',
	['1F4A5'] = 'collision symbol',
	['1F4A8'] = 'dash symbol',
	['1F4AB'] = 'dizzy symbol',
	['1F4BB'] = 'personal computer',
	['1F4BC'] = 'brief case',
	['1F525'] = 'fire',
	['1F527'] = 'wrench',
	['1F52C'] = 'microscope',
	['1F5E8'] = 'left speech bubble',
	['1F680'] = 'rocket',
	['1F692'] = 'fire engine',
	['1F7E9'] = 'large green square',
	['1F7EB'] = 'large brown square',
	['1F91D'] = 'handshake',
	['1F9AF'] = 'probing cane',
	['1F9B0'] = 'emoji component red hair',
	['1F9B1'] = 'emoji component curly hair',
	['1F9B2'] = 'emoji component bald',
	['1F9B3'] = 'emoji component white hair',
	['1F9BA'] = 'safety vest',
	['1F9BC'] = 'motorized wheelchair',
	['1F9BD'] = 'manual wheelchair',
	['1F9D1'] = 'adult',
	['1F9D2'] = 'child',
	['1FA79'] = 'adhesive bandage',
	['1FAF2'] = 'leftwards hand',
	}



--[[

This module creates an associative table emoji code points that may follow a zero-width joiner character (U+200D).

The module reads a copy of the Unicode Emoji ZWJ Sequences for UTS (typically emoji-zwj-sequences.txt found in
https://unicode.org/Public/emoji/VV.V/ where VV.V is the Unicode version number).  The copy of the unicode data
file is held inside html comments in the module's /doc page.  From that file, the module extracts pairs of
<zwj> <emoji code point>. The moculde save each unique code point, transformed as necessary to build a new version
of emoji_t for use in Module:Citation/CS1/Configuration.

The module takes one positional parameter:
	{{#invoke:make emoji zwj table|main|<url>}}

<url> is the url that matches the Unicode data file.  Alas, Lua modules cannot read external data files so <url>
is merely used to document where the data may be found.

Use of this module is documented on its /doc page

]]

require('strict');

local emoji_names_t = {															-- keys are hex values from U+xxxx code points
	['2194'] = 'left right arrow',
	['2195'] = 'up down arrow',
	['2620'] = 'skull and crossbones',
	['2640'] = 'female sign',
	['2642'] = 'male sign',
	['2695'] = 'staff of aesculapius',
	['2696'] = 'scales',
	['26A7'] = 'male with stroke and male and female sign',
	['2708'] = 'airplane',
	['2744'] = 'snowflake',
	['2764'] = 'heavy black heart',
	['27A1'] = 'black rightwards arrow',
	['2B1B'] = 'black large square',
	['1F308'] = 'rainbow',
	['1F32B'] = 'fog',
	['1F33E'] = 'ear of rice',
	['1F373'] = 'cooking',
	['1F37C'] = 'baby bottle',
	['1F384'] = 'christmas tree',
	['1F393'] = 'graduation cap',
	['1F3A4'] = 'microphone',
	['1F3A8'] = 'artist palette',
	['1F3EB'] = 'school',
	['1F3ED'] = 'factory',
	['1F466'] = 'boy',
	['1F467'] = 'girl',
	['1F468'] = 'man',
	['1F469'] = 'woman',
	['1F48B'] = 'kiss mark',
	['1F4A5'] = 'collision symbol',
	['1F4A8'] = 'dash symbol',
	['1F4AB'] = 'dizzy symbol',
	['1F4BB'] = 'personal computer',
	['1F4BC'] = 'brief case',
	['1F525'] = 'fire',
	['1F527'] = 'wrench',
	['1F52C'] = 'microscope',
	['1F5E8'] = 'left speech bubble',
	['1F680'] = 'rocket',
	['1F692'] = 'fire engine',
	['1F7E9'] = 'large green square',
	['1F7EB'] = 'large brown square',
	['1F91D'] = 'handshake',
	['1F9AF'] = 'probing cane',
	['1F9B0'] = 'emoji component red hair',
	['1F9B1'] = 'emoji component curly hair',
	['1F9B2'] = 'emoji component bald',
	['1F9B3'] = 'emoji component white hair',
	['1F9BA'] = 'safety vest',
	['1F9BC'] = 'motorized wheelchair',
	['1F9BD'] = 'manual wheelchair',
	['1F9D1'] = 'adult',
	['1F9D2'] = 'child',
	['1FA79'] = 'adhesive bandage',
	['1FAF2'] = 'leftwards hand',
	}

--[[--------------------------< M A I N >----------------------------------------------------------------------
]]

local function main (frame)
	local this_wiki = table.concat ({':', mw.language.getContentLanguage():getCode(), ':'});
	local title_obj = mw.title.getCurrentTitle();
	local content;
	if title_obj.prefixedText:match ('/doc$') then								-- if this title object is the ~/doc page (viewing the ~/doc page standalone)
		content = title_obj:getContent();										-- get the content
	else																		-- when viewing the module page
		content = mw.title.new (table.concat ({title_obj.prefixedText, '/doc'})):getContent();	-- get title object and content for the ~/doc page
	end

	local code_points_t = {};													-- sequence to hold unique code points that follow U+200D in RGI Emoji ZWJ Sequences in decimal
	local out_t = {};															-- final output goes here
	local new_emoji_names_t = {};												-- used to update emoji_names_t in this module
	local tabs_15 = string.rep ('\t', 15);										-- for six-digit keys
	local tabs_16 = string.rep ('\t', 16);										-- for keys that have fewer than six digits
	local file_date = content:match ('# *Date: *(%d%d%d%d%-%d%d%-%d%d)');		-- file date of the Unicode source
	local file_version = content:match ('# *Version: *([%d%.]+)');				-- version of the Unicode source

	for code_point in content:gmatch ('200D (%x+)') do							-- find each <zwj> <code point> pair
		local code_point_dec = tonumber ('0x' .. code_point);					-- convert hex code point to decimal for output table key

		if not code_points_t[code_point] then									-- if we have not seen this <code_point> before
			code_points_t[code_point] = true;									-- remember that we have now seen this <code_point>
			table.insert (out_t, table.concat ({								-- build a line for this code point
				'\t[',															-- open key markup
				code_point_dec,													-- <code_point> in decimal
				'] = true,',													-- close key and assign it the value 'true'
				(100000 <= code_point_dec) and tabs_15 or tabs_16,				-- insert a bunch of tabs between the k/v pair and an associated comment
				'-- U+',														-- start the comment; prefix for the hex <code point>
				code_point,														-- add the <code point>
				' &#x',															-- hex html entity prefix for <code point>
				code_point,														-- add the <code point>
				'; ',															-- finish the html entity
				emoji_names_t[code_point] and emoji_names_t[code_point] or '',	-- if we have a name for this code point, add the name; empty string else
				}));

			table.insert (new_emoji_names_t, table.concat ({					-- build a line for this code point
				'\t[\'',															-- open key markup
				code_point,														-- <code_point> in hex
				'\'] = \'',														-- close key, open quote mark and ready to assign it a name
				emoji_names_t[code_point] and emoji_names_t[code_point] or '',	-- if we have a name for this code point, add the name; empty string else
				'\',',															-- add closing quote mark and terminal comma
				}));
		end
	end

	local function compare_dec (a, b)											-- local compare function for decimal table.sort() ascending
		a = a:match ('%[(%d+)%]');												-- extract decimal key text
		b = b:match ('%[(%d+)%]');
		return tonumber (a) < tonumber (b);										-- convert key text to numbers and compare
	end

	local function compare_hex (a, b)											-- local compare function for hexadecimal table.sort() ascending
		a = a:match ('%[\'(%x+)\'%]');											-- extract hexadecimal key text
		b = b:match ('%[\'(%x+)\'%]');
		a = table.concat ({'0x', a});											-- make a hex string
		b = table.concat ({'0x', b});
		return tonumber (a) < tonumber (b);										-- convert hex key text todecimal numbers and compare
	end

	table.sort (out_t, compare_dec);											-- ascending numerical sort on decimal keys
	
	local prefix_t = {};														-- build a prefix for this version of the table
	table.insert (prefix_t, '==<span style="font-family: monospace, monospace;">emoji_t</span>==');
	table.insert (prefix_t, 'use this table to overwrite same-named table in [[Module:Citation/CS1/Configuration/sandbox]]');
	table.insert (prefix_t, '<pre>-- list of emoji that use a zwj character (U+200D) to combine with another emoji');
	table.insert (prefix_t, table.concat ({'-- from: ', frame.args[1], '; version: ', file_version, '; ', file_date}));
	table.insert (prefix_t, table.concat ({'-- table created by: [[', this_wiki, title_obj.nsText, ':', title_obj.baseText, ']]'}));
	table.insert (prefix_t, table.concat ({'local emoji_t = {', tabs_16, '-- indexes are decimal forms of the hex values in U+xxxx'}));

	table.insert (out_t, 1, table.concat (prefix_t, '\n'));						-- insert at the head of the output table
	table.insert (out_t, '\t}</pre>');											-- close the <pre> tag

	table.sort (new_emoji_names_t, compare_hex);								-- ascending numerical sort on hexadecimal keys

	table.insert (out_t, '==<span style="font-family: monospace, monospace;">emoji_names_t</span>==');
	table.insert (out_t, table.concat ({'use this table to overwrite same-named table in ', this_wiki, title_obj.nsText, ':', title_obj.baseText, '; add missing names.'}));
	table.insert (out_t, table.concat ({'\n<pre>local emoji_names_t = {', tabs_15, '-- keys are hex values from U+xxxx code points'}));
	for _, v in ipairs (new_emoji_names_t)do
		table.insert (out_t, v);
	end
	table.insert (out_t, '\t}</pre>');											-- close the <pre> tag

	return frame:preprocess (table.concat (out_t, '\n'));						-- make a big string and done
end


--[[--------------------------< E X P O R T S >----------------------------------------------------------------
]]

return {
	main = main,
	}