Module:Unicode data/category/make
Appearance
local p = {}
local function read_page(title)
return mw.title.new(title):getContent()
end
function p.make_singles_and_ranges(derived_general_category_page, property_value_aliases_page)
local text = read_page(derived_general_category_page)
local singles, ranges = {}, {}
for code_point1, code_point2, general_category in text:gmatch "%f[^\n%z](%x+)%.?%.?(%x*)%s*;%s*(%u%l)" do
if general_category ~= "Cn" then
code_point1 = tonumber(code_point1, 16)
-- XXXX..XXXX ; gc
if code_point2 ~= "" then
code_point2 = tonumber(code_point2, 16)
table.insert(ranges, { code_point1, code_point2, general_category })
-- XXXX ; gc
else
singles[code_point1] = general_category
end
end
end
local property_value_aliases = read_page(property_value_aliases_page)
local general_category_aliases = property_value_aliases:match "# General_Category[^\n]+%s*(.-)%s*%f[^\n]#"
local long_names = {}
for abbr, long_name in general_category_aliases:gmatch "gc%s*;%s*(%S+)%s*;%s*([%a_]+)" do
long_names[abbr] = long_name
end
return singles, ranges, long_names
end
function p.print_data(data)
local output = require "Module:array"()
local function writef(...)
output:insert(string.format(...))
end
writef [[
return {
singles = {
]]
-- Check that maximum "singles" codepoint is less than 0x100000?
for codepoint, category in require "Module:TableTools".sortedPairs(data.singles) do
writef('\t\t[0x%05X] = "%s",\n', codepoint, category)
end
writef [[
},
ranges = {
]]
local function compare_ranges(range1, range2)
return range1[1] < range2[1]
end
table.sort(data.ranges, compare_ranges)
for _, range in ipairs(data.ranges) do
writef('\t\t{ 0x%06X, 0x%06X, "%s" },\n', unpack(range))
end
writef [[
},
long_names = {
]]
for abbr, long_name in require "Module:TableTools".sortedPairs(data.long_names) do
writef('\t\t%-2s = "%s",\n', abbr, long_name)
end
writef [[
},
}]]
return output:concat()
end
function p.main(frame)
local data = {}
data.singles, data.ranges, data.long_names = p.make_singles_and_ranges(
"User:Erutuon/Unicode/DerivedGeneralCategory.txt",
"User:Erutuon/Unicode/PropertyValueAliases.txt")
return frame:extensionTag{
name = "syntaxhighlight",
content = p.print_data(data),
args = { lang = "lua" }
}
end
return p