輸出GB 18030-2005字符編碼(Lua)

輸出

按單字節、雙字節、四字節及編碼順序輸出GB 18030-2005中規定的中文圖形字符二進制編碼,不包括保留區字符和用戶自定義區字符。

Lua實現代碼

考慮到最近《信息技術 中文編碼字符集》修訂版送審稿通過專家審查,採用Lua腳本實現以方便修改。

function IsInDoubleBytesUserDefinedArea(ch1, ch2)
	-- ch1 = [0xAA, 0xAF], ch2 = [0xA1, 0xFE]
	if (0xAA <= ch1 and ch1 <= 0xAF and 0xA1 <= ch2 and ch2 <= 0xFE) then
		return true
	end
	-- ch1 = [0xF8, 0xFE], ch2 = [0xA1, 0xFE]
	if (0xF8 <= ch1 and ch1 <= 0xFE and 0xA1 <= ch2 and ch2 <= 0xFE) then
		return true
	end
	-- ch1 = [0xA1, 0xA7], ch2 = [0x40, 0x7E] or ch2 = [0x80, 0xA0]
	if (0xA1 <= ch1 and ch1 <= 0xA7 and
		((0x40 <=ch2 and ch2 <= 0x7E) or (0x80 <= ch2 and ch2 <= 0xA0))) then
		return true
	end
	return false
end

function IsInQuadBytesReservedArea(ch1, ch2, ch3, ch4)
	local ch = (ch1 << 24) | (ch2 << 16) | (ch3 << 8) | ch4
	if (0x85308130 <= ch and ch <= 0x8539FE39) then
		return true
	end
	if (0x86308130 <= ch and ch <= 0x8F39FE39) then
		return true
	end
	if (0xE4308130 <= ch and ch <= 0xFC39FE39) then
		return true
	end
	return false
end

function IsInQuadBytesUserDefinedArea(ch1, ch2, ch3, ch4)
	local ch = (ch1 << 24) | (ch2 << 16) | (ch3 << 8) | ch4
	if (0xFD308130 <= ch and ch <= 0xFE39FE39) then
		return true
	end
	return false
end

function PrintSingleByte(ch)
	if ((ch & 0x0f) == 0) then
		io.write(string.format("\n %1X", ch >> 4))
	end
	io.write(string.format("  %c", ch))
end

function PrintDoubleBytes(ch1, ch2, last_ch)
	local ch = (ch1 << 8) | ch2
	bNewLine = false
	if ((last_ch >> 8) ~= ch1) then
		io.write(string.format("\n\n%02X  0  1  2  3  4  5  6  7  8  9  A  B  C  D  E  F", ch1))
		bNewLine = true
	end
	if (bNewLine or ((last_ch & 0xf0) ~= (ch2 & 0xf0))) then
		io.write(string.format("\n %1X", ch2 >> 4))
		for i = 1, ch2 & 0x0f, 1 do
			io.write("   ")
		end
	else
		for i = 1, (ch2 & 0x0f) - (last_ch & 0x0f) - 1, 1 do
			io.write("   ")
		end
	end
	io.write(string.format(" %c%c", ch1, ch2))
	return ch
end

function PrintQuadBytes(ch1, ch2, ch3, ch4, last_ch)
	local Leading2Bytes = (ch1 << 8) | ch2
	local bNewLine = false
	if ((last_ch >> 16) ~= Leading2Bytes) then
		io.write(string.format("\n\n%04X\n    30 31 32 33 34 35 36 37 38 39", Leading2Bytes))
		bNewLine = true
	end
	local ch = (Leading2Bytes << 16) | (ch3 << 8) | ch4
	if (bNewLine or (((last_ch >> 8) & 0xff) ~= ch3)) then
		io.write(string.format("\n%02X", ch3))
		for i = 1, ch4 - 0x30, 1 do
			io.write("   ")
		end
	else
		for i = 1, ch4 - (last_ch & 0xff) - 1, 1 do
			io.write("   ")
		end
	end
	io.write(string.format(" %c%c%c%c", ch1, ch2, ch3, ch4))
	return ch
end

-- main
io.write("    0  1  2  3  4  5  6  7  8  9  A  B  C  D  E  F")
for ch = 0, 0x7f, 1 do
	PrintSingleByte(ch)
end

local last_ch = 0
for ch1 = 0x81, 0xfe, 1 do
	for ch2 = 0x40, 0x7e, 1 do
		if (not IsInDoubleBytesUserDefinedArea(ch1, ch2)) then
			last_ch = PrintDoubleBytes(ch1, ch2, last_ch)
		end
	end
	for ch2 = 0x80, 0xfe, 1 do
		if (not IsInDoubleBytesUserDefinedArea(ch1, ch2)) then
			last_ch = PrintDoubleBytes(ch1, ch2, last_ch)
		end
	end
end
for ch1 = 0x81, 0xfe, 1 do
	for ch2 = 0x30, 0x39, 1 do
		for ch3 = 0x81, 0xfe, 1 do
			for ch4 = 0x30, 0x39, 1 do
				if (not IsInQuadBytesReservedArea(ch1, ch2, ch3, ch4)
					and not IsInQuadBytesUserDefinedArea(ch1, ch2, ch3, ch4)) then
					last_ch = PrintQuadBytes(ch1, ch2, ch3, ch4, last_ch)
				end
			end
		end
	end
end

參考

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章