You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
48 lines
1.1 KiB
48 lines
1.1 KiB
#!/usr/bin/perl -w |
|
|
|
# Convert unicode mappings to nginx configuration file format. |
|
|
|
# You may find useful mappings in various places, including |
|
# unicode.org official site: |
|
# |
|
# http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1251.TXT |
|
# http://www.unicode.org/Public/MAPPINGS/VENDORS/MISC/KOI8-R.TXT |
|
|
|
# Needs perl 5.6 or later. |
|
|
|
# Written by Maxim Dounin, mdounin@mdounin.ru |
|
|
|
############################################################################### |
|
|
|
require 5.006; |
|
|
|
while (<>) { |
|
# Skip comments and empty lines |
|
|
|
next if /^#/; |
|
next if /^\s*$/; |
|
chomp; |
|
|
|
# Convert mappings |
|
|
|
if (/^\s*0x(..)\s*0x(....)\s*(#.*)/) { |
|
# Mapping <from-code> <unicode-code> "#" <unicode-name> |
|
my $cs_code = $1; |
|
my $un_code = $2; |
|
my $un_name = $3; |
|
|
|
# Produce UTF-8 sequence from character code; |
|
|
|
my $un_utf8 = join('', |
|
map { sprintf("%02X", $_) } |
|
unpack("U0C*", pack("U", hex($un_code))) |
|
); |
|
|
|
print " $cs_code $un_utf8 ; $un_name\n"; |
|
|
|
} else { |
|
warn "Unrecognized line: '$_'"; |
|
} |
|
} |
|
|
|
###############################################################################
|
|
|