“021740dc30d184e3b0fa7679936e65a56090c425”上不存在“arch/x86/mm/hugetlbpage.c”
提交 3f6679ba 编写于 作者: S sherman

6945564: Unicode script support in Character class

6948903: Make Unicode scripts available for use in regular expressions
Summary: added Unicode script suport
Reviewed-by: martin
上级 7a89f869
......@@ -34,6 +34,7 @@ JAVA_JAVA_java = \
java/lang/Thread.java \
java/lang/Character.java \
java/lang/CharacterData.java \
java/lang/CharacterName.java \
sun/misc/ASCIICaseInsensitiveComparator.java \
sun/misc/VM.java \
sun/misc/Signal.java \
......
......@@ -384,6 +384,27 @@ clean::
$(RM) $(GENSRCDIR)/java/lang/CharacterDataUndefined.java
$(RM) $(GENSRCDIR)/java/lang/CharacterDataPrivateUse.java
#
# Rules to generate classes/java/lang/uniName.dat
#
UNINAME = $(CLASSBINDIR)/java/lang/uniName.dat
GENERATEUNINAME_JARFILE = $(BUILDTOOLJARDIR)/generatecharacter.jar
build: $(UNINAME)
$(UNINAME): $(UNICODEDATA)/UnicodeData.txt \
$(GENERATECHARACTER_JARFILE)
@$(prep-target)
$(BOOT_JAVA_CMD) -classpath $(GENERATECHARACTER_JARFILE) \
build.tools.generatecharacter.CharacterName \
$(UNICODEDATA)/UnicodeData.txt $(UNINAME)
clean::
$(RM) $(UNINAME)
#
# End of rules to create $(GENSRCDIR)/java/lang/CharacterDataXX.java
#
......
# Scripts-5.2.0.txt
# Date: 2009-08-22, 04:58:43 GMT [MD]
#
# Unicode Character Database
# Copyright (c) 1991-2009 Unicode, Inc.
# For terms of use, see http://www.unicode.org/terms_of_use.html
# For documentation, see http://www.unicode.org/reports/tr44/
# ================================================
# Property: Script
# All code points not explicitly listed for Script
# have the value Unknown (Zzzz).
# @missing: 0000..10FFFF; Unknown
# ================================================
0000..001F ; Common # Cc [32] <control-0000>..<control-001F>
0020 ; Common # Zs SPACE
0021..0023 ; Common # Po [3] EXCLAMATION MARK..NUMBER SIGN
0024 ; Common # Sc DOLLAR SIGN
0025..0027 ; Common # Po [3] PERCENT SIGN..APOSTROPHE
0028 ; Common # Ps LEFT PARENTHESIS
0029 ; Common # Pe RIGHT PARENTHESIS
002A ; Common # Po ASTERISK
002B ; Common # Sm PLUS SIGN
002C ; Common # Po COMMA
002D ; Common # Pd HYPHEN-MINUS
002E..002F ; Common # Po [2] FULL STOP..SOLIDUS
0030..0039 ; Common # Nd [10] DIGIT ZERO..DIGIT NINE
003A..003B ; Common # Po [2] COLON..SEMICOLON
003C..003E ; Common # Sm [3] LESS-THAN SIGN..GREATER-THAN SIGN
003F..0040 ; Common # Po [2] QUESTION MARK..COMMERCIAL AT
005B ; Common # Ps LEFT SQUARE BRACKET
005C ; Common # Po REVERSE SOLIDUS
005D ; Common # Pe RIGHT SQUARE BRACKET
005E ; Common # Sk CIRCUMFLEX ACCENT
005F ; Common # Pc LOW LINE
0060 ; Common # Sk GRAVE ACCENT
007B ; Common # Ps LEFT CURLY BRACKET
007C ; Common # Sm VERTICAL LINE
007D ; Common # Pe RIGHT CURLY BRACKET
007E ; Common # Sm TILDE
007F..009F ; Common # Cc [33] <control-007F>..<control-009F>
00A0 ; Common # Zs NO-BREAK SPACE
00A1 ; Common # Po INVERTED EXCLAMATION MARK
00A2..00A5 ; Common # Sc [4] CENT SIGN..YEN SIGN
00A6..00A7 ; Common # So [2] BROKEN BAR..SECTION SIGN
00A8 ; Common # Sk DIAERESIS
00A9 ; Common # So COPYRIGHT SIGN
00AB ; Common # Pi LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
00AC ; Common # Sm NOT SIGN
00AD ; Common # Cf SOFT HYPHEN
00AE ; Common # So REGISTERED SIGN
00AF ; Common # Sk MACRON
00B0 ; Common # So DEGREE SIGN
00B1 ; Common # Sm PLUS-MINUS SIGN
00B2..00B3 ; Common # No [2] SUPERSCRIPT TWO..SUPERSCRIPT THREE
00B4 ; Common # Sk ACUTE ACCENT
00B5 ; Common # L& MICRO SIGN
00B6 ; Common # So PILCROW SIGN
00B7 ; Common # Po MIDDLE DOT
00B8 ; Common # Sk CEDILLA
00B9 ; Common # No SUPERSCRIPT ONE
00BB ; Common # Pf RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
00BC..00BE ; Common # No [3] VULGAR FRACTION ONE QUARTER..VULGAR FRACTION THREE QUARTERS
00BF ; Common # Po INVERTED QUESTION MARK
00D7 ; Common # Sm MULTIPLICATION SIGN
00F7 ; Common # Sm DIVISION SIGN
02B9..02C1 ; Common # Lm [9] MODIFIER LETTER PRIME..MODIFIER LETTER REVERSED GLOTTAL STOP
02C2..02C5 ; Common # Sk [4] MODIFIER LETTER LEFT ARROWHEAD..MODIFIER LETTER DOWN ARROWHEAD
02C6..02D1 ; Common # Lm [12] MODIFIER LETTER CIRCUMFLEX ACCENT..MODIFIER LETTER HALF TRIANGULAR COLON
02D2..02DF ; Common # Sk [14] MODIFIER LETTER CENTRED RIGHT HALF RING..MODIFIER LETTER CROSS ACCENT
02E5..02EB ; Common # Sk [7] MODIFIER LETTER EXTRA-HIGH TONE BAR..MODIFIER LETTER YANG DEPARTING TONE MARK
02EC ; Common # Lm MODIFIER LETTER VOICING
02ED ; Common # Sk MODIFIER LETTER UNASPIRATED
02EE ; Common # Lm MODIFIER LETTER DOUBLE APOSTROPHE
02EF..02FF ; Common # Sk [17] MODIFIER LETTER LOW DOWN ARROWHEAD..MODIFIER LETTER LOW LEFT ARROW
0374 ; Common # Lm GREEK NUMERAL SIGN
037E ; Common # Po GREEK QUESTION MARK
0385 ; Common # Sk GREEK DIALYTIKA TONOS
0387 ; Common # Po GREEK ANO TELEIA
0589 ; Common # Po ARMENIAN FULL STOP
0600..0603 ; Common # Cf [4] ARABIC NUMBER SIGN..ARABIC SIGN SAFHA
060C ; Common # Po ARABIC COMMA
061B ; Common # Po ARABIC SEMICOLON
061F ; Common # Po ARABIC QUESTION MARK
0640 ; Common # Lm ARABIC TATWEEL
0660..0669 ; Common # Nd [10] ARABIC-INDIC DIGIT ZERO..ARABIC-INDIC DIGIT NINE
06DD ; Common # Cf ARABIC END OF AYAH
0964..0965 ; Common # Po [2] DEVANAGARI DANDA..DEVANAGARI DOUBLE DANDA
0970 ; Common # Po DEVANAGARI ABBREVIATION SIGN
0CF1..0CF2 ; Common # So [2] KANNADA SIGN JIHVAMULIYA..KANNADA SIGN UPADHMANIYA
0E3F ; Common # Sc THAI CURRENCY SYMBOL BAHT
0FD5..0FD8 ; Common # So [4] RIGHT-FACING SVASTI SIGN..LEFT-FACING SVASTI SIGN WITH DOTS
10FB ; Common # Po GEORGIAN PARAGRAPH SEPARATOR
16EB..16ED ; Common # Po [3] RUNIC SINGLE PUNCTUATION..RUNIC CROSS PUNCTUATION
1735..1736 ; Common # Po [2] PHILIPPINE SINGLE PUNCTUATION..PHILIPPINE DOUBLE PUNCTUATION
1802..1803 ; Common # Po [2] MONGOLIAN COMMA..MONGOLIAN FULL STOP
1805 ; Common # Po MONGOLIAN FOUR DOTS
1CD3 ; Common # Po VEDIC SIGN NIHSHVASA
1CE1 ; Common # Mc VEDIC TONE ATHARVAVEDIC INDEPENDENT SVARITA
1CE9..1CEC ; Common # Lo [4] VEDIC SIGN ANUSVARA ANTARGOMUKHA..VEDIC SIGN ANUSVARA VAMAGOMUKHA WITH TAIL
1CEE..1CF1 ; Common # Lo [4] VEDIC SIGN HEXIFORM LONG ANUSVARA..VEDIC SIGN ANUSVARA UBHAYATO MUKHA
1CF2 ; Common # Mc VEDIC SIGN ARDHAVISARGA
2000..200A ; Common # Zs [11] EN QUAD..HAIR SPACE
200B ; Common # Cf ZERO WIDTH SPACE
200E..200F ; Common # Cf [2] LEFT-TO-RIGHT MARK..RIGHT-TO-LEFT MARK
2010..2015 ; Common # Pd [6] HYPHEN..HORIZONTAL BAR
2016..2017 ; Common # Po [2] DOUBLE VERTICAL LINE..DOUBLE LOW LINE
2018 ; Common # Pi LEFT SINGLE QUOTATION MARK
2019 ; Common # Pf RIGHT SINGLE QUOTATION MARK
201A ; Common # Ps SINGLE LOW-9 QUOTATION MARK
201B..201C ; Common # Pi [2] SINGLE HIGH-REVERSED-9 QUOTATION MARK..LEFT DOUBLE QUOTATION MARK
201D ; Common # Pf RIGHT DOUBLE QUOTATION MARK
201E ; Common # Ps DOUBLE LOW-9 QUOTATION MARK
201F ; Common # Pi DOUBLE HIGH-REVERSED-9 QUOTATION MARK
2020..2027 ; Common # Po [8] DAGGER..HYPHENATION POINT
2028 ; Common # Zl LINE SEPARATOR
2029 ; Common # Zp PARAGRAPH SEPARATOR
202A..202E ; Common # Cf [5] LEFT-TO-RIGHT EMBEDDING..RIGHT-TO-LEFT OVERRIDE
202F ; Common # Zs NARROW NO-BREAK SPACE
2030..2038 ; Common # Po [9] PER MILLE SIGN..CARET
2039 ; Common # Pi SINGLE LEFT-POINTING ANGLE QUOTATION MARK
203A ; Common # Pf SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
203B..203E ; Common # Po [4] REFERENCE MARK..OVERLINE
203F..2040 ; Common # Pc [2] UNDERTIE..CHARACTER TIE
2041..2043 ; Common # Po [3] CARET INSERTION POINT..HYPHEN BULLET
2044 ; Common # Sm FRACTION SLASH
2045 ; Common # Ps LEFT SQUARE BRACKET WITH QUILL
2046 ; Common # Pe RIGHT SQUARE BRACKET WITH QUILL
2047..2051 ; Common # Po [11] DOUBLE QUESTION MARK..TWO ASTERISKS ALIGNED VERTICALLY
2052 ; Common # Sm COMMERCIAL MINUS SIGN
2053 ; Common # Po SWUNG DASH
2054 ; Common # Pc INVERTED UNDERTIE
2055..205E ; Common # Po [10] FLOWER PUNCTUATION MARK..VERTICAL FOUR DOTS
205F ; Common # Zs MEDIUM MATHEMATICAL SPACE
2060..2064 ; Common # Cf [5] WORD JOINER..INVISIBLE PLUS
206A..206F ; Common # Cf [6] INHIBIT SYMMETRIC SWAPPING..NOMINAL DIGIT SHAPES
2070 ; Common # No SUPERSCRIPT ZERO
2074..2079 ; Common # No [6] SUPERSCRIPT FOUR..SUPERSCRIPT NINE
207A..207C ; Common # Sm [3] SUPERSCRIPT PLUS SIGN..SUPERSCRIPT EQUALS SIGN
207D ; Common # Ps SUPERSCRIPT LEFT PARENTHESIS
207E ; Common # Pe SUPERSCRIPT RIGHT PARENTHESIS
2080..2089 ; Common # No [10] SUBSCRIPT ZERO..SUBSCRIPT NINE
208A..208C ; Common # Sm [3] SUBSCRIPT PLUS SIGN..SUBSCRIPT EQUALS SIGN
208D ; Common # Ps SUBSCRIPT LEFT PARENTHESIS
208E ; Common # Pe SUBSCRIPT RIGHT PARENTHESIS
20A0..20B8 ; Common # Sc [25] EURO-CURRENCY SIGN..TENGE SIGN
2100..2101 ; Common # So [2] ACCOUNT OF..ADDRESSED TO THE SUBJECT
2102 ; Common # L& DOUBLE-STRUCK CAPITAL C
2103..2106 ; Common # So [4] DEGREE CELSIUS..CADA UNA
2107 ; Common # L& EULER CONSTANT
2108..2109 ; Common # So [2] SCRUPLE..DEGREE FAHRENHEIT
210A..2113 ; Common # L& [10] SCRIPT SMALL G..SCRIPT SMALL L
2114 ; Common # So L B BAR SYMBOL
2115 ; Common # L& DOUBLE-STRUCK CAPITAL N
2116..2118 ; Common # So [3] NUMERO SIGN..SCRIPT CAPITAL P
2119..211D ; Common # L& [5] DOUBLE-STRUCK CAPITAL P..DOUBLE-STRUCK CAPITAL R
211E..2123 ; Common # So [6] PRESCRIPTION TAKE..VERSICLE
2124 ; Common # L& DOUBLE-STRUCK CAPITAL Z
2125 ; Common # So OUNCE SIGN
2127 ; Common # So INVERTED OHM SIGN
2128 ; Common # L& BLACK-LETTER CAPITAL Z
2129 ; Common # So TURNED GREEK SMALL LETTER IOTA
212C..212D ; Common # L& [2] SCRIPT CAPITAL B..BLACK-LETTER CAPITAL C
212E ; Common # So ESTIMATED SYMBOL
212F..2131 ; Common # L& [3] SCRIPT SMALL E..SCRIPT CAPITAL F
2133..2134 ; Common # L& [2] SCRIPT CAPITAL M..SCRIPT SMALL O
2135..2138 ; Common # Lo [4] ALEF SYMBOL..DALET SYMBOL
2139 ; Common # L& INFORMATION SOURCE
213A..213B ; Common # So [2] ROTATED CAPITAL Q..FACSIMILE SIGN
213C..213F ; Common # L& [4] DOUBLE-STRUCK SMALL PI..DOUBLE-STRUCK CAPITAL PI
2140..2144 ; Common # Sm [5] DOUBLE-STRUCK N-ARY SUMMATION..TURNED SANS-SERIF CAPITAL Y
2145..2149 ; Common # L& [5] DOUBLE-STRUCK ITALIC CAPITAL D..DOUBLE-STRUCK ITALIC SMALL J
214A ; Common # So PROPERTY LINE
214B ; Common # Sm TURNED AMPERSAND
214C..214D ; Common # So [2] PER SIGN..AKTIESELSKAB
214F ; Common # So SYMBOL FOR SAMARITAN SOURCE
2150..215F ; Common # No [16] VULGAR FRACTION ONE SEVENTH..FRACTION NUMERATOR ONE
2189 ; Common # No VULGAR FRACTION ZERO THIRDS
2190..2194 ; Common # Sm [5] LEFTWARDS ARROW..LEFT RIGHT ARROW
2195..2199 ; Common # So [5] UP DOWN ARROW..SOUTH WEST ARROW
219A..219B ; Common # Sm [2] LEFTWARDS ARROW WITH STROKE..RIGHTWARDS ARROW WITH STROKE
219C..219F ; Common # So [4] LEFTWARDS WAVE ARROW..UPWARDS TWO HEADED ARROW
21A0 ; Common # Sm RIGHTWARDS TWO HEADED ARROW
21A1..21A2 ; Common # So [2] DOWNWARDS TWO HEADED ARROW..LEFTWARDS ARROW WITH TAIL
21A3 ; Common # Sm RIGHTWARDS ARROW WITH TAIL
21A4..21A5 ; Common # So [2] LEFTWARDS ARROW FROM BAR..UPWARDS ARROW FROM BAR
21A6 ; Common # Sm RIGHTWARDS ARROW FROM BAR
21A7..21AD ; Common # So [7] DOWNWARDS ARROW FROM BAR..LEFT RIGHT WAVE ARROW
21AE ; Common # Sm LEFT RIGHT ARROW WITH STROKE
21AF..21CD ; Common # So [31] DOWNWARDS ZIGZAG ARROW..LEFTWARDS DOUBLE ARROW WITH STROKE
21CE..21CF ; Common # Sm [2] LEFT RIGHT DOUBLE ARROW WITH STROKE..RIGHTWARDS DOUBLE ARROW WITH STROKE
21D0..21D1 ; Common # So [2] LEFTWARDS DOUBLE ARROW..UPWARDS DOUBLE ARROW
21D2 ; Common # Sm RIGHTWARDS DOUBLE ARROW
21D3 ; Common # So DOWNWARDS DOUBLE ARROW
21D4 ; Common # Sm LEFT RIGHT DOUBLE ARROW
21D5..21F3 ; Common # So [31] UP DOWN DOUBLE ARROW..UP DOWN WHITE ARROW
21F4..22FF ; Common # Sm [268] RIGHT ARROW WITH SMALL CIRCLE..Z NOTATION BAG MEMBERSHIP
2300..2307 ; Common # So [8] DIAMETER SIGN..WAVY LINE
2308..230B ; Common # Sm [4] LEFT CEILING..RIGHT FLOOR
230C..231F ; Common # So [20] BOTTOM RIGHT CROP..BOTTOM RIGHT CORNER
2320..2321 ; Common # Sm [2] TOP HALF INTEGRAL..BOTTOM HALF INTEGRAL
2322..2328 ; Common # So [7] FROWN..KEYBOARD
2329 ; Common # Ps LEFT-POINTING ANGLE BRACKET
232A ; Common # Pe RIGHT-POINTING ANGLE BRACKET
232B..237B ; Common # So [81] ERASE TO THE LEFT..NOT CHECK MARK
237C ; Common # Sm RIGHT ANGLE WITH DOWNWARDS ZIGZAG ARROW
237D..239A ; Common # So [30] SHOULDERED OPEN BOX..CLEAR SCREEN SYMBOL
239B..23B3 ; Common # Sm [25] LEFT PARENTHESIS UPPER HOOK..SUMMATION BOTTOM
23B4..23DB ; Common # So [40] TOP SQUARE BRACKET..FUSE
23DC..23E1 ; Common # Sm [6] TOP PARENTHESIS..BOTTOM TORTOISE SHELL BRACKET
23E2..23E8 ; Common # So [7] WHITE TRAPEZIUM..DECIMAL EXPONENT SYMBOL
2400..2426 ; Common # So [39] SYMBOL FOR NULL..SYMBOL FOR SUBSTITUTE FORM TWO
2440..244A ; Common # So [11] OCR HOOK..OCR DOUBLE BACKSLASH
2460..249B ; Common # No [60] CIRCLED DIGIT ONE..NUMBER TWENTY FULL STOP
249C..24E9 ; Common # So [78] PARENTHESIZED LATIN SMALL LETTER A..CIRCLED LATIN SMALL LETTER Z
24EA..24FF ; Common # No [22] CIRCLED DIGIT ZERO..NEGATIVE CIRCLED DIGIT ZERO
2500..25B6 ; Common # So [183] BOX DRAWINGS LIGHT HORIZONTAL..BLACK RIGHT-POINTING TRIANGLE
25B7 ; Common # Sm WHITE RIGHT-POINTING TRIANGLE
25B8..25C0 ; Common # So [9] BLACK RIGHT-POINTING SMALL TRIANGLE..BLACK LEFT-POINTING TRIANGLE
25C1 ; Common # Sm WHITE LEFT-POINTING TRIANGLE
25C2..25F7 ; Common # So [54] BLACK LEFT-POINTING SMALL TRIANGLE..WHITE CIRCLE WITH UPPER RIGHT QUADRANT
25F8..25FF ; Common # Sm [8] UPPER LEFT TRIANGLE..LOWER RIGHT TRIANGLE
2600..266E ; Common # So [111] BLACK SUN WITH RAYS..MUSIC NATURAL SIGN
266F ; Common # Sm MUSIC SHARP SIGN
2670..26CD ; Common # So [94] WEST SYRIAC CROSS..DISABLED CAR
26CF..26E1 ; Common # So [19] PICK..RESTRICTED LEFT ENTRY-2
26E3 ; Common # So HEAVY CIRCLE WITH STROKE AND TWO DOTS ABOVE
26E8..26FF ; Common # So [24] BLACK CROSS ON SHIELD..WHITE FLAG WITH HORIZONTAL MIDDLE BLACK STRIPE
2701..2704 ; Common # So [4] UPPER BLADE SCISSORS..WHITE SCISSORS
2706..2709 ; Common # So [4] TELEPHONE LOCATION SIGN..ENVELOPE
270C..2727 ; Common # So [28] VICTORY HAND..WHITE FOUR POINTED STAR
2729..274B ; Common # So [35] STRESS OUTLINED WHITE STAR..HEAVY EIGHT TEARDROP-SPOKED PROPELLER ASTERISK
274D ; Common # So SHADOWED WHITE CIRCLE
274F..2752 ; Common # So [4] LOWER RIGHT DROP-SHADOWED WHITE SQUARE..UPPER RIGHT SHADOWED WHITE SQUARE
2756..275E ; Common # So [9] BLACK DIAMOND MINUS WHITE X..HEAVY DOUBLE COMMA QUOTATION MARK ORNAMENT
2761..2767 ; Common # So [7] CURVED STEM PARAGRAPH SIGN ORNAMENT..ROTATED FLORAL HEART BULLET
2768 ; Common # Ps MEDIUM LEFT PARENTHESIS ORNAMENT
2769 ; Common # Pe MEDIUM RIGHT PARENTHESIS ORNAMENT
276A ; Common # Ps MEDIUM FLATTENED LEFT PARENTHESIS ORNAMENT
276B ; Common # Pe MEDIUM FLATTENED RIGHT PARENTHESIS ORNAMENT
276C ; Common # Ps MEDIUM LEFT-POINTING ANGLE BRACKET ORNAMENT
276D ; Common # Pe MEDIUM RIGHT-POINTING ANGLE BRACKET ORNAMENT
276E ; Common # Ps HEAVY LEFT-POINTING ANGLE QUOTATION MARK ORNAMENT
276F ; Common # Pe HEAVY RIGHT-POINTING ANGLE QUOTATION MARK ORNAMENT
2770 ; Common # Ps HEAVY LEFT-POINTING ANGLE BRACKET ORNAMENT
2771 ; Common # Pe HEAVY RIGHT-POINTING ANGLE BRACKET ORNAMENT
2772 ; Common # Ps LIGHT LEFT TORTOISE SHELL BRACKET ORNAMENT
2773 ; Common # Pe LIGHT RIGHT TORTOISE SHELL BRACKET ORNAMENT
2774 ; Common # Ps MEDIUM LEFT CURLY BRACKET ORNAMENT
2775 ; Common # Pe MEDIUM RIGHT CURLY BRACKET ORNAMENT
2776..2793 ; Common # No [30] DINGBAT NEGATIVE CIRCLED DIGIT ONE..DINGBAT NEGATIVE CIRCLED SANS-SERIF NUMBER TEN
2794 ; Common # So HEAVY WIDE-HEADED RIGHTWARDS ARROW
2798..27AF ; Common # So [24] HEAVY SOUTH EAST ARROW..NOTCHED LOWER RIGHT-SHADOWED WHITE RIGHTWARDS ARROW
27B1..27BE ; Common # So [14] NOTCHED UPPER RIGHT-SHADOWED WHITE RIGHTWARDS ARROW..OPEN-OUTLINED RIGHTWARDS ARROW
27C0..27C4 ; Common # Sm [5] THREE DIMENSIONAL ANGLE..OPEN SUPERSET
27C5 ; Common # Ps LEFT S-SHAPED BAG DELIMITER
27C6 ; Common # Pe RIGHT S-SHAPED BAG DELIMITER
27C7..27CA ; Common # Sm [4] OR WITH DOT INSIDE..VERTICAL BAR WITH HORIZONTAL STROKE
27CC ; Common # Sm LONG DIVISION
27D0..27E5 ; Common # Sm [22] WHITE DIAMOND WITH CENTRED DOT..WHITE SQUARE WITH RIGHTWARDS TICK
27E6 ; Common # Ps MATHEMATICAL LEFT WHITE SQUARE BRACKET
27E7 ; Common # Pe MATHEMATICAL RIGHT WHITE SQUARE BRACKET
27E8 ; Common # Ps MATHEMATICAL LEFT ANGLE BRACKET
27E9 ; Common # Pe MATHEMATICAL RIGHT ANGLE BRACKET
27EA ; Common # Ps MATHEMATICAL LEFT DOUBLE ANGLE BRACKET
27EB ; Common # Pe MATHEMATICAL RIGHT DOUBLE ANGLE BRACKET
27EC ; Common # Ps MATHEMATICAL LEFT WHITE TORTOISE SHELL BRACKET
27ED ; Common # Pe MATHEMATICAL RIGHT WHITE TORTOISE SHELL BRACKET
27EE ; Common # Ps MATHEMATICAL LEFT FLATTENED PARENTHESIS
27EF ; Common # Pe MATHEMATICAL RIGHT FLATTENED PARENTHESIS
27F0..27FF ; Common # Sm [16] UPWARDS QUADRUPLE ARROW..LONG RIGHTWARDS SQUIGGLE ARROW
2900..2982 ; Common # Sm [131] RIGHTWARDS TWO-HEADED ARROW WITH VERTICAL STROKE..Z NOTATION TYPE COLON
2983 ; Common # Ps LEFT WHITE CURLY BRACKET
2984 ; Common # Pe RIGHT WHITE CURLY BRACKET
2985 ; Common # Ps LEFT WHITE PARENTHESIS
2986 ; Common # Pe RIGHT WHITE PARENTHESIS
2987 ; Common # Ps Z NOTATION LEFT IMAGE BRACKET
2988 ; Common # Pe Z NOTATION RIGHT IMAGE BRACKET
2989 ; Common # Ps Z NOTATION LEFT BINDING BRACKET
298A ; Common # Pe Z NOTATION RIGHT BINDING BRACKET
298B ; Common # Ps LEFT SQUARE BRACKET WITH UNDERBAR
298C ; Common # Pe RIGHT SQUARE BRACKET WITH UNDERBAR
298D ; Common # Ps LEFT SQUARE BRACKET WITH TICK IN TOP CORNER
298E ; Common # Pe RIGHT SQUARE BRACKET WITH TICK IN BOTTOM CORNER
298F ; Common # Ps LEFT SQUARE BRACKET WITH TICK IN BOTTOM CORNER
2990 ; Common # Pe RIGHT SQUARE BRACKET WITH TICK IN TOP CORNER
2991 ; Common # Ps LEFT ANGLE BRACKET WITH DOT
2992 ; Common # Pe RIGHT ANGLE BRACKET WITH DOT
2993 ; Common # Ps LEFT ARC LESS-THAN BRACKET
2994 ; Common # Pe RIGHT ARC GREATER-THAN BRACKET
2995 ; Common # Ps DOUBLE LEFT ARC GREATER-THAN BRACKET
2996 ; Common # Pe DOUBLE RIGHT ARC LESS-THAN BRACKET
2997 ; Common # Ps LEFT BLACK TORTOISE SHELL BRACKET
2998 ; Common # Pe RIGHT BLACK TORTOISE SHELL BRACKET
2999..29D7 ; Common # Sm [63] DOTTED FENCE..BLACK HOURGLASS
29D8 ; Common # Ps LEFT WIGGLY FENCE
29D9 ; Common # Pe RIGHT WIGGLY FENCE
29DA ; Common # Ps LEFT DOUBLE WIGGLY FENCE
29DB ; Common # Pe RIGHT DOUBLE WIGGLY FENCE
29DC..29FB ; Common # Sm [32] INCOMPLETE INFINITY..TRIPLE PLUS
29FC ; Common # Ps LEFT-POINTING CURVED ANGLE BRACKET
29FD ; Common # Pe RIGHT-POINTING CURVED ANGLE BRACKET
29FE..2AFF ; Common # Sm [258] TINY..N-ARY WHITE VERTICAL BAR
2B00..2B2F ; Common # So [48] NORTH EAST WHITE ARROW..WHITE VERTICAL ELLIPSE
2B30..2B44 ; Common # Sm [21] LEFT ARROW WITH SMALL CIRCLE..RIGHTWARDS ARROW THROUGH SUPERSET
2B45..2B46 ; Common # So [2] LEFTWARDS QUADRUPLE ARROW..RIGHTWARDS QUADRUPLE ARROW
2B47..2B4C ; Common # Sm [6] REVERSE TILDE OPERATOR ABOVE RIGHTWARDS ARROW..RIGHTWARDS ARROW ABOVE REVERSE TILDE OPERATOR
2B50..2B59 ; Common # So [10] WHITE MEDIUM STAR..HEAVY CIRCLED SALTIRE
2E00..2E01 ; Common # Po [2] RIGHT ANGLE SUBSTITUTION MARKER..RIGHT ANGLE DOTTED SUBSTITUTION MARKER
2E02 ; Common # Pi LEFT SUBSTITUTION BRACKET
2E03 ; Common # Pf RIGHT SUBSTITUTION BRACKET
2E04 ; Common # Pi LEFT DOTTED SUBSTITUTION BRACKET
2E05 ; Common # Pf RIGHT DOTTED SUBSTITUTION BRACKET
2E06..2E08 ; Common # Po [3] RAISED INTERPOLATION MARKER..DOTTED TRANSPOSITION MARKER
2E09 ; Common # Pi LEFT TRANSPOSITION BRACKET
2E0A ; Common # Pf RIGHT TRANSPOSITION BRACKET
2E0B ; Common # Po RAISED SQUARE
2E0C ; Common # Pi LEFT RAISED OMISSION BRACKET
2E0D ; Common # Pf RIGHT RAISED OMISSION BRACKET
2E0E..2E16 ; Common # Po [9] EDITORIAL CORONIS..DOTTED RIGHT-POINTING ANGLE
2E17 ; Common # Pd DOUBLE OBLIQUE HYPHEN
2E18..2E19 ; Common # Po [2] INVERTED INTERROBANG..PALM BRANCH
2E1A ; Common # Pd HYPHEN WITH DIAERESIS
2E1B ; Common # Po TILDE WITH RING ABOVE
2E1C ; Common # Pi LEFT LOW PARAPHRASE BRACKET
2E1D ; Common # Pf RIGHT LOW PARAPHRASE BRACKET
2E1E..2E1F ; Common # Po [2] TILDE WITH DOT ABOVE..TILDE WITH DOT BELOW
2E20 ; Common # Pi LEFT VERTICAL BAR WITH QUILL
2E21 ; Common # Pf RIGHT VERTICAL BAR WITH QUILL
2E22 ; Common # Ps TOP LEFT HALF BRACKET
2E23 ; Common # Pe TOP RIGHT HALF BRACKET
2E24 ; Common # Ps BOTTOM LEFT HALF BRACKET
2E25 ; Common # Pe BOTTOM RIGHT HALF BRACKET
2E26 ; Common # Ps LEFT SIDEWAYS U BRACKET
2E27 ; Common # Pe RIGHT SIDEWAYS U BRACKET
2E28 ; Common # Ps LEFT DOUBLE PARENTHESIS
2E29 ; Common # Pe RIGHT DOUBLE PARENTHESIS
2E2A..2E2E ; Common # Po [5] TWO DOTS OVER ONE DOT PUNCTUATION..REVERSED QUESTION MARK
2E2F ; Common # Lm VERTICAL TILDE
2E30..2E31 ; Common # Po [2] RING POINT..WORD SEPARATOR MIDDLE DOT
2FF0..2FFB ; Common # So [12] IDEOGRAPHIC DESCRIPTION CHARACTER LEFT TO RIGHT..IDEOGRAPHIC DESCRIPTION CHARACTER OVERLAID
3000 ; Common # Zs IDEOGRAPHIC SPACE
3001..3003 ; Common # Po [3] IDEOGRAPHIC COMMA..DITTO MARK
3004 ; Common # So JAPANESE INDUSTRIAL STANDARD SYMBOL
3006 ; Common # Lo IDEOGRAPHIC CLOSING MARK
3008 ; Common # Ps LEFT ANGLE BRACKET
3009 ; Common # Pe RIGHT ANGLE BRACKET
300A ; Common # Ps LEFT DOUBLE ANGLE BRACKET
300B ; Common # Pe RIGHT DOUBLE ANGLE BRACKET
300C ; Common # Ps LEFT CORNER BRACKET
300D ; Common # Pe RIGHT CORNER BRACKET
300E ; Common # Ps LEFT WHITE CORNER BRACKET
300F ; Common # Pe RIGHT WHITE CORNER BRACKET
3010 ; Common # Ps LEFT BLACK LENTICULAR BRACKET
3011 ; Common # Pe RIGHT BLACK LENTICULAR BRACKET
3012..3013 ; Common # So [2] POSTAL MARK..GETA MARK
3014 ; Common # Ps LEFT TORTOISE SHELL BRACKET
3015 ; Common # Pe RIGHT TORTOISE SHELL BRACKET
3016 ; Common # Ps LEFT WHITE LENTICULAR BRACKET
3017 ; Common # Pe RIGHT WHITE LENTICULAR BRACKET
3018 ; Common # Ps LEFT WHITE TORTOISE SHELL BRACKET
3019 ; Common # Pe RIGHT WHITE TORTOISE SHELL BRACKET
301A ; Common # Ps LEFT WHITE SQUARE BRACKET
301B ; Common # Pe RIGHT WHITE SQUARE BRACKET
301C ; Common # Pd WAVE DASH
301D ; Common # Ps REVERSED DOUBLE PRIME QUOTATION MARK
301E..301F ; Common # Pe [2] DOUBLE PRIME QUOTATION MARK..LOW DOUBLE PRIME QUOTATION MARK
3020 ; Common # So POSTAL MARK FACE
3030 ; Common # Pd WAVY DASH
3031..3035 ; Common # Lm [5] VERTICAL KANA REPEAT MARK..VERTICAL KANA REPEAT MARK LOWER HALF
3036..3037 ; Common # So [2] CIRCLED POSTAL MARK..IDEOGRAPHIC TELEGRAPH LINE FEED SEPARATOR SYMBOL
303C ; Common # Lo MASU MARK
303D ; Common # Po PART ALTERNATION MARK
303E..303F ; Common # So [2] IDEOGRAPHIC VARIATION INDICATOR..IDEOGRAPHIC HALF FILL SPACE
309B..309C ; Common # Sk [2] KATAKANA-HIRAGANA VOICED SOUND MARK..KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK
30A0 ; Common # Pd KATAKANA-HIRAGANA DOUBLE HYPHEN
30FB ; Common # Po KATAKANA MIDDLE DOT
30FC ; Common # Lm KATAKANA-HIRAGANA PROLONGED SOUND MARK
3190..3191 ; Common # So [2] IDEOGRAPHIC ANNOTATION LINKING MARK..IDEOGRAPHIC ANNOTATION REVERSE MARK
3192..3195 ; Common # No [4] IDEOGRAPHIC ANNOTATION ONE MARK..IDEOGRAPHIC ANNOTATION FOUR MARK
3196..319F ; Common # So [10] IDEOGRAPHIC ANNOTATION TOP MARK..IDEOGRAPHIC ANNOTATION MAN MARK
31C0..31E3 ; Common # So [36] CJK STROKE T..CJK STROKE Q
3220..3229 ; Common # No [10] PARENTHESIZED IDEOGRAPH ONE..PARENTHESIZED IDEOGRAPH TEN
322A..3250 ; Common # So [39] PARENTHESIZED IDEOGRAPH MOON..PARTNERSHIP SIGN
3251..325F ; Common # No [15] CIRCLED NUMBER TWENTY ONE..CIRCLED NUMBER THIRTY FIVE
327F ; Common # So KOREAN STANDARD SYMBOL
3280..3289 ; Common # No [10] CIRCLED IDEOGRAPH ONE..CIRCLED IDEOGRAPH TEN
328A..32B0 ; Common # So [39] CIRCLED IDEOGRAPH MOON..CIRCLED IDEOGRAPH NIGHT
32B1..32BF ; Common # No [15] CIRCLED NUMBER THIRTY SIX..CIRCLED NUMBER FIFTY
32C0..32CF ; Common # So [16] IDEOGRAPHIC TELEGRAPH SYMBOL FOR JANUARY..LIMITED LIABILITY SIGN
3358..33FF ; Common # So [168] IDEOGRAPHIC TELEGRAPH SYMBOL FOR HOUR ZERO..SQUARE GAL
4DC0..4DFF ; Common # So [64] HEXAGRAM FOR THE CREATIVE HEAVEN..HEXAGRAM FOR BEFORE COMPLETION
A700..A716 ; Common # Sk [23] MODIFIER LETTER CHINESE TONE YIN PING..MODIFIER LETTER EXTRA-LOW LEFT-STEM TONE BAR
A717..A71F ; Common # Lm [9] MODIFIER LETTER DOT VERTICAL BAR..MODIFIER LETTER LOW INVERTED EXCLAMATION MARK
A720..A721 ; Common # Sk [2] MODIFIER LETTER STRESS AND HIGH TONE..MODIFIER LETTER STRESS AND LOW TONE
A788 ; Common # Lm MODIFIER LETTER LOW CIRCUMFLEX ACCENT
A789..A78A ; Common # Sk [2] MODIFIER LETTER COLON..MODIFIER LETTER SHORT EQUALS SIGN
A830..A835 ; Common # No [6] NORTH INDIC FRACTION ONE QUARTER..NORTH INDIC FRACTION THREE SIXTEENTHS
A836..A837 ; Common # So [2] NORTH INDIC QUARTER MARK..NORTH INDIC PLACEHOLDER MARK
A838 ; Common # Sc NORTH INDIC RUPEE MARK
A839 ; Common # So NORTH INDIC QUANTITY MARK
FD3E ; Common # Ps ORNATE LEFT PARENTHESIS
FD3F ; Common # Pe ORNATE RIGHT PARENTHESIS
FDFD ; Common # So ARABIC LIGATURE BISMILLAH AR-RAHMAN AR-RAHEEM
FE10..FE16 ; Common # Po [7] PRESENTATION FORM FOR VERTICAL COMMA..PRESENTATION FORM FOR VERTICAL QUESTION MARK
FE17 ; Common # Ps PRESENTATION FORM FOR VERTICAL LEFT WHITE LENTICULAR BRACKET
FE18 ; Common # Pe PRESENTATION FORM FOR VERTICAL RIGHT WHITE LENTICULAR BRAKCET
FE19 ; Common # Po PRESENTATION FORM FOR VERTICAL HORIZONTAL ELLIPSIS
FE30 ; Common # Po PRESENTATION FORM FOR VERTICAL TWO DOT LEADER
FE31..FE32 ; Common # Pd [2] PRESENTATION FORM FOR VERTICAL EM DASH..PRESENTATION FORM FOR VERTICAL EN DASH
FE33..FE34 ; Common # Pc [2] PRESENTATION FORM FOR VERTICAL LOW LINE..PRESENTATION FORM FOR VERTICAL WAVY LOW LINE
FE35 ; Common # Ps PRESENTATION FORM FOR VERTICAL LEFT PARENTHESIS
FE36 ; Common # Pe PRESENTATION FORM FOR VERTICAL RIGHT PARENTHESIS
FE37 ; Common # Ps PRESENTATION FORM FOR VERTICAL LEFT CURLY BRACKET
FE38 ; Common # Pe PRESENTATION FORM FOR VERTICAL RIGHT CURLY BRACKET
FE39 ; Common # Ps PRESENTATION FORM FOR VERTICAL LEFT TORTOISE SHELL BRACKET
FE3A ; Common # Pe PRESENTATION FORM FOR VERTICAL RIGHT TORTOISE SHELL BRACKET
FE3B ; Common # Ps PRESENTATION FORM FOR VERTICAL LEFT BLACK LENTICULAR BRACKET
FE3C ; Common # Pe PRESENTATION FORM FOR VERTICAL RIGHT BLACK LENTICULAR BRACKET
FE3D ; Common # Ps PRESENTATION FORM FOR VERTICAL LEFT DOUBLE ANGLE BRACKET
FE3E ; Common # Pe PRESENTATION FORM FOR VERTICAL RIGHT DOUBLE ANGLE BRACKET
FE3F ; Common # Ps PRESENTATION FORM FOR VERTICAL LEFT ANGLE BRACKET
FE40 ; Common # Pe PRESENTATION FORM FOR VERTICAL RIGHT ANGLE BRACKET
FE41 ; Common # Ps PRESENTATION FORM FOR VERTICAL LEFT CORNER BRACKET
FE42 ; Common # Pe PRESENTATION FORM FOR VERTICAL RIGHT CORNER BRACKET
FE43 ; Common # Ps PRESENTATION FORM FOR VERTICAL LEFT WHITE CORNER BRACKET
FE44 ; Common # Pe PRESENTATION FORM FOR VERTICAL RIGHT WHITE CORNER BRACKET
FE45..FE46 ; Common # Po [2] SESAME DOT..WHITE SESAME DOT
FE47 ; Common # Ps PRESENTATION FORM FOR VERTICAL LEFT SQUARE BRACKET
FE48 ; Common # Pe PRESENTATION FORM FOR VERTICAL RIGHT SQUARE BRACKET
FE49..FE4C ; Common # Po [4] DASHED OVERLINE..DOUBLE WAVY OVERLINE
FE4D..FE4F ; Common # Pc [3] DASHED LOW LINE..WAVY LOW LINE
FE50..FE52 ; Common # Po [3] SMALL COMMA..SMALL FULL STOP
FE54..FE57 ; Common # Po [4] SMALL SEMICOLON..SMALL EXCLAMATION MARK
FE58 ; Common # Pd SMALL EM DASH
FE59 ; Common # Ps SMALL LEFT PARENTHESIS
FE5A ; Common # Pe SMALL RIGHT PARENTHESIS
FE5B ; Common # Ps SMALL LEFT CURLY BRACKET
FE5C ; Common # Pe SMALL RIGHT CURLY BRACKET
FE5D ; Common # Ps SMALL LEFT TORTOISE SHELL BRACKET
FE5E ; Common # Pe SMALL RIGHT TORTOISE SHELL BRACKET
FE5F..FE61 ; Common # Po [3] SMALL NUMBER SIGN..SMALL ASTERISK
FE62 ; Common # Sm SMALL PLUS SIGN
FE63 ; Common # Pd SMALL HYPHEN-MINUS
FE64..FE66 ; Common # Sm [3] SMALL LESS-THAN SIGN..SMALL EQUALS SIGN
FE68 ; Common # Po SMALL REVERSE SOLIDUS
FE69 ; Common # Sc SMALL DOLLAR SIGN
FE6A..FE6B ; Common # Po [2] SMALL PERCENT SIGN..SMALL COMMERCIAL AT
FEFF ; Common # Cf ZERO WIDTH NO-BREAK SPACE
FF01..FF03 ; Common # Po [3] FULLWIDTH EXCLAMATION MARK..FULLWIDTH NUMBER SIGN
FF04 ; Common # Sc FULLWIDTH DOLLAR SIGN
FF05..FF07 ; Common # Po [3] FULLWIDTH PERCENT SIGN..FULLWIDTH APOSTROPHE
FF08 ; Common # Ps FULLWIDTH LEFT PARENTHESIS
FF09 ; Common # Pe FULLWIDTH RIGHT PARENTHESIS
FF0A ; Common # Po FULLWIDTH ASTERISK
FF0B ; Common # Sm FULLWIDTH PLUS SIGN
FF0C ; Common # Po FULLWIDTH COMMA
FF0D ; Common # Pd FULLWIDTH HYPHEN-MINUS
FF0E..FF0F ; Common # Po [2] FULLWIDTH FULL STOP..FULLWIDTH SOLIDUS
FF10..FF19 ; Common # Nd [10] FULLWIDTH DIGIT ZERO..FULLWIDTH DIGIT NINE
FF1A..FF1B ; Common # Po [2] FULLWIDTH COLON..FULLWIDTH SEMICOLON
FF1C..FF1E ; Common # Sm [3] FULLWIDTH LESS-THAN SIGN..FULLWIDTH GREATER-THAN SIGN
FF1F..FF20 ; Common # Po [2] FULLWIDTH QUESTION MARK..FULLWIDTH COMMERCIAL AT
FF3B ; Common # Ps FULLWIDTH LEFT SQUARE BRACKET
FF3C ; Common # Po FULLWIDTH REVERSE SOLIDUS
FF3D ; Common # Pe FULLWIDTH RIGHT SQUARE BRACKET
FF3E ; Common # Sk FULLWIDTH CIRCUMFLEX ACCENT
FF3F ; Common # Pc FULLWIDTH LOW LINE
FF40 ; Common # Sk FULLWIDTH GRAVE ACCENT
FF5B ; Common # Ps FULLWIDTH LEFT CURLY BRACKET
FF5C ; Common # Sm FULLWIDTH VERTICAL LINE
FF5D ; Common # Pe FULLWIDTH RIGHT CURLY BRACKET
FF5E ; Common # Sm FULLWIDTH TILDE
FF5F ; Common # Ps FULLWIDTH LEFT WHITE PARENTHESIS
FF60 ; Common # Pe FULLWIDTH RIGHT WHITE PARENTHESIS
FF61 ; Common # Po HALFWIDTH IDEOGRAPHIC FULL STOP
FF62 ; Common # Ps HALFWIDTH LEFT CORNER BRACKET
FF63 ; Common # Pe HALFWIDTH RIGHT CORNER BRACKET
FF64..FF65 ; Common # Po [2] HALFWIDTH IDEOGRAPHIC COMMA..HALFWIDTH KATAKANA MIDDLE DOT
FF70 ; Common # Lm HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK
FF9E..FF9F ; Common # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK
FFE0..FFE1 ; Common # Sc [2] FULLWIDTH CENT SIGN..FULLWIDTH POUND SIGN
FFE2 ; Common # Sm FULLWIDTH NOT SIGN
FFE3 ; Common # Sk FULLWIDTH MACRON
FFE4 ; Common # So FULLWIDTH BROKEN BAR
FFE5..FFE6 ; Common # Sc [2] FULLWIDTH YEN SIGN..FULLWIDTH WON SIGN
FFE8 ; Common # So HALFWIDTH FORMS LIGHT VERTICAL
FFE9..FFEC ; Common # Sm [4] HALFWIDTH LEFTWARDS ARROW..HALFWIDTH DOWNWARDS ARROW
FFED..FFEE ; Common # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CIRCLE
FFF9..FFFB ; Common # Cf [3] INTERLINEAR ANNOTATION ANCHOR..INTERLINEAR ANNOTATION TERMINATOR
FFFC..FFFD ; Common # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTER
10100..10101 ; Common # Po [2] AEGEAN WORD SEPARATOR LINE..AEGEAN WORD SEPARATOR DOT
10102 ; Common # So AEGEAN CHECK MARK
10107..10133 ; Common # No [45] AEGEAN NUMBER ONE..AEGEAN NUMBER NINETY THOUSAND
10137..1013F ; Common # So [9] AEGEAN WEIGHT BASE UNIT..AEGEAN MEASURE THIRD SUBUNIT
10190..1019B ; Common # So [12] ROMAN SEXTANS SIGN..ROMAN CENTURIAL SIGN
101D0..101FC ; Common # So [45] PHAISTOS DISC SIGN PEDESTRIAN..PHAISTOS DISC SIGN WAVY BAND
1D000..1D0F5 ; Common # So [246] BYZANTINE MUSICAL SYMBOL PSILI..BYZANTINE MUSICAL SYMBOL GORGON NEO KATO
1D100..1D126 ; Common # So [39] MUSICAL SYMBOL SINGLE BARLINE..MUSICAL SYMBOL DRUM CLEF-2
1D129..1D164 ; Common # So [60] MUSICAL SYMBOL MULTIPLE MEASURE REST..MUSICAL SYMBOL ONE HUNDRED TWENTY-EIGHTH NOTE
1D165..1D166 ; Common # Mc [2] MUSICAL SYMBOL COMBINING STEM..MUSICAL SYMBOL COMBINING SPRECHGESANG STEM
1D16A..1D16C ; Common # So [3] MUSICAL SYMBOL FINGERED TREMOLO-1..MUSICAL SYMBOL FINGERED TREMOLO-3
1D16D..1D172 ; Common # Mc [6] MUSICAL SYMBOL COMBINING AUGMENTATION DOT..MUSICAL SYMBOL COMBINING FLAG-5
1D173..1D17A ; Common # Cf [8] MUSICAL SYMBOL BEGIN BEAM..MUSICAL SYMBOL END PHRASE
1D183..1D184 ; Common # So [2] MUSICAL SYMBOL ARPEGGIATO UP..MUSICAL SYMBOL ARPEGGIATO DOWN
1D18C..1D1A9 ; Common # So [30] MUSICAL SYMBOL RINFORZANDO..MUSICAL SYMBOL DEGREE SLASH
1D1AE..1D1DD ; Common # So [48] MUSICAL SYMBOL PEDAL MARK..MUSICAL SYMBOL PES SUBPUNCTIS
1D300..1D356 ; Common # So [87] MONOGRAM FOR EARTH..TETRAGRAM FOR FOSTERING
1D360..1D371 ; Common # No [18] COUNTING ROD UNIT DIGIT ONE..COUNTING ROD TENS DIGIT NINE
1D400..1D454 ; Common # L& [85] MATHEMATICAL BOLD CAPITAL A..MATHEMATICAL ITALIC SMALL G
1D456..1D49C ; Common # L& [71] MATHEMATICAL ITALIC SMALL I..MATHEMATICAL SCRIPT CAPITAL A
1D49E..1D49F ; Common # L& [2] MATHEMATICAL SCRIPT CAPITAL C..MATHEMATICAL SCRIPT CAPITAL D
1D4A2 ; Common # L& MATHEMATICAL SCRIPT CAPITAL G
1D4A5..1D4A6 ; Common # L& [2] MATHEMATICAL SCRIPT CAPITAL J..MATHEMATICAL SCRIPT CAPITAL K
1D4A9..1D4AC ; Common # L& [4] MATHEMATICAL SCRIPT CAPITAL N..MATHEMATICAL SCRIPT CAPITAL Q
1D4AE..1D4B9 ; Common # L& [12] MATHEMATICAL SCRIPT CAPITAL S..MATHEMATICAL SCRIPT SMALL D
1D4BB ; Common # L& MATHEMATICAL SCRIPT SMALL F
1D4BD..1D4C3 ; Common # L& [7] MATHEMATICAL SCRIPT SMALL H..MATHEMATICAL SCRIPT SMALL N
1D4C5..1D505 ; Common # L& [65] MATHEMATICAL SCRIPT SMALL P..MATHEMATICAL FRAKTUR CAPITAL B
1D507..1D50A ; Common # L& [4] MATHEMATICAL FRAKTUR CAPITAL D..MATHEMATICAL FRAKTUR CAPITAL G
1D50D..1D514 ; Common # L& [8] MATHEMATICAL FRAKTUR CAPITAL J..MATHEMATICAL FRAKTUR CAPITAL Q
1D516..1D51C ; Common # L& [7] MATHEMATICAL FRAKTUR CAPITAL S..MATHEMATICAL FRAKTUR CAPITAL Y
1D51E..1D539 ; Common # L& [28] MATHEMATICAL FRAKTUR SMALL A..MATHEMATICAL DOUBLE-STRUCK CAPITAL B
1D53B..1D53E ; Common # L& [4] MATHEMATICAL DOUBLE-STRUCK CAPITAL D..MATHEMATICAL DOUBLE-STRUCK CAPITAL G
1D540..1D544 ; Common # L& [5] MATHEMATICAL DOUBLE-STRUCK CAPITAL I..MATHEMATICAL DOUBLE-STRUCK CAPITAL M
1D546 ; Common # L& MATHEMATICAL DOUBLE-STRUCK CAPITAL O
1D54A..1D550 ; Common # L& [7] MATHEMATICAL DOUBLE-STRUCK CAPITAL S..MATHEMATICAL DOUBLE-STRUCK CAPITAL Y
1D552..1D6A5 ; Common # L& [340] MATHEMATICAL DOUBLE-STRUCK SMALL A..MATHEMATICAL ITALIC SMALL DOTLESS J
1D6A8..1D6C0 ; Common # L& [25] MATHEMATICAL BOLD CAPITAL ALPHA..MATHEMATICAL BOLD CAPITAL OMEGA
1D6C1 ; Common # Sm MATHEMATICAL BOLD NABLA
1D6C2..1D6DA ; Common # L& [25] MATHEMATICAL BOLD SMALL ALPHA..MATHEMATICAL BOLD SMALL OMEGA
1D6DB ; Common # Sm MATHEMATICAL BOLD PARTIAL DIFFERENTIAL
1D6DC..1D6FA ; Common # L& [31] MATHEMATICAL BOLD EPSILON SYMBOL..MATHEMATICAL ITALIC CAPITAL OMEGA
1D6FB ; Common # Sm MATHEMATICAL ITALIC NABLA
1D6FC..1D714 ; Common # L& [25] MATHEMATICAL ITALIC SMALL ALPHA..MATHEMATICAL ITALIC SMALL OMEGA
1D715 ; Common # Sm MATHEMATICAL ITALIC PARTIAL DIFFERENTIAL
1D716..1D734 ; Common # L& [31] MATHEMATICAL ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD ITALIC CAPITAL OMEGA
1D735 ; Common # Sm MATHEMATICAL BOLD ITALIC NABLA
1D736..1D74E ; Common # L& [25] MATHEMATICAL BOLD ITALIC SMALL ALPHA..MATHEMATICAL BOLD ITALIC SMALL OMEGA
1D74F ; Common # Sm MATHEMATICAL BOLD ITALIC PARTIAL DIFFERENTIAL
1D750..1D76E ; Common # L& [31] MATHEMATICAL BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL SANS-SERIF BOLD CAPITAL OMEGA
1D76F ; Common # Sm MATHEMATICAL SANS-SERIF BOLD NABLA
1D770..1D788 ; Common # L& [25] MATHEMATICAL SANS-SERIF BOLD SMALL ALPHA..MATHEMATICAL SANS-SERIF BOLD SMALL OMEGA
1D789 ; Common # Sm MATHEMATICAL SANS-SERIF BOLD PARTIAL DIFFERENTIAL
1D78A..1D7A8 ; Common # L& [31] MATHEMATICAL SANS-SERIF BOLD EPSILON SYMBOL..MATHEMATICAL SANS-SERIF BOLD ITALIC CAPITAL OMEGA
1D7A9 ; Common # Sm MATHEMATICAL SANS-SERIF BOLD ITALIC NABLA
1D7AA..1D7C2 ; Common # L& [25] MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL ALPHA..MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL OMEGA
1D7C3 ; Common # Sm MATHEMATICAL SANS-SERIF BOLD ITALIC PARTIAL DIFFERENTIAL
1D7C4..1D7CB ; Common # L& [8] MATHEMATICAL SANS-SERIF BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD SMALL DIGAMMA
1D7CE..1D7FF ; Common # Nd [50] MATHEMATICAL BOLD DIGIT ZERO..MATHEMATICAL MONOSPACE DIGIT NINE
1F000..1F02B ; Common # So [44] MAHJONG TILE EAST WIND..MAHJONG TILE BACK
1F030..1F093 ; Common # So [100] DOMINO TILE HORIZONTAL BACK..DOMINO TILE VERTICAL-06-06
1F100..1F10A ; Common # No [11] DIGIT ZERO FULL STOP..DIGIT NINE COMMA
1F110..1F12E ; Common # So [31] PARENTHESIZED LATIN CAPITAL LETTER A..CIRCLED WZ
1F131 ; Common # So SQUARED LATIN CAPITAL LETTER B
1F13D ; Common # So SQUARED LATIN CAPITAL LETTER N
1F13F ; Common # So SQUARED LATIN CAPITAL LETTER P
1F142 ; Common # So SQUARED LATIN CAPITAL LETTER S
1F146 ; Common # So SQUARED LATIN CAPITAL LETTER W
1F14A..1F14E ; Common # So [5] SQUARED HV..SQUARED PPV
1F157 ; Common # So NEGATIVE CIRCLED LATIN CAPITAL LETTER H
1F15F ; Common # So NEGATIVE CIRCLED LATIN CAPITAL LETTER P
1F179 ; Common # So NEGATIVE SQUARED LATIN CAPITAL LETTER J
1F17B..1F17C ; Common # So [2] NEGATIVE SQUARED LATIN CAPITAL LETTER L..NEGATIVE SQUARED LATIN CAPITAL LETTER M
1F17F ; Common # So NEGATIVE SQUARED LATIN CAPITAL LETTER P
1F18A..1F18D ; Common # So [4] CROSSED NEGATIVE SQUARED LATIN CAPITAL LETTER P..NEGATIVE SQUARED SA
1F190 ; Common # So SQUARE DJ
1F210..1F231 ; Common # So [34] SQUARED CJK UNIFIED IDEOGRAPH-624B..SQUARED CJK UNIFIED IDEOGRAPH-6253
1F240..1F248 ; Common # So [9] TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-672C..TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-6557
E0001 ; Common # Cf LANGUAGE TAG
E0020..E007F ; Common # Cf [96] TAG SPACE..CANCEL TAG
# Total code points: 5395
# ================================================
0041..005A ; Latin # L& [26] LATIN CAPITAL LETTER A..LATIN CAPITAL LETTER Z
0061..007A ; Latin # L& [26] LATIN SMALL LETTER A..LATIN SMALL LETTER Z
00AA ; Latin # L& FEMININE ORDINAL INDICATOR
00BA ; Latin # L& MASCULINE ORDINAL INDICATOR
00C0..00D6 ; Latin # L& [23] LATIN CAPITAL LETTER A WITH GRAVE..LATIN CAPITAL LETTER O WITH DIAERESIS
00D8..00F6 ; Latin # L& [31] LATIN CAPITAL LETTER O WITH STROKE..LATIN SMALL LETTER O WITH DIAERESIS
00F8..01BA ; Latin # L& [195] LATIN SMALL LETTER O WITH STROKE..LATIN SMALL LETTER EZH WITH TAIL
01BB ; Latin # Lo LATIN LETTER TWO WITH STROKE
01BC..01BF ; Latin # L& [4] LATIN CAPITAL LETTER TONE FIVE..LATIN LETTER WYNN
01C0..01C3 ; Latin # Lo [4] LATIN LETTER DENTAL CLICK..LATIN LETTER RETROFLEX CLICK
01C4..0293 ; Latin # L& [208] LATIN CAPITAL LETTER DZ WITH CARON..LATIN SMALL LETTER EZH WITH CURL
0294 ; Latin # Lo LATIN LETTER GLOTTAL STOP
0295..02AF ; Latin # L& [27] LATIN LETTER PHARYNGEAL VOICED FRICATIVE..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL
02B0..02B8 ; Latin # Lm [9] MODIFIER LETTER SMALL H..MODIFIER LETTER SMALL Y
02E0..02E4 ; Latin # Lm [5] MODIFIER LETTER SMALL GAMMA..MODIFIER LETTER SMALL REVERSED GLOTTAL STOP
1D00..1D25 ; Latin # L& [38] LATIN LETTER SMALL CAPITAL A..LATIN LETTER AIN
1D2C..1D5C ; Latin # Lm [49] MODIFIER LETTER CAPITAL A..MODIFIER LETTER SMALL AIN
1D62..1D65 ; Latin # L& [4] LATIN SUBSCRIPT SMALL LETTER I..LATIN SUBSCRIPT SMALL LETTER V
1D6B..1D77 ; Latin # L& [13] LATIN SMALL LETTER UE..LATIN SMALL LETTER TURNED G
1D79..1D9A ; Latin # L& [34] LATIN SMALL LETTER INSULAR G..LATIN SMALL LETTER EZH WITH RETROFLEX HOOK
1D9B..1DBE ; Latin # Lm [36] MODIFIER LETTER SMALL TURNED ALPHA..MODIFIER LETTER SMALL EZH
1E00..1EFF ; Latin # L& [256] LATIN CAPITAL LETTER A WITH RING BELOW..LATIN SMALL LETTER Y WITH LOOP
2071 ; Latin # Lm SUPERSCRIPT LATIN SMALL LETTER I
207F ; Latin # Lm SUPERSCRIPT LATIN SMALL LETTER N
2090..2094 ; Latin # Lm [5] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER SCHWA
212A..212B ; Latin # L& [2] KELVIN SIGN..ANGSTROM SIGN
2132 ; Latin # L& TURNED CAPITAL F
214E ; Latin # L& TURNED SMALL F
2160..2182 ; Latin # Nl [35] ROMAN NUMERAL ONE..ROMAN NUMERAL TEN THOUSAND
2183..2184 ; Latin # L& [2] ROMAN NUMERAL REVERSED ONE HUNDRED..LATIN SMALL LETTER REVERSED C
2185..2188 ; Latin # Nl [4] ROMAN NUMERAL SIX LATE FORM..ROMAN NUMERAL ONE HUNDRED THOUSAND
2C60..2C7C ; Latin # L& [29] LATIN CAPITAL LETTER L WITH DOUBLE BAR..LATIN SUBSCRIPT SMALL LETTER J
2C7D ; Latin # Lm MODIFIER LETTER CAPITAL V
2C7E..2C7F ; Latin # L& [2] LATIN CAPITAL LETTER S WITH SWASH TAIL..LATIN CAPITAL LETTER Z WITH SWASH TAIL
A722..A76F ; Latin # L& [78] LATIN CAPITAL LETTER EGYPTOLOGICAL ALEF..LATIN SMALL LETTER CON
A770 ; Latin # Lm MODIFIER LETTER US
A771..A787 ; Latin # L& [23] LATIN SMALL LETTER DUM..LATIN SMALL LETTER INSULAR T
A78B..A78C ; Latin # L& [2] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER SALTILLO
A7FB..A7FF ; Latin # Lo [5] LATIN EPIGRAPHIC LETTER REVERSED F..LATIN EPIGRAPHIC LETTER ARCHAIC M
FB00..FB06 ; Latin # L& [7] LATIN SMALL LIGATURE FF..LATIN SMALL LIGATURE ST
FF21..FF3A ; Latin # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAPITAL LETTER Z
FF41..FF5A ; Latin # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN SMALL LETTER Z
# Total code points: 1244
# ================================================
0370..0373 ; Greek # L& [4] GREEK CAPITAL LETTER HETA..GREEK SMALL LETTER ARCHAIC SAMPI
0375 ; Greek # Sk GREEK LOWER NUMERAL SIGN
0376..0377 ; Greek # L& [2] GREEK CAPITAL LETTER PAMPHYLIAN DIGAMMA..GREEK SMALL LETTER PAMPHYLIAN DIGAMMA
037A ; Greek # Lm GREEK YPOGEGRAMMENI
037B..037D ; Greek # L& [3] GREEK SMALL REVERSED LUNATE SIGMA SYMBOL..GREEK SMALL REVERSED DOTTED LUNATE SIGMA SYMBOL
0384 ; Greek # Sk GREEK TONOS
0386 ; Greek # L& GREEK CAPITAL LETTER ALPHA WITH TONOS
0388..038A ; Greek # L& [3] GREEK CAPITAL LETTER EPSILON WITH TONOS..GREEK CAPITAL LETTER IOTA WITH TONOS
038C ; Greek # L& GREEK CAPITAL LETTER OMICRON WITH TONOS
038E..03A1 ; Greek # L& [20] GREEK CAPITAL LETTER UPSILON WITH TONOS..GREEK CAPITAL LETTER RHO
03A3..03E1 ; Greek # L& [63] GREEK CAPITAL LETTER SIGMA..GREEK SMALL LETTER SAMPI
03F0..03F5 ; Greek # L& [6] GREEK KAPPA SYMBOL..GREEK LUNATE EPSILON SYMBOL
03F6 ; Greek # Sm GREEK REVERSED LUNATE EPSILON SYMBOL
03F7..03FF ; Greek # L& [9] GREEK CAPITAL LETTER SHO..GREEK CAPITAL REVERSED DOTTED LUNATE SIGMA SYMBOL
1D26..1D2A ; Greek # L& [5] GREEK LETTER SMALL CAPITAL GAMMA..GREEK LETTER SMALL CAPITAL PSI
1D5D..1D61 ; Greek # Lm [5] MODIFIER LETTER SMALL BETA..MODIFIER LETTER SMALL CHI
1D66..1D6A ; Greek # L& [5] GREEK SUBSCRIPT SMALL LETTER BETA..GREEK SUBSCRIPT SMALL LETTER CHI
1DBF ; Greek # Lm MODIFIER LETTER SMALL THETA
1F00..1F15 ; Greek # L& [22] GREEK SMALL LETTER ALPHA WITH PSILI..GREEK SMALL LETTER EPSILON WITH DASIA AND OXIA
1F18..1F1D ; Greek # L& [6] GREEK CAPITAL LETTER EPSILON WITH PSILI..GREEK CAPITAL LETTER EPSILON WITH DASIA AND OXIA
1F20..1F45 ; Greek # L& [38] GREEK SMALL LETTER ETA WITH PSILI..GREEK SMALL LETTER OMICRON WITH DASIA AND OXIA
1F48..1F4D ; Greek # L& [6] GREEK CAPITAL LETTER OMICRON WITH PSILI..GREEK CAPITAL LETTER OMICRON WITH DASIA AND OXIA
1F50..1F57 ; Greek # L& [8] GREEK SMALL LETTER UPSILON WITH PSILI..GREEK SMALL LETTER UPSILON WITH DASIA AND PERISPOMENI
1F59 ; Greek # L& GREEK CAPITAL LETTER UPSILON WITH DASIA
1F5B ; Greek # L& GREEK CAPITAL LETTER UPSILON WITH DASIA AND VARIA
1F5D ; Greek # L& GREEK CAPITAL LETTER UPSILON WITH DASIA AND OXIA
1F5F..1F7D ; Greek # L& [31] GREEK CAPITAL LETTER UPSILON WITH DASIA AND PERISPOMENI..GREEK SMALL LETTER OMEGA WITH OXIA
1F80..1FB4 ; Greek # L& [53] GREEK SMALL LETTER ALPHA WITH PSILI AND YPOGEGRAMMENI..GREEK SMALL LETTER ALPHA WITH OXIA AND YPOGEGRAMMENI
1FB6..1FBC ; Greek # L& [7] GREEK SMALL LETTER ALPHA WITH PERISPOMENI..GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI
1FBD ; Greek # Sk GREEK KORONIS
1FBE ; Greek # L& GREEK PROSGEGRAMMENI
1FBF..1FC1 ; Greek # Sk [3] GREEK PSILI..GREEK DIALYTIKA AND PERISPOMENI
1FC2..1FC4 ; Greek # L& [3] GREEK SMALL LETTER ETA WITH VARIA AND YPOGEGRAMMENI..GREEK SMALL LETTER ETA WITH OXIA AND YPOGEGRAMMENI
1FC6..1FCC ; Greek # L& [7] GREEK SMALL LETTER ETA WITH PERISPOMENI..GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI
1FCD..1FCF ; Greek # Sk [3] GREEK PSILI AND VARIA..GREEK PSILI AND PERISPOMENI
1FD0..1FD3 ; Greek # L& [4] GREEK SMALL LETTER IOTA WITH VRACHY..GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA
1FD6..1FDB ; Greek # L& [6] GREEK SMALL LETTER IOTA WITH PERISPOMENI..GREEK CAPITAL LETTER IOTA WITH OXIA
1FDD..1FDF ; Greek # Sk [3] GREEK DASIA AND VARIA..GREEK DASIA AND PERISPOMENI
1FE0..1FEC ; Greek # L& [13] GREEK SMALL LETTER UPSILON WITH VRACHY..GREEK CAPITAL LETTER RHO WITH DASIA
1FED..1FEF ; Greek # Sk [3] GREEK DIALYTIKA AND VARIA..GREEK VARIA
1FF2..1FF4 ; Greek # L& [3] GREEK SMALL LETTER OMEGA WITH VARIA AND YPOGEGRAMMENI..GREEK SMALL LETTER OMEGA WITH OXIA AND YPOGEGRAMMENI
1FF6..1FFC ; Greek # L& [7] GREEK SMALL LETTER OMEGA WITH PERISPOMENI..GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI
1FFD..1FFE ; Greek # Sk [2] GREEK OXIA..GREEK DASIA
2126 ; Greek # L& OHM SIGN
10140..10174 ; Greek # Nl [53] GREEK ACROPHONIC ATTIC ONE QUARTER..GREEK ACROPHONIC STRATIAN FIFTY MNAS
10175..10178 ; Greek # No [4] GREEK ONE HALF SIGN..GREEK THREE QUARTERS SIGN
10179..10189 ; Greek # So [17] GREEK YEAR SIGN..GREEK TRYBLION BASE SIGN
1018A ; Greek # No GREEK ZERO SIGN
1D200..1D241 ; Greek # So [66] GREEK VOCAL NOTATION SYMBOL-1..GREEK INSTRUMENTAL NOTATION SYMBOL-54
1D242..1D244 ; Greek # Mn [3] COMBINING GREEK MUSICAL TRISEME..COMBINING GREEK MUSICAL PENTASEME
1D245 ; Greek # So GREEK MUSICAL LEIMMA
# Total code points: 511
# ================================================
0400..0481 ; Cyrillic # L& [130] CYRILLIC CAPITAL LETTER IE WITH GRAVE..CYRILLIC SMALL LETTER KOPPA
0482 ; Cyrillic # So CYRILLIC THOUSANDS SIGN
0483..0484 ; Cyrillic # Mn [2] COMBINING CYRILLIC TITLO..COMBINING CYRILLIC PALATALIZATION
0487 ; Cyrillic # Mn COMBINING CYRILLIC POKRYTIE
0488..0489 ; Cyrillic # Me [2] COMBINING CYRILLIC HUNDRED THOUSANDS SIGN..COMBINING CYRILLIC MILLIONS SIGN
048A..0525 ; Cyrillic # L& [156] CYRILLIC CAPITAL LETTER SHORT I WITH TAIL..CYRILLIC SMALL LETTER PE WITH DESCENDER
1D2B ; Cyrillic # L& CYRILLIC LETTER SMALL CAPITAL EL
1D78 ; Cyrillic # Lm MODIFIER LETTER CYRILLIC EN
2DE0..2DFF ; Cyrillic # Mn [32] COMBINING CYRILLIC LETTER BE..COMBINING CYRILLIC LETTER IOTIFIED BIG YUS
A640..A65F ; Cyrillic # L& [32] CYRILLIC CAPITAL LETTER ZEMLYA..CYRILLIC SMALL LETTER YN
A662..A66D ; Cyrillic # L& [12] CYRILLIC CAPITAL LETTER SOFT DE..CYRILLIC SMALL LETTER DOUBLE MONOCULAR O
A66E ; Cyrillic # Lo CYRILLIC LETTER MULTIOCULAR O
A66F ; Cyrillic # Mn COMBINING CYRILLIC VZMET
A670..A672 ; Cyrillic # Me [3] COMBINING CYRILLIC TEN MILLIONS SIGN..COMBINING CYRILLIC THOUSAND MILLIONS SIGN
A673 ; Cyrillic # Po SLAVONIC ASTERISK
A67C..A67D ; Cyrillic # Mn [2] COMBINING CYRILLIC KAVYKA..COMBINING CYRILLIC PAYEROK
A67E ; Cyrillic # Po CYRILLIC KAVYKA
A67F ; Cyrillic # Lm CYRILLIC PAYEROK
A680..A697 ; Cyrillic # L& [24] CYRILLIC CAPITAL LETTER DWE..CYRILLIC SMALL LETTER SHWE
# Total code points: 404
# ================================================
0531..0556 ; Armenian # L& [38] ARMENIAN CAPITAL LETTER AYB..ARMENIAN CAPITAL LETTER FEH
0559 ; Armenian # Lm ARMENIAN MODIFIER LETTER LEFT HALF RING
055A..055F ; Armenian # Po [6] ARMENIAN APOSTROPHE..ARMENIAN ABBREVIATION MARK
0561..0587 ; Armenian # L& [39] ARMENIAN SMALL LETTER AYB..ARMENIAN SMALL LIGATURE ECH YIWN
058A ; Armenian # Pd ARMENIAN HYPHEN
FB13..FB17 ; Armenian # L& [5] ARMENIAN SMALL LIGATURE MEN NOW..ARMENIAN SMALL LIGATURE MEN XEH
# Total code points: 90
# ================================================
0591..05BD ; Hebrew # Mn [45] HEBREW ACCENT ETNAHTA..HEBREW POINT METEG
05BE ; Hebrew # Pd HEBREW PUNCTUATION MAQAF
05BF ; Hebrew # Mn HEBREW POINT RAFE
05C0 ; Hebrew # Po HEBREW PUNCTUATION PASEQ
05C1..05C2 ; Hebrew # Mn [2] HEBREW POINT SHIN DOT..HEBREW POINT SIN DOT
05C3 ; Hebrew # Po HEBREW PUNCTUATION SOF PASUQ
05C4..05C5 ; Hebrew # Mn [2] HEBREW MARK UPPER DOT..HEBREW MARK LOWER DOT
05C6 ; Hebrew # Po HEBREW PUNCTUATION NUN HAFUKHA
05C7 ; Hebrew # Mn HEBREW POINT QAMATS QATAN
05D0..05EA ; Hebrew # Lo [27] HEBREW LETTER ALEF..HEBREW LETTER TAV
05F0..05F2 ; Hebrew # Lo [3] HEBREW LIGATURE YIDDISH DOUBLE VAV..HEBREW LIGATURE YIDDISH DOUBLE YOD
05F3..05F4 ; Hebrew # Po [2] HEBREW PUNCTUATION GERESH..HEBREW PUNCTUATION GERSHAYIM
FB1D ; Hebrew # Lo HEBREW LETTER YOD WITH HIRIQ
FB1E ; Hebrew # Mn HEBREW POINT JUDEO-SPANISH VARIKA
FB1F..FB28 ; Hebrew # Lo [10] HEBREW LIGATURE YIDDISH YOD YOD PATAH..HEBREW LETTER WIDE TAV
FB29 ; Hebrew # Sm HEBREW LETTER ALTERNATIVE PLUS SIGN
FB2A..FB36 ; Hebrew # Lo [13] HEBREW LETTER SHIN WITH SHIN DOT..HEBREW LETTER ZAYIN WITH DAGESH
FB38..FB3C ; Hebrew # Lo [5] HEBREW LETTER TET WITH DAGESH..HEBREW LETTER LAMED WITH DAGESH
FB3E ; Hebrew # Lo HEBREW LETTER MEM WITH DAGESH
FB40..FB41 ; Hebrew # Lo [2] HEBREW LETTER NUN WITH DAGESH..HEBREW LETTER SAMEKH WITH DAGESH
FB43..FB44 ; Hebrew # Lo [2] HEBREW LETTER FINAL PE WITH DAGESH..HEBREW LETTER PE WITH DAGESH
FB46..FB4F ; Hebrew # Lo [10] HEBREW LETTER TSADI WITH DAGESH..HEBREW LIGATURE ALEF LAMED
# Total code points: 133
# ================================================
0606..0608 ; Arabic # Sm [3] ARABIC-INDIC CUBE ROOT..ARABIC RAY
0609..060A ; Arabic # Po [2] ARABIC-INDIC PER MILLE SIGN..ARABIC-INDIC PER TEN THOUSAND SIGN
060B ; Arabic # Sc AFGHANI SIGN
060D ; Arabic # Po ARABIC DATE SEPARATOR
060E..060F ; Arabic # So [2] ARABIC POETIC VERSE SIGN..ARABIC SIGN MISRA
0610..061A ; Arabic # Mn [11] ARABIC SIGN SALLALLAHOU ALAYHE WASSALLAM..ARABIC SMALL KASRA
061E ; Arabic # Po ARABIC TRIPLE DOT PUNCTUATION MARK
0621..063F ; Arabic # Lo [31] ARABIC LETTER HAMZA..ARABIC LETTER FARSI YEH WITH THREE DOTS ABOVE
0641..064A ; Arabic # Lo [10] ARABIC LETTER FEH..ARABIC LETTER YEH
0656..065E ; Arabic # Mn [9] ARABIC SUBSCRIPT ALEF..ARABIC FATHA WITH TWO DOTS
066A..066D ; Arabic # Po [4] ARABIC PERCENT SIGN..ARABIC FIVE POINTED STAR
066E..066F ; Arabic # Lo [2] ARABIC LETTER DOTLESS BEH..ARABIC LETTER DOTLESS QAF
0671..06D3 ; Arabic # Lo [99] ARABIC LETTER ALEF WASLA..ARABIC LETTER YEH BARREE WITH HAMZA ABOVE
06D4 ; Arabic # Po ARABIC FULL STOP
06D5 ; Arabic # Lo ARABIC LETTER AE
06D6..06DC ; Arabic # Mn [7] ARABIC SMALL HIGH LIGATURE SAD WITH LAM WITH ALEF MAKSURA..ARABIC SMALL HIGH SEEN
06DE ; Arabic # Me ARABIC START OF RUB EL HIZB
06DF..06E4 ; Arabic # Mn [6] ARABIC SMALL HIGH ROUNDED ZERO..ARABIC SMALL HIGH MADDA
06E5..06E6 ; Arabic # Lm [2] ARABIC SMALL WAW..ARABIC SMALL YEH
06E7..06E8 ; Arabic # Mn [2] ARABIC SMALL HIGH YEH..ARABIC SMALL HIGH NOON
06E9 ; Arabic # So ARABIC PLACE OF SAJDAH
06EA..06ED ; Arabic # Mn [4] ARABIC EMPTY CENTRE LOW STOP..ARABIC SMALL LOW MEEM
06EE..06EF ; Arabic # Lo [2] ARABIC LETTER DAL WITH INVERTED V..ARABIC LETTER REH WITH INVERTED V
06F0..06F9 ; Arabic # Nd [10] EXTENDED ARABIC-INDIC DIGIT ZERO..EXTENDED ARABIC-INDIC DIGIT NINE
06FA..06FC ; Arabic # Lo [3] ARABIC LETTER SHEEN WITH DOT BELOW..ARABIC LETTER GHAIN WITH DOT BELOW
06FD..06FE ; Arabic # So [2] ARABIC SIGN SINDHI AMPERSAND..ARABIC SIGN SINDHI POSTPOSITION MEN
06FF ; Arabic # Lo ARABIC LETTER HEH WITH INVERTED V
0750..077F ; Arabic # Lo [48] ARABIC LETTER BEH WITH THREE DOTS HORIZONTALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS ABOVE
FB50..FBB1 ; Arabic # Lo [98] ARABIC LETTER ALEF WASLA ISOLATED FORM..ARABIC LETTER YEH BARREE WITH HAMZA ABOVE FINAL FORM
FBD3..FD3D ; Arabic # Lo [363] ARABIC LETTER NG ISOLATED FORM..ARABIC LIGATURE ALEF WITH FATHATAN ISOLATED FORM
FD50..FD8F ; Arabic # Lo [64] ARABIC LIGATURE TEH WITH JEEM WITH MEEM INITIAL FORM..ARABIC LIGATURE MEEM WITH KHAH WITH MEEM INITIAL FORM
FD92..FDC7 ; Arabic # Lo [54] ARABIC LIGATURE MEEM WITH JEEM WITH KHAH INITIAL FORM..ARABIC LIGATURE NOON WITH JEEM WITH YEH FINAL FORM
FDF0..FDFB ; Arabic # Lo [12] ARABIC LIGATURE SALLA USED AS KORANIC STOP SIGN ISOLATED FORM..ARABIC LIGATURE JALLAJALALOUHOU
FDFC ; Arabic # Sc RIAL SIGN
FE70..FE74 ; Arabic # Lo [5] ARABIC FATHATAN ISOLATED FORM..ARABIC KASRATAN ISOLATED FORM
FE76..FEFC ; Arabic # Lo [135] ARABIC FATHA ISOLATED FORM..ARABIC LIGATURE LAM WITH ALEF FINAL FORM
10E60..10E7E ; Arabic # No [31] RUMI DIGIT ONE..RUMI FRACTION TWO THIRDS
# Total code points: 1030
# ================================================
0700..070D ; Syriac # Po [14] SYRIAC END OF PARAGRAPH..SYRIAC HARKLEAN ASTERISCUS
070F ; Syriac # Cf SYRIAC ABBREVIATION MARK
0710 ; Syriac # Lo SYRIAC LETTER ALAPH
0711 ; Syriac # Mn SYRIAC LETTER SUPERSCRIPT ALAPH
0712..072F ; Syriac # Lo [30] SYRIAC LETTER BETH..SYRIAC LETTER PERSIAN DHALATH
0730..074A ; Syriac # Mn [27] SYRIAC PTHAHA ABOVE..SYRIAC BARREKH
074D..074F ; Syriac # Lo [3] SYRIAC LETTER SOGDIAN ZHAIN..SYRIAC LETTER SOGDIAN FE
# Total code points: 77
# ================================================
0780..07A5 ; Thaana # Lo [38] THAANA LETTER HAA..THAANA LETTER WAAVU
07A6..07B0 ; Thaana # Mn [11] THAANA ABAFILI..THAANA SUKUN
07B1 ; Thaana # Lo THAANA LETTER NAA
# Total code points: 50
# ================================================
0900..0902 ; Devanagari # Mn [3] DEVANAGARI SIGN INVERTED CANDRABINDU..DEVANAGARI SIGN ANUSVARA
0903 ; Devanagari # Mc DEVANAGARI SIGN VISARGA
0904..0939 ; Devanagari # Lo [54] DEVANAGARI LETTER SHORT A..DEVANAGARI LETTER HA
093C ; Devanagari # Mn DEVANAGARI SIGN NUKTA
093D ; Devanagari # Lo DEVANAGARI SIGN AVAGRAHA
093E..0940 ; Devanagari # Mc [3] DEVANAGARI VOWEL SIGN AA..DEVANAGARI VOWEL SIGN II
0941..0948 ; Devanagari # Mn [8] DEVANAGARI VOWEL SIGN U..DEVANAGARI VOWEL SIGN AI
0949..094C ; Devanagari # Mc [4] DEVANAGARI VOWEL SIGN CANDRA O..DEVANAGARI VOWEL SIGN AU
094D ; Devanagari # Mn DEVANAGARI SIGN VIRAMA
094E ; Devanagari # Mc DEVANAGARI VOWEL SIGN PRISHTHAMATRA E
0950 ; Devanagari # Lo DEVANAGARI OM
0953..0955 ; Devanagari # Mn [3] DEVANAGARI GRAVE ACCENT..DEVANAGARI VOWEL SIGN CANDRA LONG E
0958..0961 ; Devanagari # Lo [10] DEVANAGARI LETTER QA..DEVANAGARI LETTER VOCALIC LL
0962..0963 ; Devanagari # Mn [2] DEVANAGARI VOWEL SIGN VOCALIC L..DEVANAGARI VOWEL SIGN VOCALIC LL
0966..096F ; Devanagari # Nd [10] DEVANAGARI DIGIT ZERO..DEVANAGARI DIGIT NINE
0971 ; Devanagari # Lm DEVANAGARI SIGN HIGH SPACING DOT
0972 ; Devanagari # Lo DEVANAGARI LETTER CANDRA A
0979..097F ; Devanagari # Lo [7] DEVANAGARI LETTER ZHA..DEVANAGARI LETTER BBA
A8E0..A8F1 ; Devanagari # Mn [18] COMBINING DEVANAGARI DIGIT ZERO..COMBINING DEVANAGARI SIGN AVAGRAHA
A8F2..A8F7 ; Devanagari # Lo [6] DEVANAGARI SIGN SPACING CANDRABINDU..DEVANAGARI SIGN CANDRABINDU AVAGRAHA
A8F8..A8FA ; Devanagari # Po [3] DEVANAGARI SIGN PUSHPIKA..DEVANAGARI CARET
A8FB ; Devanagari # Lo DEVANAGARI HEADSTROKE
# Total code points: 140
# ================================================
0981 ; Bengali # Mn BENGALI SIGN CANDRABINDU
0982..0983 ; Bengali # Mc [2] BENGALI SIGN ANUSVARA..BENGALI SIGN VISARGA
0985..098C ; Bengali # Lo [8] BENGALI LETTER A..BENGALI LETTER VOCALIC L
098F..0990 ; Bengali # Lo [2] BENGALI LETTER E..BENGALI LETTER AI
0993..09A8 ; Bengali # Lo [22] BENGALI LETTER O..BENGALI LETTER NA
09AA..09B0 ; Bengali # Lo [7] BENGALI LETTER PA..BENGALI LETTER RA
09B2 ; Bengali # Lo BENGALI LETTER LA
09B6..09B9 ; Bengali # Lo [4] BENGALI LETTER SHA..BENGALI LETTER HA
09BC ; Bengali # Mn BENGALI SIGN NUKTA
09BD ; Bengali # Lo BENGALI SIGN AVAGRAHA
09BE..09C0 ; Bengali # Mc [3] BENGALI VOWEL SIGN AA..BENGALI VOWEL SIGN II
09C1..09C4 ; Bengali # Mn [4] BENGALI VOWEL SIGN U..BENGALI VOWEL SIGN VOCALIC RR
09C7..09C8 ; Bengali # Mc [2] BENGALI VOWEL SIGN E..BENGALI VOWEL SIGN AI
09CB..09CC ; Bengali # Mc [2] BENGALI VOWEL SIGN O..BENGALI VOWEL SIGN AU
09CD ; Bengali # Mn BENGALI SIGN VIRAMA
09CE ; Bengali # Lo BENGALI LETTER KHANDA TA
09D7 ; Bengali # Mc BENGALI AU LENGTH MARK
09DC..09DD ; Bengali # Lo [2] BENGALI LETTER RRA..BENGALI LETTER RHA
09DF..09E1 ; Bengali # Lo [3] BENGALI LETTER YYA..BENGALI LETTER VOCALIC LL
09E2..09E3 ; Bengali # Mn [2] BENGALI VOWEL SIGN VOCALIC L..BENGALI VOWEL SIGN VOCALIC LL
09E6..09EF ; Bengali # Nd [10] BENGALI DIGIT ZERO..BENGALI DIGIT NINE
09F0..09F1 ; Bengali # Lo [2] BENGALI LETTER RA WITH MIDDLE DIAGONAL..BENGALI LETTER RA WITH LOWER DIAGONAL
09F2..09F3 ; Bengali # Sc [2] BENGALI RUPEE MARK..BENGALI RUPEE SIGN
09F4..09F9 ; Bengali # No [6] BENGALI CURRENCY NUMERATOR ONE..BENGALI CURRENCY DENOMINATOR SIXTEEN
09FA ; Bengali # So BENGALI ISSHAR
09FB ; Bengali # Sc BENGALI GANDA MARK
# Total code points: 92
# ================================================
0A01..0A02 ; Gurmukhi # Mn [2] GURMUKHI SIGN ADAK BINDI..GURMUKHI SIGN BINDI
0A03 ; Gurmukhi # Mc GURMUKHI SIGN VISARGA
0A05..0A0A ; Gurmukhi # Lo [6] GURMUKHI LETTER A..GURMUKHI LETTER UU
0A0F..0A10 ; Gurmukhi # Lo [2] GURMUKHI LETTER EE..GURMUKHI LETTER AI
0A13..0A28 ; Gurmukhi # Lo [22] GURMUKHI LETTER OO..GURMUKHI LETTER NA
0A2A..0A30 ; Gurmukhi # Lo [7] GURMUKHI LETTER PA..GURMUKHI LETTER RA
0A32..0A33 ; Gurmukhi # Lo [2] GURMUKHI LETTER LA..GURMUKHI LETTER LLA
0A35..0A36 ; Gurmukhi # Lo [2] GURMUKHI LETTER VA..GURMUKHI LETTER SHA
0A38..0A39 ; Gurmukhi # Lo [2] GURMUKHI LETTER SA..GURMUKHI LETTER HA
0A3C ; Gurmukhi # Mn GURMUKHI SIGN NUKTA
0A3E..0A40 ; Gurmukhi # Mc [3] GURMUKHI VOWEL SIGN AA..GURMUKHI VOWEL SIGN II
0A41..0A42 ; Gurmukhi # Mn [2] GURMUKHI VOWEL SIGN U..GURMUKHI VOWEL SIGN UU
0A47..0A48 ; Gurmukhi # Mn [2] GURMUKHI VOWEL SIGN EE..GURMUKHI VOWEL SIGN AI
0A4B..0A4D ; Gurmukhi # Mn [3] GURMUKHI VOWEL SIGN OO..GURMUKHI SIGN VIRAMA
0A51 ; Gurmukhi # Mn GURMUKHI SIGN UDAAT
0A59..0A5C ; Gurmukhi # Lo [4] GURMUKHI LETTER KHHA..GURMUKHI LETTER RRA
0A5E ; Gurmukhi # Lo GURMUKHI LETTER FA
0A66..0A6F ; Gurmukhi # Nd [10] GURMUKHI DIGIT ZERO..GURMUKHI DIGIT NINE
0A70..0A71 ; Gurmukhi # Mn [2] GURMUKHI TIPPI..GURMUKHI ADDAK
0A72..0A74 ; Gurmukhi # Lo [3] GURMUKHI IRI..GURMUKHI EK ONKAR
0A75 ; Gurmukhi # Mn GURMUKHI SIGN YAKASH
# Total code points: 79
# ================================================
0A81..0A82 ; Gujarati # Mn [2] GUJARATI SIGN CANDRABINDU..GUJARATI SIGN ANUSVARA
0A83 ; Gujarati # Mc GUJARATI SIGN VISARGA
0A85..0A8D ; Gujarati # Lo [9] GUJARATI LETTER A..GUJARATI VOWEL CANDRA E
0A8F..0A91 ; Gujarati # Lo [3] GUJARATI LETTER E..GUJARATI VOWEL CANDRA O
0A93..0AA8 ; Gujarati # Lo [22] GUJARATI LETTER O..GUJARATI LETTER NA
0AAA..0AB0 ; Gujarati # Lo [7] GUJARATI LETTER PA..GUJARATI LETTER RA
0AB2..0AB3 ; Gujarati # Lo [2] GUJARATI LETTER LA..GUJARATI LETTER LLA
0AB5..0AB9 ; Gujarati # Lo [5] GUJARATI LETTER VA..GUJARATI LETTER HA
0ABC ; Gujarati # Mn GUJARATI SIGN NUKTA
0ABD ; Gujarati # Lo GUJARATI SIGN AVAGRAHA
0ABE..0AC0 ; Gujarati # Mc [3] GUJARATI VOWEL SIGN AA..GUJARATI VOWEL SIGN II
0AC1..0AC5 ; Gujarati # Mn [5] GUJARATI VOWEL SIGN U..GUJARATI VOWEL SIGN CANDRA E
0AC7..0AC8 ; Gujarati # Mn [2] GUJARATI VOWEL SIGN E..GUJARATI VOWEL SIGN AI
0AC9 ; Gujarati # Mc GUJARATI VOWEL SIGN CANDRA O
0ACB..0ACC ; Gujarati # Mc [2] GUJARATI VOWEL SIGN O..GUJARATI VOWEL SIGN AU
0ACD ; Gujarati # Mn GUJARATI SIGN VIRAMA
0AD0 ; Gujarati # Lo GUJARATI OM
0AE0..0AE1 ; Gujarati # Lo [2] GUJARATI LETTER VOCALIC RR..GUJARATI LETTER VOCALIC LL
0AE2..0AE3 ; Gujarati # Mn [2] GUJARATI VOWEL SIGN VOCALIC L..GUJARATI VOWEL SIGN VOCALIC LL
0AE6..0AEF ; Gujarati # Nd [10] GUJARATI DIGIT ZERO..GUJARATI DIGIT NINE
0AF1 ; Gujarati # Sc GUJARATI RUPEE SIGN
# Total code points: 83
# ================================================
0B01 ; Oriya # Mn ORIYA SIGN CANDRABINDU
0B02..0B03 ; Oriya # Mc [2] ORIYA SIGN ANUSVARA..ORIYA SIGN VISARGA
0B05..0B0C ; Oriya # Lo [8] ORIYA LETTER A..ORIYA LETTER VOCALIC L
0B0F..0B10 ; Oriya # Lo [2] ORIYA LETTER E..ORIYA LETTER AI
0B13..0B28 ; Oriya # Lo [22] ORIYA LETTER O..ORIYA LETTER NA
0B2A..0B30 ; Oriya # Lo [7] ORIYA LETTER PA..ORIYA LETTER RA
0B32..0B33 ; Oriya # Lo [2] ORIYA LETTER LA..ORIYA LETTER LLA
0B35..0B39 ; Oriya # Lo [5] ORIYA LETTER VA..ORIYA LETTER HA
0B3C ; Oriya # Mn ORIYA SIGN NUKTA
0B3D ; Oriya # Lo ORIYA SIGN AVAGRAHA
0B3E ; Oriya # Mc ORIYA VOWEL SIGN AA
0B3F ; Oriya # Mn ORIYA VOWEL SIGN I
0B40 ; Oriya # Mc ORIYA VOWEL SIGN II
0B41..0B44 ; Oriya # Mn [4] ORIYA VOWEL SIGN U..ORIYA VOWEL SIGN VOCALIC RR
0B47..0B48 ; Oriya # Mc [2] ORIYA VOWEL SIGN E..ORIYA VOWEL SIGN AI
0B4B..0B4C ; Oriya # Mc [2] ORIYA VOWEL SIGN O..ORIYA VOWEL SIGN AU
0B4D ; Oriya # Mn ORIYA SIGN VIRAMA
0B56 ; Oriya # Mn ORIYA AI LENGTH MARK
0B57 ; Oriya # Mc ORIYA AU LENGTH MARK
0B5C..0B5D ; Oriya # Lo [2] ORIYA LETTER RRA..ORIYA LETTER RHA
0B5F..0B61 ; Oriya # Lo [3] ORIYA LETTER YYA..ORIYA LETTER VOCALIC LL
0B62..0B63 ; Oriya # Mn [2] ORIYA VOWEL SIGN VOCALIC L..ORIYA VOWEL SIGN VOCALIC LL
0B66..0B6F ; Oriya # Nd [10] ORIYA DIGIT ZERO..ORIYA DIGIT NINE
0B70 ; Oriya # So ORIYA ISSHAR
0B71 ; Oriya # Lo ORIYA LETTER WA
# Total code points: 84
# ================================================
0B82 ; Tamil # Mn TAMIL SIGN ANUSVARA
0B83 ; Tamil # Lo TAMIL SIGN VISARGA
0B85..0B8A ; Tamil # Lo [6] TAMIL LETTER A..TAMIL LETTER UU
0B8E..0B90 ; Tamil # Lo [3] TAMIL LETTER E..TAMIL LETTER AI
0B92..0B95 ; Tamil # Lo [4] TAMIL LETTER O..TAMIL LETTER KA
0B99..0B9A ; Tamil # Lo [2] TAMIL LETTER NGA..TAMIL LETTER CA
0B9C ; Tamil # Lo TAMIL LETTER JA
0B9E..0B9F ; Tamil # Lo [2] TAMIL LETTER NYA..TAMIL LETTER TTA
0BA3..0BA4 ; Tamil # Lo [2] TAMIL LETTER NNA..TAMIL LETTER TA
0BA8..0BAA ; Tamil # Lo [3] TAMIL LETTER NA..TAMIL LETTER PA
0BAE..0BB9 ; Tamil # Lo [12] TAMIL LETTER MA..TAMIL LETTER HA
0BBE..0BBF ; Tamil # Mc [2] TAMIL VOWEL SIGN AA..TAMIL VOWEL SIGN I
0BC0 ; Tamil # Mn TAMIL VOWEL SIGN II
0BC1..0BC2 ; Tamil # Mc [2] TAMIL VOWEL SIGN U..TAMIL VOWEL SIGN UU
0BC6..0BC8 ; Tamil # Mc [3] TAMIL VOWEL SIGN E..TAMIL VOWEL SIGN AI
0BCA..0BCC ; Tamil # Mc [3] TAMIL VOWEL SIGN O..TAMIL VOWEL SIGN AU
0BCD ; Tamil # Mn TAMIL SIGN VIRAMA
0BD0 ; Tamil # Lo TAMIL OM
0BD7 ; Tamil # Mc TAMIL AU LENGTH MARK
0BE6..0BEF ; Tamil # Nd [10] TAMIL DIGIT ZERO..TAMIL DIGIT NINE
0BF0..0BF2 ; Tamil # No [3] TAMIL NUMBER TEN..TAMIL NUMBER ONE THOUSAND
0BF3..0BF8 ; Tamil # So [6] TAMIL DAY SIGN..TAMIL AS ABOVE SIGN
0BF9 ; Tamil # Sc TAMIL RUPEE SIGN
0BFA ; Tamil # So TAMIL NUMBER SIGN
# Total code points: 72
# ================================================
0C01..0C03 ; Telugu # Mc [3] TELUGU SIGN CANDRABINDU..TELUGU SIGN VISARGA
0C05..0C0C ; Telugu # Lo [8] TELUGU LETTER A..TELUGU LETTER VOCALIC L
0C0E..0C10 ; Telugu # Lo [3] TELUGU LETTER E..TELUGU LETTER AI
0C12..0C28 ; Telugu # Lo [23] TELUGU LETTER O..TELUGU LETTER NA
0C2A..0C33 ; Telugu # Lo [10] TELUGU LETTER PA..TELUGU LETTER LLA
0C35..0C39 ; Telugu # Lo [5] TELUGU LETTER VA..TELUGU LETTER HA
0C3D ; Telugu # Lo TELUGU SIGN AVAGRAHA
0C3E..0C40 ; Telugu # Mn [3] TELUGU VOWEL SIGN AA..TELUGU VOWEL SIGN II
0C41..0C44 ; Telugu # Mc [4] TELUGU VOWEL SIGN U..TELUGU VOWEL SIGN VOCALIC RR
0C46..0C48 ; Telugu # Mn [3] TELUGU VOWEL SIGN E..TELUGU VOWEL SIGN AI
0C4A..0C4D ; Telugu # Mn [4] TELUGU VOWEL SIGN O..TELUGU SIGN VIRAMA
0C55..0C56 ; Telugu # Mn [2] TELUGU LENGTH MARK..TELUGU AI LENGTH MARK
0C58..0C59 ; Telugu # Lo [2] TELUGU LETTER TSA..TELUGU LETTER DZA
0C60..0C61 ; Telugu # Lo [2] TELUGU LETTER VOCALIC RR..TELUGU LETTER VOCALIC LL
0C62..0C63 ; Telugu # Mn [2] TELUGU VOWEL SIGN VOCALIC L..TELUGU VOWEL SIGN VOCALIC LL
0C66..0C6F ; Telugu # Nd [10] TELUGU DIGIT ZERO..TELUGU DIGIT NINE
0C78..0C7E ; Telugu # No [7] TELUGU FRACTION DIGIT ZERO FOR ODD POWERS OF FOUR..TELUGU FRACTION DIGIT THREE FOR EVEN POWERS OF FOUR
0C7F ; Telugu # So TELUGU SIGN TUUMU
# Total code points: 93
# ================================================
0C82..0C83 ; Kannada # Mc [2] KANNADA SIGN ANUSVARA..KANNADA SIGN VISARGA
0C85..0C8C ; Kannada # Lo [8] KANNADA LETTER A..KANNADA LETTER VOCALIC L
0C8E..0C90 ; Kannada # Lo [3] KANNADA LETTER E..KANNADA LETTER AI
0C92..0CA8 ; Kannada # Lo [23] KANNADA LETTER O..KANNADA LETTER NA
0CAA..0CB3 ; Kannada # Lo [10] KANNADA LETTER PA..KANNADA LETTER LLA
0CB5..0CB9 ; Kannada # Lo [5] KANNADA LETTER VA..KANNADA LETTER HA
0CBC ; Kannada # Mn KANNADA SIGN NUKTA
0CBD ; Kannada # Lo KANNADA SIGN AVAGRAHA
0CBE ; Kannada # Mc KANNADA VOWEL SIGN AA
0CBF ; Kannada # Mn KANNADA VOWEL SIGN I
0CC0..0CC4 ; Kannada # Mc [5] KANNADA VOWEL SIGN II..KANNADA VOWEL SIGN VOCALIC RR
0CC6 ; Kannada # Mn KANNADA VOWEL SIGN E
0CC7..0CC8 ; Kannada # Mc [2] KANNADA VOWEL SIGN EE..KANNADA VOWEL SIGN AI
0CCA..0CCB ; Kannada # Mc [2] KANNADA VOWEL SIGN O..KANNADA VOWEL SIGN OO
0CCC..0CCD ; Kannada # Mn [2] KANNADA VOWEL SIGN AU..KANNADA SIGN VIRAMA
0CD5..0CD6 ; Kannada # Mc [2] KANNADA LENGTH MARK..KANNADA AI LENGTH MARK
0CDE ; Kannada # Lo KANNADA LETTER FA
0CE0..0CE1 ; Kannada # Lo [2] KANNADA LETTER VOCALIC RR..KANNADA LETTER VOCALIC LL
0CE2..0CE3 ; Kannada # Mn [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL
0CE6..0CEF ; Kannada # Nd [10] KANNADA DIGIT ZERO..KANNADA DIGIT NINE
# Total code points: 84
# ================================================
0D02..0D03 ; Malayalam # Mc [2] MALAYALAM SIGN ANUSVARA..MALAYALAM SIGN VISARGA
0D05..0D0C ; Malayalam # Lo [8] MALAYALAM LETTER A..MALAYALAM LETTER VOCALIC L
0D0E..0D10 ; Malayalam # Lo [3] MALAYALAM LETTER E..MALAYALAM LETTER AI
0D12..0D28 ; Malayalam # Lo [23] MALAYALAM LETTER O..MALAYALAM LETTER NA
0D2A..0D39 ; Malayalam # Lo [16] MALAYALAM LETTER PA..MALAYALAM LETTER HA
0D3D ; Malayalam # Lo MALAYALAM SIGN AVAGRAHA
0D3E..0D40 ; Malayalam # Mc [3] MALAYALAM VOWEL SIGN AA..MALAYALAM VOWEL SIGN II
0D41..0D44 ; Malayalam # Mn [4] MALAYALAM VOWEL SIGN U..MALAYALAM VOWEL SIGN VOCALIC RR
0D46..0D48 ; Malayalam # Mc [3] MALAYALAM VOWEL SIGN E..MALAYALAM VOWEL SIGN AI
0D4A..0D4C ; Malayalam # Mc [3] MALAYALAM VOWEL SIGN O..MALAYALAM VOWEL SIGN AU
0D4D ; Malayalam # Mn MALAYALAM SIGN VIRAMA
0D57 ; Malayalam # Mc MALAYALAM AU LENGTH MARK
0D60..0D61 ; Malayalam # Lo [2] MALAYALAM LETTER VOCALIC RR..MALAYALAM LETTER VOCALIC LL
0D62..0D63 ; Malayalam # Mn [2] MALAYALAM VOWEL SIGN VOCALIC L..MALAYALAM VOWEL SIGN VOCALIC LL
0D66..0D6F ; Malayalam # Nd [10] MALAYALAM DIGIT ZERO..MALAYALAM DIGIT NINE
0D70..0D75 ; Malayalam # No [6] MALAYALAM NUMBER TEN..MALAYALAM FRACTION THREE QUARTERS
0D79 ; Malayalam # So MALAYALAM DATE MARK
0D7A..0D7F ; Malayalam # Lo [6] MALAYALAM LETTER CHILLU NN..MALAYALAM LETTER CHILLU K
# Total code points: 95
# ================================================
0D82..0D83 ; Sinhala # Mc [2] SINHALA SIGN ANUSVARAYA..SINHALA SIGN VISARGAYA
0D85..0D96 ; Sinhala # Lo [18] SINHALA LETTER AYANNA..SINHALA LETTER AUYANNA
0D9A..0DB1 ; Sinhala # Lo [24] SINHALA LETTER ALPAPRAANA KAYANNA..SINHALA LETTER DANTAJA NAYANNA
0DB3..0DBB ; Sinhala # Lo [9] SINHALA LETTER SANYAKA DAYANNA..SINHALA LETTER RAYANNA
0DBD ; Sinhala # Lo SINHALA LETTER DANTAJA LAYANNA
0DC0..0DC6 ; Sinhala # Lo [7] SINHALA LETTER VAYANNA..SINHALA LETTER FAYANNA
0DCA ; Sinhala # Mn SINHALA SIGN AL-LAKUNA
0DCF..0DD1 ; Sinhala # Mc [3] SINHALA VOWEL SIGN AELA-PILLA..SINHALA VOWEL SIGN DIGA AEDA-PILLA
0DD2..0DD4 ; Sinhala # Mn [3] SINHALA VOWEL SIGN KETTI IS-PILLA..SINHALA VOWEL SIGN KETTI PAA-PILLA
0DD6 ; Sinhala # Mn SINHALA VOWEL SIGN DIGA PAA-PILLA
0DD8..0DDF ; Sinhala # Mc [8] SINHALA VOWEL SIGN GAETTA-PILLA..SINHALA VOWEL SIGN GAYANUKITTA
0DF2..0DF3 ; Sinhala # Mc [2] SINHALA VOWEL SIGN DIGA GAETTA-PILLA..SINHALA VOWEL SIGN DIGA GAYANUKITTA
0DF4 ; Sinhala # Po SINHALA PUNCTUATION KUNDDALIYA
# Total code points: 80
# ================================================
0E01..0E30 ; Thai # Lo [48] THAI CHARACTER KO KAI..THAI CHARACTER SARA A
0E31 ; Thai # Mn THAI CHARACTER MAI HAN-AKAT
0E32..0E33 ; Thai # Lo [2] THAI CHARACTER SARA AA..THAI CHARACTER SARA AM
0E34..0E3A ; Thai # Mn [7] THAI CHARACTER SARA I..THAI CHARACTER PHINTHU
0E40..0E45 ; Thai # Lo [6] THAI CHARACTER SARA E..THAI CHARACTER LAKKHANGYAO
0E46 ; Thai # Lm THAI CHARACTER MAIYAMOK
0E47..0E4E ; Thai # Mn [8] THAI CHARACTER MAITAIKHU..THAI CHARACTER YAMAKKAN
0E4F ; Thai # Po THAI CHARACTER FONGMAN
0E50..0E59 ; Thai # Nd [10] THAI DIGIT ZERO..THAI DIGIT NINE
0E5A..0E5B ; Thai # Po [2] THAI CHARACTER ANGKHANKHU..THAI CHARACTER KHOMUT
# Total code points: 86
# ================================================
0E81..0E82 ; Lao # Lo [2] LAO LETTER KO..LAO LETTER KHO SUNG
0E84 ; Lao # Lo LAO LETTER KHO TAM
0E87..0E88 ; Lao # Lo [2] LAO LETTER NGO..LAO LETTER CO
0E8A ; Lao # Lo LAO LETTER SO TAM
0E8D ; Lao # Lo LAO LETTER NYO
0E94..0E97 ; Lao # Lo [4] LAO LETTER DO..LAO LETTER THO TAM
0E99..0E9F ; Lao # Lo [7] LAO LETTER NO..LAO LETTER FO SUNG
0EA1..0EA3 ; Lao # Lo [3] LAO LETTER MO..LAO LETTER LO LING
0EA5 ; Lao # Lo LAO LETTER LO LOOT
0EA7 ; Lao # Lo LAO LETTER WO
0EAA..0EAB ; Lao # Lo [2] LAO LETTER SO SUNG..LAO LETTER HO SUNG
0EAD..0EB0 ; Lao # Lo [4] LAO LETTER O..LAO VOWEL SIGN A
0EB1 ; Lao # Mn LAO VOWEL SIGN MAI KAN
0EB2..0EB3 ; Lao # Lo [2] LAO VOWEL SIGN AA..LAO VOWEL SIGN AM
0EB4..0EB9 ; Lao # Mn [6] LAO VOWEL SIGN I..LAO VOWEL SIGN UU
0EBB..0EBC ; Lao # Mn [2] LAO VOWEL SIGN MAI KON..LAO SEMIVOWEL SIGN LO
0EBD ; Lao # Lo LAO SEMIVOWEL SIGN NYO
0EC0..0EC4 ; Lao # Lo [5] LAO VOWEL SIGN E..LAO VOWEL SIGN AI
0EC6 ; Lao # Lm LAO KO LA
0EC8..0ECD ; Lao # Mn [6] LAO TONE MAI EK..LAO NIGGAHITA
0ED0..0ED9 ; Lao # Nd [10] LAO DIGIT ZERO..LAO DIGIT NINE
0EDC..0EDD ; Lao # Lo [2] LAO HO NO..LAO HO MO
# Total code points: 65
# ================================================
0F00 ; Tibetan # Lo TIBETAN SYLLABLE OM
0F01..0F03 ; Tibetan # So [3] TIBETAN MARK GTER YIG MGO TRUNCATED A..TIBETAN MARK GTER YIG MGO -UM GTER TSHEG MA
0F04..0F12 ; Tibetan # Po [15] TIBETAN MARK INITIAL YIG MGO MDUN MA..TIBETAN MARK RGYA GRAM SHAD
0F13..0F17 ; Tibetan # So [5] TIBETAN MARK CARET -DZUD RTAGS ME LONG CAN..TIBETAN ASTROLOGICAL SIGN SGRA GCAN -CHAR RTAGS
0F18..0F19 ; Tibetan # Mn [2] TIBETAN ASTROLOGICAL SIGN -KHYUD PA..TIBETAN ASTROLOGICAL SIGN SDONG TSHUGS
0F1A..0F1F ; Tibetan # So [6] TIBETAN SIGN RDEL DKAR GCIG..TIBETAN SIGN RDEL DKAR RDEL NAG
0F20..0F29 ; Tibetan # Nd [10] TIBETAN DIGIT ZERO..TIBETAN DIGIT NINE
0F2A..0F33 ; Tibetan # No [10] TIBETAN DIGIT HALF ONE..TIBETAN DIGIT HALF ZERO
0F34 ; Tibetan # So TIBETAN MARK BSDUS RTAGS
0F35 ; Tibetan # Mn TIBETAN MARK NGAS BZUNG NYI ZLA
0F36 ; Tibetan # So TIBETAN MARK CARET -DZUD RTAGS BZHI MIG CAN
0F37 ; Tibetan # Mn TIBETAN MARK NGAS BZUNG SGOR RTAGS
0F38 ; Tibetan # So TIBETAN MARK CHE MGO
0F39 ; Tibetan # Mn TIBETAN MARK TSA -PHRU
0F3A ; Tibetan # Ps TIBETAN MARK GUG RTAGS GYON
0F3B ; Tibetan # Pe TIBETAN MARK GUG RTAGS GYAS
0F3C ; Tibetan # Ps TIBETAN MARK ANG KHANG GYON
0F3D ; Tibetan # Pe TIBETAN MARK ANG KHANG GYAS
0F3E..0F3F ; Tibetan # Mc [2] TIBETAN SIGN YAR TSHES..TIBETAN SIGN MAR TSHES
0F40..0F47 ; Tibetan # Lo [8] TIBETAN LETTER KA..TIBETAN LETTER JA
0F49..0F6C ; Tibetan # Lo [36] TIBETAN LETTER NYA..TIBETAN LETTER RRA
0F71..0F7E ; Tibetan # Mn [14] TIBETAN VOWEL SIGN AA..TIBETAN SIGN RJES SU NGA RO
0F7F ; Tibetan # Mc TIBETAN SIGN RNAM BCAD
0F80..0F84 ; Tibetan # Mn [5] TIBETAN VOWEL SIGN REVERSED I..TIBETAN MARK HALANTA
0F85 ; Tibetan # Po TIBETAN MARK PALUTA
0F86..0F87 ; Tibetan # Mn [2] TIBETAN SIGN LCI RTAGS..TIBETAN SIGN YANG RTAGS
0F88..0F8B ; Tibetan # Lo [4] TIBETAN SIGN LCE TSA CAN..TIBETAN SIGN GRU MED RGYINGS
0F90..0F97 ; Tibetan # Mn [8] TIBETAN SUBJOINED LETTER KA..TIBETAN SUBJOINED LETTER JA
0F99..0FBC ; Tibetan # Mn [36] TIBETAN SUBJOINED LETTER NYA..TIBETAN SUBJOINED LETTER FIXED-FORM RA
0FBE..0FC5 ; Tibetan # So [8] TIBETAN KU RU KHA..TIBETAN SYMBOL RDO RJE
0FC6 ; Tibetan # Mn TIBETAN SYMBOL PADMA GDAN
0FC7..0FCC ; Tibetan # So [6] TIBETAN SYMBOL RDO RJE RGYA GRAM..TIBETAN SYMBOL NOR BU BZHI -KHYIL
0FCE..0FCF ; Tibetan # So [2] TIBETAN SIGN RDEL NAG RDEL DKAR..TIBETAN SIGN RDEL NAG GSUM
0FD0..0FD4 ; Tibetan # Po [5] TIBETAN MARK BSKA- SHOG GI MGO RGYAN..TIBETAN MARK CLOSING BRDA RNYING YIG MGO SGAB MA
# Total code points: 201
# ================================================
1000..102A ; Myanmar # Lo [43] MYANMAR LETTER KA..MYANMAR LETTER AU
102B..102C ; Myanmar # Mc [2] MYANMAR VOWEL SIGN TALL AA..MYANMAR VOWEL SIGN AA
102D..1030 ; Myanmar # Mn [4] MYANMAR VOWEL SIGN I..MYANMAR VOWEL SIGN UU
1031 ; Myanmar # Mc MYANMAR VOWEL SIGN E
1032..1037 ; Myanmar # Mn [6] MYANMAR VOWEL SIGN AI..MYANMAR SIGN DOT BELOW
1038 ; Myanmar # Mc MYANMAR SIGN VISARGA
1039..103A ; Myanmar # Mn [2] MYANMAR SIGN VIRAMA..MYANMAR SIGN ASAT
103B..103C ; Myanmar # Mc [2] MYANMAR CONSONANT SIGN MEDIAL YA..MYANMAR CONSONANT SIGN MEDIAL RA
103D..103E ; Myanmar # Mn [2] MYANMAR CONSONANT SIGN MEDIAL WA..MYANMAR CONSONANT SIGN MEDIAL HA
103F ; Myanmar # Lo MYANMAR LETTER GREAT SA
1040..1049 ; Myanmar # Nd [10] MYANMAR DIGIT ZERO..MYANMAR DIGIT NINE
104A..104F ; Myanmar # Po [6] MYANMAR SIGN LITTLE SECTION..MYANMAR SYMBOL GENITIVE
1050..1055 ; Myanmar # Lo [6] MYANMAR LETTER SHA..MYANMAR LETTER VOCALIC LL
1056..1057 ; Myanmar # Mc [2] MYANMAR VOWEL SIGN VOCALIC R..MYANMAR VOWEL SIGN VOCALIC RR
1058..1059 ; Myanmar # Mn [2] MYANMAR VOWEL SIGN VOCALIC L..MYANMAR VOWEL SIGN VOCALIC LL
105A..105D ; Myanmar # Lo [4] MYANMAR LETTER MON NGA..MYANMAR LETTER MON BBE
105E..1060 ; Myanmar # Mn [3] MYANMAR CONSONANT SIGN MON MEDIAL NA..MYANMAR CONSONANT SIGN MON MEDIAL LA
1061 ; Myanmar # Lo MYANMAR LETTER SGAW KAREN SHA
1062..1064 ; Myanmar # Mc [3] MYANMAR VOWEL SIGN SGAW KAREN EU..MYANMAR TONE MARK SGAW KAREN KE PHO
1065..1066 ; Myanmar # Lo [2] MYANMAR LETTER WESTERN PWO KAREN THA..MYANMAR LETTER WESTERN PWO KAREN PWA
1067..106D ; Myanmar # Mc [7] MYANMAR VOWEL SIGN WESTERN PWO KAREN EU..MYANMAR SIGN WESTERN PWO KAREN TONE-5
106E..1070 ; Myanmar # Lo [3] MYANMAR LETTER EASTERN PWO KAREN NNA..MYANMAR LETTER EASTERN PWO KAREN GHWA
1071..1074 ; Myanmar # Mn [4] MYANMAR VOWEL SIGN GEBA KAREN I..MYANMAR VOWEL SIGN KAYAH EE
1075..1081 ; Myanmar # Lo [13] MYANMAR LETTER SHAN KA..MYANMAR LETTER SHAN HA
1082 ; Myanmar # Mn MYANMAR CONSONANT SIGN SHAN MEDIAL WA
1083..1084 ; Myanmar # Mc [2] MYANMAR VOWEL SIGN SHAN AA..MYANMAR VOWEL SIGN SHAN E
1085..1086 ; Myanmar # Mn [2] MYANMAR VOWEL SIGN SHAN E ABOVE..MYANMAR VOWEL SIGN SHAN FINAL Y
1087..108C ; Myanmar # Mc [6] MYANMAR SIGN SHAN TONE-2..MYANMAR SIGN SHAN COUNCIL TONE-3
108D ; Myanmar # Mn MYANMAR SIGN SHAN COUNCIL EMPHATIC TONE
108E ; Myanmar # Lo MYANMAR LETTER RUMAI PALAUNG FA
108F ; Myanmar # Mc MYANMAR SIGN RUMAI PALAUNG TONE-5
1090..1099 ; Myanmar # Nd [10] MYANMAR SHAN DIGIT ZERO..MYANMAR SHAN DIGIT NINE
109A..109C ; Myanmar # Mc [3] MYANMAR SIGN KHAMTI TONE-1..MYANMAR VOWEL SIGN AITON A
109D ; Myanmar # Mn MYANMAR VOWEL SIGN AITON AI
109E..109F ; Myanmar # So [2] MYANMAR SYMBOL SHAN ONE..MYANMAR SYMBOL SHAN EXCLAMATION
AA60..AA6F ; Myanmar # Lo [16] MYANMAR LETTER KHAMTI GA..MYANMAR LETTER KHAMTI FA
AA70 ; Myanmar # Lm MYANMAR MODIFIER LETTER KHAMTI REDUPLICATION
AA71..AA76 ; Myanmar # Lo [6] MYANMAR LETTER KHAMTI XA..MYANMAR LOGOGRAM KHAMTI HM
AA77..AA79 ; Myanmar # So [3] MYANMAR SYMBOL AITON EXCLAMATION..MYANMAR SYMBOL AITON TWO
AA7A ; Myanmar # Lo MYANMAR LETTER AITON RA
AA7B ; Myanmar # Mc MYANMAR SIGN PAO KAREN TONE
# Total code points: 188
# ================================================
10A0..10C5 ; Georgian # L& [38] GEORGIAN CAPITAL LETTER AN..GEORGIAN CAPITAL LETTER HOE
10D0..10FA ; Georgian # Lo [43] GEORGIAN LETTER AN..GEORGIAN LETTER AIN
10FC ; Georgian # Lm MODIFIER LETTER GEORGIAN NAR
2D00..2D25 ; Georgian # L& [38] GEORGIAN SMALL LETTER AN..GEORGIAN SMALL LETTER HOE
# Total code points: 120
# ================================================
1100..11FF ; Hangul # Lo [256] HANGUL CHOSEONG KIYEOK..HANGUL JONGSEONG SSANGNIEUN
3131..318E ; Hangul # Lo [94] HANGUL LETTER KIYEOK..HANGUL LETTER ARAEAE
3200..321E ; Hangul # So [31] PARENTHESIZED HANGUL KIYEOK..PARENTHESIZED KOREAN CHARACTER O HU
3260..327E ; Hangul # So [31] CIRCLED HANGUL KIYEOK..CIRCLED HANGUL IEUNG U
A960..A97C ; Hangul # Lo [29] HANGUL CHOSEONG TIKEUT-MIEUM..HANGUL CHOSEONG SSANGYEORINHIEUH
AC00..D7A3 ; Hangul # Lo [11172] HANGUL SYLLABLE GA..HANGUL SYLLABLE HIH
D7B0..D7C6 ; Hangul # Lo [23] HANGUL JUNGSEONG O-YEO..HANGUL JUNGSEONG ARAEA-E
D7CB..D7FB ; Hangul # Lo [49] HANGUL JONGSEONG NIEUN-RIEUL..HANGUL JONGSEONG PHIEUPH-THIEUTH
FFA0..FFBE ; Hangul # Lo [31] HALFWIDTH HANGUL FILLER..HALFWIDTH HANGUL LETTER HIEUH
FFC2..FFC7 ; Hangul # Lo [6] HALFWIDTH HANGUL LETTER A..HALFWIDTH HANGUL LETTER E
FFCA..FFCF ; Hangul # Lo [6] HALFWIDTH HANGUL LETTER YEO..HALFWIDTH HANGUL LETTER OE
FFD2..FFD7 ; Hangul # Lo [6] HALFWIDTH HANGUL LETTER YO..HALFWIDTH HANGUL LETTER YU
FFDA..FFDC ; Hangul # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I
# Total code points: 11737
# ================================================
1200..1248 ; Ethiopic # Lo [73] ETHIOPIC SYLLABLE HA..ETHIOPIC SYLLABLE QWA
124A..124D ; Ethiopic # Lo [4] ETHIOPIC SYLLABLE QWI..ETHIOPIC SYLLABLE QWE
1250..1256 ; Ethiopic # Lo [7] ETHIOPIC SYLLABLE QHA..ETHIOPIC SYLLABLE QHO
1258 ; Ethiopic # Lo ETHIOPIC SYLLABLE QHWA
125A..125D ; Ethiopic # Lo [4] ETHIOPIC SYLLABLE QHWI..ETHIOPIC SYLLABLE QHWE
1260..1288 ; Ethiopic # Lo [41] ETHIOPIC SYLLABLE BA..ETHIOPIC SYLLABLE XWA
128A..128D ; Ethiopic # Lo [4] ETHIOPIC SYLLABLE XWI..ETHIOPIC SYLLABLE XWE
1290..12B0 ; Ethiopic # Lo [33] ETHIOPIC SYLLABLE NA..ETHIOPIC SYLLABLE KWA
12B2..12B5 ; Ethiopic # Lo [4] ETHIOPIC SYLLABLE KWI..ETHIOPIC SYLLABLE KWE
12B8..12BE ; Ethiopic # Lo [7] ETHIOPIC SYLLABLE KXA..ETHIOPIC SYLLABLE KXO
12C0 ; Ethiopic # Lo ETHIOPIC SYLLABLE KXWA
12C2..12C5 ; Ethiopic # Lo [4] ETHIOPIC SYLLABLE KXWI..ETHIOPIC SYLLABLE KXWE
12C8..12D6 ; Ethiopic # Lo [15] ETHIOPIC SYLLABLE WA..ETHIOPIC SYLLABLE PHARYNGEAL O
12D8..1310 ; Ethiopic # Lo [57] ETHIOPIC SYLLABLE ZA..ETHIOPIC SYLLABLE GWA
1312..1315 ; Ethiopic # Lo [4] ETHIOPIC SYLLABLE GWI..ETHIOPIC SYLLABLE GWE
1318..135A ; Ethiopic # Lo [67] ETHIOPIC SYLLABLE GGA..ETHIOPIC SYLLABLE FYA
135F ; Ethiopic # Mn ETHIOPIC COMBINING GEMINATION MARK
1360 ; Ethiopic # So ETHIOPIC SECTION MARK
1361..1368 ; Ethiopic # Po [8] ETHIOPIC WORDSPACE..ETHIOPIC PARAGRAPH SEPARATOR
1369..137C ; Ethiopic # No [20] ETHIOPIC DIGIT ONE..ETHIOPIC NUMBER TEN THOUSAND
1380..138F ; Ethiopic # Lo [16] ETHIOPIC SYLLABLE SEBATBEIT MWA..ETHIOPIC SYLLABLE PWE
1390..1399 ; Ethiopic # So [10] ETHIOPIC TONAL MARK YIZET..ETHIOPIC TONAL MARK KURT
2D80..2D96 ; Ethiopic # Lo [23] ETHIOPIC SYLLABLE LOA..ETHIOPIC SYLLABLE GGWE
2DA0..2DA6 ; Ethiopic # Lo [7] ETHIOPIC SYLLABLE SSA..ETHIOPIC SYLLABLE SSO
2DA8..2DAE ; Ethiopic # Lo [7] ETHIOPIC SYLLABLE CCA..ETHIOPIC SYLLABLE CCO
2DB0..2DB6 ; Ethiopic # Lo [7] ETHIOPIC SYLLABLE ZZA..ETHIOPIC SYLLABLE ZZO
2DB8..2DBE ; Ethiopic # Lo [7] ETHIOPIC SYLLABLE CCHA..ETHIOPIC SYLLABLE CCHO
2DC0..2DC6 ; Ethiopic # Lo [7] ETHIOPIC SYLLABLE QYA..ETHIOPIC SYLLABLE QYO
2DC8..2DCE ; Ethiopic # Lo [7] ETHIOPIC SYLLABLE KYA..ETHIOPIC SYLLABLE KYO
2DD0..2DD6 ; Ethiopic # Lo [7] ETHIOPIC SYLLABLE XYA..ETHIOPIC SYLLABLE XYO
2DD8..2DDE ; Ethiopic # Lo [7] ETHIOPIC SYLLABLE GYA..ETHIOPIC SYLLABLE GYO
# Total code points: 461
# ================================================
13A0..13F4 ; Cherokee # Lo [85] CHEROKEE LETTER A..CHEROKEE LETTER YV
# Total code points: 85
# ================================================
1400 ; Canadian_Aboriginal # Pd CANADIAN SYLLABICS HYPHEN
1401..166C ; Canadian_Aboriginal # Lo [620] CANADIAN SYLLABICS E..CANADIAN SYLLABICS CARRIER TTSA
166D..166E ; Canadian_Aboriginal # Po [2] CANADIAN SYLLABICS CHI SIGN..CANADIAN SYLLABICS FULL STOP
166F..167F ; Canadian_Aboriginal # Lo [17] CANADIAN SYLLABICS QAI..CANADIAN SYLLABICS BLACKFOOT W
18B0..18F5 ; Canadian_Aboriginal # Lo [70] CANADIAN SYLLABICS OY..CANADIAN SYLLABICS CARRIER DENTAL S
# Total code points: 710
# ================================================
1680 ; Ogham # Zs OGHAM SPACE MARK
1681..169A ; Ogham # Lo [26] OGHAM LETTER BEITH..OGHAM LETTER PEITH
169B ; Ogham # Ps OGHAM FEATHER MARK
169C ; Ogham # Pe OGHAM REVERSED FEATHER MARK
# Total code points: 29
# ================================================
16A0..16EA ; Runic # Lo [75] RUNIC LETTER FEHU FEOH FE F..RUNIC LETTER X
16EE..16F0 ; Runic # Nl [3] RUNIC ARLAUG SYMBOL..RUNIC BELGTHOR SYMBOL
# Total code points: 78
# ================================================
1780..17B3 ; Khmer # Lo [52] KHMER LETTER KA..KHMER INDEPENDENT VOWEL QAU
17B4..17B5 ; Khmer # Cf [2] KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA
17B6 ; Khmer # Mc KHMER VOWEL SIGN AA
17B7..17BD ; Khmer # Mn [7] KHMER VOWEL SIGN I..KHMER VOWEL SIGN UA
17BE..17C5 ; Khmer # Mc [8] KHMER VOWEL SIGN OE..KHMER VOWEL SIGN AU
17C6 ; Khmer # Mn KHMER SIGN NIKAHIT
17C7..17C8 ; Khmer # Mc [2] KHMER SIGN REAHMUK..KHMER SIGN YUUKALEAPINTU
17C9..17D3 ; Khmer # Mn [11] KHMER SIGN MUUSIKATOAN..KHMER SIGN BATHAMASAT
17D4..17D6 ; Khmer # Po [3] KHMER SIGN KHAN..KHMER SIGN CAMNUC PII KUUH
17D7 ; Khmer # Lm KHMER SIGN LEK TOO
17D8..17DA ; Khmer # Po [3] KHMER SIGN BEYYAL..KHMER SIGN KOOMUUT
17DB ; Khmer # Sc KHMER CURRENCY SYMBOL RIEL
17DC ; Khmer # Lo KHMER SIGN AVAKRAHASANYA
17DD ; Khmer # Mn KHMER SIGN ATTHACAN
17E0..17E9 ; Khmer # Nd [10] KHMER DIGIT ZERO..KHMER DIGIT NINE
17F0..17F9 ; Khmer # No [10] KHMER SYMBOL LEK ATTAK SON..KHMER SYMBOL LEK ATTAK PRAM-BUON
19E0..19FF ; Khmer # So [32] KHMER SYMBOL PATHAMASAT..KHMER SYMBOL DAP-PRAM ROC
# Total code points: 146
# ================================================
1800..1801 ; Mongolian # Po [2] MONGOLIAN BIRGA..MONGOLIAN ELLIPSIS
1804 ; Mongolian # Po MONGOLIAN COLON
1806 ; Mongolian # Pd MONGOLIAN TODO SOFT HYPHEN
1807..180A ; Mongolian # Po [4] MONGOLIAN SIBE SYLLABLE BOUNDARY MARKER..MONGOLIAN NIRUGU
180B..180D ; Mongolian # Mn [3] MONGOLIAN FREE VARIATION SELECTOR ONE..MONGOLIAN FREE VARIATION SELECTOR THREE
180E ; Mongolian # Zs MONGOLIAN VOWEL SEPARATOR
1810..1819 ; Mongolian # Nd [10] MONGOLIAN DIGIT ZERO..MONGOLIAN DIGIT NINE
1820..1842 ; Mongolian # Lo [35] MONGOLIAN LETTER A..MONGOLIAN LETTER CHI
1843 ; Mongolian # Lm MONGOLIAN LETTER TODO LONG VOWEL SIGN
1844..1877 ; Mongolian # Lo [52] MONGOLIAN LETTER TODO E..MONGOLIAN LETTER MANCHU ZHA
1880..18A8 ; Mongolian # Lo [41] MONGOLIAN LETTER ALI GALI ANUSVARA ONE..MONGOLIAN LETTER MANCHU ALI GALI BHA
18A9 ; Mongolian # Mn MONGOLIAN LETTER ALI GALI DAGALGA
18AA ; Mongolian # Lo MONGOLIAN LETTER MANCHU ALI GALI LHA
# Total code points: 153
# ================================================
3041..3096 ; Hiragana # Lo [86] HIRAGANA LETTER SMALL A..HIRAGANA LETTER SMALL KE
309D..309E ; Hiragana # Lm [2] HIRAGANA ITERATION MARK..HIRAGANA VOICED ITERATION MARK
309F ; Hiragana # Lo HIRAGANA DIGRAPH YORI
1F200 ; Hiragana # So SQUARE HIRAGANA HOKA
# Total code points: 90
# ================================================
30A1..30FA ; Katakana # Lo [90] KATAKANA LETTER SMALL A..KATAKANA LETTER VO
30FD..30FE ; Katakana # Lm [2] KATAKANA ITERATION MARK..KATAKANA VOICED ITERATION MARK
30FF ; Katakana # Lo KATAKANA DIGRAPH KOTO
31F0..31FF ; Katakana # Lo [16] KATAKANA LETTER SMALL KU..KATAKANA LETTER SMALL RO
32D0..32FE ; Katakana # So [47] CIRCLED KATAKANA A..CIRCLED KATAKANA WO
3300..3357 ; Katakana # So [88] SQUARE APAATO..SQUARE WATTO
FF66..FF6F ; Katakana # Lo [10] HALFWIDTH KATAKANA LETTER WO..HALFWIDTH KATAKANA LETTER SMALL TU
FF71..FF9D ; Katakana # Lo [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAKANA LETTER N
# Total code points: 299
# ================================================
3105..312D ; Bopomofo # Lo [41] BOPOMOFO LETTER B..BOPOMOFO LETTER IH
31A0..31B7 ; Bopomofo # Lo [24] BOPOMOFO LETTER BU..BOPOMOFO FINAL LETTER H
# Total code points: 65
# ================================================
2E80..2E99 ; Han # So [26] CJK RADICAL REPEAT..CJK RADICAL RAP
2E9B..2EF3 ; Han # So [89] CJK RADICAL CHOKE..CJK RADICAL C-SIMPLIFIED TURTLE
2F00..2FD5 ; Han # So [214] KANGXI RADICAL ONE..KANGXI RADICAL FLUTE
3005 ; Han # Lm IDEOGRAPHIC ITERATION MARK
3007 ; Han # Nl IDEOGRAPHIC NUMBER ZERO
3021..3029 ; Han # Nl [9] HANGZHOU NUMERAL ONE..HANGZHOU NUMERAL NINE
3038..303A ; Han # Nl [3] HANGZHOU NUMERAL TEN..HANGZHOU NUMERAL THIRTY
303B ; Han # Lm VERTICAL IDEOGRAPHIC ITERATION MARK
3400..4DB5 ; Han # Lo [6582] CJK UNIFIED IDEOGRAPH-3400..CJK UNIFIED IDEOGRAPH-4DB5
4E00..9FCB ; Han # Lo [20940] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FCB
F900..FA2D ; Han # Lo [302] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA2D
FA30..FA6D ; Han # Lo [62] CJK COMPATIBILITY IDEOGRAPH-FA30..CJK COMPATIBILITY IDEOGRAPH-FA6D
FA70..FAD9 ; Han # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILITY IDEOGRAPH-FAD9
20000..2A6D6 ; Han # Lo [42711] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6D6
2A700..2B734 ; Han # Lo [4149] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B734
2F800..2FA1D ; Han # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D
# Total code points: 75738
# ================================================
A000..A014 ; Yi # Lo [21] YI SYLLABLE IT..YI SYLLABLE E
A015 ; Yi # Lm YI SYLLABLE WU
A016..A48C ; Yi # Lo [1143] YI SYLLABLE BIT..YI SYLLABLE YYR
A490..A4C6 ; Yi # So [55] YI RADICAL QOT..YI RADICAL KE
# Total code points: 1220
# ================================================
10300..1031E ; Old_Italic # Lo [31] OLD ITALIC LETTER A..OLD ITALIC LETTER UU
10320..10323 ; Old_Italic # No [4] OLD ITALIC NUMERAL ONE..OLD ITALIC NUMERAL FIFTY
# Total code points: 35
# ================================================
10330..10340 ; Gothic # Lo [17] GOTHIC LETTER AHSA..GOTHIC LETTER PAIRTHRA
10341 ; Gothic # Nl GOTHIC LETTER NINETY
10342..10349 ; Gothic # Lo [8] GOTHIC LETTER RAIDA..GOTHIC LETTER OTHAL
1034A ; Gothic # Nl GOTHIC LETTER NINE HUNDRED
# Total code points: 27
# ================================================
10400..1044F ; Deseret # L& [80] DESERET CAPITAL LETTER LONG I..DESERET SMALL LETTER EW
# Total code points: 80
# ================================================
0300..036F ; Inherited # Mn [112] COMBINING GRAVE ACCENT..COMBINING LATIN SMALL LETTER X
0485..0486 ; Inherited # Mn [2] COMBINING CYRILLIC DASIA PNEUMATA..COMBINING CYRILLIC PSILI PNEUMATA
064B..0655 ; Inherited # Mn [11] ARABIC FATHATAN..ARABIC HAMZA BELOW
0670 ; Inherited # Mn ARABIC LETTER SUPERSCRIPT ALEF
0951..0952 ; Inherited # Mn [2] DEVANAGARI STRESS SIGN UDATTA..DEVANAGARI STRESS SIGN ANUDATTA
1CD0..1CD2 ; Inherited # Mn [3] VEDIC TONE KARSHANA..VEDIC TONE PRENKHA
1CD4..1CE0 ; Inherited # Mn [13] VEDIC SIGN YAJURVEDIC MIDLINE SVARITA..VEDIC TONE RIGVEDIC KASHMIRI INDEPENDENT SVARITA
1CE2..1CE8 ; Inherited # Mn [7] VEDIC SIGN VISARGA SVARITA..VEDIC SIGN VISARGA ANUDATTA WITH TAIL
1CED ; Inherited # Mn VEDIC SIGN TIRYAK
1DC0..1DE6 ; Inherited # Mn [39] COMBINING DOTTED GRAVE ACCENT..COMBINING LATIN SMALL LETTER Z
1DFD..1DFF ; Inherited # Mn [3] COMBINING ALMOST EQUAL TO BELOW..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW
200C..200D ; Inherited # Cf [2] ZERO WIDTH NON-JOINER..ZERO WIDTH JOINER
20D0..20DC ; Inherited # Mn [13] COMBINING LEFT HARPOON ABOVE..COMBINING FOUR DOTS ABOVE
20DD..20E0 ; Inherited # Me [4] COMBINING ENCLOSING CIRCLE..COMBINING ENCLOSING CIRCLE BACKSLASH
20E1 ; Inherited # Mn COMBINING LEFT RIGHT ARROW ABOVE
20E2..20E4 ; Inherited # Me [3] COMBINING ENCLOSING SCREEN..COMBINING ENCLOSING UPWARD POINTING TRIANGLE
20E5..20F0 ; Inherited # Mn [12] COMBINING REVERSE SOLIDUS OVERLAY..COMBINING ASTERISK ABOVE
302A..302F ; Inherited # Mn [6] IDEOGRAPHIC LEVEL TONE MARK..HANGUL DOUBLE DOT TONE MARK
3099..309A ; Inherited # Mn [2] COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK..COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK
FE00..FE0F ; Inherited # Mn [16] VARIATION SELECTOR-1..VARIATION SELECTOR-16
FE20..FE26 ; Inherited # Mn [7] COMBINING LIGATURE LEFT HALF..COMBINING CONJOINING MACRON
101FD ; Inherited # Mn PHAISTOS DISC SIGN COMBINING OBLIQUE STROKE
1D167..1D169 ; Inherited # Mn [3] MUSICAL SYMBOL COMBINING TREMOLO-1..MUSICAL SYMBOL COMBINING TREMOLO-3
1D17B..1D182 ; Inherited # Mn [8] MUSICAL SYMBOL COMBINING ACCENT..MUSICAL SYMBOL COMBINING LOURE
1D185..1D18B ; Inherited # Mn [7] MUSICAL SYMBOL COMBINING DOIT..MUSICAL SYMBOL COMBINING TRIPLE TONGUE
1D1AA..1D1AD ; Inherited # Mn [4] MUSICAL SYMBOL COMBINING DOWN BOW..MUSICAL SYMBOL COMBINING SNAP PIZZICATO
E0100..E01EF ; Inherited # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256
# Total code points: 523
# ================================================
1700..170C ; Tagalog # Lo [13] TAGALOG LETTER A..TAGALOG LETTER YA
170E..1711 ; Tagalog # Lo [4] TAGALOG LETTER LA..TAGALOG LETTER HA
1712..1714 ; Tagalog # Mn [3] TAGALOG VOWEL SIGN I..TAGALOG SIGN VIRAMA
# Total code points: 20
# ================================================
1720..1731 ; Hanunoo # Lo [18] HANUNOO LETTER A..HANUNOO LETTER HA
1732..1734 ; Hanunoo # Mn [3] HANUNOO VOWEL SIGN I..HANUNOO SIGN PAMUDPOD
# Total code points: 21
# ================================================
1740..1751 ; Buhid # Lo [18] BUHID LETTER A..BUHID LETTER HA
1752..1753 ; Buhid # Mn [2] BUHID VOWEL SIGN I..BUHID VOWEL SIGN U
# Total code points: 20
# ================================================
1760..176C ; Tagbanwa # Lo [13] TAGBANWA LETTER A..TAGBANWA LETTER YA
176E..1770 ; Tagbanwa # Lo [3] TAGBANWA LETTER LA..TAGBANWA LETTER SA
1772..1773 ; Tagbanwa # Mn [2] TAGBANWA VOWEL SIGN I..TAGBANWA VOWEL SIGN U
# Total code points: 18
# ================================================
1900..191C ; Limbu # Lo [29] LIMBU VOWEL-CARRIER LETTER..LIMBU LETTER HA
1920..1922 ; Limbu # Mn [3] LIMBU VOWEL SIGN A..LIMBU VOWEL SIGN U
1923..1926 ; Limbu # Mc [4] LIMBU VOWEL SIGN EE..LIMBU VOWEL SIGN AU
1927..1928 ; Limbu # Mn [2] LIMBU VOWEL SIGN E..LIMBU VOWEL SIGN O
1929..192B ; Limbu # Mc [3] LIMBU SUBJOINED LETTER YA..LIMBU SUBJOINED LETTER WA
1930..1931 ; Limbu # Mc [2] LIMBU SMALL LETTER KA..LIMBU SMALL LETTER NGA
1932 ; Limbu # Mn LIMBU SMALL LETTER ANUSVARA
1933..1938 ; Limbu # Mc [6] LIMBU SMALL LETTER TA..LIMBU SMALL LETTER LA
1939..193B ; Limbu # Mn [3] LIMBU SIGN MUKPHRENG..LIMBU SIGN SA-I
1940 ; Limbu # So LIMBU SIGN LOO
1944..1945 ; Limbu # Po [2] LIMBU EXCLAMATION MARK..LIMBU QUESTION MARK
1946..194F ; Limbu # Nd [10] LIMBU DIGIT ZERO..LIMBU DIGIT NINE
# Total code points: 66
# ================================================
1950..196D ; Tai_Le # Lo [30] TAI LE LETTER KA..TAI LE LETTER AI
1970..1974 ; Tai_Le # Lo [5] TAI LE LETTER TONE-2..TAI LE LETTER TONE-6
# Total code points: 35
# ================================================
10000..1000B ; Linear_B # Lo [12] LINEAR B SYLLABLE B008 A..LINEAR B SYLLABLE B046 JE
1000D..10026 ; Linear_B # Lo [26] LINEAR B SYLLABLE B036 JO..LINEAR B SYLLABLE B032 QO
10028..1003A ; Linear_B # Lo [19] LINEAR B SYLLABLE B060 RA..LINEAR B SYLLABLE B042 WO
1003C..1003D ; Linear_B # Lo [2] LINEAR B SYLLABLE B017 ZA..LINEAR B SYLLABLE B074 ZE
1003F..1004D ; Linear_B # Lo [15] LINEAR B SYLLABLE B020 ZO..LINEAR B SYLLABLE B091 TWO
10050..1005D ; Linear_B # Lo [14] LINEAR B SYMBOL B018..LINEAR B SYMBOL B089
10080..100FA ; Linear_B # Lo [123] LINEAR B IDEOGRAM B100 MAN..LINEAR B IDEOGRAM VESSEL B305
# Total code points: 211
# ================================================
10380..1039D ; Ugaritic # Lo [30] UGARITIC LETTER ALPA..UGARITIC LETTER SSU
1039F ; Ugaritic # Po UGARITIC WORD DIVIDER
# Total code points: 31
# ================================================
10450..1047F ; Shavian # Lo [48] SHAVIAN LETTER PEEP..SHAVIAN LETTER YEW
# Total code points: 48
# ================================================
10480..1049D ; Osmanya # Lo [30] OSMANYA LETTER ALEF..OSMANYA LETTER OO
104A0..104A9 ; Osmanya # Nd [10] OSMANYA DIGIT ZERO..OSMANYA DIGIT NINE
# Total code points: 40
# ================================================
10800..10805 ; Cypriot # Lo [6] CYPRIOT SYLLABLE A..CYPRIOT SYLLABLE JA
10808 ; Cypriot # Lo CYPRIOT SYLLABLE JO
1080A..10835 ; Cypriot # Lo [44] CYPRIOT SYLLABLE KA..CYPRIOT SYLLABLE WO
10837..10838 ; Cypriot # Lo [2] CYPRIOT SYLLABLE XA..CYPRIOT SYLLABLE XE
1083C ; Cypriot # Lo CYPRIOT SYLLABLE ZA
1083F ; Cypriot # Lo CYPRIOT SYLLABLE ZO
# Total code points: 55
# ================================================
2800..28FF ; Braille # So [256] BRAILLE PATTERN BLANK..BRAILLE PATTERN DOTS-12345678
# Total code points: 256
# ================================================
1A00..1A16 ; Buginese # Lo [23] BUGINESE LETTER KA..BUGINESE LETTER HA
1A17..1A18 ; Buginese # Mn [2] BUGINESE VOWEL SIGN I..BUGINESE VOWEL SIGN U
1A19..1A1B ; Buginese # Mc [3] BUGINESE VOWEL SIGN E..BUGINESE VOWEL SIGN AE
1A1E..1A1F ; Buginese # Po [2] BUGINESE PALLAWA..BUGINESE END OF SECTION
# Total code points: 30
# ================================================
03E2..03EF ; Coptic # L& [14] COPTIC CAPITAL LETTER SHEI..COPTIC SMALL LETTER DEI
2C80..2CE4 ; Coptic # L& [101] COPTIC CAPITAL LETTER ALFA..COPTIC SYMBOL KAI
2CE5..2CEA ; Coptic # So [6] COPTIC SYMBOL MI RO..COPTIC SYMBOL SHIMA SIMA
2CEB..2CEE ; Coptic # L& [4] COPTIC CAPITAL LETTER CRYPTOGRAMMIC SHEI..COPTIC SMALL LETTER CRYPTOGRAMMIC GANGIA
2CEF..2CF1 ; Coptic # Mn [3] COPTIC COMBINING NI ABOVE..COPTIC COMBINING SPIRITUS LENIS
2CF9..2CFC ; Coptic # Po [4] COPTIC OLD NUBIAN FULL STOP..COPTIC OLD NUBIAN VERSE DIVIDER
2CFD ; Coptic # No COPTIC FRACTION ONE HALF
2CFE..2CFF ; Coptic # Po [2] COPTIC FULL STOP..COPTIC MORPHOLOGICAL DIVIDER
# Total code points: 135
# ================================================
1980..19AB ; New_Tai_Lue # Lo [44] NEW TAI LUE LETTER HIGH QA..NEW TAI LUE LETTER LOW SUA
19B0..19C0 ; New_Tai_Lue # Mc [17] NEW TAI LUE VOWEL SIGN VOWEL SHORTENER..NEW TAI LUE VOWEL SIGN IY
19C1..19C7 ; New_Tai_Lue # Lo [7] NEW TAI LUE LETTER FINAL V..NEW TAI LUE LETTER FINAL B
19C8..19C9 ; New_Tai_Lue # Mc [2] NEW TAI LUE TONE MARK-1..NEW TAI LUE TONE MARK-2
19D0..19DA ; New_Tai_Lue # Nd [11] NEW TAI LUE DIGIT ZERO..NEW TAI LUE THAM DIGIT ONE
19DE..19DF ; New_Tai_Lue # Po [2] NEW TAI LUE SIGN LAE..NEW TAI LUE SIGN LAEV
# Total code points: 83
# ================================================
2C00..2C2E ; Glagolitic # L& [47] GLAGOLITIC CAPITAL LETTER AZU..GLAGOLITIC CAPITAL LETTER LATINATE MYSLITE
2C30..2C5E ; Glagolitic # L& [47] GLAGOLITIC SMALL LETTER AZU..GLAGOLITIC SMALL LETTER LATINATE MYSLITE
# Total code points: 94
# ================================================
2D30..2D65 ; Tifinagh # Lo [54] TIFINAGH LETTER YA..TIFINAGH LETTER YAZZ
2D6F ; Tifinagh # Lm TIFINAGH MODIFIER LETTER LABIALIZATION MARK
# Total code points: 55
# ================================================
A800..A801 ; Syloti_Nagri # Lo [2] SYLOTI NAGRI LETTER A..SYLOTI NAGRI LETTER I
A802 ; Syloti_Nagri # Mn SYLOTI NAGRI SIGN DVISVARA
A803..A805 ; Syloti_Nagri # Lo [3] SYLOTI NAGRI LETTER U..SYLOTI NAGRI LETTER O
A806 ; Syloti_Nagri # Mn SYLOTI NAGRI SIGN HASANTA
A807..A80A ; Syloti_Nagri # Lo [4] SYLOTI NAGRI LETTER KO..SYLOTI NAGRI LETTER GHO
A80B ; Syloti_Nagri # Mn SYLOTI NAGRI SIGN ANUSVARA
A80C..A822 ; Syloti_Nagri # Lo [23] SYLOTI NAGRI LETTER CO..SYLOTI NAGRI LETTER HO
A823..A824 ; Syloti_Nagri # Mc [2] SYLOTI NAGRI VOWEL SIGN A..SYLOTI NAGRI VOWEL SIGN I
A825..A826 ; Syloti_Nagri # Mn [2] SYLOTI NAGRI VOWEL SIGN U..SYLOTI NAGRI VOWEL SIGN E
A827 ; Syloti_Nagri # Mc SYLOTI NAGRI VOWEL SIGN OO
A828..A82B ; Syloti_Nagri # So [4] SYLOTI NAGRI POETRY MARK-1..SYLOTI NAGRI POETRY MARK-4
# Total code points: 44
# ================================================
103A0..103C3 ; Old_Persian # Lo [36] OLD PERSIAN SIGN A..OLD PERSIAN SIGN HA
103C8..103CF ; Old_Persian # Lo [8] OLD PERSIAN SIGN AURAMAZDAA..OLD PERSIAN SIGN BUUMISH
103D0 ; Old_Persian # Po OLD PERSIAN WORD DIVIDER
103D1..103D5 ; Old_Persian # Nl [5] OLD PERSIAN NUMBER ONE..OLD PERSIAN NUMBER HUNDRED
# Total code points: 50
# ================================================
10A00 ; Kharoshthi # Lo KHAROSHTHI LETTER A
10A01..10A03 ; Kharoshthi # Mn [3] KHAROSHTHI VOWEL SIGN I..KHAROSHTHI VOWEL SIGN VOCALIC R
10A05..10A06 ; Kharoshthi # Mn [2] KHAROSHTHI VOWEL SIGN E..KHAROSHTHI VOWEL SIGN O
10A0C..10A0F ; Kharoshthi # Mn [4] KHAROSHTHI VOWEL LENGTH MARK..KHAROSHTHI SIGN VISARGA
10A10..10A13 ; Kharoshthi # Lo [4] KHAROSHTHI LETTER KA..KHAROSHTHI LETTER GHA
10A15..10A17 ; Kharoshthi # Lo [3] KHAROSHTHI LETTER CA..KHAROSHTHI LETTER JA
10A19..10A33 ; Kharoshthi # Lo [27] KHAROSHTHI LETTER NYA..KHAROSHTHI LETTER TTTHA
10A38..10A3A ; Kharoshthi # Mn [3] KHAROSHTHI SIGN BAR ABOVE..KHAROSHTHI SIGN DOT BELOW
10A3F ; Kharoshthi # Mn KHAROSHTHI VIRAMA
10A40..10A47 ; Kharoshthi # No [8] KHAROSHTHI DIGIT ONE..KHAROSHTHI NUMBER ONE THOUSAND
10A50..10A58 ; Kharoshthi # Po [9] KHAROSHTHI PUNCTUATION DOT..KHAROSHTHI PUNCTUATION LINES
# Total code points: 65
# ================================================
1B00..1B03 ; Balinese # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG
1B04 ; Balinese # Mc BALINESE SIGN BISAH
1B05..1B33 ; Balinese # Lo [47] BALINESE LETTER AKARA..BALINESE LETTER HA
1B34 ; Balinese # Mn BALINESE SIGN REREKAN
1B35 ; Balinese # Mc BALINESE VOWEL SIGN TEDUNG
1B36..1B3A ; Balinese # Mn [5] BALINESE VOWEL SIGN ULU..BALINESE VOWEL SIGN RA REPA
1B3B ; Balinese # Mc BALINESE VOWEL SIGN RA REPA TEDUNG
1B3C ; Balinese # Mn BALINESE VOWEL SIGN LA LENGA
1B3D..1B41 ; Balinese # Mc [5] BALINESE VOWEL SIGN LA LENGA TEDUNG..BALINESE VOWEL SIGN TALING REPA TEDUNG
1B42 ; Balinese # Mn BALINESE VOWEL SIGN PEPET
1B43..1B44 ; Balinese # Mc [2] BALINESE VOWEL SIGN PEPET TEDUNG..BALINESE ADEG ADEG
1B45..1B4B ; Balinese # Lo [7] BALINESE LETTER KAF SASAK..BALINESE LETTER ASYURA SASAK
1B50..1B59 ; Balinese # Nd [10] BALINESE DIGIT ZERO..BALINESE DIGIT NINE
1B5A..1B60 ; Balinese # Po [7] BALINESE PANTI..BALINESE PAMENENG
1B61..1B6A ; Balinese # So [10] BALINESE MUSICAL SYMBOL DONG..BALINESE MUSICAL SYMBOL DANG GEDE
1B6B..1B73 ; Balinese # Mn [9] BALINESE MUSICAL SYMBOL COMBINING TEGEH..BALINESE MUSICAL SYMBOL COMBINING GONG
1B74..1B7C ; Balinese # So [9] BALINESE MUSICAL SYMBOL RIGHT-HAND OPEN DUG..BALINESE MUSICAL SYMBOL LEFT-HAND OPEN PING
# Total code points: 121
# ================================================
12000..1236E ; Cuneiform # Lo [879] CUNEIFORM SIGN A..CUNEIFORM SIGN ZUM
12400..12462 ; Cuneiform # Nl [99] CUNEIFORM NUMERIC SIGN TWO ASH..CUNEIFORM NUMERIC SIGN OLD ASSYRIAN ONE QUARTER
12470..12473 ; Cuneiform # Po [4] CUNEIFORM PUNCTUATION SIGN OLD ASSYRIAN WORD DIVIDER..CUNEIFORM PUNCTUATION SIGN DIAGONAL TRICOLON
# Total code points: 982
# ================================================
10900..10915 ; Phoenician # Lo [22] PHOENICIAN LETTER ALF..PHOENICIAN LETTER TAU
10916..1091B ; Phoenician # No [6] PHOENICIAN NUMBER ONE..PHOENICIAN NUMBER THREE
1091F ; Phoenician # Po PHOENICIAN WORD SEPARATOR
# Total code points: 29
# ================================================
A840..A873 ; Phags_Pa # Lo [52] PHAGS-PA LETTER KA..PHAGS-PA LETTER CANDRABINDU
A874..A877 ; Phags_Pa # Po [4] PHAGS-PA SINGLE HEAD MARK..PHAGS-PA MARK DOUBLE SHAD
# Total code points: 56
# ================================================
07C0..07C9 ; Nko # Nd [10] NKO DIGIT ZERO..NKO DIGIT NINE
07CA..07EA ; Nko # Lo [33] NKO LETTER A..NKO LETTER JONA RA
07EB..07F3 ; Nko # Mn [9] NKO COMBINING SHORT HIGH TONE..NKO COMBINING DOUBLE DOT ABOVE
07F4..07F5 ; Nko # Lm [2] NKO HIGH TONE APOSTROPHE..NKO LOW TONE APOSTROPHE
07F6 ; Nko # So NKO SYMBOL OO DENNEN
07F7..07F9 ; Nko # Po [3] NKO SYMBOL GBAKURUNEN..NKO EXCLAMATION MARK
07FA ; Nko # Lm NKO LAJANYALAN
# Total code points: 59
# ================================================
1B80..1B81 ; Sundanese # Mn [2] SUNDANESE SIGN PANYECEK..SUNDANESE SIGN PANGLAYAR
1B82 ; Sundanese # Mc SUNDANESE SIGN PANGWISAD
1B83..1BA0 ; Sundanese # Lo [30] SUNDANESE LETTER A..SUNDANESE LETTER HA
1BA1 ; Sundanese # Mc SUNDANESE CONSONANT SIGN PAMINGKAL
1BA2..1BA5 ; Sundanese # Mn [4] SUNDANESE CONSONANT SIGN PANYAKRA..SUNDANESE VOWEL SIGN PANYUKU
1BA6..1BA7 ; Sundanese # Mc [2] SUNDANESE VOWEL SIGN PANAELAENG..SUNDANESE VOWEL SIGN PANOLONG
1BA8..1BA9 ; Sundanese # Mn [2] SUNDANESE VOWEL SIGN PAMEPET..SUNDANESE VOWEL SIGN PANEULEUNG
1BAA ; Sundanese # Mc SUNDANESE SIGN PAMAAEH
1BAE..1BAF ; Sundanese # Lo [2] SUNDANESE LETTER KHA..SUNDANESE LETTER SYA
1BB0..1BB9 ; Sundanese # Nd [10] SUNDANESE DIGIT ZERO..SUNDANESE DIGIT NINE
# Total code points: 55
# ================================================
1C00..1C23 ; Lepcha # Lo [36] LEPCHA LETTER KA..LEPCHA LETTER A
1C24..1C2B ; Lepcha # Mc [8] LEPCHA SUBJOINED LETTER YA..LEPCHA VOWEL SIGN UU
1C2C..1C33 ; Lepcha # Mn [8] LEPCHA VOWEL SIGN E..LEPCHA CONSONANT SIGN T
1C34..1C35 ; Lepcha # Mc [2] LEPCHA CONSONANT SIGN NYIN-DO..LEPCHA CONSONANT SIGN KANG
1C36..1C37 ; Lepcha # Mn [2] LEPCHA SIGN RAN..LEPCHA SIGN NUKTA
1C3B..1C3F ; Lepcha # Po [5] LEPCHA PUNCTUATION TA-ROL..LEPCHA PUNCTUATION TSHOOK
1C40..1C49 ; Lepcha # Nd [10] LEPCHA DIGIT ZERO..LEPCHA DIGIT NINE
1C4D..1C4F ; Lepcha # Lo [3] LEPCHA LETTER TTA..LEPCHA LETTER DDA
# Total code points: 74
# ================================================
1C50..1C59 ; Ol_Chiki # Nd [10] OL CHIKI DIGIT ZERO..OL CHIKI DIGIT NINE
1C5A..1C77 ; Ol_Chiki # Lo [30] OL CHIKI LETTER LA..OL CHIKI LETTER OH
1C78..1C7D ; Ol_Chiki # Lm [6] OL CHIKI MU TTUDDAG..OL CHIKI AHAD
1C7E..1C7F ; Ol_Chiki # Po [2] OL CHIKI PUNCTUATION MUCAAD..OL CHIKI PUNCTUATION DOUBLE MUCAAD
# Total code points: 48
# ================================================
A500..A60B ; Vai # Lo [268] VAI SYLLABLE EE..VAI SYLLABLE NG
A60C ; Vai # Lm VAI SYLLABLE LENGTHENER
A60D..A60F ; Vai # Po [3] VAI COMMA..VAI QUESTION MARK
A610..A61F ; Vai # Lo [16] VAI SYLLABLE NDOLE FA..VAI SYMBOL JONG
A620..A629 ; Vai # Nd [10] VAI DIGIT ZERO..VAI DIGIT NINE
A62A..A62B ; Vai # Lo [2] VAI SYLLABLE NDOLE MA..VAI SYLLABLE NDOLE DO
# Total code points: 300
# ================================================
A880..A881 ; Saurashtra # Mc [2] SAURASHTRA SIGN ANUSVARA..SAURASHTRA SIGN VISARGA
A882..A8B3 ; Saurashtra # Lo [50] SAURASHTRA LETTER A..SAURASHTRA LETTER LLA
A8B4..A8C3 ; Saurashtra # Mc [16] SAURASHTRA CONSONANT SIGN HAARU..SAURASHTRA VOWEL SIGN AU
A8C4 ; Saurashtra # Mn SAURASHTRA SIGN VIRAMA
A8CE..A8CF ; Saurashtra # Po [2] SAURASHTRA DANDA..SAURASHTRA DOUBLE DANDA
A8D0..A8D9 ; Saurashtra # Nd [10] SAURASHTRA DIGIT ZERO..SAURASHTRA DIGIT NINE
# Total code points: 81
# ================================================
A900..A909 ; Kayah_Li # Nd [10] KAYAH LI DIGIT ZERO..KAYAH LI DIGIT NINE
A90A..A925 ; Kayah_Li # Lo [28] KAYAH LI LETTER KA..KAYAH LI LETTER OO
A926..A92D ; Kayah_Li # Mn [8] KAYAH LI VOWEL UE..KAYAH LI TONE CALYA PLOPHU
A92E..A92F ; Kayah_Li # Po [2] KAYAH LI SIGN CWI..KAYAH LI SIGN SHYA
# Total code points: 48
# ================================================
A930..A946 ; Rejang # Lo [23] REJANG LETTER KA..REJANG LETTER A
A947..A951 ; Rejang # Mn [11] REJANG VOWEL SIGN I..REJANG CONSONANT SIGN R
A952..A953 ; Rejang # Mc [2] REJANG CONSONANT SIGN H..REJANG VIRAMA
A95F ; Rejang # Po REJANG SECTION MARK
# Total code points: 37
# ================================================
10280..1029C ; Lycian # Lo [29] LYCIAN LETTER A..LYCIAN LETTER X
# Total code points: 29
# ================================================
102A0..102D0 ; Carian # Lo [49] CARIAN LETTER A..CARIAN LETTER UUU3
# Total code points: 49
# ================================================
10920..10939 ; Lydian # Lo [26] LYDIAN LETTER A..LYDIAN LETTER C
1093F ; Lydian # Po LYDIAN TRIANGULAR MARK
# Total code points: 27
# ================================================
AA00..AA28 ; Cham # Lo [41] CHAM LETTER A..CHAM LETTER HA
AA29..AA2E ; Cham # Mn [6] CHAM VOWEL SIGN AA..CHAM VOWEL SIGN OE
AA2F..AA30 ; Cham # Mc [2] CHAM VOWEL SIGN O..CHAM VOWEL SIGN AI
AA31..AA32 ; Cham # Mn [2] CHAM VOWEL SIGN AU..CHAM VOWEL SIGN UE
AA33..AA34 ; Cham # Mc [2] CHAM CONSONANT SIGN YA..CHAM CONSONANT SIGN RA
AA35..AA36 ; Cham # Mn [2] CHAM CONSONANT SIGN LA..CHAM CONSONANT SIGN WA
AA40..AA42 ; Cham # Lo [3] CHAM LETTER FINAL K..CHAM LETTER FINAL NG
AA43 ; Cham # Mn CHAM CONSONANT SIGN FINAL NG
AA44..AA4B ; Cham # Lo [8] CHAM LETTER FINAL CH..CHAM LETTER FINAL SS
AA4C ; Cham # Mn CHAM CONSONANT SIGN FINAL M
AA4D ; Cham # Mc CHAM CONSONANT SIGN FINAL H
AA50..AA59 ; Cham # Nd [10] CHAM DIGIT ZERO..CHAM DIGIT NINE
AA5C..AA5F ; Cham # Po [4] CHAM PUNCTUATION SPIRAL..CHAM PUNCTUATION TRIPLE DANDA
# Total code points: 83
# ================================================
1A20..1A54 ; Tai_Tham # Lo [53] TAI THAM LETTER HIGH KA..TAI THAM LETTER GREAT SA
1A55 ; Tai_Tham # Mc TAI THAM CONSONANT SIGN MEDIAL RA
1A56 ; Tai_Tham # Mn TAI THAM CONSONANT SIGN MEDIAL LA
1A57 ; Tai_Tham # Mc TAI THAM CONSONANT SIGN LA TANG LAI
1A58..1A5E ; Tai_Tham # Mn [7] TAI THAM SIGN MAI KANG LAI..TAI THAM CONSONANT SIGN SA
1A60 ; Tai_Tham # Mn TAI THAM SIGN SAKOT
1A61 ; Tai_Tham # Mc TAI THAM VOWEL SIGN A
1A62 ; Tai_Tham # Mn TAI THAM VOWEL SIGN MAI SAT
1A63..1A64 ; Tai_Tham # Mc [2] TAI THAM VOWEL SIGN AA..TAI THAM VOWEL SIGN TALL AA
1A65..1A6C ; Tai_Tham # Mn [8] TAI THAM VOWEL SIGN I..TAI THAM VOWEL SIGN OA BELOW
1A6D..1A72 ; Tai_Tham # Mc [6] TAI THAM VOWEL SIGN OY..TAI THAM VOWEL SIGN THAM AI
1A73..1A7C ; Tai_Tham # Mn [10] TAI THAM VOWEL SIGN OA ABOVE..TAI THAM SIGN KHUEN-LUE KARAN
1A7F ; Tai_Tham # Mn TAI THAM COMBINING CRYPTOGRAMMIC DOT
1A80..1A89 ; Tai_Tham # Nd [10] TAI THAM HORA DIGIT ZERO..TAI THAM HORA DIGIT NINE
1A90..1A99 ; Tai_Tham # Nd [10] TAI THAM THAM DIGIT ZERO..TAI THAM THAM DIGIT NINE
1AA0..1AA6 ; Tai_Tham # Po [7] TAI THAM SIGN WIANG..TAI THAM SIGN REVERSED ROTATED RANA
1AA7 ; Tai_Tham # Lm TAI THAM SIGN MAI YAMOK
1AA8..1AAD ; Tai_Tham # Po [6] TAI THAM SIGN KAAN..TAI THAM SIGN CAANG
# Total code points: 127
# ================================================
AA80..AAAF ; Tai_Viet # Lo [48] TAI VIET LETTER LOW KO..TAI VIET LETTER HIGH O
AAB0 ; Tai_Viet # Mn TAI VIET MAI KANG
AAB1 ; Tai_Viet # Lo TAI VIET VOWEL AA
AAB2..AAB4 ; Tai_Viet # Mn [3] TAI VIET VOWEL I..TAI VIET VOWEL U
AAB5..AAB6 ; Tai_Viet # Lo [2] TAI VIET VOWEL E..TAI VIET VOWEL O
AAB7..AAB8 ; Tai_Viet # Mn [2] TAI VIET MAI KHIT..TAI VIET VOWEL IA
AAB9..AABD ; Tai_Viet # Lo [5] TAI VIET VOWEL UEA..TAI VIET VOWEL AN
AABE..AABF ; Tai_Viet # Mn [2] TAI VIET VOWEL AM..TAI VIET TONE MAI EK
AAC0 ; Tai_Viet # Lo TAI VIET TONE MAI NUENG
AAC1 ; Tai_Viet # Mn TAI VIET TONE MAI THO
AAC2 ; Tai_Viet # Lo TAI VIET TONE MAI SONG
AADB..AADC ; Tai_Viet # Lo [2] TAI VIET SYMBOL KON..TAI VIET SYMBOL NUENG
AADD ; Tai_Viet # Lm TAI VIET SYMBOL SAM
AADE..AADF ; Tai_Viet # Po [2] TAI VIET SYMBOL HO HOI..TAI VIET SYMBOL KOI KOI
# Total code points: 72
# ================================================
10B00..10B35 ; Avestan # Lo [54] AVESTAN LETTER A..AVESTAN LETTER HE
10B39..10B3F ; Avestan # Po [7] AVESTAN ABBREVIATION MARK..LARGE ONE RING OVER TWO RINGS PUNCTUATION
# Total code points: 61
# ================================================
13000..1342E ; Egyptian_Hieroglyphs # Lo [1071] EGYPTIAN HIEROGLYPH A001..EGYPTIAN HIEROGLYPH AA032
# Total code points: 1071
# ================================================
0800..0815 ; Samaritan # Lo [22] SAMARITAN LETTER ALAF..SAMARITAN LETTER TAAF
0816..0819 ; Samaritan # Mn [4] SAMARITAN MARK IN..SAMARITAN MARK DAGESH
081A ; Samaritan # Lm SAMARITAN MODIFIER LETTER EPENTHETIC YUT
081B..0823 ; Samaritan # Mn [9] SAMARITAN MARK EPENTHETIC YUT..SAMARITAN VOWEL SIGN A
0824 ; Samaritan # Lm SAMARITAN MODIFIER LETTER SHORT A
0825..0827 ; Samaritan # Mn [3] SAMARITAN VOWEL SIGN SHORT A..SAMARITAN VOWEL SIGN U
0828 ; Samaritan # Lm SAMARITAN MODIFIER LETTER I
0829..082D ; Samaritan # Mn [5] SAMARITAN VOWEL SIGN LONG I..SAMARITAN MARK NEQUDAA
0830..083E ; Samaritan # Po [15] SAMARITAN PUNCTUATION NEQUDAA..SAMARITAN PUNCTUATION ANNAAU
# Total code points: 61
# ================================================
A4D0..A4F7 ; Lisu # Lo [40] LISU LETTER BA..LISU LETTER OE
A4F8..A4FD ; Lisu # Lm [6] LISU LETTER TONE MYA TI..LISU LETTER TONE MYA JEU
A4FE..A4FF ; Lisu # Po [2] LISU PUNCTUATION COMMA..LISU PUNCTUATION FULL STOP
# Total code points: 48
# ================================================
A6A0..A6E5 ; Bamum # Lo [70] BAMUM LETTER A..BAMUM LETTER KI
A6E6..A6EF ; Bamum # Nl [10] BAMUM LETTER MO..BAMUM LETTER KOGHOM
A6F0..A6F1 ; Bamum # Mn [2] BAMUM COMBINING MARK KOQNDON..BAMUM COMBINING MARK TUKWENTIS
A6F2..A6F7 ; Bamum # Po [6] BAMUM NJAEMLI..BAMUM QUESTION MARK
# Total code points: 88
# ================================================
A980..A982 ; Javanese # Mn [3] JAVANESE SIGN PANYANGGA..JAVANESE SIGN LAYAR
A983 ; Javanese # Mc JAVANESE SIGN WIGNYAN
A984..A9B2 ; Javanese # Lo [47] JAVANESE LETTER A..JAVANESE LETTER HA
A9B3 ; Javanese # Mn JAVANESE SIGN CECAK TELU
A9B4..A9B5 ; Javanese # Mc [2] JAVANESE VOWEL SIGN TARUNG..JAVANESE VOWEL SIGN TOLONG
A9B6..A9B9 ; Javanese # Mn [4] JAVANESE VOWEL SIGN WULU..JAVANESE VOWEL SIGN SUKU MENDUT
A9BA..A9BB ; Javanese # Mc [2] JAVANESE VOWEL SIGN TALING..JAVANESE VOWEL SIGN DIRGA MURE
A9BC ; Javanese # Mn JAVANESE VOWEL SIGN PEPET
A9BD..A9C0 ; Javanese # Mc [4] JAVANESE CONSONANT SIGN KERET..JAVANESE PANGKON
A9C1..A9CD ; Javanese # Po [13] JAVANESE LEFT RERENGGAN..JAVANESE TURNED PADA PISELEH
A9CF ; Javanese # Lm JAVANESE PANGRANGKEP
A9D0..A9D9 ; Javanese # Nd [10] JAVANESE DIGIT ZERO..JAVANESE DIGIT NINE
A9DE..A9DF ; Javanese # Po [2] JAVANESE PADA TIRTA TUMETES..JAVANESE PADA ISEN-ISEN
# Total code points: 91
# ================================================
ABC0..ABE2 ; Meetei_Mayek # Lo [35] MEETEI MAYEK LETTER KOK..MEETEI MAYEK LETTER I LONSUM
ABE3..ABE4 ; Meetei_Mayek # Mc [2] MEETEI MAYEK VOWEL SIGN ONAP..MEETEI MAYEK VOWEL SIGN INAP
ABE5 ; Meetei_Mayek # Mn MEETEI MAYEK VOWEL SIGN ANAP
ABE6..ABE7 ; Meetei_Mayek # Mc [2] MEETEI MAYEK VOWEL SIGN YENAP..MEETEI MAYEK VOWEL SIGN SOUNAP
ABE8 ; Meetei_Mayek # Mn MEETEI MAYEK VOWEL SIGN UNAP
ABE9..ABEA ; Meetei_Mayek # Mc [2] MEETEI MAYEK VOWEL SIGN CHEINAP..MEETEI MAYEK VOWEL SIGN NUNG
ABEB ; Meetei_Mayek # Po MEETEI MAYEK CHEIKHEI
ABEC ; Meetei_Mayek # Mc MEETEI MAYEK LUM IYEK
ABED ; Meetei_Mayek # Mn MEETEI MAYEK APUN IYEK
ABF0..ABF9 ; Meetei_Mayek # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DIGIT NINE
# Total code points: 56
# ================================================
10840..10855 ; Imperial_Aramaic # Lo [22] IMPERIAL ARAMAIC LETTER ALEPH..IMPERIAL ARAMAIC LETTER TAW
10857 ; Imperial_Aramaic # Po IMPERIAL ARAMAIC SECTION SIGN
10858..1085F ; Imperial_Aramaic # No [8] IMPERIAL ARAMAIC NUMBER ONE..IMPERIAL ARAMAIC NUMBER TEN THOUSAND
# Total code points: 31
# ================================================
10A60..10A7C ; Old_South_Arabian # Lo [29] OLD SOUTH ARABIAN LETTER HE..OLD SOUTH ARABIAN LETTER THETH
10A7D..10A7E ; Old_South_Arabian # No [2] OLD SOUTH ARABIAN NUMBER ONE..OLD SOUTH ARABIAN NUMBER FIFTY
10A7F ; Old_South_Arabian # Po OLD SOUTH ARABIAN NUMERIC INDICATOR
# Total code points: 32
# ================================================
10B40..10B55 ; Inscriptional_Parthian # Lo [22] INSCRIPTIONAL PARTHIAN LETTER ALEPH..INSCRIPTIONAL PARTHIAN LETTER TAW
10B58..10B5F ; Inscriptional_Parthian # No [8] INSCRIPTIONAL PARTHIAN NUMBER ONE..INSCRIPTIONAL PARTHIAN NUMBER ONE THOUSAND
# Total code points: 30
# ================================================
10B60..10B72 ; Inscriptional_Pahlavi # Lo [19] INSCRIPTIONAL PAHLAVI LETTER ALEPH..INSCRIPTIONAL PAHLAVI LETTER TAW
10B78..10B7F ; Inscriptional_Pahlavi # No [8] INSCRIPTIONAL PAHLAVI NUMBER ONE..INSCRIPTIONAL PAHLAVI NUMBER ONE THOUSAND
# Total code points: 27
# ================================================
10C00..10C48 ; Old_Turkic # Lo [73] OLD TURKIC LETTER ORKHON A..OLD TURKIC LETTER ORKHON BASH
# Total code points: 73
# ================================================
11080..11081 ; Kaithi # Mn [2] KAITHI SIGN CANDRABINDU..KAITHI SIGN ANUSVARA
11082 ; Kaithi # Mc KAITHI SIGN VISARGA
11083..110AF ; Kaithi # Lo [45] KAITHI LETTER A..KAITHI LETTER HA
110B0..110B2 ; Kaithi # Mc [3] KAITHI VOWEL SIGN AA..KAITHI VOWEL SIGN II
110B3..110B6 ; Kaithi # Mn [4] KAITHI VOWEL SIGN U..KAITHI VOWEL SIGN AI
110B7..110B8 ; Kaithi # Mc [2] KAITHI VOWEL SIGN O..KAITHI VOWEL SIGN AU
110B9..110BA ; Kaithi # Mn [2] KAITHI SIGN VIRAMA..KAITHI SIGN NUKTA
110BB..110BC ; Kaithi # Po [2] KAITHI ABBREVIATION SIGN..KAITHI ENUMERATION SIGN
110BD ; Kaithi # Cf KAITHI NUMBER SIGN
110BE..110C1 ; Kaithi # Po [4] KAITHI SECTION MARK..KAITHI DOUBLE DANDA
# Total code points: 66
# EOF
package build.tools.generatecharacter;
import java.io.*;
import java.nio.*;
import java.util.*;
import java.util.zip.*;
public class CharacterName {
public static void main(String[] args) {
FileReader reader = null;
try {
if (args.length != 2) {
System.err.println("Usage: java CharacterName UniocdeData.txt uniName.dat");
System.exit(1);
}
reader = new FileReader(args[0]);
BufferedReader bfr = new BufferedReader(reader);
String line = null;
StringBuilder namePool = new StringBuilder();
byte[] cpPoolBytes = new byte[0x100000];
ByteBuffer cpBB = ByteBuffer.wrap(cpPoolBytes);
int lastCp = 0;
int cpNum = 0;
while ((line = bfr.readLine()) != null) {
if (line.startsWith("#"))
continue;
UnicodeSpec spec = UnicodeSpec.parse(line);
if (spec != null) {
int cp = spec.getCodePoint();
String name = spec.getName();
cpNum++;
if (name.equals("<control>") && spec.getOldName() != null) {
if (spec.getOldName().length() != 0)
name = spec.getOldName();
else
continue;
} else if (name.startsWith("<")) {
/*
3400 <CJK Ideograph Extension A, First>
4db5 <CJK Ideograph Extension A, Last>
4e00 <CJK Ideograph, First>
9fc3 <CJK Ideograph, Last>
ac00 <Hangul Syllable, First>
d7a3 <Hangul Syllable, Last>
d800 <Non Private Use High Surrogate, First>
db7f <Non Private Use High Surrogate, Last>
db80 <Private Use High Surrogate, First>
dbff <Private Use High Surrogate, Last>
dc00 <Low Surrogate, First>
dfff <Low Surrogate, Last>
e000 <Private Use, First>
f8ff <Private Use, Last>
20000 <CJK Ideograph Extension B, First>
2a6d6 <CJK Ideograph Extension B, Last>
f0000 <Plane 15 Private Use, First>
ffffd <Plane 15 Private Use, Last>
*/
continue;
}
if (cp == lastCp + 1) {
cpBB.put((byte)name.length());
} else {
cpBB.put((byte)0); // segment start flag
cpBB.putInt((name.length() << 24) | (cp & 0xffffff));
}
namePool.append(name);
lastCp = cp;
}
}
byte[] namePoolBytes = namePool.toString().getBytes("ASCII");
int cpLen = cpBB.position();
int total = cpLen + namePoolBytes.length;
DataOutputStream dos = new DataOutputStream(
new DeflaterOutputStream(
new FileOutputStream(args[1])));
dos.writeInt(total); // total
dos.writeInt(cpLen); // nameOff
dos.write(cpPoolBytes, 0, cpLen);
dos.write(namePoolBytes);
dos.close();
} catch (Throwable e) {
System.out.println("Unexpected exception:");
e.printStackTrace();
} finally {
if (reader != null) {
try {
reader.close();
} catch (Throwable ee) { ee.printStackTrace(); }
}
}
}
}
import java.util.regex.*;
import java.util.*;
import java.io.*;
public class CharacterScript {
// generate the code needed for j.l.C.UnicodeScript
static void fortest(String fmt, Object... o) {
//System.out.printf(fmt, o);
}
static void print(String fmt, Object... o) {
System.out.printf(fmt, o);
}
static void debug(String fmt, Object... o) {
//System.out.printf(fmt, o);
}
public static void main(String args[]){
try {
if (args.length != 1) {
System.out.println("java CharacterScript script.txt out");
System.exit(1);
}
int i, j;
BufferedReader sbfr = new BufferedReader(new FileReader(args[0]));
HashMap<String,Integer> scriptMap = new HashMap<String,Integer>();
String line = null;
Matcher m = Pattern.compile("(\\p{XDigit}+)(?:\\.{2}(\\p{XDigit}+))?\\s+;\\s+(\\w+)\\s+#.*").matcher("");
int prevS = -1;
int prevE = -1;
String prevN = null;
int[][] scripts = new int[1024][3];
int scriptSize = 0;
while ((line = sbfr.readLine()) != null) {
if (line.length() <= 1 || line.charAt(0) == '#') {
continue;
}
m.reset(line);
if (m.matches()) {
int start = Integer.parseInt(m.group(1), 16);
int end = (m.group(2)==null)?start
:Integer.parseInt(m.group(2), 16);
String name = m.group(3);
if (name.equals(prevN) && start == prevE + 1) {
prevE = end;
} else {
if (prevS != -1) {
if (scriptMap.get(prevN) == null) {
scriptMap.put(prevN, scriptMap.size());
}
scripts[scriptSize][0] = prevS;
scripts[scriptSize][1] = prevE;
scripts[scriptSize][2] = scriptMap.get(prevN);
scriptSize++;
}
debug("%x-%x\t%s%n", prevS, prevE, prevN);
prevS = start; prevE = end; prevN = name;
}
} else {
debug("Warning: Unrecognized line <%s>%n", line);
}
}
//last one.
if (scriptMap.get(prevN) == null) {
scriptMap.put(prevN, scriptMap.size());
}
scripts[scriptSize][0] = prevS;
scripts[scriptSize][1] = prevE;
scripts[scriptSize][2] = scriptMap.get(prevN);
scriptSize++;
debug("%x-%x\t%s%n", prevS, prevE, prevN);
debug("-----------------%n");
debug("Total scripts=%s%n", scriptMap.size());
debug("-----------------%n%n");
String[] names = new String[scriptMap.size()];
for (String name: scriptMap.keySet()) {
names[scriptMap.get(name).intValue()] = name;
}
for (j = 0; j < scriptSize; j++) {
for (int cp = scripts[j][0]; cp <= scripts[j][1]; cp++) {
String name = names[scripts[j][2]].toUpperCase(Locale.ENGLISH);;
if (cp > 0xffff)
System.out.printf("%05X %s%n", cp, name);
else
System.out.printf("%05X %s%n", cp, name);
}
}
Arrays.sort(scripts, 0, scriptSize,
new Comparator<int[]>() {
public int compare(int[] a1, int[] a2) {
return a1[0] - a2[0];
}
public boolean compare(Object obj) {
return obj == this;
}
});
// Consolidation: there are lots of "reserved" code points
// embedded in those otherwise "sequential" blocks.
// To make the lookup table smaller, we combine those
// separated segments with the assumption that the lookup
// implementation checks
// Character.getType() != Character.UNASSIGNED
// first (return UNKNOWN for unassigned)
ArrayList<int[]> list = new ArrayList();
list.add(scripts[0]);
int[] last = scripts[0];
for (i = 1; i < scriptSize; i++) {
if (scripts[i][0] != (last[1] + 1)) {
boolean isNotUnassigned = false;
for (int cp = last[1] + 1; cp < scripts[i][0]; cp++) {
if (Character.getType(cp) != Character.UNASSIGNED) {
isNotUnassigned = true;
debug("Warning: [%x] is ASSIGNED but in NON script%n", cp);
break;
}
}
if (isNotUnassigned) {
// surrogates only?
int[] a = new int[3];
a[0] = last[1] + 1;
a[1] = scripts[i][0] - 1;
a[2] = -1; // unknown
list.add(a);
} else {
if (last[2] == scripts[i][2]) {
//combine
last[1] = scripts[i][1];
continue;
} else {
// expand last
last[1] = scripts[i][0] - 1;
}
}
}
list.add(scripts[i]);
last = scripts[i];
}
for (i = 0; i < list.size(); i++) {
int[] a = (int[])list.get(i);
String name = "UNKNOWN";
if (a[2] != -1)
name = names[a[2]].toUpperCase(Locale.US);
debug("0x%05x, 0x%05x %s%n", a[0], a[1], name);
}
debug("--->total=%d%n", list.size());
//////////////////OUTPUT//////////////////////////////////
print("public class Scripts {%n%n");
print(" public static enum UnicodeScript {%n");
for (i = 0; i < names.length; i++) {
print(" /**%n * Unicode script \"%s\".%n */%n", names[i]);
print(" %s,%n%n", names[i].toUpperCase(Locale.US));
}
print(" /**%n * Unicode script \"Unknown\".%n */%n UNKNOWN;%n%n");
// lookup table
print(" private static final int[] scriptStarts = {%n");
for (int[] a : list) {
String name = "UNKNOWN";
if (a[2] != -1)
name = names[a[2]].toUpperCase(Locale.US);
if (a[0] < 0x10000)
print(" 0x%04X, // %04X..%04X; %s%n",
a[0], a[0], a[1], name);
else
print(" 0x%05X, // %05X..%05X; %s%n",
a[0], a[0], a[1], name);
}
last = list.get(list.size() -1);
if (last[1] != Character.MAX_CODE_POINT)
print(" 0x%05X // %05X..%06X; %s%n",
last[1] + 1, last[1] + 1, Character.MAX_CODE_POINT,
"UNKNOWN");
print("%n };%n%n");
print(" private static final UnicodeScript[] scripts = {%n");
for (int[] a : list) {
String name = "UNKNOWN";
if (a[2] != -1)
name = names[a[2]].toUpperCase(Locale.US);
print(" %s,%n", name);
}
if (last[1] != Character.MAX_CODE_POINT)
print(" UNKNOWN%n");
print(" };%n");
print(" }%n");
print("}%n");
} catch (Exception e) {
e.printStackTrace();
}
}
}
......@@ -35,6 +35,8 @@ import java.io.BufferedWriter;
import java.io.FileWriter;
import java.io.File;
import build.tools.generatecharacter.CharacterName;
/**
* This program generates the source code for the class java.lang.Character.
* It also generates native C code that can perform the same operations.
......
......@@ -24,6 +24,7 @@
*/
package java.lang;
import java.util.Arrays;
import java.util.Map;
import java.util.HashMap;
import java.util.Locale;
......@@ -2546,6 +2547,1241 @@ class Character extends Object implements java.io.Serializable, Comparable<Chara
}
/**
* A family of character subsets representing the character scripts
* defined in the <a href="http://www.unicode.org/reports/tr24/">
* <i>Unicode Standard Annex #24: Script Names</i></a>. Every Unicode
* character is assigned to a single Unicode script, either a specific
* script, such as {@link Character.UnicodeScript#LATIN Latin}, or
* one of the following three special values,
* {@link Character.UnicodeScript#INHERITED Inherited},
* {@link Character.UnicodeScript#COMMON Common} or
* {@link Character.UnicodeScript#UNKNOWN Unknown}.
*
* @since 1.7
*/
public static enum UnicodeScript {
/**
* Unicode script "Common".
*/
COMMON,
/**
* Unicode script "Latin".
*/
LATIN,
/**
* Unicode script "Greek".
*/
GREEK,
/**
* Unicode script "Cyrillic".
*/
CYRILLIC,
/**
* Unicode script "Armenian".
*/
ARMENIAN,
/**
* Unicode script "Hebrew".
*/
HEBREW,
/**
* Unicode script "Arabic".
*/
ARABIC,
/**
* Unicode script "Syriac".
*/
SYRIAC,
/**
* Unicode script "Thaana".
*/
THAANA,
/**
* Unicode script "Devanagari".
*/
DEVANAGARI,
/**
* Unicode script "Bengali".
*/
BENGALI,
/**
* Unicode script "Gurmukhi".
*/
GURMUKHI,
/**
* Unicode script "Gujarati".
*/
GUJARATI,
/**
* Unicode script "Oriya".
*/
ORIYA,
/**
* Unicode script "Tamil".
*/
TAMIL,
/**
* Unicode script "Telugu".
*/
TELUGU,
/**
* Unicode script "Kannada".
*/
KANNADA,
/**
* Unicode script "Malayalam".
*/
MALAYALAM,
/**
* Unicode script "Sinhala".
*/
SINHALA,
/**
* Unicode script "Thai".
*/
THAI,
/**
* Unicode script "Lao".
*/
LAO,
/**
* Unicode script "Tibetan".
*/
TIBETAN,
/**
* Unicode script "Myanmar".
*/
MYANMAR,
/**
* Unicode script "Georgian".
*/
GEORGIAN,
/**
* Unicode script "Hangul".
*/
HANGUL,
/**
* Unicode script "Ethiopic".
*/
ETHIOPIC,
/**
* Unicode script "Cherokee".
*/
CHEROKEE,
/**
* Unicode script "Canadian_Aboriginal".
*/
CANADIAN_ABORIGINAL,
/**
* Unicode script "Ogham".
*/
OGHAM,
/**
* Unicode script "Runic".
*/
RUNIC,
/**
* Unicode script "Khmer".
*/
KHMER,
/**
* Unicode script "Mongolian".
*/
MONGOLIAN,
/**
* Unicode script "Hiragana".
*/
HIRAGANA,
/**
* Unicode script "Katakana".
*/
KATAKANA,
/**
* Unicode script "Bopomofo".
*/
BOPOMOFO,
/**
* Unicode script "Han".
*/
HAN,
/**
* Unicode script "Yi".
*/
YI,
/**
* Unicode script "Old_Italic".
*/
OLD_ITALIC,
/**
* Unicode script "Gothic".
*/
GOTHIC,
/**
* Unicode script "Deseret".
*/
DESERET,
/**
* Unicode script "Inherited".
*/
INHERITED,
/**
* Unicode script "Tagalog".
*/
TAGALOG,
/**
* Unicode script "Hanunoo".
*/
HANUNOO,
/**
* Unicode script "Buhid".
*/
BUHID,
/**
* Unicode script "Tagbanwa".
*/
TAGBANWA,
/**
* Unicode script "Limbu".
*/
LIMBU,
/**
* Unicode script "Tai_Le".
*/
TAI_LE,
/**
* Unicode script "Linear_B".
*/
LINEAR_B,
/**
* Unicode script "Ugaritic".
*/
UGARITIC,
/**
* Unicode script "Shavian".
*/
SHAVIAN,
/**
* Unicode script "Osmanya".
*/
OSMANYA,
/**
* Unicode script "Cypriot".
*/
CYPRIOT,
/**
* Unicode script "Braille".
*/
BRAILLE,
/**
* Unicode script "Buginese".
*/
BUGINESE,
/**
* Unicode script "Coptic".
*/
COPTIC,
/**
* Unicode script "New_Tai_Lue".
*/
NEW_TAI_LUE,
/**
* Unicode script "Glagolitic".
*/
GLAGOLITIC,
/**
* Unicode script "Tifinagh".
*/
TIFINAGH,
/**
* Unicode script "Syloti_Nagri".
*/
SYLOTI_NAGRI,
/**
* Unicode script "Old_Persian".
*/
OLD_PERSIAN,
/**
* Unicode script "Kharoshthi".
*/
KHAROSHTHI,
/**
* Unicode script "Balinese".
*/
BALINESE,
/**
* Unicode script "Cuneiform".
*/
CUNEIFORM,
/**
* Unicode script "Phoenician".
*/
PHOENICIAN,
/**
* Unicode script "Phags_Pa".
*/
PHAGS_PA,
/**
* Unicode script "Nko".
*/
NKO,
/**
* Unicode script "Sundanese".
*/
SUNDANESE,
/**
* Unicode script "Lepcha".
*/
LEPCHA,
/**
* Unicode script "Ol_Chiki".
*/
OL_CHIKI,
/**
* Unicode script "Vai".
*/
VAI,
/**
* Unicode script "Saurashtra".
*/
SAURASHTRA,
/**
* Unicode script "Kayah_Li".
*/
KAYAH_LI,
/**
* Unicode script "Rejang".
*/
REJANG,
/**
* Unicode script "Lycian".
*/
LYCIAN,
/**
* Unicode script "Carian".
*/
CARIAN,
/**
* Unicode script "Lydian".
*/
LYDIAN,
/**
* Unicode script "Cham".
*/
CHAM,
/**
* Unicode script "Tai_Tham".
*/
TAI_THAM,
/**
* Unicode script "Tai_Viet".
*/
TAI_VIET,
/**
* Unicode script "Avestan".
*/
AVESTAN,
/**
* Unicode script "Egyptian_Hieroglyphs".
*/
EGYPTIAN_HIEROGLYPHS,
/**
* Unicode script "Samaritan".
*/
SAMARITAN,
/**
* Unicode script "Lisu".
*/
LISU,
/**
* Unicode script "Bamum".
*/
BAMUM,
/**
* Unicode script "Javanese".
*/
JAVANESE,
/**
* Unicode script "Meetei_Mayek".
*/
MEETEI_MAYEK,
/**
* Unicode script "Imperial_Aramaic".
*/
IMPERIAL_ARAMAIC,
/**
* Unicode script "Old_South_Arabian".
*/
OLD_SOUTH_ARABIAN,
/**
* Unicode script "Inscriptional_Parthian".
*/
INSCRIPTIONAL_PARTHIAN,
/**
* Unicode script "Inscriptional_Pahlavi".
*/
INSCRIPTIONAL_PAHLAVI,
/**
* Unicode script "Old_Turkic".
*/
OLD_TURKIC,
/**
* Unicode script "Kaithi".
*/
KAITHI,
/**
* Unicode script "Unknown".
*/
UNKNOWN;
private static final int[] scriptStarts = {
0x0000, // 0000..0040; COMMON
0x0041, // 0041..005A; LATIN
0x005B, // 005B..0060; COMMON
0x0061, // 0061..007A; LATIN
0x007B, // 007B..00A9; COMMON
0x00AA, // 00AA..00AA; LATIN
0x00AB, // 00AB..00B9; COMMON
0x00BA, // 00BA..00BA; LATIN
0x00BB, // 00BB..00BF; COMMON
0x00C0, // 00C0..00D6; LATIN
0x00D7, // 00D7..00D7; COMMON
0x00D8, // 00D8..00F6; LATIN
0x00F7, // 00F7..00F7; COMMON
0x00F8, // 00F8..02B8; LATIN
0x02B9, // 02B9..02DF; COMMON
0x02E0, // 02E0..02E4; LATIN
0x02E5, // 02E5..02FF; COMMON
0x0300, // 0300..036F; INHERITED
0x0370, // 0370..0373; GREEK
0x0374, // 0374..0374; COMMON
0x0375, // 0375..037D; GREEK
0x037E, // 037E..0383; COMMON
0x0384, // 0384..0384; GREEK
0x0385, // 0385..0385; COMMON
0x0386, // 0386..0386; GREEK
0x0387, // 0387..0387; COMMON
0x0388, // 0388..03E1; GREEK
0x03E2, // 03E2..03EF; COPTIC
0x03F0, // 03F0..03FF; GREEK
0x0400, // 0400..0484; CYRILLIC
0x0485, // 0485..0486; INHERITED
0x0487, // 0487..0530; CYRILLIC
0x0531, // 0531..0588; ARMENIAN
0x0589, // 0589..0589; COMMON
0x058A, // 058A..0590; ARMENIAN
0x0591, // 0591..05FF; HEBREW
0x0600, // 0600..0605; COMMON
0x0606, // 0606..060B; ARABIC
0x060C, // 060C..060C; COMMON
0x060D, // 060D..061A; ARABIC
0x061B, // 061B..061D; COMMON
0x061E, // 061E..061E; ARABIC
0x061F, // 061F..0620; COMMON
0x0621, // 0621..063F; ARABIC
0x0640, // 0640..0640; COMMON
0x0641, // 0641..064A; ARABIC
0x064B, // 064B..0655; INHERITED
0x0656, // 0656..065F; ARABIC
0x0660, // 0660..0669; COMMON
0x066A, // 066A..066F; ARABIC
0x0670, // 0670..0670; INHERITED
0x0671, // 0671..06DC; ARABIC
0x06DD, // 06DD..06DD; COMMON
0x06DE, // 06DE..06FF; ARABIC
0x0700, // 0700..074F; SYRIAC
0x0750, // 0750..077F; ARABIC
0x0780, // 0780..07BF; THAANA
0x07C0, // 07C0..07FF; NKO
0x0800, // 0800..08FF; SAMARITAN
0x0900, // 0900..0950; DEVANAGARI
0x0951, // 0951..0952; INHERITED
0x0953, // 0953..0963; DEVANAGARI
0x0964, // 0964..0965; COMMON
0x0966, // 0966..096F; DEVANAGARI
0x0970, // 0970..0970; COMMON
0x0971, // 0971..0980; DEVANAGARI
0x0981, // 0981..0A00; BENGALI
0x0A01, // 0A01..0A80; GURMUKHI
0x0A81, // 0A81..0B00; GUJARATI
0x0B01, // 0B01..0B81; ORIYA
0x0B82, // 0B82..0C00; TAMIL
0x0C01, // 0C01..0C81; TELUGU
0x0C82, // 0C82..0CF0; KANNADA
0x0CF1, // 0CF1..0D01; COMMON
0x0D02, // 0D02..0D81; MALAYALAM
0x0D82, // 0D82..0E00; SINHALA
0x0E01, // 0E01..0E3E; THAI
0x0E3F, // 0E3F..0E3F; COMMON
0x0E40, // 0E40..0E80; THAI
0x0E81, // 0E81..0EFF; LAO
0x0F00, // 0F00..0FD4; TIBETAN
0x0FD5, // 0FD5..0FFF; COMMON
0x1000, // 1000..109F; MYANMAR
0x10A0, // 10A0..10FA; GEORGIAN
0x10FB, // 10FB..10FB; COMMON
0x10FC, // 10FC..10FF; GEORGIAN
0x1100, // 1100..11FF; HANGUL
0x1200, // 1200..139F; ETHIOPIC
0x13A0, // 13A0..13FF; CHEROKEE
0x1400, // 1400..167F; CANADIAN_ABORIGINAL
0x1680, // 1680..169F; OGHAM
0x16A0, // 16A0..16EA; RUNIC
0x16EB, // 16EB..16ED; COMMON
0x16EE, // 16EE..16FF; RUNIC
0x1700, // 1700..171F; TAGALOG
0x1720, // 1720..1734; HANUNOO
0x1735, // 1735..173F; COMMON
0x1740, // 1740..175F; BUHID
0x1760, // 1760..177F; TAGBANWA
0x1780, // 1780..17FF; KHMER
0x1800, // 1800..1801; MONGOLIAN
0x1802, // 1802..1803; COMMON
0x1804, // 1804..1804; MONGOLIAN
0x1805, // 1805..1805; COMMON
0x1806, // 1806..18AF; MONGOLIAN
0x18B0, // 18B0..18FF; CANADIAN_ABORIGINAL
0x1900, // 1900..194F; LIMBU
0x1950, // 1950..197F; TAI_LE
0x1980, // 1980..19DF; NEW_TAI_LUE
0x19E0, // 19E0..19FF; KHMER
0x1A00, // 1A00..1A1F; BUGINESE
0x1A20, // 1A20..1AFF; TAI_THAM
0x1B00, // 1B00..1B7F; BALINESE
0x1B80, // 1B80..1BFF; SUNDANESE
0x1C00, // 1C00..1C4F; LEPCHA
0x1C50, // 1C50..1CCF; OL_CHIKI
0x1CD0, // 1CD0..1CD2; INHERITED
0x1CD3, // 1CD3..1CD3; COMMON
0x1CD4, // 1CD4..1CE0; INHERITED
0x1CE1, // 1CE1..1CE1; COMMON
0x1CE2, // 1CE2..1CE8; INHERITED
0x1CE9, // 1CE9..1CEC; COMMON
0x1CED, // 1CED..1CED; INHERITED
0x1CEE, // 1CEE..1CFF; COMMON
0x1D00, // 1D00..1D25; LATIN
0x1D26, // 1D26..1D2A; GREEK
0x1D2B, // 1D2B..1D2B; CYRILLIC
0x1D2C, // 1D2C..1D5C; LATIN
0x1D5D, // 1D5D..1D61; GREEK
0x1D62, // 1D62..1D65; LATIN
0x1D66, // 1D66..1D6A; GREEK
0x1D6B, // 1D6B..1D77; LATIN
0x1D78, // 1D78..1D78; CYRILLIC
0x1D79, // 1D79..1DBE; LATIN
0x1DBF, // 1DBF..1DBF; GREEK
0x1DC0, // 1DC0..1DFF; INHERITED
0x1E00, // 1E00..1EFF; LATIN
0x1F00, // 1F00..1FFF; GREEK
0x2000, // 2000..200B; COMMON
0x200C, // 200C..200D; INHERITED
0x200E, // 200E..2070; COMMON
0x2071, // 2071..2073; LATIN
0x2074, // 2074..207E; COMMON
0x207F, // 207F..207F; LATIN
0x2080, // 2080..208F; COMMON
0x2090, // 2090..209F; LATIN
0x20A0, // 20A0..20CF; COMMON
0x20D0, // 20D0..20FF; INHERITED
0x2100, // 2100..2125; COMMON
0x2126, // 2126..2126; GREEK
0x2127, // 2127..2129; COMMON
0x212A, // 212A..212B; LATIN
0x212C, // 212C..2131; COMMON
0x2132, // 2132..2132; LATIN
0x2133, // 2133..214D; COMMON
0x214E, // 214E..214E; LATIN
0x214F, // 214F..215F; COMMON
0x2160, // 2160..2188; LATIN
0x2189, // 2189..27FF; COMMON
0x2800, // 2800..28FF; BRAILLE
0x2900, // 2900..2BFF; COMMON
0x2C00, // 2C00..2C5F; GLAGOLITIC
0x2C60, // 2C60..2C7F; LATIN
0x2C80, // 2C80..2CFF; COPTIC
0x2D00, // 2D00..2D2F; GEORGIAN
0x2D30, // 2D30..2D7F; TIFINAGH
0x2D80, // 2D80..2DDF; ETHIOPIC
0x2DE0, // 2DE0..2DFF; CYRILLIC
0x2E00, // 2E00..2E7F; COMMON
0x2E80, // 2E80..2FEF; HAN
0x2FF0, // 2FF0..3004; COMMON
0x3005, // 3005..3005; HAN
0x3006, // 3006..3006; COMMON
0x3007, // 3007..3007; HAN
0x3008, // 3008..3020; COMMON
0x3021, // 3021..3029; HAN
0x302A, // 302A..302F; INHERITED
0x3030, // 3030..3037; COMMON
0x3038, // 3038..303B; HAN
0x303C, // 303C..3040; COMMON
0x3041, // 3041..3098; HIRAGANA
0x3099, // 3099..309A; INHERITED
0x309B, // 309B..309C; COMMON
0x309D, // 309D..309F; HIRAGANA
0x30A0, // 30A0..30A0; COMMON
0x30A1, // 30A1..30FA; KATAKANA
0x30FB, // 30FB..30FC; COMMON
0x30FD, // 30FD..3104; KATAKANA
0x3105, // 3105..3130; BOPOMOFO
0x3131, // 3131..318F; HANGUL
0x3190, // 3190..319F; COMMON
0x31A0, // 31A0..31BF; BOPOMOFO
0x31C0, // 31C0..31EF; COMMON
0x31F0, // 31F0..31FF; KATAKANA
0x3200, // 3200..321F; HANGUL
0x3220, // 3220..325F; COMMON
0x3260, // 3260..327E; HANGUL
0x327F, // 327F..32CF; COMMON
0x32D0, // 32D0..3357; KATAKANA
0x3358, // 3358..33FF; COMMON
0x3400, // 3400..4DBF; HAN
0x4DC0, // 4DC0..4DFF; COMMON
0x4E00, // 4E00..9FFF; HAN
0xA000, // A000..A4CF; YI
0xA4D0, // A4D0..A4FF; LISU
0xA500, // A500..A63F; VAI
0xA640, // A640..A69F; CYRILLIC
0xA6A0, // A6A0..A6FF; BAMUM
0xA700, // A700..A721; COMMON
0xA722, // A722..A787; LATIN
0xA788, // A788..A78A; COMMON
0xA78B, // A78B..A7FF; LATIN
0xA800, // A800..A82F; SYLOTI_NAGRI
0xA830, // A830..A83F; COMMON
0xA840, // A840..A87F; PHAGS_PA
0xA880, // A880..A8DF; SAURASHTRA
0xA8E0, // A8E0..A8FF; DEVANAGARI
0xA900, // A900..A92F; KAYAH_LI
0xA930, // A930..A95F; REJANG
0xA960, // A960..A97F; HANGUL
0xA980, // A980..A9FF; JAVANESE
0xAA00, // AA00..AA5F; CHAM
0xAA60, // AA60..AA7F; MYANMAR
0xAA80, // AA80..ABBF; TAI_VIET
0xABC0, // ABC0..ABFF; MEETEI_MAYEK
0xAC00, // AC00..D7FB; HANGUL
0xD7FC, // D7FC..F8FF; UNKNOWN
0xF900, // F900..FAFF; HAN
0xFB00, // FB00..FB12; LATIN
0xFB13, // FB13..FB1C; ARMENIAN
0xFB1D, // FB1D..FB4F; HEBREW
0xFB50, // FB50..FD3D; ARABIC
0xFD3E, // FD3E..FD4F; COMMON
0xFD50, // FD50..FDFC; ARABIC
0xFDFD, // FDFD..FDFF; COMMON
0xFE00, // FE00..FE0F; INHERITED
0xFE10, // FE10..FE1F; COMMON
0xFE20, // FE20..FE2F; INHERITED
0xFE30, // FE30..FE6F; COMMON
0xFE70, // FE70..FEFE; ARABIC
0xFEFF, // FEFF..FF20; COMMON
0xFF21, // FF21..FF3A; LATIN
0xFF3B, // FF3B..FF40; COMMON
0xFF41, // FF41..FF5A; LATIN
0xFF5B, // FF5B..FF65; COMMON
0xFF66, // FF66..FF6F; KATAKANA
0xFF70, // FF70..FF70; COMMON
0xFF71, // FF71..FF9D; KATAKANA
0xFF9E, // FF9E..FF9F; COMMON
0xFFA0, // FFA0..FFDF; HANGUL
0xFFE0, // FFE0..FFFF; COMMON
0x10000, // 10000..100FF; LINEAR_B
0x10100, // 10100..1013F; COMMON
0x10140, // 10140..1018F; GREEK
0x10190, // 10190..101FC; COMMON
0x101FD, // 101FD..1027F; INHERITED
0x10280, // 10280..1029F; LYCIAN
0x102A0, // 102A0..102FF; CARIAN
0x10300, // 10300..1032F; OLD_ITALIC
0x10330, // 10330..1037F; GOTHIC
0x10380, // 10380..1039F; UGARITIC
0x103A0, // 103A0..103FF; OLD_PERSIAN
0x10400, // 10400..1044F; DESERET
0x10450, // 10450..1047F; SHAVIAN
0x10480, // 10480..107FF; OSMANYA
0x10800, // 10800..1083F; CYPRIOT
0x10840, // 10840..108FF; IMPERIAL_ARAMAIC
0x10900, // 10900..1091F; PHOENICIAN
0x10920, // 10920..109FF; LYDIAN
0x10A00, // 10A00..10A5F; KHAROSHTHI
0x10A60, // 10A60..10AFF; OLD_SOUTH_ARABIAN
0x10B00, // 10B00..10B3F; AVESTAN
0x10B40, // 10B40..10B5F; INSCRIPTIONAL_PARTHIAN
0x10B60, // 10B60..10BFF; INSCRIPTIONAL_PAHLAVI
0x10C00, // 10C00..10E5F; OLD_TURKIC
0x10E60, // 10E60..1107F; ARABIC
0x11080, // 11080..11FFF; KAITHI
0x12000, // 12000..12FFF; CUNEIFORM
0x13000, // 13000..1CFFF; EGYPTIAN_HIEROGLYPHS
0x1D000, // 1D000..1D166; COMMON
0x1D167, // 1D167..1D169; INHERITED
0x1D16A, // 1D16A..1D17A; COMMON
0x1D17B, // 1D17B..1D182; INHERITED
0x1D183, // 1D183..1D184; COMMON
0x1D185, // 1D185..1D18B; INHERITED
0x1D18C, // 1D18C..1D1A9; COMMON
0x1D1AA, // 1D1AA..1D1AD; INHERITED
0x1D1AE, // 1D1AE..1D1FF; COMMON
0x1D200, // 1D200..1D2FF; GREEK
0x1D300, // 1D300..1F1FF; COMMON
0x1F200, // 1F200..1F20F; HIRAGANA
0x1F210, // 1F210..1FFFF; COMMON
0x20000, // 20000..E0000; HAN
0xE0001, // E0001..E00FF; COMMON
0xE0100, // E0100..E01EF; INHERITED
0xE01F0 // E01F0..10FFFF; UNKNOWN
};
private static final UnicodeScript[] scripts = {
COMMON,
LATIN,
COMMON,
LATIN,
COMMON,
LATIN,
COMMON,
LATIN,
COMMON,
LATIN,
COMMON,
LATIN,
COMMON,
LATIN,
COMMON,
LATIN,
COMMON,
INHERITED,
GREEK,
COMMON,
GREEK,
COMMON,
GREEK,
COMMON,
GREEK,
COMMON,
GREEK,
COPTIC,
GREEK,
CYRILLIC,
INHERITED,
CYRILLIC,
ARMENIAN,
COMMON,
ARMENIAN,
HEBREW,
COMMON,
ARABIC,
COMMON,
ARABIC,
COMMON,
ARABIC,
COMMON,
ARABIC,
COMMON,
ARABIC,
INHERITED,
ARABIC,
COMMON,
ARABIC,
INHERITED,
ARABIC,
COMMON,
ARABIC,
SYRIAC,
ARABIC,
THAANA,
NKO,
SAMARITAN,
DEVANAGARI,
INHERITED,
DEVANAGARI,
COMMON,
DEVANAGARI,
COMMON,
DEVANAGARI,
BENGALI,
GURMUKHI,
GUJARATI,
ORIYA,
TAMIL,
TELUGU,
KANNADA,
COMMON,
MALAYALAM,
SINHALA,
THAI,
COMMON,
THAI,
LAO,
TIBETAN,
COMMON,
MYANMAR,
GEORGIAN,
COMMON,
GEORGIAN,
HANGUL,
ETHIOPIC,
CHEROKEE,
CANADIAN_ABORIGINAL,
OGHAM,
RUNIC,
COMMON,
RUNIC,
TAGALOG,
HANUNOO,
COMMON,
BUHID,
TAGBANWA,
KHMER,
MONGOLIAN,
COMMON,
MONGOLIAN,
COMMON,
MONGOLIAN,
CANADIAN_ABORIGINAL,
LIMBU,
TAI_LE,
NEW_TAI_LUE,
KHMER,
BUGINESE,
TAI_THAM,
BALINESE,
SUNDANESE,
LEPCHA,
OL_CHIKI,
INHERITED,
COMMON,
INHERITED,
COMMON,
INHERITED,
COMMON,
INHERITED,
COMMON,
LATIN,
GREEK,
CYRILLIC,
LATIN,
GREEK,
LATIN,
GREEK,
LATIN,
CYRILLIC,
LATIN,
GREEK,
INHERITED,
LATIN,
GREEK,
COMMON,
INHERITED,
COMMON,
LATIN,
COMMON,
LATIN,
COMMON,
LATIN,
COMMON,
INHERITED,
COMMON,
GREEK,
COMMON,
LATIN,
COMMON,
LATIN,
COMMON,
LATIN,
COMMON,
LATIN,
COMMON,
BRAILLE,
COMMON,
GLAGOLITIC,
LATIN,
COPTIC,
GEORGIAN,
TIFINAGH,
ETHIOPIC,
CYRILLIC,
COMMON,
HAN,
COMMON,
HAN,
COMMON,
HAN,
COMMON,
HAN,
INHERITED,
COMMON,
HAN,
COMMON,
HIRAGANA,
INHERITED,
COMMON,
HIRAGANA,
COMMON,
KATAKANA,
COMMON,
KATAKANA,
BOPOMOFO,
HANGUL,
COMMON,
BOPOMOFO,
COMMON,
KATAKANA,
HANGUL,
COMMON,
HANGUL,
COMMON,
KATAKANA,
COMMON,
HAN,
COMMON,
HAN,
YI,
LISU,
VAI,
CYRILLIC,
BAMUM,
COMMON,
LATIN,
COMMON,
LATIN,
SYLOTI_NAGRI,
COMMON,
PHAGS_PA,
SAURASHTRA,
DEVANAGARI,
KAYAH_LI,
REJANG,
HANGUL,
JAVANESE,
CHAM,
MYANMAR,
TAI_VIET,
MEETEI_MAYEK,
HANGUL,
UNKNOWN,
HAN,
LATIN,
ARMENIAN,
HEBREW,
ARABIC,
COMMON,
ARABIC,
COMMON,
INHERITED,
COMMON,
INHERITED,
COMMON,
ARABIC,
COMMON,
LATIN,
COMMON,
LATIN,
COMMON,
KATAKANA,
COMMON,
KATAKANA,
COMMON,
HANGUL,
COMMON,
LINEAR_B,
COMMON,
GREEK,
COMMON,
INHERITED,
LYCIAN,
CARIAN,
OLD_ITALIC,
GOTHIC,
UGARITIC,
OLD_PERSIAN,
DESERET,
SHAVIAN,
OSMANYA,
CYPRIOT,
IMPERIAL_ARAMAIC,
PHOENICIAN,
LYDIAN,
KHAROSHTHI,
OLD_SOUTH_ARABIAN,
AVESTAN,
INSCRIPTIONAL_PARTHIAN,
INSCRIPTIONAL_PAHLAVI,
OLD_TURKIC,
ARABIC,
KAITHI,
CUNEIFORM,
EGYPTIAN_HIEROGLYPHS,
COMMON,
INHERITED,
COMMON,
INHERITED,
COMMON,
INHERITED,
COMMON,
INHERITED,
COMMON,
GREEK,
COMMON,
HIRAGANA,
COMMON,
HAN,
COMMON,
INHERITED,
UNKNOWN
};
private static HashMap<String, Character.UnicodeScript> aliases;
static {
aliases = new HashMap<String, UnicodeScript>();
aliases.put("ARAB", ARABIC);
aliases.put("ARMI", IMPERIAL_ARAMAIC);
aliases.put("ARMN", ARMENIAN);
aliases.put("AVST", AVESTAN);
aliases.put("BALI", BALINESE);
aliases.put("BAMU", BAMUM);
aliases.put("BENG", BENGALI);
aliases.put("BOPO", BOPOMOFO);
aliases.put("BRAI", BRAILLE);
aliases.put("BUGI", BUGINESE);
aliases.put("BUHD", BUHID);
aliases.put("CANS", CANADIAN_ABORIGINAL);
aliases.put("CARI", CARIAN);
aliases.put("CHAM", CHAM);
aliases.put("CHER", CHEROKEE);
aliases.put("COPT", COPTIC);
aliases.put("CPRT", CYPRIOT);
aliases.put("CYRL", CYRILLIC);
aliases.put("DEVA", DEVANAGARI);
aliases.put("DSRT", DESERET);
aliases.put("EGYP", EGYPTIAN_HIEROGLYPHS);
aliases.put("ETHI", ETHIOPIC);
aliases.put("GEOR", GEORGIAN);
aliases.put("GLAG", GLAGOLITIC);
aliases.put("GOTH", GOTHIC);
aliases.put("GREK", GREEK);
aliases.put("GUJR", GUJARATI);
aliases.put("GURU", GURMUKHI);
aliases.put("HANG", HANGUL);
aliases.put("HANI", HAN);
aliases.put("HANO", HANUNOO);
aliases.put("HEBR", HEBREW);
aliases.put("HIRA", HIRAGANA);
// it appears we don't have the KATAKANA_OR_HIRAGANA
//aliases.put("HRKT", KATAKANA_OR_HIRAGANA);
aliases.put("ITAL", OLD_ITALIC);
aliases.put("JAVA", JAVANESE);
aliases.put("KALI", KAYAH_LI);
aliases.put("KANA", KATAKANA);
aliases.put("KHAR", KHAROSHTHI);
aliases.put("KHMR", KHMER);
aliases.put("KNDA", KANNADA);
aliases.put("KTHI", KAITHI);
aliases.put("LANA", TAI_THAM);
aliases.put("LAOO", LAO);
aliases.put("LATN", LATIN);
aliases.put("LEPC", LEPCHA);
aliases.put("LIMB", LIMBU);
aliases.put("LINB", LINEAR_B);
aliases.put("LISU", LISU);
aliases.put("LYCI", LYCIAN);
aliases.put("LYDI", LYDIAN);
aliases.put("MLYM", MALAYALAM);
aliases.put("MONG", MONGOLIAN);
aliases.put("MTEI", MEETEI_MAYEK);
aliases.put("MYMR", MYANMAR);
aliases.put("NKOO", NKO);
aliases.put("OGAM", OGHAM);
aliases.put("OLCK", OL_CHIKI);
aliases.put("ORKH", OLD_TURKIC);
aliases.put("ORYA", ORIYA);
aliases.put("OSMA", OSMANYA);
aliases.put("PHAG", PHAGS_PA);
aliases.put("PHLI", INSCRIPTIONAL_PAHLAVI);
aliases.put("PHNX", PHOENICIAN);
aliases.put("PRTI", INSCRIPTIONAL_PARTHIAN);
aliases.put("RJNG", REJANG);
aliases.put("RUNR", RUNIC);
aliases.put("SAMR", SAMARITAN);
aliases.put("SARB", OLD_SOUTH_ARABIAN);
aliases.put("SAUR", SAURASHTRA);
aliases.put("SHAW", SHAVIAN);
aliases.put("SINH", SINHALA);
aliases.put("SUND", SUNDANESE);
aliases.put("SYLO", SYLOTI_NAGRI);
aliases.put("SYRC", SYRIAC);
aliases.put("TAGB", TAGBANWA);
aliases.put("TALE", TAI_LE);
aliases.put("TALU", NEW_TAI_LUE);
aliases.put("TAML", TAMIL);
aliases.put("TAVT", TAI_VIET);
aliases.put("TELU", TELUGU);
aliases.put("TFNG", TIFINAGH);
aliases.put("TGLG", TAGALOG);
aliases.put("THAA", THAANA);
aliases.put("THAI", THAI);
aliases.put("TIBT", TIBETAN);
aliases.put("UGAR", UGARITIC);
aliases.put("VAII", VAI);
aliases.put("XPEO", OLD_PERSIAN);
aliases.put("XSUX", CUNEIFORM);
aliases.put("YIII", YI);
aliases.put("ZINH", INHERITED);
aliases.put("ZYYY", COMMON);
aliases.put("ZZZZ", UNKNOWN);
}
/**
* Returns the enum constant representing the Unicode script of which
* the given character (Unicode code point) is assigned to.
*
* @param codePoint the character (Unicode code point) in question.
* @return The <code>UnicodeScript</code> constant representing the
* Unicode script of which this character is assigned to.
*
* @exception IllegalArgumentException if the specified
* <code>codePoint</code> is an invalid Unicode code point.
* @see Character#isValidCodePoint(int)
*
*/
public static UnicodeScript of(int codePoint) {
if (!isValidCodePoint(codePoint))
throw new IllegalArgumentException();
int type = getType(codePoint);
// leave SURROGATE and PRIVATE_USE for table lookup
if (type == UNASSIGNED)
return UNKNOWN;
int index = Arrays.binarySearch(scriptStarts, codePoint);
if (index < 0)
index = -index - 2;
return scripts[index];
}
/**
* Returns the UnicodeScript constant with the given Unicode script
* name or the script name alias. Script names and their aliases are
* determined by The Unicode Standard. The files Scripts&lt;version&gt;.txt
* and PropertyValueAliases&lt;version&gt;.txt define script names
* and the script name aliases for a particular version of the
* standard. The {@link Character} class specifies the version of
* the standard that it supports.
* <p>
* Character case is ignored for all of the valid script names.
* The en_US locale's case mapping rules are used to provide
* case-insensitive string comparisons for script name validation.
* <p>
*
* @param scriptName A <code>UnicodeScript</code> name.
* @return The <code>UnicodeScript</code> constant identified
* by <code>scriptName</code>
* @throws IllegalArgumentException if <code>scriptName</code> is an
* invalid name
* @throws NullPointerException if <code>scriptName</code> is null
*/
public static final UnicodeScript forName(String scriptName) {
scriptName = scriptName.toUpperCase(Locale.ENGLISH);
//.replace(' ', '_'));
UnicodeScript sc = aliases.get(scriptName);
if (sc != null)
return sc;
return valueOf(scriptName);
}
}
/**
* The value of the <code>Character</code>.
*
......@@ -5042,4 +6278,51 @@ class Character extends Object implements java.io.Serializable, Comparable<Chara
public static char reverseBytes(char ch) {
return (char) (((ch & 0xFF00) >> 8) | (ch << 8));
}
/**
* Returns the Unicode name of the specified character
* <code>codePoint</code>, or null if the code point is
* {@link #UNASSIGNED unassigned}.
* <p>
* Note: if the specified character is not assigned a name by
* the <i>UnicodeData</i> file (part of the Unicode Character
* Database maintained by the Unicode Consortium), the returned
* name is the same as the result of expression
*
* <blockquote><code>
* Character.UnicodeBlock.of(codePoint)
* .toString()
* .replace('_', ' ')
* + " "
* + Integer.toHexString(codePoint).toUpperCase(Locale.ENGLISH);
*
* </code></blockquote>
*
* @param codePoint the character (Unicode code point)
*
* @return the Unicode name of the specified character, or null if
* the code point is unassigned.
*
* @exception IllegalArgumentException if the specified
* <code>codePoint</code> is not a valid Unicode
* code point.
*
* @since 1.7
*/
public static String getName(int codePoint) {
if (!isValidCodePoint(codePoint)) {
throw new IllegalArgumentException();
}
String name = CharacterName.get(codePoint);
if (name != null)
return name;
if (getType(codePoint) == UNASSIGNED)
return null;
UnicodeBlock block = UnicodeBlock.of(codePoint);
if (block != null)
return block.toString().replace('_', ' ') + " "
+ Integer.toHexString(codePoint).toUpperCase(Locale.ENGLISH);
// should never come here
return Integer.toHexString(codePoint).toUpperCase(Locale.ENGLISH);
}
}
/*
* Copyright 2010 Sun Microsystems, Inc. All Rights Reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation. Sun designates this
* particular file as subject to the "Classpath" exception as provided
* by Sun in the LICENSE file that accompanied this code.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
* CA 95054 USA or visit www.sun.com if you need additional information or
* have any questions.
*/
package java.lang;
import java.io.DataInputStream;
import java.io.InputStream;
import java.lang.ref.SoftReference;
import java.util.Arrays;
import java.util.zip.InflaterInputStream;
import java.security.AccessController;
import java.security.PrivilegedAction;
class CharacterName {
private static SoftReference<byte[]> refStrPool;
private static int[][] lookup;
private static synchronized byte[] initNamePool() {
byte[] strPool = null;
if (refStrPool != null && (strPool = refStrPool.get()) != null)
return strPool;
DataInputStream dis = null;
try {
dis = new DataInputStream(new InflaterInputStream(
AccessController.doPrivileged(new PrivilegedAction<InputStream>()
{
public InputStream run() {
return getClass().getResourceAsStream("uniName.dat");
}
})));
lookup = new int[(Character.MAX_CODE_POINT + 1) >> 8][];
int total = dis.readInt();
int cpEnd = dis.readInt();
byte ba[] = new byte[cpEnd];
dis.readFully(ba);
int nameOff = 0;
int cpOff = 0;
int cp = 0;
do {
int len = ba[cpOff++] & 0xff;
if (len == 0) {
len = ba[cpOff++] & 0xff;
// always big-endian
cp = ((ba[cpOff++] & 0xff) << 16) |
((ba[cpOff++] & 0xff) << 8) |
((ba[cpOff++] & 0xff));
} else {
cp++;
}
int hi = cp >> 8;
if (lookup[hi] == null) {
lookup[hi] = new int[0x100];
}
lookup[hi][cp&0xff] = (nameOff << 8) | len;
nameOff += len;
} while (cpOff < cpEnd);
strPool = new byte[total - cpEnd];
dis.readFully(strPool);
refStrPool = new SoftReference<byte[]>(strPool);
} catch (Exception x) {
throw new InternalError(x.getMessage());
} finally {
try {
if (dis != null)
dis.close();
} catch (Exception xx) {}
}
return strPool;
}
public static String get(int cp) {
byte[] strPool = null;
if (refStrPool == null || (strPool = refStrPool.get()) == null)
strPool = initNamePool();
int off = 0;
if (lookup[cp>>8] == null ||
(off = lookup[cp>>8][cp&0xff]) == 0)
return null;
return new String(strPool, 0, off >>> 8, off & 0xff); // ASCII
}
}
......@@ -29,6 +29,7 @@ import java.security.AccessController;
import java.security.PrivilegedAction;
import java.text.CharacterIterator;
import java.text.Normalizer;
import java.util.Locale;
import java.util.Map;
import java.util.ArrayList;
import java.util.HashMap;
......@@ -200,8 +201,9 @@ import java.util.Arrays;
* <td>Equivalent to java.lang.Character.isMirrored()</td></tr>
*
* <tr><th>&nbsp;</th></tr>
* <tr align="left"><th colspan="2" id="unicode">Classes for Unicode blocks and categories</th></tr>
*
* <tr align="left"><th colspan="2" id="unicode">Classes for Unicode scripts, blocks and categories</th></tr>
* * <tr><td valign="top" headers="construct unicode"><tt>\p{IsLatin}</tt></td>
* <td headers="matches">A Latin&nbsp;script character (simple <a href="#ubc">script</a>)</td></tr>
* <tr><td valign="top" headers="construct unicode"><tt>\p{InGreek}</tt></td>
* <td headers="matches">A character in the Greek&nbsp;block (simple <a href="#ubc">block</a>)</td></tr>
* <tr><td valign="top" headers="construct unicode"><tt>\p{Lu}</tt></td>
......@@ -527,25 +529,40 @@ import java.util.Arrays;
* while not equal, compile into the same pattern, which matches the character
* with hexadecimal value <tt>0x2014</tt>.
*
* <a name="ubc"> <p>Unicode blocks and categories are written with the
* <tt>\p</tt> and <tt>\P</tt> constructs as in
* Perl. <tt>\p{</tt><i>prop</i><tt>}</tt> matches if the input has the
* property <i>prop</i>, while <tt>\P{</tt><i>prop</i><tt>}</tt> does not match if
* the input has that property. Blocks are specified with the prefix
* <tt>In</tt>, as in <tt>InMongolian</tt>. Categories may be specified with
* the optional prefix <tt>Is</tt>: Both <tt>\p{L}</tt> and <tt>\p{IsL}</tt>
* denote the category of Unicode letters. Blocks and categories can be used
* both inside and outside of a character class.
*
* <a name="ubc">
* <p>Unicode scripts, blocks and categories are written with the <tt>\p</tt> and
* <tt>\P</tt> constructs as in Perl. <tt>\p{</tt><i>prop</i><tt>}</tt> matches if
* the input has the property <i>prop</i>, while <tt>\P{</tt><i>prop</i><tt>}</tt>
* does not match if the input has that property.
* <p>
* Scripts are specified either with the prefix {@code Is}, as in
* {@code IsHiragana}, or by using the {@code script} keyword (or its short
* form {@code sc})as in {@code script=Hiragana} or {@code sc=Hiragana}.
* <p>
* Blocks are specified with the prefix {@code In}, as in
* {@code InMongolian}, or by using the keyword {@code block} (or its short
* form {@code blk}) as in {@code block=Mongolian} or {@code blk=Mongolian}.
* <p>
* Categories may be specified with the optional prefix {@code Is}:
* Both {@code \p{L}} and {@code \p{IsL}} denote the category of Unicode
* letters. Same as scripts and blocks, categories can also be specified
* by using the keyword {@code general_category} (or its short form
* {@code gc}) as in {@code general_category=Lu} or {@code gc=Lu}.
* <p>
* Scripts, blocks and categories can be used both inside and outside of a
* character class.
* <p> The supported categories are those of
* <a href="http://www.unicode.org/unicode/standard/standard.html">
* <i>The Unicode Standard</i></a> in the version specified by the
* {@link java.lang.Character Character} class. The category names are those
* defined in the Standard, both normative and informative.
* The script names supported by <code>Pattern</code> are the valid script names
* accepted and defined by
* {@link java.lang.Character.UnicodeScript#forName(String) UnicodeScript.forName}.
* The block names supported by <code>Pattern</code> are the valid block names
* accepted and defined by
* {@link java.lang.Character.UnicodeBlock#forName(String) UnicodeBlock.forName}.
*
* <p>
* <a name="jcc"> <p>Categories that behave like the java.lang.Character
* boolean is<i>methodname</i> methods (except for the deprecated ones) are
* available through the same <tt>\p{</tt><i>prop</i><tt>}</tt> syntax where
......@@ -2488,13 +2505,35 @@ loop: for(int x=0, offset=0; x<nCodePoints; x++, offset+=len) {
name = new String(temp, i, j-i-1);
}
int i = name.indexOf('=');
if (i != -1) {
// property construct \p{name=value}
String value = name.substring(i + 1);
name = name.substring(0, i).toLowerCase(Locale.ENGLISH);
if ("sc".equals(name) || "script".equals(name)) {
node = unicodeScriptPropertyFor(value);
} else if ("blk".equals(name) || "block".equals(name)) {
node = unicodeBlockPropertyFor(value);
} else if ("gc".equals(name) || "general_category".equals(name)) {
node = charPropertyNodeFor(value);
} else {
throw error("Unknown Unicode property {name=<" + name + ">, "
+ "value=<" + value + ">}");
}
} else {
if (name.startsWith("In")) {
// \p{inBlockName}
node = unicodeBlockPropertyFor(name.substring(2));
} else {
if (name.startsWith("Is"))
} else if (name.startsWith("Is")) {
// \p{isGeneralCategory} and \p{isScriptName}
name = name.substring(2);
node = CharPropertyNames.charPropertyFor(name);
if (node == null)
node = unicodeScriptPropertyFor(name);
} else {
node = charPropertyNodeFor(name);
}
}
if (maybeComplement) {
if (node instanceof Category || node instanceof Block)
hasSupplementary = true;
......@@ -2503,6 +2542,21 @@ loop: for(int x=0, offset=0; x<nCodePoints; x++, offset+=len) {
return node;
}
/**
* Returns a CharProperty matching all characters belong to
* a UnicodeScript.
*/
private CharProperty unicodeScriptPropertyFor(String name) {
final Character.UnicodeScript script;
try {
script = Character.UnicodeScript.forName(name);
} catch (IllegalArgumentException iae) {
throw error("Unknown character script name {" + name + "}");
}
return new Script(script);
}
/**
* Returns a CharProperty matching all characters in a UnicodeBlock.
*/
......@@ -3566,6 +3620,19 @@ loop: for(int x=0, offset=0; x<nCodePoints; x++, offset+=len) {
}
}
/**
* Node class that matches a Unicode script
*/
static final class Script extends CharProperty {
final Character.UnicodeScript script;
Script(Character.UnicodeScript script) {
this.script = script;
}
boolean isSatisfiedBy(int ch) {
return script == Character.UnicodeScript.of(ch);
}
}
/**
* Node class that matches a Unicode category.
*/
......
/**
* @test
* @bug 6945564
* @summary Check that the j.l.Character.UnicodeScript
* @ignore don't run until #6903266 is integrated
*/
import java.io.*;
import java.lang.reflect.*;
import java.util.*;
import java.util.regex.*;
import java.lang.Character.UnicodeScript;
public class CheckScript {
public static void main(String[] args) throws Exception {
if (args.length != 1) {
System.out.println("java CharacterScript script.txt");
System.exit(1);
}
BufferedReader sbfr = new BufferedReader(new FileReader(args[0]));
Matcher m = Pattern.compile("(\\p{XDigit}+)(?:\\.{2}(\\p{XDigit}+))?\\s+;\\s+(\\w+)\\s+#.*").matcher("");
String line = null;
HashMap<String,ArrayList<Integer>> scripts = new HashMap<>();
while ((line = sbfr.readLine()) != null) {
if (line.length() <= 1 || line.charAt(0) == '#') {
continue;
}
m.reset(line);
if (m.matches()) {
int start = Integer.parseInt(m.group(1), 16);
int end = (m.group(2)==null)?start
:Integer.parseInt(m.group(2), 16);
String name = m.group(3).toLowerCase(Locale.ENGLISH);
ArrayList<Integer> ranges = scripts.get(name);
if (ranges == null) {
ranges = new ArrayList<Integer>();
scripts.put(name, ranges);
}
ranges.add(start);
ranges.add(end);
}
}
sbfr.close();
// check all defined ranges
Integer[] ZEROSIZEARRAY = new Integer[0];
for (String name : scripts.keySet()) {
System.out.println("Checking " + name + "...");
Integer[] ranges = scripts.get(name).toArray(ZEROSIZEARRAY);
Character.UnicodeScript expected =
Character.UnicodeScript.forName(name);
int off = 0;
while (off < ranges.length) {
int start = ranges[off++];
int end = ranges[off++];
for (int cp = start; cp <= end; cp++) {
Character.UnicodeScript script =
Character.UnicodeScript.of(cp);
if (script != expected) {
throw new RuntimeException(
"UnicodeScript failed: cp=" +
Integer.toHexString(cp) +
", of(cp)=<" + script + "> but <" +
expected + "> is expected");
}
}
}
}
// check all codepoints
for (int cp = 0; cp < Character.MAX_CODE_POINT; cp++) {
Character.UnicodeScript script = Character.UnicodeScript.of(cp);
if (script == Character.UnicodeScript.UNKNOWN) {
if (Character.getType(cp) != Character.UNASSIGNED &&
Character.getType(cp) != Character.SURROGATE &&
Character.getType(cp) != Character.PRIVATE_USE)
throw new RuntimeException(
"UnicodeScript failed: cp=" +
Integer.toHexString(cp) +
", of(cp)=<" + script + "> but UNKNOWN is expected");
} else {
Integer[] ranges =
scripts.get(script.name().toLowerCase(Locale.ENGLISH))
.toArray(ZEROSIZEARRAY);
int off = 0;
boolean found = false;
while (off < ranges.length) {
int start = ranges[off++];
int end = ranges[off++];
if (cp >= start && cp <= end)
found = true;
}
if (!found) {
throw new RuntimeException(
"UnicodeScript failed: cp=" +
Integer.toHexString(cp) +
", of(cp)=<" + script +
"> but NOT in ranges of this script");
}
}
}
}
}
# Scripts-5.2.0.txt
# Date: 2009-08-22, 04:58:43 GMT [MD]
#
# Unicode Character Database
# Copyright (c) 1991-2009 Unicode, Inc.
# For terms of use, see http://www.unicode.org/terms_of_use.html
# For documentation, see http://www.unicode.org/reports/tr44/
# ================================================
# Property: Script
# All code points not explicitly listed for Script
# have the value Unknown (Zzzz).
# @missing: 0000..10FFFF; Unknown
# ================================================
0000..001F ; Common # Cc [32] <control-0000>..<control-001F>
0020 ; Common # Zs SPACE
0021..0023 ; Common # Po [3] EXCLAMATION MARK..NUMBER SIGN
0024 ; Common # Sc DOLLAR SIGN
0025..0027 ; Common # Po [3] PERCENT SIGN..APOSTROPHE
0028 ; Common # Ps LEFT PARENTHESIS
0029 ; Common # Pe RIGHT PARENTHESIS
002A ; Common # Po ASTERISK
002B ; Common # Sm PLUS SIGN
002C ; Common # Po COMMA
002D ; Common # Pd HYPHEN-MINUS
002E..002F ; Common # Po [2] FULL STOP..SOLIDUS
0030..0039 ; Common # Nd [10] DIGIT ZERO..DIGIT NINE
003A..003B ; Common # Po [2] COLON..SEMICOLON
003C..003E ; Common # Sm [3] LESS-THAN SIGN..GREATER-THAN SIGN
003F..0040 ; Common # Po [2] QUESTION MARK..COMMERCIAL AT
005B ; Common # Ps LEFT SQUARE BRACKET
005C ; Common # Po REVERSE SOLIDUS
005D ; Common # Pe RIGHT SQUARE BRACKET
005E ; Common # Sk CIRCUMFLEX ACCENT
005F ; Common # Pc LOW LINE
0060 ; Common # Sk GRAVE ACCENT
007B ; Common # Ps LEFT CURLY BRACKET
007C ; Common # Sm VERTICAL LINE
007D ; Common # Pe RIGHT CURLY BRACKET
007E ; Common # Sm TILDE
007F..009F ; Common # Cc [33] <control-007F>..<control-009F>
00A0 ; Common # Zs NO-BREAK SPACE
00A1 ; Common # Po INVERTED EXCLAMATION MARK
00A2..00A5 ; Common # Sc [4] CENT SIGN..YEN SIGN
00A6..00A7 ; Common # So [2] BROKEN BAR..SECTION SIGN
00A8 ; Common # Sk DIAERESIS
00A9 ; Common # So COPYRIGHT SIGN
00AB ; Common # Pi LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
00AC ; Common # Sm NOT SIGN
00AD ; Common # Cf SOFT HYPHEN
00AE ; Common # So REGISTERED SIGN
00AF ; Common # Sk MACRON
00B0 ; Common # So DEGREE SIGN
00B1 ; Common # Sm PLUS-MINUS SIGN
00B2..00B3 ; Common # No [2] SUPERSCRIPT TWO..SUPERSCRIPT THREE
00B4 ; Common # Sk ACUTE ACCENT
00B5 ; Common # L& MICRO SIGN
00B6 ; Common # So PILCROW SIGN
00B7 ; Common # Po MIDDLE DOT
00B8 ; Common # Sk CEDILLA
00B9 ; Common # No SUPERSCRIPT ONE
00BB ; Common # Pf RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
00BC..00BE ; Common # No [3] VULGAR FRACTION ONE QUARTER..VULGAR FRACTION THREE QUARTERS
00BF ; Common # Po INVERTED QUESTION MARK
00D7 ; Common # Sm MULTIPLICATION SIGN
00F7 ; Common # Sm DIVISION SIGN
02B9..02C1 ; Common # Lm [9] MODIFIER LETTER PRIME..MODIFIER LETTER REVERSED GLOTTAL STOP
02C2..02C5 ; Common # Sk [4] MODIFIER LETTER LEFT ARROWHEAD..MODIFIER LETTER DOWN ARROWHEAD
02C6..02D1 ; Common # Lm [12] MODIFIER LETTER CIRCUMFLEX ACCENT..MODIFIER LETTER HALF TRIANGULAR COLON
02D2..02DF ; Common # Sk [14] MODIFIER LETTER CENTRED RIGHT HALF RING..MODIFIER LETTER CROSS ACCENT
02E5..02EB ; Common # Sk [7] MODIFIER LETTER EXTRA-HIGH TONE BAR..MODIFIER LETTER YANG DEPARTING TONE MARK
02EC ; Common # Lm MODIFIER LETTER VOICING
02ED ; Common # Sk MODIFIER LETTER UNASPIRATED
02EE ; Common # Lm MODIFIER LETTER DOUBLE APOSTROPHE
02EF..02FF ; Common # Sk [17] MODIFIER LETTER LOW DOWN ARROWHEAD..MODIFIER LETTER LOW LEFT ARROW
0374 ; Common # Lm GREEK NUMERAL SIGN
037E ; Common # Po GREEK QUESTION MARK
0385 ; Common # Sk GREEK DIALYTIKA TONOS
0387 ; Common # Po GREEK ANO TELEIA
0589 ; Common # Po ARMENIAN FULL STOP
0600..0603 ; Common # Cf [4] ARABIC NUMBER SIGN..ARABIC SIGN SAFHA
060C ; Common # Po ARABIC COMMA
061B ; Common # Po ARABIC SEMICOLON
061F ; Common # Po ARABIC QUESTION MARK
0640 ; Common # Lm ARABIC TATWEEL
0660..0669 ; Common # Nd [10] ARABIC-INDIC DIGIT ZERO..ARABIC-INDIC DIGIT NINE
06DD ; Common # Cf ARABIC END OF AYAH
0964..0965 ; Common # Po [2] DEVANAGARI DANDA..DEVANAGARI DOUBLE DANDA
0970 ; Common # Po DEVANAGARI ABBREVIATION SIGN
0CF1..0CF2 ; Common # So [2] KANNADA SIGN JIHVAMULIYA..KANNADA SIGN UPADHMANIYA
0E3F ; Common # Sc THAI CURRENCY SYMBOL BAHT
0FD5..0FD8 ; Common # So [4] RIGHT-FACING SVASTI SIGN..LEFT-FACING SVASTI SIGN WITH DOTS
10FB ; Common # Po GEORGIAN PARAGRAPH SEPARATOR
16EB..16ED ; Common # Po [3] RUNIC SINGLE PUNCTUATION..RUNIC CROSS PUNCTUATION
1735..1736 ; Common # Po [2] PHILIPPINE SINGLE PUNCTUATION..PHILIPPINE DOUBLE PUNCTUATION
1802..1803 ; Common # Po [2] MONGOLIAN COMMA..MONGOLIAN FULL STOP
1805 ; Common # Po MONGOLIAN FOUR DOTS
1CD3 ; Common # Po VEDIC SIGN NIHSHVASA
1CE1 ; Common # Mc VEDIC TONE ATHARVAVEDIC INDEPENDENT SVARITA
1CE9..1CEC ; Common # Lo [4] VEDIC SIGN ANUSVARA ANTARGOMUKHA..VEDIC SIGN ANUSVARA VAMAGOMUKHA WITH TAIL
1CEE..1CF1 ; Common # Lo [4] VEDIC SIGN HEXIFORM LONG ANUSVARA..VEDIC SIGN ANUSVARA UBHAYATO MUKHA
1CF2 ; Common # Mc VEDIC SIGN ARDHAVISARGA
2000..200A ; Common # Zs [11] EN QUAD..HAIR SPACE
200B ; Common # Cf ZERO WIDTH SPACE
200E..200F ; Common # Cf [2] LEFT-TO-RIGHT MARK..RIGHT-TO-LEFT MARK
2010..2015 ; Common # Pd [6] HYPHEN..HORIZONTAL BAR
2016..2017 ; Common # Po [2] DOUBLE VERTICAL LINE..DOUBLE LOW LINE
2018 ; Common # Pi LEFT SINGLE QUOTATION MARK
2019 ; Common # Pf RIGHT SINGLE QUOTATION MARK
201A ; Common # Ps SINGLE LOW-9 QUOTATION MARK
201B..201C ; Common # Pi [2] SINGLE HIGH-REVERSED-9 QUOTATION MARK..LEFT DOUBLE QUOTATION MARK
201D ; Common # Pf RIGHT DOUBLE QUOTATION MARK
201E ; Common # Ps DOUBLE LOW-9 QUOTATION MARK
201F ; Common # Pi DOUBLE HIGH-REVERSED-9 QUOTATION MARK
2020..2027 ; Common # Po [8] DAGGER..HYPHENATION POINT
2028 ; Common # Zl LINE SEPARATOR
2029 ; Common # Zp PARAGRAPH SEPARATOR
202A..202E ; Common # Cf [5] LEFT-TO-RIGHT EMBEDDING..RIGHT-TO-LEFT OVERRIDE
202F ; Common # Zs NARROW NO-BREAK SPACE
2030..2038 ; Common # Po [9] PER MILLE SIGN..CARET
2039 ; Common # Pi SINGLE LEFT-POINTING ANGLE QUOTATION MARK
203A ; Common # Pf SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
203B..203E ; Common # Po [4] REFERENCE MARK..OVERLINE
203F..2040 ; Common # Pc [2] UNDERTIE..CHARACTER TIE
2041..2043 ; Common # Po [3] CARET INSERTION POINT..HYPHEN BULLET
2044 ; Common # Sm FRACTION SLASH
2045 ; Common # Ps LEFT SQUARE BRACKET WITH QUILL
2046 ; Common # Pe RIGHT SQUARE BRACKET WITH QUILL
2047..2051 ; Common # Po [11] DOUBLE QUESTION MARK..TWO ASTERISKS ALIGNED VERTICALLY
2052 ; Common # Sm COMMERCIAL MINUS SIGN
2053 ; Common # Po SWUNG DASH
2054 ; Common # Pc INVERTED UNDERTIE
2055..205E ; Common # Po [10] FLOWER PUNCTUATION MARK..VERTICAL FOUR DOTS
205F ; Common # Zs MEDIUM MATHEMATICAL SPACE
2060..2064 ; Common # Cf [5] WORD JOINER..INVISIBLE PLUS
206A..206F ; Common # Cf [6] INHIBIT SYMMETRIC SWAPPING..NOMINAL DIGIT SHAPES
2070 ; Common # No SUPERSCRIPT ZERO
2074..2079 ; Common # No [6] SUPERSCRIPT FOUR..SUPERSCRIPT NINE
207A..207C ; Common # Sm [3] SUPERSCRIPT PLUS SIGN..SUPERSCRIPT EQUALS SIGN
207D ; Common # Ps SUPERSCRIPT LEFT PARENTHESIS
207E ; Common # Pe SUPERSCRIPT RIGHT PARENTHESIS
2080..2089 ; Common # No [10] SUBSCRIPT ZERO..SUBSCRIPT NINE
208A..208C ; Common # Sm [3] SUBSCRIPT PLUS SIGN..SUBSCRIPT EQUALS SIGN
208D ; Common # Ps SUBSCRIPT LEFT PARENTHESIS
208E ; Common # Pe SUBSCRIPT RIGHT PARENTHESIS
20A0..20B8 ; Common # Sc [25] EURO-CURRENCY SIGN..TENGE SIGN
2100..2101 ; Common # So [2] ACCOUNT OF..ADDRESSED TO THE SUBJECT
2102 ; Common # L& DOUBLE-STRUCK CAPITAL C
2103..2106 ; Common # So [4] DEGREE CELSIUS..CADA UNA
2107 ; Common # L& EULER CONSTANT
2108..2109 ; Common # So [2] SCRUPLE..DEGREE FAHRENHEIT
210A..2113 ; Common # L& [10] SCRIPT SMALL G..SCRIPT SMALL L
2114 ; Common # So L B BAR SYMBOL
2115 ; Common # L& DOUBLE-STRUCK CAPITAL N
2116..2118 ; Common # So [3] NUMERO SIGN..SCRIPT CAPITAL P
2119..211D ; Common # L& [5] DOUBLE-STRUCK CAPITAL P..DOUBLE-STRUCK CAPITAL R
211E..2123 ; Common # So [6] PRESCRIPTION TAKE..VERSICLE
2124 ; Common # L& DOUBLE-STRUCK CAPITAL Z
2125 ; Common # So OUNCE SIGN
2127 ; Common # So INVERTED OHM SIGN
2128 ; Common # L& BLACK-LETTER CAPITAL Z
2129 ; Common # So TURNED GREEK SMALL LETTER IOTA
212C..212D ; Common # L& [2] SCRIPT CAPITAL B..BLACK-LETTER CAPITAL C
212E ; Common # So ESTIMATED SYMBOL
212F..2131 ; Common # L& [3] SCRIPT SMALL E..SCRIPT CAPITAL F
2133..2134 ; Common # L& [2] SCRIPT CAPITAL M..SCRIPT SMALL O
2135..2138 ; Common # Lo [4] ALEF SYMBOL..DALET SYMBOL
2139 ; Common # L& INFORMATION SOURCE
213A..213B ; Common # So [2] ROTATED CAPITAL Q..FACSIMILE SIGN
213C..213F ; Common # L& [4] DOUBLE-STRUCK SMALL PI..DOUBLE-STRUCK CAPITAL PI
2140..2144 ; Common # Sm [5] DOUBLE-STRUCK N-ARY SUMMATION..TURNED SANS-SERIF CAPITAL Y
2145..2149 ; Common # L& [5] DOUBLE-STRUCK ITALIC CAPITAL D..DOUBLE-STRUCK ITALIC SMALL J
214A ; Common # So PROPERTY LINE
214B ; Common # Sm TURNED AMPERSAND
214C..214D ; Common # So [2] PER SIGN..AKTIESELSKAB
214F ; Common # So SYMBOL FOR SAMARITAN SOURCE
2150..215F ; Common # No [16] VULGAR FRACTION ONE SEVENTH..FRACTION NUMERATOR ONE
2189 ; Common # No VULGAR FRACTION ZERO THIRDS
2190..2194 ; Common # Sm [5] LEFTWARDS ARROW..LEFT RIGHT ARROW
2195..2199 ; Common # So [5] UP DOWN ARROW..SOUTH WEST ARROW
219A..219B ; Common # Sm [2] LEFTWARDS ARROW WITH STROKE..RIGHTWARDS ARROW WITH STROKE
219C..219F ; Common # So [4] LEFTWARDS WAVE ARROW..UPWARDS TWO HEADED ARROW
21A0 ; Common # Sm RIGHTWARDS TWO HEADED ARROW
21A1..21A2 ; Common # So [2] DOWNWARDS TWO HEADED ARROW..LEFTWARDS ARROW WITH TAIL
21A3 ; Common # Sm RIGHTWARDS ARROW WITH TAIL
21A4..21A5 ; Common # So [2] LEFTWARDS ARROW FROM BAR..UPWARDS ARROW FROM BAR
21A6 ; Common # Sm RIGHTWARDS ARROW FROM BAR
21A7..21AD ; Common # So [7] DOWNWARDS ARROW FROM BAR..LEFT RIGHT WAVE ARROW
21AE ; Common # Sm LEFT RIGHT ARROW WITH STROKE
21AF..21CD ; Common # So [31] DOWNWARDS ZIGZAG ARROW..LEFTWARDS DOUBLE ARROW WITH STROKE
21CE..21CF ; Common # Sm [2] LEFT RIGHT DOUBLE ARROW WITH STROKE..RIGHTWARDS DOUBLE ARROW WITH STROKE
21D0..21D1 ; Common # So [2] LEFTWARDS DOUBLE ARROW..UPWARDS DOUBLE ARROW
21D2 ; Common # Sm RIGHTWARDS DOUBLE ARROW
21D3 ; Common # So DOWNWARDS DOUBLE ARROW
21D4 ; Common # Sm LEFT RIGHT DOUBLE ARROW
21D5..21F3 ; Common # So [31] UP DOWN DOUBLE ARROW..UP DOWN WHITE ARROW
21F4..22FF ; Common # Sm [268] RIGHT ARROW WITH SMALL CIRCLE..Z NOTATION BAG MEMBERSHIP
2300..2307 ; Common # So [8] DIAMETER SIGN..WAVY LINE
2308..230B ; Common # Sm [4] LEFT CEILING..RIGHT FLOOR
230C..231F ; Common # So [20] BOTTOM RIGHT CROP..BOTTOM RIGHT CORNER
2320..2321 ; Common # Sm [2] TOP HALF INTEGRAL..BOTTOM HALF INTEGRAL
2322..2328 ; Common # So [7] FROWN..KEYBOARD
2329 ; Common # Ps LEFT-POINTING ANGLE BRACKET
232A ; Common # Pe RIGHT-POINTING ANGLE BRACKET
232B..237B ; Common # So [81] ERASE TO THE LEFT..NOT CHECK MARK
237C ; Common # Sm RIGHT ANGLE WITH DOWNWARDS ZIGZAG ARROW
237D..239A ; Common # So [30] SHOULDERED OPEN BOX..CLEAR SCREEN SYMBOL
239B..23B3 ; Common # Sm [25] LEFT PARENTHESIS UPPER HOOK..SUMMATION BOTTOM
23B4..23DB ; Common # So [40] TOP SQUARE BRACKET..FUSE
23DC..23E1 ; Common # Sm [6] TOP PARENTHESIS..BOTTOM TORTOISE SHELL BRACKET
23E2..23E8 ; Common # So [7] WHITE TRAPEZIUM..DECIMAL EXPONENT SYMBOL
2400..2426 ; Common # So [39] SYMBOL FOR NULL..SYMBOL FOR SUBSTITUTE FORM TWO
2440..244A ; Common # So [11] OCR HOOK..OCR DOUBLE BACKSLASH
2460..249B ; Common # No [60] CIRCLED DIGIT ONE..NUMBER TWENTY FULL STOP
249C..24E9 ; Common # So [78] PARENTHESIZED LATIN SMALL LETTER A..CIRCLED LATIN SMALL LETTER Z
24EA..24FF ; Common # No [22] CIRCLED DIGIT ZERO..NEGATIVE CIRCLED DIGIT ZERO
2500..25B6 ; Common # So [183] BOX DRAWINGS LIGHT HORIZONTAL..BLACK RIGHT-POINTING TRIANGLE
25B7 ; Common # Sm WHITE RIGHT-POINTING TRIANGLE
25B8..25C0 ; Common # So [9] BLACK RIGHT-POINTING SMALL TRIANGLE..BLACK LEFT-POINTING TRIANGLE
25C1 ; Common # Sm WHITE LEFT-POINTING TRIANGLE
25C2..25F7 ; Common # So [54] BLACK LEFT-POINTING SMALL TRIANGLE..WHITE CIRCLE WITH UPPER RIGHT QUADRANT
25F8..25FF ; Common # Sm [8] UPPER LEFT TRIANGLE..LOWER RIGHT TRIANGLE
2600..266E ; Common # So [111] BLACK SUN WITH RAYS..MUSIC NATURAL SIGN
266F ; Common # Sm MUSIC SHARP SIGN
2670..26CD ; Common # So [94] WEST SYRIAC CROSS..DISABLED CAR
26CF..26E1 ; Common # So [19] PICK..RESTRICTED LEFT ENTRY-2
26E3 ; Common # So HEAVY CIRCLE WITH STROKE AND TWO DOTS ABOVE
26E8..26FF ; Common # So [24] BLACK CROSS ON SHIELD..WHITE FLAG WITH HORIZONTAL MIDDLE BLACK STRIPE
2701..2704 ; Common # So [4] UPPER BLADE SCISSORS..WHITE SCISSORS
2706..2709 ; Common # So [4] TELEPHONE LOCATION SIGN..ENVELOPE
270C..2727 ; Common # So [28] VICTORY HAND..WHITE FOUR POINTED STAR
2729..274B ; Common # So [35] STRESS OUTLINED WHITE STAR..HEAVY EIGHT TEARDROP-SPOKED PROPELLER ASTERISK
274D ; Common # So SHADOWED WHITE CIRCLE
274F..2752 ; Common # So [4] LOWER RIGHT DROP-SHADOWED WHITE SQUARE..UPPER RIGHT SHADOWED WHITE SQUARE
2756..275E ; Common # So [9] BLACK DIAMOND MINUS WHITE X..HEAVY DOUBLE COMMA QUOTATION MARK ORNAMENT
2761..2767 ; Common # So [7] CURVED STEM PARAGRAPH SIGN ORNAMENT..ROTATED FLORAL HEART BULLET
2768 ; Common # Ps MEDIUM LEFT PARENTHESIS ORNAMENT
2769 ; Common # Pe MEDIUM RIGHT PARENTHESIS ORNAMENT
276A ; Common # Ps MEDIUM FLATTENED LEFT PARENTHESIS ORNAMENT
276B ; Common # Pe MEDIUM FLATTENED RIGHT PARENTHESIS ORNAMENT
276C ; Common # Ps MEDIUM LEFT-POINTING ANGLE BRACKET ORNAMENT
276D ; Common # Pe MEDIUM RIGHT-POINTING ANGLE BRACKET ORNAMENT
276E ; Common # Ps HEAVY LEFT-POINTING ANGLE QUOTATION MARK ORNAMENT
276F ; Common # Pe HEAVY RIGHT-POINTING ANGLE QUOTATION MARK ORNAMENT
2770 ; Common # Ps HEAVY LEFT-POINTING ANGLE BRACKET ORNAMENT
2771 ; Common # Pe HEAVY RIGHT-POINTING ANGLE BRACKET ORNAMENT
2772 ; Common # Ps LIGHT LEFT TORTOISE SHELL BRACKET ORNAMENT
2773 ; Common # Pe LIGHT RIGHT TORTOISE SHELL BRACKET ORNAMENT
2774 ; Common # Ps MEDIUM LEFT CURLY BRACKET ORNAMENT
2775 ; Common # Pe MEDIUM RIGHT CURLY BRACKET ORNAMENT
2776..2793 ; Common # No [30] DINGBAT NEGATIVE CIRCLED DIGIT ONE..DINGBAT NEGATIVE CIRCLED SANS-SERIF NUMBER TEN
2794 ; Common # So HEAVY WIDE-HEADED RIGHTWARDS ARROW
2798..27AF ; Common # So [24] HEAVY SOUTH EAST ARROW..NOTCHED LOWER RIGHT-SHADOWED WHITE RIGHTWARDS ARROW
27B1..27BE ; Common # So [14] NOTCHED UPPER RIGHT-SHADOWED WHITE RIGHTWARDS ARROW..OPEN-OUTLINED RIGHTWARDS ARROW
27C0..27C4 ; Common # Sm [5] THREE DIMENSIONAL ANGLE..OPEN SUPERSET
27C5 ; Common # Ps LEFT S-SHAPED BAG DELIMITER
27C6 ; Common # Pe RIGHT S-SHAPED BAG DELIMITER
27C7..27CA ; Common # Sm [4] OR WITH DOT INSIDE..VERTICAL BAR WITH HORIZONTAL STROKE
27CC ; Common # Sm LONG DIVISION
27D0..27E5 ; Common # Sm [22] WHITE DIAMOND WITH CENTRED DOT..WHITE SQUARE WITH RIGHTWARDS TICK
27E6 ; Common # Ps MATHEMATICAL LEFT WHITE SQUARE BRACKET
27E7 ; Common # Pe MATHEMATICAL RIGHT WHITE SQUARE BRACKET
27E8 ; Common # Ps MATHEMATICAL LEFT ANGLE BRACKET
27E9 ; Common # Pe MATHEMATICAL RIGHT ANGLE BRACKET
27EA ; Common # Ps MATHEMATICAL LEFT DOUBLE ANGLE BRACKET
27EB ; Common # Pe MATHEMATICAL RIGHT DOUBLE ANGLE BRACKET
27EC ; Common # Ps MATHEMATICAL LEFT WHITE TORTOISE SHELL BRACKET
27ED ; Common # Pe MATHEMATICAL RIGHT WHITE TORTOISE SHELL BRACKET
27EE ; Common # Ps MATHEMATICAL LEFT FLATTENED PARENTHESIS
27EF ; Common # Pe MATHEMATICAL RIGHT FLATTENED PARENTHESIS
27F0..27FF ; Common # Sm [16] UPWARDS QUADRUPLE ARROW..LONG RIGHTWARDS SQUIGGLE ARROW
2900..2982 ; Common # Sm [131] RIGHTWARDS TWO-HEADED ARROW WITH VERTICAL STROKE..Z NOTATION TYPE COLON
2983 ; Common # Ps LEFT WHITE CURLY BRACKET
2984 ; Common # Pe RIGHT WHITE CURLY BRACKET
2985 ; Common # Ps LEFT WHITE PARENTHESIS
2986 ; Common # Pe RIGHT WHITE PARENTHESIS
2987 ; Common # Ps Z NOTATION LEFT IMAGE BRACKET
2988 ; Common # Pe Z NOTATION RIGHT IMAGE BRACKET
2989 ; Common # Ps Z NOTATION LEFT BINDING BRACKET
298A ; Common # Pe Z NOTATION RIGHT BINDING BRACKET
298B ; Common # Ps LEFT SQUARE BRACKET WITH UNDERBAR
298C ; Common # Pe RIGHT SQUARE BRACKET WITH UNDERBAR
298D ; Common # Ps LEFT SQUARE BRACKET WITH TICK IN TOP CORNER
298E ; Common # Pe RIGHT SQUARE BRACKET WITH TICK IN BOTTOM CORNER
298F ; Common # Ps LEFT SQUARE BRACKET WITH TICK IN BOTTOM CORNER
2990 ; Common # Pe RIGHT SQUARE BRACKET WITH TICK IN TOP CORNER
2991 ; Common # Ps LEFT ANGLE BRACKET WITH DOT
2992 ; Common # Pe RIGHT ANGLE BRACKET WITH DOT
2993 ; Common # Ps LEFT ARC LESS-THAN BRACKET
2994 ; Common # Pe RIGHT ARC GREATER-THAN BRACKET
2995 ; Common # Ps DOUBLE LEFT ARC GREATER-THAN BRACKET
2996 ; Common # Pe DOUBLE RIGHT ARC LESS-THAN BRACKET
2997 ; Common # Ps LEFT BLACK TORTOISE SHELL BRACKET
2998 ; Common # Pe RIGHT BLACK TORTOISE SHELL BRACKET
2999..29D7 ; Common # Sm [63] DOTTED FENCE..BLACK HOURGLASS
29D8 ; Common # Ps LEFT WIGGLY FENCE
29D9 ; Common # Pe RIGHT WIGGLY FENCE
29DA ; Common # Ps LEFT DOUBLE WIGGLY FENCE
29DB ; Common # Pe RIGHT DOUBLE WIGGLY FENCE
29DC..29FB ; Common # Sm [32] INCOMPLETE INFINITY..TRIPLE PLUS
29FC ; Common # Ps LEFT-POINTING CURVED ANGLE BRACKET
29FD ; Common # Pe RIGHT-POINTING CURVED ANGLE BRACKET
29FE..2AFF ; Common # Sm [258] TINY..N-ARY WHITE VERTICAL BAR
2B00..2B2F ; Common # So [48] NORTH EAST WHITE ARROW..WHITE VERTICAL ELLIPSE
2B30..2B44 ; Common # Sm [21] LEFT ARROW WITH SMALL CIRCLE..RIGHTWARDS ARROW THROUGH SUPERSET
2B45..2B46 ; Common # So [2] LEFTWARDS QUADRUPLE ARROW..RIGHTWARDS QUADRUPLE ARROW
2B47..2B4C ; Common # Sm [6] REVERSE TILDE OPERATOR ABOVE RIGHTWARDS ARROW..RIGHTWARDS ARROW ABOVE REVERSE TILDE OPERATOR
2B50..2B59 ; Common # So [10] WHITE MEDIUM STAR..HEAVY CIRCLED SALTIRE
2E00..2E01 ; Common # Po [2] RIGHT ANGLE SUBSTITUTION MARKER..RIGHT ANGLE DOTTED SUBSTITUTION MARKER
2E02 ; Common # Pi LEFT SUBSTITUTION BRACKET
2E03 ; Common # Pf RIGHT SUBSTITUTION BRACKET
2E04 ; Common # Pi LEFT DOTTED SUBSTITUTION BRACKET
2E05 ; Common # Pf RIGHT DOTTED SUBSTITUTION BRACKET
2E06..2E08 ; Common # Po [3] RAISED INTERPOLATION MARKER..DOTTED TRANSPOSITION MARKER
2E09 ; Common # Pi LEFT TRANSPOSITION BRACKET
2E0A ; Common # Pf RIGHT TRANSPOSITION BRACKET
2E0B ; Common # Po RAISED SQUARE
2E0C ; Common # Pi LEFT RAISED OMISSION BRACKET
2E0D ; Common # Pf RIGHT RAISED OMISSION BRACKET
2E0E..2E16 ; Common # Po [9] EDITORIAL CORONIS..DOTTED RIGHT-POINTING ANGLE
2E17 ; Common # Pd DOUBLE OBLIQUE HYPHEN
2E18..2E19 ; Common # Po [2] INVERTED INTERROBANG..PALM BRANCH
2E1A ; Common # Pd HYPHEN WITH DIAERESIS
2E1B ; Common # Po TILDE WITH RING ABOVE
2E1C ; Common # Pi LEFT LOW PARAPHRASE BRACKET
2E1D ; Common # Pf RIGHT LOW PARAPHRASE BRACKET
2E1E..2E1F ; Common # Po [2] TILDE WITH DOT ABOVE..TILDE WITH DOT BELOW
2E20 ; Common # Pi LEFT VERTICAL BAR WITH QUILL
2E21 ; Common # Pf RIGHT VERTICAL BAR WITH QUILL
2E22 ; Common # Ps TOP LEFT HALF BRACKET
2E23 ; Common # Pe TOP RIGHT HALF BRACKET
2E24 ; Common # Ps BOTTOM LEFT HALF BRACKET
2E25 ; Common # Pe BOTTOM RIGHT HALF BRACKET
2E26 ; Common # Ps LEFT SIDEWAYS U BRACKET
2E27 ; Common # Pe RIGHT SIDEWAYS U BRACKET
2E28 ; Common # Ps LEFT DOUBLE PARENTHESIS
2E29 ; Common # Pe RIGHT DOUBLE PARENTHESIS
2E2A..2E2E ; Common # Po [5] TWO DOTS OVER ONE DOT PUNCTUATION..REVERSED QUESTION MARK
2E2F ; Common # Lm VERTICAL TILDE
2E30..2E31 ; Common # Po [2] RING POINT..WORD SEPARATOR MIDDLE DOT
2FF0..2FFB ; Common # So [12] IDEOGRAPHIC DESCRIPTION CHARACTER LEFT TO RIGHT..IDEOGRAPHIC DESCRIPTION CHARACTER OVERLAID
3000 ; Common # Zs IDEOGRAPHIC SPACE
3001..3003 ; Common # Po [3] IDEOGRAPHIC COMMA..DITTO MARK
3004 ; Common # So JAPANESE INDUSTRIAL STANDARD SYMBOL
3006 ; Common # Lo IDEOGRAPHIC CLOSING MARK
3008 ; Common # Ps LEFT ANGLE BRACKET
3009 ; Common # Pe RIGHT ANGLE BRACKET
300A ; Common # Ps LEFT DOUBLE ANGLE BRACKET
300B ; Common # Pe RIGHT DOUBLE ANGLE BRACKET
300C ; Common # Ps LEFT CORNER BRACKET
300D ; Common # Pe RIGHT CORNER BRACKET
300E ; Common # Ps LEFT WHITE CORNER BRACKET
300F ; Common # Pe RIGHT WHITE CORNER BRACKET
3010 ; Common # Ps LEFT BLACK LENTICULAR BRACKET
3011 ; Common # Pe RIGHT BLACK LENTICULAR BRACKET
3012..3013 ; Common # So [2] POSTAL MARK..GETA MARK
3014 ; Common # Ps LEFT TORTOISE SHELL BRACKET
3015 ; Common # Pe RIGHT TORTOISE SHELL BRACKET
3016 ; Common # Ps LEFT WHITE LENTICULAR BRACKET
3017 ; Common # Pe RIGHT WHITE LENTICULAR BRACKET
3018 ; Common # Ps LEFT WHITE TORTOISE SHELL BRACKET
3019 ; Common # Pe RIGHT WHITE TORTOISE SHELL BRACKET
301A ; Common # Ps LEFT WHITE SQUARE BRACKET
301B ; Common # Pe RIGHT WHITE SQUARE BRACKET
301C ; Common # Pd WAVE DASH
301D ; Common # Ps REVERSED DOUBLE PRIME QUOTATION MARK
301E..301F ; Common # Pe [2] DOUBLE PRIME QUOTATION MARK..LOW DOUBLE PRIME QUOTATION MARK
3020 ; Common # So POSTAL MARK FACE
3030 ; Common # Pd WAVY DASH
3031..3035 ; Common # Lm [5] VERTICAL KANA REPEAT MARK..VERTICAL KANA REPEAT MARK LOWER HALF
3036..3037 ; Common # So [2] CIRCLED POSTAL MARK..IDEOGRAPHIC TELEGRAPH LINE FEED SEPARATOR SYMBOL
303C ; Common # Lo MASU MARK
303D ; Common # Po PART ALTERNATION MARK
303E..303F ; Common # So [2] IDEOGRAPHIC VARIATION INDICATOR..IDEOGRAPHIC HALF FILL SPACE
309B..309C ; Common # Sk [2] KATAKANA-HIRAGANA VOICED SOUND MARK..KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK
30A0 ; Common # Pd KATAKANA-HIRAGANA DOUBLE HYPHEN
30FB ; Common # Po KATAKANA MIDDLE DOT
30FC ; Common # Lm KATAKANA-HIRAGANA PROLONGED SOUND MARK
3190..3191 ; Common # So [2] IDEOGRAPHIC ANNOTATION LINKING MARK..IDEOGRAPHIC ANNOTATION REVERSE MARK
3192..3195 ; Common # No [4] IDEOGRAPHIC ANNOTATION ONE MARK..IDEOGRAPHIC ANNOTATION FOUR MARK
3196..319F ; Common # So [10] IDEOGRAPHIC ANNOTATION TOP MARK..IDEOGRAPHIC ANNOTATION MAN MARK
31C0..31E3 ; Common # So [36] CJK STROKE T..CJK STROKE Q
3220..3229 ; Common # No [10] PARENTHESIZED IDEOGRAPH ONE..PARENTHESIZED IDEOGRAPH TEN
322A..3250 ; Common # So [39] PARENTHESIZED IDEOGRAPH MOON..PARTNERSHIP SIGN
3251..325F ; Common # No [15] CIRCLED NUMBER TWENTY ONE..CIRCLED NUMBER THIRTY FIVE
327F ; Common # So KOREAN STANDARD SYMBOL
3280..3289 ; Common # No [10] CIRCLED IDEOGRAPH ONE..CIRCLED IDEOGRAPH TEN
328A..32B0 ; Common # So [39] CIRCLED IDEOGRAPH MOON..CIRCLED IDEOGRAPH NIGHT
32B1..32BF ; Common # No [15] CIRCLED NUMBER THIRTY SIX..CIRCLED NUMBER FIFTY
32C0..32CF ; Common # So [16] IDEOGRAPHIC TELEGRAPH SYMBOL FOR JANUARY..LIMITED LIABILITY SIGN
3358..33FF ; Common # So [168] IDEOGRAPHIC TELEGRAPH SYMBOL FOR HOUR ZERO..SQUARE GAL
4DC0..4DFF ; Common # So [64] HEXAGRAM FOR THE CREATIVE HEAVEN..HEXAGRAM FOR BEFORE COMPLETION
A700..A716 ; Common # Sk [23] MODIFIER LETTER CHINESE TONE YIN PING..MODIFIER LETTER EXTRA-LOW LEFT-STEM TONE BAR
A717..A71F ; Common # Lm [9] MODIFIER LETTER DOT VERTICAL BAR..MODIFIER LETTER LOW INVERTED EXCLAMATION MARK
A720..A721 ; Common # Sk [2] MODIFIER LETTER STRESS AND HIGH TONE..MODIFIER LETTER STRESS AND LOW TONE
A788 ; Common # Lm MODIFIER LETTER LOW CIRCUMFLEX ACCENT
A789..A78A ; Common # Sk [2] MODIFIER LETTER COLON..MODIFIER LETTER SHORT EQUALS SIGN
A830..A835 ; Common # No [6] NORTH INDIC FRACTION ONE QUARTER..NORTH INDIC FRACTION THREE SIXTEENTHS
A836..A837 ; Common # So [2] NORTH INDIC QUARTER MARK..NORTH INDIC PLACEHOLDER MARK
A838 ; Common # Sc NORTH INDIC RUPEE MARK
A839 ; Common # So NORTH INDIC QUANTITY MARK
FD3E ; Common # Ps ORNATE LEFT PARENTHESIS
FD3F ; Common # Pe ORNATE RIGHT PARENTHESIS
FDFD ; Common # So ARABIC LIGATURE BISMILLAH AR-RAHMAN AR-RAHEEM
FE10..FE16 ; Common # Po [7] PRESENTATION FORM FOR VERTICAL COMMA..PRESENTATION FORM FOR VERTICAL QUESTION MARK
FE17 ; Common # Ps PRESENTATION FORM FOR VERTICAL LEFT WHITE LENTICULAR BRACKET
FE18 ; Common # Pe PRESENTATION FORM FOR VERTICAL RIGHT WHITE LENTICULAR BRAKCET
FE19 ; Common # Po PRESENTATION FORM FOR VERTICAL HORIZONTAL ELLIPSIS
FE30 ; Common # Po PRESENTATION FORM FOR VERTICAL TWO DOT LEADER
FE31..FE32 ; Common # Pd [2] PRESENTATION FORM FOR VERTICAL EM DASH..PRESENTATION FORM FOR VERTICAL EN DASH
FE33..FE34 ; Common # Pc [2] PRESENTATION FORM FOR VERTICAL LOW LINE..PRESENTATION FORM FOR VERTICAL WAVY LOW LINE
FE35 ; Common # Ps PRESENTATION FORM FOR VERTICAL LEFT PARENTHESIS
FE36 ; Common # Pe PRESENTATION FORM FOR VERTICAL RIGHT PARENTHESIS
FE37 ; Common # Ps PRESENTATION FORM FOR VERTICAL LEFT CURLY BRACKET
FE38 ; Common # Pe PRESENTATION FORM FOR VERTICAL RIGHT CURLY BRACKET
FE39 ; Common # Ps PRESENTATION FORM FOR VERTICAL LEFT TORTOISE SHELL BRACKET
FE3A ; Common # Pe PRESENTATION FORM FOR VERTICAL RIGHT TORTOISE SHELL BRACKET
FE3B ; Common # Ps PRESENTATION FORM FOR VERTICAL LEFT BLACK LENTICULAR BRACKET
FE3C ; Common # Pe PRESENTATION FORM FOR VERTICAL RIGHT BLACK LENTICULAR BRACKET
FE3D ; Common # Ps PRESENTATION FORM FOR VERTICAL LEFT DOUBLE ANGLE BRACKET
FE3E ; Common # Pe PRESENTATION FORM FOR VERTICAL RIGHT DOUBLE ANGLE BRACKET
FE3F ; Common # Ps PRESENTATION FORM FOR VERTICAL LEFT ANGLE BRACKET
FE40 ; Common # Pe PRESENTATION FORM FOR VERTICAL RIGHT ANGLE BRACKET
FE41 ; Common # Ps PRESENTATION FORM FOR VERTICAL LEFT CORNER BRACKET
FE42 ; Common # Pe PRESENTATION FORM FOR VERTICAL RIGHT CORNER BRACKET
FE43 ; Common # Ps PRESENTATION FORM FOR VERTICAL LEFT WHITE CORNER BRACKET
FE44 ; Common # Pe PRESENTATION FORM FOR VERTICAL RIGHT WHITE CORNER BRACKET
FE45..FE46 ; Common # Po [2] SESAME DOT..WHITE SESAME DOT
FE47 ; Common # Ps PRESENTATION FORM FOR VERTICAL LEFT SQUARE BRACKET
FE48 ; Common # Pe PRESENTATION FORM FOR VERTICAL RIGHT SQUARE BRACKET
FE49..FE4C ; Common # Po [4] DASHED OVERLINE..DOUBLE WAVY OVERLINE
FE4D..FE4F ; Common # Pc [3] DASHED LOW LINE..WAVY LOW LINE
FE50..FE52 ; Common # Po [3] SMALL COMMA..SMALL FULL STOP
FE54..FE57 ; Common # Po [4] SMALL SEMICOLON..SMALL EXCLAMATION MARK
FE58 ; Common # Pd SMALL EM DASH
FE59 ; Common # Ps SMALL LEFT PARENTHESIS
FE5A ; Common # Pe SMALL RIGHT PARENTHESIS
FE5B ; Common # Ps SMALL LEFT CURLY BRACKET
FE5C ; Common # Pe SMALL RIGHT CURLY BRACKET
FE5D ; Common # Ps SMALL LEFT TORTOISE SHELL BRACKET
FE5E ; Common # Pe SMALL RIGHT TORTOISE SHELL BRACKET
FE5F..FE61 ; Common # Po [3] SMALL NUMBER SIGN..SMALL ASTERISK
FE62 ; Common # Sm SMALL PLUS SIGN
FE63 ; Common # Pd SMALL HYPHEN-MINUS
FE64..FE66 ; Common # Sm [3] SMALL LESS-THAN SIGN..SMALL EQUALS SIGN
FE68 ; Common # Po SMALL REVERSE SOLIDUS
FE69 ; Common # Sc SMALL DOLLAR SIGN
FE6A..FE6B ; Common # Po [2] SMALL PERCENT SIGN..SMALL COMMERCIAL AT
FEFF ; Common # Cf ZERO WIDTH NO-BREAK SPACE
FF01..FF03 ; Common # Po [3] FULLWIDTH EXCLAMATION MARK..FULLWIDTH NUMBER SIGN
FF04 ; Common # Sc FULLWIDTH DOLLAR SIGN
FF05..FF07 ; Common # Po [3] FULLWIDTH PERCENT SIGN..FULLWIDTH APOSTROPHE
FF08 ; Common # Ps FULLWIDTH LEFT PARENTHESIS
FF09 ; Common # Pe FULLWIDTH RIGHT PARENTHESIS
FF0A ; Common # Po FULLWIDTH ASTERISK
FF0B ; Common # Sm FULLWIDTH PLUS SIGN
FF0C ; Common # Po FULLWIDTH COMMA
FF0D ; Common # Pd FULLWIDTH HYPHEN-MINUS
FF0E..FF0F ; Common # Po [2] FULLWIDTH FULL STOP..FULLWIDTH SOLIDUS
FF10..FF19 ; Common # Nd [10] FULLWIDTH DIGIT ZERO..FULLWIDTH DIGIT NINE
FF1A..FF1B ; Common # Po [2] FULLWIDTH COLON..FULLWIDTH SEMICOLON
FF1C..FF1E ; Common # Sm [3] FULLWIDTH LESS-THAN SIGN..FULLWIDTH GREATER-THAN SIGN
FF1F..FF20 ; Common # Po [2] FULLWIDTH QUESTION MARK..FULLWIDTH COMMERCIAL AT
FF3B ; Common # Ps FULLWIDTH LEFT SQUARE BRACKET
FF3C ; Common # Po FULLWIDTH REVERSE SOLIDUS
FF3D ; Common # Pe FULLWIDTH RIGHT SQUARE BRACKET
FF3E ; Common # Sk FULLWIDTH CIRCUMFLEX ACCENT
FF3F ; Common # Pc FULLWIDTH LOW LINE
FF40 ; Common # Sk FULLWIDTH GRAVE ACCENT
FF5B ; Common # Ps FULLWIDTH LEFT CURLY BRACKET
FF5C ; Common # Sm FULLWIDTH VERTICAL LINE
FF5D ; Common # Pe FULLWIDTH RIGHT CURLY BRACKET
FF5E ; Common # Sm FULLWIDTH TILDE
FF5F ; Common # Ps FULLWIDTH LEFT WHITE PARENTHESIS
FF60 ; Common # Pe FULLWIDTH RIGHT WHITE PARENTHESIS
FF61 ; Common # Po HALFWIDTH IDEOGRAPHIC FULL STOP
FF62 ; Common # Ps HALFWIDTH LEFT CORNER BRACKET
FF63 ; Common # Pe HALFWIDTH RIGHT CORNER BRACKET
FF64..FF65 ; Common # Po [2] HALFWIDTH IDEOGRAPHIC COMMA..HALFWIDTH KATAKANA MIDDLE DOT
FF70 ; Common # Lm HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK
FF9E..FF9F ; Common # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK
FFE0..FFE1 ; Common # Sc [2] FULLWIDTH CENT SIGN..FULLWIDTH POUND SIGN
FFE2 ; Common # Sm FULLWIDTH NOT SIGN
FFE3 ; Common # Sk FULLWIDTH MACRON
FFE4 ; Common # So FULLWIDTH BROKEN BAR
FFE5..FFE6 ; Common # Sc [2] FULLWIDTH YEN SIGN..FULLWIDTH WON SIGN
FFE8 ; Common # So HALFWIDTH FORMS LIGHT VERTICAL
FFE9..FFEC ; Common # Sm [4] HALFWIDTH LEFTWARDS ARROW..HALFWIDTH DOWNWARDS ARROW
FFED..FFEE ; Common # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CIRCLE
FFF9..FFFB ; Common # Cf [3] INTERLINEAR ANNOTATION ANCHOR..INTERLINEAR ANNOTATION TERMINATOR
FFFC..FFFD ; Common # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTER
10100..10101 ; Common # Po [2] AEGEAN WORD SEPARATOR LINE..AEGEAN WORD SEPARATOR DOT
10102 ; Common # So AEGEAN CHECK MARK
10107..10133 ; Common # No [45] AEGEAN NUMBER ONE..AEGEAN NUMBER NINETY THOUSAND
10137..1013F ; Common # So [9] AEGEAN WEIGHT BASE UNIT..AEGEAN MEASURE THIRD SUBUNIT
10190..1019B ; Common # So [12] ROMAN SEXTANS SIGN..ROMAN CENTURIAL SIGN
101D0..101FC ; Common # So [45] PHAISTOS DISC SIGN PEDESTRIAN..PHAISTOS DISC SIGN WAVY BAND
1D000..1D0F5 ; Common # So [246] BYZANTINE MUSICAL SYMBOL PSILI..BYZANTINE MUSICAL SYMBOL GORGON NEO KATO
1D100..1D126 ; Common # So [39] MUSICAL SYMBOL SINGLE BARLINE..MUSICAL SYMBOL DRUM CLEF-2
1D129..1D164 ; Common # So [60] MUSICAL SYMBOL MULTIPLE MEASURE REST..MUSICAL SYMBOL ONE HUNDRED TWENTY-EIGHTH NOTE
1D165..1D166 ; Common # Mc [2] MUSICAL SYMBOL COMBINING STEM..MUSICAL SYMBOL COMBINING SPRECHGESANG STEM
1D16A..1D16C ; Common # So [3] MUSICAL SYMBOL FINGERED TREMOLO-1..MUSICAL SYMBOL FINGERED TREMOLO-3
1D16D..1D172 ; Common # Mc [6] MUSICAL SYMBOL COMBINING AUGMENTATION DOT..MUSICAL SYMBOL COMBINING FLAG-5
1D173..1D17A ; Common # Cf [8] MUSICAL SYMBOL BEGIN BEAM..MUSICAL SYMBOL END PHRASE
1D183..1D184 ; Common # So [2] MUSICAL SYMBOL ARPEGGIATO UP..MUSICAL SYMBOL ARPEGGIATO DOWN
1D18C..1D1A9 ; Common # So [30] MUSICAL SYMBOL RINFORZANDO..MUSICAL SYMBOL DEGREE SLASH
1D1AE..1D1DD ; Common # So [48] MUSICAL SYMBOL PEDAL MARK..MUSICAL SYMBOL PES SUBPUNCTIS
1D300..1D356 ; Common # So [87] MONOGRAM FOR EARTH..TETRAGRAM FOR FOSTERING
1D360..1D371 ; Common # No [18] COUNTING ROD UNIT DIGIT ONE..COUNTING ROD TENS DIGIT NINE
1D400..1D454 ; Common # L& [85] MATHEMATICAL BOLD CAPITAL A..MATHEMATICAL ITALIC SMALL G
1D456..1D49C ; Common # L& [71] MATHEMATICAL ITALIC SMALL I..MATHEMATICAL SCRIPT CAPITAL A
1D49E..1D49F ; Common # L& [2] MATHEMATICAL SCRIPT CAPITAL C..MATHEMATICAL SCRIPT CAPITAL D
1D4A2 ; Common # L& MATHEMATICAL SCRIPT CAPITAL G
1D4A5..1D4A6 ; Common # L& [2] MATHEMATICAL SCRIPT CAPITAL J..MATHEMATICAL SCRIPT CAPITAL K
1D4A9..1D4AC ; Common # L& [4] MATHEMATICAL SCRIPT CAPITAL N..MATHEMATICAL SCRIPT CAPITAL Q
1D4AE..1D4B9 ; Common # L& [12] MATHEMATICAL SCRIPT CAPITAL S..MATHEMATICAL SCRIPT SMALL D
1D4BB ; Common # L& MATHEMATICAL SCRIPT SMALL F
1D4BD..1D4C3 ; Common # L& [7] MATHEMATICAL SCRIPT SMALL H..MATHEMATICAL SCRIPT SMALL N
1D4C5..1D505 ; Common # L& [65] MATHEMATICAL SCRIPT SMALL P..MATHEMATICAL FRAKTUR CAPITAL B
1D507..1D50A ; Common # L& [4] MATHEMATICAL FRAKTUR CAPITAL D..MATHEMATICAL FRAKTUR CAPITAL G
1D50D..1D514 ; Common # L& [8] MATHEMATICAL FRAKTUR CAPITAL J..MATHEMATICAL FRAKTUR CAPITAL Q
1D516..1D51C ; Common # L& [7] MATHEMATICAL FRAKTUR CAPITAL S..MATHEMATICAL FRAKTUR CAPITAL Y
1D51E..1D539 ; Common # L& [28] MATHEMATICAL FRAKTUR SMALL A..MATHEMATICAL DOUBLE-STRUCK CAPITAL B
1D53B..1D53E ; Common # L& [4] MATHEMATICAL DOUBLE-STRUCK CAPITAL D..MATHEMATICAL DOUBLE-STRUCK CAPITAL G
1D540..1D544 ; Common # L& [5] MATHEMATICAL DOUBLE-STRUCK CAPITAL I..MATHEMATICAL DOUBLE-STRUCK CAPITAL M
1D546 ; Common # L& MATHEMATICAL DOUBLE-STRUCK CAPITAL O
1D54A..1D550 ; Common # L& [7] MATHEMATICAL DOUBLE-STRUCK CAPITAL S..MATHEMATICAL DOUBLE-STRUCK CAPITAL Y
1D552..1D6A5 ; Common # L& [340] MATHEMATICAL DOUBLE-STRUCK SMALL A..MATHEMATICAL ITALIC SMALL DOTLESS J
1D6A8..1D6C0 ; Common # L& [25] MATHEMATICAL BOLD CAPITAL ALPHA..MATHEMATICAL BOLD CAPITAL OMEGA
1D6C1 ; Common # Sm MATHEMATICAL BOLD NABLA
1D6C2..1D6DA ; Common # L& [25] MATHEMATICAL BOLD SMALL ALPHA..MATHEMATICAL BOLD SMALL OMEGA
1D6DB ; Common # Sm MATHEMATICAL BOLD PARTIAL DIFFERENTIAL
1D6DC..1D6FA ; Common # L& [31] MATHEMATICAL BOLD EPSILON SYMBOL..MATHEMATICAL ITALIC CAPITAL OMEGA
1D6FB ; Common # Sm MATHEMATICAL ITALIC NABLA
1D6FC..1D714 ; Common # L& [25] MATHEMATICAL ITALIC SMALL ALPHA..MATHEMATICAL ITALIC SMALL OMEGA
1D715 ; Common # Sm MATHEMATICAL ITALIC PARTIAL DIFFERENTIAL
1D716..1D734 ; Common # L& [31] MATHEMATICAL ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD ITALIC CAPITAL OMEGA
1D735 ; Common # Sm MATHEMATICAL BOLD ITALIC NABLA
1D736..1D74E ; Common # L& [25] MATHEMATICAL BOLD ITALIC SMALL ALPHA..MATHEMATICAL BOLD ITALIC SMALL OMEGA
1D74F ; Common # Sm MATHEMATICAL BOLD ITALIC PARTIAL DIFFERENTIAL
1D750..1D76E ; Common # L& [31] MATHEMATICAL BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL SANS-SERIF BOLD CAPITAL OMEGA
1D76F ; Common # Sm MATHEMATICAL SANS-SERIF BOLD NABLA
1D770..1D788 ; Common # L& [25] MATHEMATICAL SANS-SERIF BOLD SMALL ALPHA..MATHEMATICAL SANS-SERIF BOLD SMALL OMEGA
1D789 ; Common # Sm MATHEMATICAL SANS-SERIF BOLD PARTIAL DIFFERENTIAL
1D78A..1D7A8 ; Common # L& [31] MATHEMATICAL SANS-SERIF BOLD EPSILON SYMBOL..MATHEMATICAL SANS-SERIF BOLD ITALIC CAPITAL OMEGA
1D7A9 ; Common # Sm MATHEMATICAL SANS-SERIF BOLD ITALIC NABLA
1D7AA..1D7C2 ; Common # L& [25] MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL ALPHA..MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL OMEGA
1D7C3 ; Common # Sm MATHEMATICAL SANS-SERIF BOLD ITALIC PARTIAL DIFFERENTIAL
1D7C4..1D7CB ; Common # L& [8] MATHEMATICAL SANS-SERIF BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD SMALL DIGAMMA
1D7CE..1D7FF ; Common # Nd [50] MATHEMATICAL BOLD DIGIT ZERO..MATHEMATICAL MONOSPACE DIGIT NINE
1F000..1F02B ; Common # So [44] MAHJONG TILE EAST WIND..MAHJONG TILE BACK
1F030..1F093 ; Common # So [100] DOMINO TILE HORIZONTAL BACK..DOMINO TILE VERTICAL-06-06
1F100..1F10A ; Common # No [11] DIGIT ZERO FULL STOP..DIGIT NINE COMMA
1F110..1F12E ; Common # So [31] PARENTHESIZED LATIN CAPITAL LETTER A..CIRCLED WZ
1F131 ; Common # So SQUARED LATIN CAPITAL LETTER B
1F13D ; Common # So SQUARED LATIN CAPITAL LETTER N
1F13F ; Common # So SQUARED LATIN CAPITAL LETTER P
1F142 ; Common # So SQUARED LATIN CAPITAL LETTER S
1F146 ; Common # So SQUARED LATIN CAPITAL LETTER W
1F14A..1F14E ; Common # So [5] SQUARED HV..SQUARED PPV
1F157 ; Common # So NEGATIVE CIRCLED LATIN CAPITAL LETTER H
1F15F ; Common # So NEGATIVE CIRCLED LATIN CAPITAL LETTER P
1F179 ; Common # So NEGATIVE SQUARED LATIN CAPITAL LETTER J
1F17B..1F17C ; Common # So [2] NEGATIVE SQUARED LATIN CAPITAL LETTER L..NEGATIVE SQUARED LATIN CAPITAL LETTER M
1F17F ; Common # So NEGATIVE SQUARED LATIN CAPITAL LETTER P
1F18A..1F18D ; Common # So [4] CROSSED NEGATIVE SQUARED LATIN CAPITAL LETTER P..NEGATIVE SQUARED SA
1F190 ; Common # So SQUARE DJ
1F210..1F231 ; Common # So [34] SQUARED CJK UNIFIED IDEOGRAPH-624B..SQUARED CJK UNIFIED IDEOGRAPH-6253
1F240..1F248 ; Common # So [9] TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-672C..TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-6557
E0001 ; Common # Cf LANGUAGE TAG
E0020..E007F ; Common # Cf [96] TAG SPACE..CANCEL TAG
# Total code points: 5395
# ================================================
0041..005A ; Latin # L& [26] LATIN CAPITAL LETTER A..LATIN CAPITAL LETTER Z
0061..007A ; Latin # L& [26] LATIN SMALL LETTER A..LATIN SMALL LETTER Z
00AA ; Latin # L& FEMININE ORDINAL INDICATOR
00BA ; Latin # L& MASCULINE ORDINAL INDICATOR
00C0..00D6 ; Latin # L& [23] LATIN CAPITAL LETTER A WITH GRAVE..LATIN CAPITAL LETTER O WITH DIAERESIS
00D8..00F6 ; Latin # L& [31] LATIN CAPITAL LETTER O WITH STROKE..LATIN SMALL LETTER O WITH DIAERESIS
00F8..01BA ; Latin # L& [195] LATIN SMALL LETTER O WITH STROKE..LATIN SMALL LETTER EZH WITH TAIL
01BB ; Latin # Lo LATIN LETTER TWO WITH STROKE
01BC..01BF ; Latin # L& [4] LATIN CAPITAL LETTER TONE FIVE..LATIN LETTER WYNN
01C0..01C3 ; Latin # Lo [4] LATIN LETTER DENTAL CLICK..LATIN LETTER RETROFLEX CLICK
01C4..0293 ; Latin # L& [208] LATIN CAPITAL LETTER DZ WITH CARON..LATIN SMALL LETTER EZH WITH CURL
0294 ; Latin # Lo LATIN LETTER GLOTTAL STOP
0295..02AF ; Latin # L& [27] LATIN LETTER PHARYNGEAL VOICED FRICATIVE..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL
02B0..02B8 ; Latin # Lm [9] MODIFIER LETTER SMALL H..MODIFIER LETTER SMALL Y
02E0..02E4 ; Latin # Lm [5] MODIFIER LETTER SMALL GAMMA..MODIFIER LETTER SMALL REVERSED GLOTTAL STOP
1D00..1D25 ; Latin # L& [38] LATIN LETTER SMALL CAPITAL A..LATIN LETTER AIN
1D2C..1D5C ; Latin # Lm [49] MODIFIER LETTER CAPITAL A..MODIFIER LETTER SMALL AIN
1D62..1D65 ; Latin # L& [4] LATIN SUBSCRIPT SMALL LETTER I..LATIN SUBSCRIPT SMALL LETTER V
1D6B..1D77 ; Latin # L& [13] LATIN SMALL LETTER UE..LATIN SMALL LETTER TURNED G
1D79..1D9A ; Latin # L& [34] LATIN SMALL LETTER INSULAR G..LATIN SMALL LETTER EZH WITH RETROFLEX HOOK
1D9B..1DBE ; Latin # Lm [36] MODIFIER LETTER SMALL TURNED ALPHA..MODIFIER LETTER SMALL EZH
1E00..1EFF ; Latin # L& [256] LATIN CAPITAL LETTER A WITH RING BELOW..LATIN SMALL LETTER Y WITH LOOP
2071 ; Latin # Lm SUPERSCRIPT LATIN SMALL LETTER I
207F ; Latin # Lm SUPERSCRIPT LATIN SMALL LETTER N
2090..2094 ; Latin # Lm [5] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER SCHWA
212A..212B ; Latin # L& [2] KELVIN SIGN..ANGSTROM SIGN
2132 ; Latin # L& TURNED CAPITAL F
214E ; Latin # L& TURNED SMALL F
2160..2182 ; Latin # Nl [35] ROMAN NUMERAL ONE..ROMAN NUMERAL TEN THOUSAND
2183..2184 ; Latin # L& [2] ROMAN NUMERAL REVERSED ONE HUNDRED..LATIN SMALL LETTER REVERSED C
2185..2188 ; Latin # Nl [4] ROMAN NUMERAL SIX LATE FORM..ROMAN NUMERAL ONE HUNDRED THOUSAND
2C60..2C7C ; Latin # L& [29] LATIN CAPITAL LETTER L WITH DOUBLE BAR..LATIN SUBSCRIPT SMALL LETTER J
2C7D ; Latin # Lm MODIFIER LETTER CAPITAL V
2C7E..2C7F ; Latin # L& [2] LATIN CAPITAL LETTER S WITH SWASH TAIL..LATIN CAPITAL LETTER Z WITH SWASH TAIL
A722..A76F ; Latin # L& [78] LATIN CAPITAL LETTER EGYPTOLOGICAL ALEF..LATIN SMALL LETTER CON
A770 ; Latin # Lm MODIFIER LETTER US
A771..A787 ; Latin # L& [23] LATIN SMALL LETTER DUM..LATIN SMALL LETTER INSULAR T
A78B..A78C ; Latin # L& [2] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER SALTILLO
A7FB..A7FF ; Latin # Lo [5] LATIN EPIGRAPHIC LETTER REVERSED F..LATIN EPIGRAPHIC LETTER ARCHAIC M
FB00..FB06 ; Latin # L& [7] LATIN SMALL LIGATURE FF..LATIN SMALL LIGATURE ST
FF21..FF3A ; Latin # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAPITAL LETTER Z
FF41..FF5A ; Latin # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN SMALL LETTER Z
# Total code points: 1244
# ================================================
0370..0373 ; Greek # L& [4] GREEK CAPITAL LETTER HETA..GREEK SMALL LETTER ARCHAIC SAMPI
0375 ; Greek # Sk GREEK LOWER NUMERAL SIGN
0376..0377 ; Greek # L& [2] GREEK CAPITAL LETTER PAMPHYLIAN DIGAMMA..GREEK SMALL LETTER PAMPHYLIAN DIGAMMA
037A ; Greek # Lm GREEK YPOGEGRAMMENI
037B..037D ; Greek # L& [3] GREEK SMALL REVERSED LUNATE SIGMA SYMBOL..GREEK SMALL REVERSED DOTTED LUNATE SIGMA SYMBOL
0384 ; Greek # Sk GREEK TONOS
0386 ; Greek # L& GREEK CAPITAL LETTER ALPHA WITH TONOS
0388..038A ; Greek # L& [3] GREEK CAPITAL LETTER EPSILON WITH TONOS..GREEK CAPITAL LETTER IOTA WITH TONOS
038C ; Greek # L& GREEK CAPITAL LETTER OMICRON WITH TONOS
038E..03A1 ; Greek # L& [20] GREEK CAPITAL LETTER UPSILON WITH TONOS..GREEK CAPITAL LETTER RHO
03A3..03E1 ; Greek # L& [63] GREEK CAPITAL LETTER SIGMA..GREEK SMALL LETTER SAMPI
03F0..03F5 ; Greek # L& [6] GREEK KAPPA SYMBOL..GREEK LUNATE EPSILON SYMBOL
03F6 ; Greek # Sm GREEK REVERSED LUNATE EPSILON SYMBOL
03F7..03FF ; Greek # L& [9] GREEK CAPITAL LETTER SHO..GREEK CAPITAL REVERSED DOTTED LUNATE SIGMA SYMBOL
1D26..1D2A ; Greek # L& [5] GREEK LETTER SMALL CAPITAL GAMMA..GREEK LETTER SMALL CAPITAL PSI
1D5D..1D61 ; Greek # Lm [5] MODIFIER LETTER SMALL BETA..MODIFIER LETTER SMALL CHI
1D66..1D6A ; Greek # L& [5] GREEK SUBSCRIPT SMALL LETTER BETA..GREEK SUBSCRIPT SMALL LETTER CHI
1DBF ; Greek # Lm MODIFIER LETTER SMALL THETA
1F00..1F15 ; Greek # L& [22] GREEK SMALL LETTER ALPHA WITH PSILI..GREEK SMALL LETTER EPSILON WITH DASIA AND OXIA
1F18..1F1D ; Greek # L& [6] GREEK CAPITAL LETTER EPSILON WITH PSILI..GREEK CAPITAL LETTER EPSILON WITH DASIA AND OXIA
1F20..1F45 ; Greek # L& [38] GREEK SMALL LETTER ETA WITH PSILI..GREEK SMALL LETTER OMICRON WITH DASIA AND OXIA
1F48..1F4D ; Greek # L& [6] GREEK CAPITAL LETTER OMICRON WITH PSILI..GREEK CAPITAL LETTER OMICRON WITH DASIA AND OXIA
1F50..1F57 ; Greek # L& [8] GREEK SMALL LETTER UPSILON WITH PSILI..GREEK SMALL LETTER UPSILON WITH DASIA AND PERISPOMENI
1F59 ; Greek # L& GREEK CAPITAL LETTER UPSILON WITH DASIA
1F5B ; Greek # L& GREEK CAPITAL LETTER UPSILON WITH DASIA AND VARIA
1F5D ; Greek # L& GREEK CAPITAL LETTER UPSILON WITH DASIA AND OXIA
1F5F..1F7D ; Greek # L& [31] GREEK CAPITAL LETTER UPSILON WITH DASIA AND PERISPOMENI..GREEK SMALL LETTER OMEGA WITH OXIA
1F80..1FB4 ; Greek # L& [53] GREEK SMALL LETTER ALPHA WITH PSILI AND YPOGEGRAMMENI..GREEK SMALL LETTER ALPHA WITH OXIA AND YPOGEGRAMMENI
1FB6..1FBC ; Greek # L& [7] GREEK SMALL LETTER ALPHA WITH PERISPOMENI..GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI
1FBD ; Greek # Sk GREEK KORONIS
1FBE ; Greek # L& GREEK PROSGEGRAMMENI
1FBF..1FC1 ; Greek # Sk [3] GREEK PSILI..GREEK DIALYTIKA AND PERISPOMENI
1FC2..1FC4 ; Greek # L& [3] GREEK SMALL LETTER ETA WITH VARIA AND YPOGEGRAMMENI..GREEK SMALL LETTER ETA WITH OXIA AND YPOGEGRAMMENI
1FC6..1FCC ; Greek # L& [7] GREEK SMALL LETTER ETA WITH PERISPOMENI..GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI
1FCD..1FCF ; Greek # Sk [3] GREEK PSILI AND VARIA..GREEK PSILI AND PERISPOMENI
1FD0..1FD3 ; Greek # L& [4] GREEK SMALL LETTER IOTA WITH VRACHY..GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA
1FD6..1FDB ; Greek # L& [6] GREEK SMALL LETTER IOTA WITH PERISPOMENI..GREEK CAPITAL LETTER IOTA WITH OXIA
1FDD..1FDF ; Greek # Sk [3] GREEK DASIA AND VARIA..GREEK DASIA AND PERISPOMENI
1FE0..1FEC ; Greek # L& [13] GREEK SMALL LETTER UPSILON WITH VRACHY..GREEK CAPITAL LETTER RHO WITH DASIA
1FED..1FEF ; Greek # Sk [3] GREEK DIALYTIKA AND VARIA..GREEK VARIA
1FF2..1FF4 ; Greek # L& [3] GREEK SMALL LETTER OMEGA WITH VARIA AND YPOGEGRAMMENI..GREEK SMALL LETTER OMEGA WITH OXIA AND YPOGEGRAMMENI
1FF6..1FFC ; Greek # L& [7] GREEK SMALL LETTER OMEGA WITH PERISPOMENI..GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI
1FFD..1FFE ; Greek # Sk [2] GREEK OXIA..GREEK DASIA
2126 ; Greek # L& OHM SIGN
10140..10174 ; Greek # Nl [53] GREEK ACROPHONIC ATTIC ONE QUARTER..GREEK ACROPHONIC STRATIAN FIFTY MNAS
10175..10178 ; Greek # No [4] GREEK ONE HALF SIGN..GREEK THREE QUARTERS SIGN
10179..10189 ; Greek # So [17] GREEK YEAR SIGN..GREEK TRYBLION BASE SIGN
1018A ; Greek # No GREEK ZERO SIGN
1D200..1D241 ; Greek # So [66] GREEK VOCAL NOTATION SYMBOL-1..GREEK INSTRUMENTAL NOTATION SYMBOL-54
1D242..1D244 ; Greek # Mn [3] COMBINING GREEK MUSICAL TRISEME..COMBINING GREEK MUSICAL PENTASEME
1D245 ; Greek # So GREEK MUSICAL LEIMMA
# Total code points: 511
# ================================================
0400..0481 ; Cyrillic # L& [130] CYRILLIC CAPITAL LETTER IE WITH GRAVE..CYRILLIC SMALL LETTER KOPPA
0482 ; Cyrillic # So CYRILLIC THOUSANDS SIGN
0483..0484 ; Cyrillic # Mn [2] COMBINING CYRILLIC TITLO..COMBINING CYRILLIC PALATALIZATION
0487 ; Cyrillic # Mn COMBINING CYRILLIC POKRYTIE
0488..0489 ; Cyrillic # Me [2] COMBINING CYRILLIC HUNDRED THOUSANDS SIGN..COMBINING CYRILLIC MILLIONS SIGN
048A..0525 ; Cyrillic # L& [156] CYRILLIC CAPITAL LETTER SHORT I WITH TAIL..CYRILLIC SMALL LETTER PE WITH DESCENDER
1D2B ; Cyrillic # L& CYRILLIC LETTER SMALL CAPITAL EL
1D78 ; Cyrillic # Lm MODIFIER LETTER CYRILLIC EN
2DE0..2DFF ; Cyrillic # Mn [32] COMBINING CYRILLIC LETTER BE..COMBINING CYRILLIC LETTER IOTIFIED BIG YUS
A640..A65F ; Cyrillic # L& [32] CYRILLIC CAPITAL LETTER ZEMLYA..CYRILLIC SMALL LETTER YN
A662..A66D ; Cyrillic # L& [12] CYRILLIC CAPITAL LETTER SOFT DE..CYRILLIC SMALL LETTER DOUBLE MONOCULAR O
A66E ; Cyrillic # Lo CYRILLIC LETTER MULTIOCULAR O
A66F ; Cyrillic # Mn COMBINING CYRILLIC VZMET
A670..A672 ; Cyrillic # Me [3] COMBINING CYRILLIC TEN MILLIONS SIGN..COMBINING CYRILLIC THOUSAND MILLIONS SIGN
A673 ; Cyrillic # Po SLAVONIC ASTERISK
A67C..A67D ; Cyrillic # Mn [2] COMBINING CYRILLIC KAVYKA..COMBINING CYRILLIC PAYEROK
A67E ; Cyrillic # Po CYRILLIC KAVYKA
A67F ; Cyrillic # Lm CYRILLIC PAYEROK
A680..A697 ; Cyrillic # L& [24] CYRILLIC CAPITAL LETTER DWE..CYRILLIC SMALL LETTER SHWE
# Total code points: 404
# ================================================
0531..0556 ; Armenian # L& [38] ARMENIAN CAPITAL LETTER AYB..ARMENIAN CAPITAL LETTER FEH
0559 ; Armenian # Lm ARMENIAN MODIFIER LETTER LEFT HALF RING
055A..055F ; Armenian # Po [6] ARMENIAN APOSTROPHE..ARMENIAN ABBREVIATION MARK
0561..0587 ; Armenian # L& [39] ARMENIAN SMALL LETTER AYB..ARMENIAN SMALL LIGATURE ECH YIWN
058A ; Armenian # Pd ARMENIAN HYPHEN
FB13..FB17 ; Armenian # L& [5] ARMENIAN SMALL LIGATURE MEN NOW..ARMENIAN SMALL LIGATURE MEN XEH
# Total code points: 90
# ================================================
0591..05BD ; Hebrew # Mn [45] HEBREW ACCENT ETNAHTA..HEBREW POINT METEG
05BE ; Hebrew # Pd HEBREW PUNCTUATION MAQAF
05BF ; Hebrew # Mn HEBREW POINT RAFE
05C0 ; Hebrew # Po HEBREW PUNCTUATION PASEQ
05C1..05C2 ; Hebrew # Mn [2] HEBREW POINT SHIN DOT..HEBREW POINT SIN DOT
05C3 ; Hebrew # Po HEBREW PUNCTUATION SOF PASUQ
05C4..05C5 ; Hebrew # Mn [2] HEBREW MARK UPPER DOT..HEBREW MARK LOWER DOT
05C6 ; Hebrew # Po HEBREW PUNCTUATION NUN HAFUKHA
05C7 ; Hebrew # Mn HEBREW POINT QAMATS QATAN
05D0..05EA ; Hebrew # Lo [27] HEBREW LETTER ALEF..HEBREW LETTER TAV
05F0..05F2 ; Hebrew # Lo [3] HEBREW LIGATURE YIDDISH DOUBLE VAV..HEBREW LIGATURE YIDDISH DOUBLE YOD
05F3..05F4 ; Hebrew # Po [2] HEBREW PUNCTUATION GERESH..HEBREW PUNCTUATION GERSHAYIM
FB1D ; Hebrew # Lo HEBREW LETTER YOD WITH HIRIQ
FB1E ; Hebrew # Mn HEBREW POINT JUDEO-SPANISH VARIKA
FB1F..FB28 ; Hebrew # Lo [10] HEBREW LIGATURE YIDDISH YOD YOD PATAH..HEBREW LETTER WIDE TAV
FB29 ; Hebrew # Sm HEBREW LETTER ALTERNATIVE PLUS SIGN
FB2A..FB36 ; Hebrew # Lo [13] HEBREW LETTER SHIN WITH SHIN DOT..HEBREW LETTER ZAYIN WITH DAGESH
FB38..FB3C ; Hebrew # Lo [5] HEBREW LETTER TET WITH DAGESH..HEBREW LETTER LAMED WITH DAGESH
FB3E ; Hebrew # Lo HEBREW LETTER MEM WITH DAGESH
FB40..FB41 ; Hebrew # Lo [2] HEBREW LETTER NUN WITH DAGESH..HEBREW LETTER SAMEKH WITH DAGESH
FB43..FB44 ; Hebrew # Lo [2] HEBREW LETTER FINAL PE WITH DAGESH..HEBREW LETTER PE WITH DAGESH
FB46..FB4F ; Hebrew # Lo [10] HEBREW LETTER TSADI WITH DAGESH..HEBREW LIGATURE ALEF LAMED
# Total code points: 133
# ================================================
0606..0608 ; Arabic # Sm [3] ARABIC-INDIC CUBE ROOT..ARABIC RAY
0609..060A ; Arabic # Po [2] ARABIC-INDIC PER MILLE SIGN..ARABIC-INDIC PER TEN THOUSAND SIGN
060B ; Arabic # Sc AFGHANI SIGN
060D ; Arabic # Po ARABIC DATE SEPARATOR
060E..060F ; Arabic # So [2] ARABIC POETIC VERSE SIGN..ARABIC SIGN MISRA
0610..061A ; Arabic # Mn [11] ARABIC SIGN SALLALLAHOU ALAYHE WASSALLAM..ARABIC SMALL KASRA
061E ; Arabic # Po ARABIC TRIPLE DOT PUNCTUATION MARK
0621..063F ; Arabic # Lo [31] ARABIC LETTER HAMZA..ARABIC LETTER FARSI YEH WITH THREE DOTS ABOVE
0641..064A ; Arabic # Lo [10] ARABIC LETTER FEH..ARABIC LETTER YEH
0656..065E ; Arabic # Mn [9] ARABIC SUBSCRIPT ALEF..ARABIC FATHA WITH TWO DOTS
066A..066D ; Arabic # Po [4] ARABIC PERCENT SIGN..ARABIC FIVE POINTED STAR
066E..066F ; Arabic # Lo [2] ARABIC LETTER DOTLESS BEH..ARABIC LETTER DOTLESS QAF
0671..06D3 ; Arabic # Lo [99] ARABIC LETTER ALEF WASLA..ARABIC LETTER YEH BARREE WITH HAMZA ABOVE
06D4 ; Arabic # Po ARABIC FULL STOP
06D5 ; Arabic # Lo ARABIC LETTER AE
06D6..06DC ; Arabic # Mn [7] ARABIC SMALL HIGH LIGATURE SAD WITH LAM WITH ALEF MAKSURA..ARABIC SMALL HIGH SEEN
06DE ; Arabic # Me ARABIC START OF RUB EL HIZB
06DF..06E4 ; Arabic # Mn [6] ARABIC SMALL HIGH ROUNDED ZERO..ARABIC SMALL HIGH MADDA
06E5..06E6 ; Arabic # Lm [2] ARABIC SMALL WAW..ARABIC SMALL YEH
06E7..06E8 ; Arabic # Mn [2] ARABIC SMALL HIGH YEH..ARABIC SMALL HIGH NOON
06E9 ; Arabic # So ARABIC PLACE OF SAJDAH
06EA..06ED ; Arabic # Mn [4] ARABIC EMPTY CENTRE LOW STOP..ARABIC SMALL LOW MEEM
06EE..06EF ; Arabic # Lo [2] ARABIC LETTER DAL WITH INVERTED V..ARABIC LETTER REH WITH INVERTED V
06F0..06F9 ; Arabic # Nd [10] EXTENDED ARABIC-INDIC DIGIT ZERO..EXTENDED ARABIC-INDIC DIGIT NINE
06FA..06FC ; Arabic # Lo [3] ARABIC LETTER SHEEN WITH DOT BELOW..ARABIC LETTER GHAIN WITH DOT BELOW
06FD..06FE ; Arabic # So [2] ARABIC SIGN SINDHI AMPERSAND..ARABIC SIGN SINDHI POSTPOSITION MEN
06FF ; Arabic # Lo ARABIC LETTER HEH WITH INVERTED V
0750..077F ; Arabic # Lo [48] ARABIC LETTER BEH WITH THREE DOTS HORIZONTALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS ABOVE
FB50..FBB1 ; Arabic # Lo [98] ARABIC LETTER ALEF WASLA ISOLATED FORM..ARABIC LETTER YEH BARREE WITH HAMZA ABOVE FINAL FORM
FBD3..FD3D ; Arabic # Lo [363] ARABIC LETTER NG ISOLATED FORM..ARABIC LIGATURE ALEF WITH FATHATAN ISOLATED FORM
FD50..FD8F ; Arabic # Lo [64] ARABIC LIGATURE TEH WITH JEEM WITH MEEM INITIAL FORM..ARABIC LIGATURE MEEM WITH KHAH WITH MEEM INITIAL FORM
FD92..FDC7 ; Arabic # Lo [54] ARABIC LIGATURE MEEM WITH JEEM WITH KHAH INITIAL FORM..ARABIC LIGATURE NOON WITH JEEM WITH YEH FINAL FORM
FDF0..FDFB ; Arabic # Lo [12] ARABIC LIGATURE SALLA USED AS KORANIC STOP SIGN ISOLATED FORM..ARABIC LIGATURE JALLAJALALOUHOU
FDFC ; Arabic # Sc RIAL SIGN
FE70..FE74 ; Arabic # Lo [5] ARABIC FATHATAN ISOLATED FORM..ARABIC KASRATAN ISOLATED FORM
FE76..FEFC ; Arabic # Lo [135] ARABIC FATHA ISOLATED FORM..ARABIC LIGATURE LAM WITH ALEF FINAL FORM
10E60..10E7E ; Arabic # No [31] RUMI DIGIT ONE..RUMI FRACTION TWO THIRDS
# Total code points: 1030
# ================================================
0700..070D ; Syriac # Po [14] SYRIAC END OF PARAGRAPH..SYRIAC HARKLEAN ASTERISCUS
070F ; Syriac # Cf SYRIAC ABBREVIATION MARK
0710 ; Syriac # Lo SYRIAC LETTER ALAPH
0711 ; Syriac # Mn SYRIAC LETTER SUPERSCRIPT ALAPH
0712..072F ; Syriac # Lo [30] SYRIAC LETTER BETH..SYRIAC LETTER PERSIAN DHALATH
0730..074A ; Syriac # Mn [27] SYRIAC PTHAHA ABOVE..SYRIAC BARREKH
074D..074F ; Syriac # Lo [3] SYRIAC LETTER SOGDIAN ZHAIN..SYRIAC LETTER SOGDIAN FE
# Total code points: 77
# ================================================
0780..07A5 ; Thaana # Lo [38] THAANA LETTER HAA..THAANA LETTER WAAVU
07A6..07B0 ; Thaana # Mn [11] THAANA ABAFILI..THAANA SUKUN
07B1 ; Thaana # Lo THAANA LETTER NAA
# Total code points: 50
# ================================================
0900..0902 ; Devanagari # Mn [3] DEVANAGARI SIGN INVERTED CANDRABINDU..DEVANAGARI SIGN ANUSVARA
0903 ; Devanagari # Mc DEVANAGARI SIGN VISARGA
0904..0939 ; Devanagari # Lo [54] DEVANAGARI LETTER SHORT A..DEVANAGARI LETTER HA
093C ; Devanagari # Mn DEVANAGARI SIGN NUKTA
093D ; Devanagari # Lo DEVANAGARI SIGN AVAGRAHA
093E..0940 ; Devanagari # Mc [3] DEVANAGARI VOWEL SIGN AA..DEVANAGARI VOWEL SIGN II
0941..0948 ; Devanagari # Mn [8] DEVANAGARI VOWEL SIGN U..DEVANAGARI VOWEL SIGN AI
0949..094C ; Devanagari # Mc [4] DEVANAGARI VOWEL SIGN CANDRA O..DEVANAGARI VOWEL SIGN AU
094D ; Devanagari # Mn DEVANAGARI SIGN VIRAMA
094E ; Devanagari # Mc DEVANAGARI VOWEL SIGN PRISHTHAMATRA E
0950 ; Devanagari # Lo DEVANAGARI OM
0953..0955 ; Devanagari # Mn [3] DEVANAGARI GRAVE ACCENT..DEVANAGARI VOWEL SIGN CANDRA LONG E
0958..0961 ; Devanagari # Lo [10] DEVANAGARI LETTER QA..DEVANAGARI LETTER VOCALIC LL
0962..0963 ; Devanagari # Mn [2] DEVANAGARI VOWEL SIGN VOCALIC L..DEVANAGARI VOWEL SIGN VOCALIC LL
0966..096F ; Devanagari # Nd [10] DEVANAGARI DIGIT ZERO..DEVANAGARI DIGIT NINE
0971 ; Devanagari # Lm DEVANAGARI SIGN HIGH SPACING DOT
0972 ; Devanagari # Lo DEVANAGARI LETTER CANDRA A
0979..097F ; Devanagari # Lo [7] DEVANAGARI LETTER ZHA..DEVANAGARI LETTER BBA
A8E0..A8F1 ; Devanagari # Mn [18] COMBINING DEVANAGARI DIGIT ZERO..COMBINING DEVANAGARI SIGN AVAGRAHA
A8F2..A8F7 ; Devanagari # Lo [6] DEVANAGARI SIGN SPACING CANDRABINDU..DEVANAGARI SIGN CANDRABINDU AVAGRAHA
A8F8..A8FA ; Devanagari # Po [3] DEVANAGARI SIGN PUSHPIKA..DEVANAGARI CARET
A8FB ; Devanagari # Lo DEVANAGARI HEADSTROKE
# Total code points: 140
# ================================================
0981 ; Bengali # Mn BENGALI SIGN CANDRABINDU
0982..0983 ; Bengali # Mc [2] BENGALI SIGN ANUSVARA..BENGALI SIGN VISARGA
0985..098C ; Bengali # Lo [8] BENGALI LETTER A..BENGALI LETTER VOCALIC L
098F..0990 ; Bengali # Lo [2] BENGALI LETTER E..BENGALI LETTER AI
0993..09A8 ; Bengali # Lo [22] BENGALI LETTER O..BENGALI LETTER NA
09AA..09B0 ; Bengali # Lo [7] BENGALI LETTER PA..BENGALI LETTER RA
09B2 ; Bengali # Lo BENGALI LETTER LA
09B6..09B9 ; Bengali # Lo [4] BENGALI LETTER SHA..BENGALI LETTER HA
09BC ; Bengali # Mn BENGALI SIGN NUKTA
09BD ; Bengali # Lo BENGALI SIGN AVAGRAHA
09BE..09C0 ; Bengali # Mc [3] BENGALI VOWEL SIGN AA..BENGALI VOWEL SIGN II
09C1..09C4 ; Bengali # Mn [4] BENGALI VOWEL SIGN U..BENGALI VOWEL SIGN VOCALIC RR
09C7..09C8 ; Bengali # Mc [2] BENGALI VOWEL SIGN E..BENGALI VOWEL SIGN AI
09CB..09CC ; Bengali # Mc [2] BENGALI VOWEL SIGN O..BENGALI VOWEL SIGN AU
09CD ; Bengali # Mn BENGALI SIGN VIRAMA
09CE ; Bengali # Lo BENGALI LETTER KHANDA TA
09D7 ; Bengali # Mc BENGALI AU LENGTH MARK
09DC..09DD ; Bengali # Lo [2] BENGALI LETTER RRA..BENGALI LETTER RHA
09DF..09E1 ; Bengali # Lo [3] BENGALI LETTER YYA..BENGALI LETTER VOCALIC LL
09E2..09E3 ; Bengali # Mn [2] BENGALI VOWEL SIGN VOCALIC L..BENGALI VOWEL SIGN VOCALIC LL
09E6..09EF ; Bengali # Nd [10] BENGALI DIGIT ZERO..BENGALI DIGIT NINE
09F0..09F1 ; Bengali # Lo [2] BENGALI LETTER RA WITH MIDDLE DIAGONAL..BENGALI LETTER RA WITH LOWER DIAGONAL
09F2..09F3 ; Bengali # Sc [2] BENGALI RUPEE MARK..BENGALI RUPEE SIGN
09F4..09F9 ; Bengali # No [6] BENGALI CURRENCY NUMERATOR ONE..BENGALI CURRENCY DENOMINATOR SIXTEEN
09FA ; Bengali # So BENGALI ISSHAR
09FB ; Bengali # Sc BENGALI GANDA MARK
# Total code points: 92
# ================================================
0A01..0A02 ; Gurmukhi # Mn [2] GURMUKHI SIGN ADAK BINDI..GURMUKHI SIGN BINDI
0A03 ; Gurmukhi # Mc GURMUKHI SIGN VISARGA
0A05..0A0A ; Gurmukhi # Lo [6] GURMUKHI LETTER A..GURMUKHI LETTER UU
0A0F..0A10 ; Gurmukhi # Lo [2] GURMUKHI LETTER EE..GURMUKHI LETTER AI
0A13..0A28 ; Gurmukhi # Lo [22] GURMUKHI LETTER OO..GURMUKHI LETTER NA
0A2A..0A30 ; Gurmukhi # Lo [7] GURMUKHI LETTER PA..GURMUKHI LETTER RA
0A32..0A33 ; Gurmukhi # Lo [2] GURMUKHI LETTER LA..GURMUKHI LETTER LLA
0A35..0A36 ; Gurmukhi # Lo [2] GURMUKHI LETTER VA..GURMUKHI LETTER SHA
0A38..0A39 ; Gurmukhi # Lo [2] GURMUKHI LETTER SA..GURMUKHI LETTER HA
0A3C ; Gurmukhi # Mn GURMUKHI SIGN NUKTA
0A3E..0A40 ; Gurmukhi # Mc [3] GURMUKHI VOWEL SIGN AA..GURMUKHI VOWEL SIGN II
0A41..0A42 ; Gurmukhi # Mn [2] GURMUKHI VOWEL SIGN U..GURMUKHI VOWEL SIGN UU
0A47..0A48 ; Gurmukhi # Mn [2] GURMUKHI VOWEL SIGN EE..GURMUKHI VOWEL SIGN AI
0A4B..0A4D ; Gurmukhi # Mn [3] GURMUKHI VOWEL SIGN OO..GURMUKHI SIGN VIRAMA
0A51 ; Gurmukhi # Mn GURMUKHI SIGN UDAAT
0A59..0A5C ; Gurmukhi # Lo [4] GURMUKHI LETTER KHHA..GURMUKHI LETTER RRA
0A5E ; Gurmukhi # Lo GURMUKHI LETTER FA
0A66..0A6F ; Gurmukhi # Nd [10] GURMUKHI DIGIT ZERO..GURMUKHI DIGIT NINE
0A70..0A71 ; Gurmukhi # Mn [2] GURMUKHI TIPPI..GURMUKHI ADDAK
0A72..0A74 ; Gurmukhi # Lo [3] GURMUKHI IRI..GURMUKHI EK ONKAR
0A75 ; Gurmukhi # Mn GURMUKHI SIGN YAKASH
# Total code points: 79
# ================================================
0A81..0A82 ; Gujarati # Mn [2] GUJARATI SIGN CANDRABINDU..GUJARATI SIGN ANUSVARA
0A83 ; Gujarati # Mc GUJARATI SIGN VISARGA
0A85..0A8D ; Gujarati # Lo [9] GUJARATI LETTER A..GUJARATI VOWEL CANDRA E
0A8F..0A91 ; Gujarati # Lo [3] GUJARATI LETTER E..GUJARATI VOWEL CANDRA O
0A93..0AA8 ; Gujarati # Lo [22] GUJARATI LETTER O..GUJARATI LETTER NA
0AAA..0AB0 ; Gujarati # Lo [7] GUJARATI LETTER PA..GUJARATI LETTER RA
0AB2..0AB3 ; Gujarati # Lo [2] GUJARATI LETTER LA..GUJARATI LETTER LLA
0AB5..0AB9 ; Gujarati # Lo [5] GUJARATI LETTER VA..GUJARATI LETTER HA
0ABC ; Gujarati # Mn GUJARATI SIGN NUKTA
0ABD ; Gujarati # Lo GUJARATI SIGN AVAGRAHA
0ABE..0AC0 ; Gujarati # Mc [3] GUJARATI VOWEL SIGN AA..GUJARATI VOWEL SIGN II
0AC1..0AC5 ; Gujarati # Mn [5] GUJARATI VOWEL SIGN U..GUJARATI VOWEL SIGN CANDRA E
0AC7..0AC8 ; Gujarati # Mn [2] GUJARATI VOWEL SIGN E..GUJARATI VOWEL SIGN AI
0AC9 ; Gujarati # Mc GUJARATI VOWEL SIGN CANDRA O
0ACB..0ACC ; Gujarati # Mc [2] GUJARATI VOWEL SIGN O..GUJARATI VOWEL SIGN AU
0ACD ; Gujarati # Mn GUJARATI SIGN VIRAMA
0AD0 ; Gujarati # Lo GUJARATI OM
0AE0..0AE1 ; Gujarati # Lo [2] GUJARATI LETTER VOCALIC RR..GUJARATI LETTER VOCALIC LL
0AE2..0AE3 ; Gujarati # Mn [2] GUJARATI VOWEL SIGN VOCALIC L..GUJARATI VOWEL SIGN VOCALIC LL
0AE6..0AEF ; Gujarati # Nd [10] GUJARATI DIGIT ZERO..GUJARATI DIGIT NINE
0AF1 ; Gujarati # Sc GUJARATI RUPEE SIGN
# Total code points: 83
# ================================================
0B01 ; Oriya # Mn ORIYA SIGN CANDRABINDU
0B02..0B03 ; Oriya # Mc [2] ORIYA SIGN ANUSVARA..ORIYA SIGN VISARGA
0B05..0B0C ; Oriya # Lo [8] ORIYA LETTER A..ORIYA LETTER VOCALIC L
0B0F..0B10 ; Oriya # Lo [2] ORIYA LETTER E..ORIYA LETTER AI
0B13..0B28 ; Oriya # Lo [22] ORIYA LETTER O..ORIYA LETTER NA
0B2A..0B30 ; Oriya # Lo [7] ORIYA LETTER PA..ORIYA LETTER RA
0B32..0B33 ; Oriya # Lo [2] ORIYA LETTER LA..ORIYA LETTER LLA
0B35..0B39 ; Oriya # Lo [5] ORIYA LETTER VA..ORIYA LETTER HA
0B3C ; Oriya # Mn ORIYA SIGN NUKTA
0B3D ; Oriya # Lo ORIYA SIGN AVAGRAHA
0B3E ; Oriya # Mc ORIYA VOWEL SIGN AA
0B3F ; Oriya # Mn ORIYA VOWEL SIGN I
0B40 ; Oriya # Mc ORIYA VOWEL SIGN II
0B41..0B44 ; Oriya # Mn [4] ORIYA VOWEL SIGN U..ORIYA VOWEL SIGN VOCALIC RR
0B47..0B48 ; Oriya # Mc [2] ORIYA VOWEL SIGN E..ORIYA VOWEL SIGN AI
0B4B..0B4C ; Oriya # Mc [2] ORIYA VOWEL SIGN O..ORIYA VOWEL SIGN AU
0B4D ; Oriya # Mn ORIYA SIGN VIRAMA
0B56 ; Oriya # Mn ORIYA AI LENGTH MARK
0B57 ; Oriya # Mc ORIYA AU LENGTH MARK
0B5C..0B5D ; Oriya # Lo [2] ORIYA LETTER RRA..ORIYA LETTER RHA
0B5F..0B61 ; Oriya # Lo [3] ORIYA LETTER YYA..ORIYA LETTER VOCALIC LL
0B62..0B63 ; Oriya # Mn [2] ORIYA VOWEL SIGN VOCALIC L..ORIYA VOWEL SIGN VOCALIC LL
0B66..0B6F ; Oriya # Nd [10] ORIYA DIGIT ZERO..ORIYA DIGIT NINE
0B70 ; Oriya # So ORIYA ISSHAR
0B71 ; Oriya # Lo ORIYA LETTER WA
# Total code points: 84
# ================================================
0B82 ; Tamil # Mn TAMIL SIGN ANUSVARA
0B83 ; Tamil # Lo TAMIL SIGN VISARGA
0B85..0B8A ; Tamil # Lo [6] TAMIL LETTER A..TAMIL LETTER UU
0B8E..0B90 ; Tamil # Lo [3] TAMIL LETTER E..TAMIL LETTER AI
0B92..0B95 ; Tamil # Lo [4] TAMIL LETTER O..TAMIL LETTER KA
0B99..0B9A ; Tamil # Lo [2] TAMIL LETTER NGA..TAMIL LETTER CA
0B9C ; Tamil # Lo TAMIL LETTER JA
0B9E..0B9F ; Tamil # Lo [2] TAMIL LETTER NYA..TAMIL LETTER TTA
0BA3..0BA4 ; Tamil # Lo [2] TAMIL LETTER NNA..TAMIL LETTER TA
0BA8..0BAA ; Tamil # Lo [3] TAMIL LETTER NA..TAMIL LETTER PA
0BAE..0BB9 ; Tamil # Lo [12] TAMIL LETTER MA..TAMIL LETTER HA
0BBE..0BBF ; Tamil # Mc [2] TAMIL VOWEL SIGN AA..TAMIL VOWEL SIGN I
0BC0 ; Tamil # Mn TAMIL VOWEL SIGN II
0BC1..0BC2 ; Tamil # Mc [2] TAMIL VOWEL SIGN U..TAMIL VOWEL SIGN UU
0BC6..0BC8 ; Tamil # Mc [3] TAMIL VOWEL SIGN E..TAMIL VOWEL SIGN AI
0BCA..0BCC ; Tamil # Mc [3] TAMIL VOWEL SIGN O..TAMIL VOWEL SIGN AU
0BCD ; Tamil # Mn TAMIL SIGN VIRAMA
0BD0 ; Tamil # Lo TAMIL OM
0BD7 ; Tamil # Mc TAMIL AU LENGTH MARK
0BE6..0BEF ; Tamil # Nd [10] TAMIL DIGIT ZERO..TAMIL DIGIT NINE
0BF0..0BF2 ; Tamil # No [3] TAMIL NUMBER TEN..TAMIL NUMBER ONE THOUSAND
0BF3..0BF8 ; Tamil # So [6] TAMIL DAY SIGN..TAMIL AS ABOVE SIGN
0BF9 ; Tamil # Sc TAMIL RUPEE SIGN
0BFA ; Tamil # So TAMIL NUMBER SIGN
# Total code points: 72
# ================================================
0C01..0C03 ; Telugu # Mc [3] TELUGU SIGN CANDRABINDU..TELUGU SIGN VISARGA
0C05..0C0C ; Telugu # Lo [8] TELUGU LETTER A..TELUGU LETTER VOCALIC L
0C0E..0C10 ; Telugu # Lo [3] TELUGU LETTER E..TELUGU LETTER AI
0C12..0C28 ; Telugu # Lo [23] TELUGU LETTER O..TELUGU LETTER NA
0C2A..0C33 ; Telugu # Lo [10] TELUGU LETTER PA..TELUGU LETTER LLA
0C35..0C39 ; Telugu # Lo [5] TELUGU LETTER VA..TELUGU LETTER HA
0C3D ; Telugu # Lo TELUGU SIGN AVAGRAHA
0C3E..0C40 ; Telugu # Mn [3] TELUGU VOWEL SIGN AA..TELUGU VOWEL SIGN II
0C41..0C44 ; Telugu # Mc [4] TELUGU VOWEL SIGN U..TELUGU VOWEL SIGN VOCALIC RR
0C46..0C48 ; Telugu # Mn [3] TELUGU VOWEL SIGN E..TELUGU VOWEL SIGN AI
0C4A..0C4D ; Telugu # Mn [4] TELUGU VOWEL SIGN O..TELUGU SIGN VIRAMA
0C55..0C56 ; Telugu # Mn [2] TELUGU LENGTH MARK..TELUGU AI LENGTH MARK
0C58..0C59 ; Telugu # Lo [2] TELUGU LETTER TSA..TELUGU LETTER DZA
0C60..0C61 ; Telugu # Lo [2] TELUGU LETTER VOCALIC RR..TELUGU LETTER VOCALIC LL
0C62..0C63 ; Telugu # Mn [2] TELUGU VOWEL SIGN VOCALIC L..TELUGU VOWEL SIGN VOCALIC LL
0C66..0C6F ; Telugu # Nd [10] TELUGU DIGIT ZERO..TELUGU DIGIT NINE
0C78..0C7E ; Telugu # No [7] TELUGU FRACTION DIGIT ZERO FOR ODD POWERS OF FOUR..TELUGU FRACTION DIGIT THREE FOR EVEN POWERS OF FOUR
0C7F ; Telugu # So TELUGU SIGN TUUMU
# Total code points: 93
# ================================================
0C82..0C83 ; Kannada # Mc [2] KANNADA SIGN ANUSVARA..KANNADA SIGN VISARGA
0C85..0C8C ; Kannada # Lo [8] KANNADA LETTER A..KANNADA LETTER VOCALIC L
0C8E..0C90 ; Kannada # Lo [3] KANNADA LETTER E..KANNADA LETTER AI
0C92..0CA8 ; Kannada # Lo [23] KANNADA LETTER O..KANNADA LETTER NA
0CAA..0CB3 ; Kannada # Lo [10] KANNADA LETTER PA..KANNADA LETTER LLA
0CB5..0CB9 ; Kannada # Lo [5] KANNADA LETTER VA..KANNADA LETTER HA
0CBC ; Kannada # Mn KANNADA SIGN NUKTA
0CBD ; Kannada # Lo KANNADA SIGN AVAGRAHA
0CBE ; Kannada # Mc KANNADA VOWEL SIGN AA
0CBF ; Kannada # Mn KANNADA VOWEL SIGN I
0CC0..0CC4 ; Kannada # Mc [5] KANNADA VOWEL SIGN II..KANNADA VOWEL SIGN VOCALIC RR
0CC6 ; Kannada # Mn KANNADA VOWEL SIGN E
0CC7..0CC8 ; Kannada # Mc [2] KANNADA VOWEL SIGN EE..KANNADA VOWEL SIGN AI
0CCA..0CCB ; Kannada # Mc [2] KANNADA VOWEL SIGN O..KANNADA VOWEL SIGN OO
0CCC..0CCD ; Kannada # Mn [2] KANNADA VOWEL SIGN AU..KANNADA SIGN VIRAMA
0CD5..0CD6 ; Kannada # Mc [2] KANNADA LENGTH MARK..KANNADA AI LENGTH MARK
0CDE ; Kannada # Lo KANNADA LETTER FA
0CE0..0CE1 ; Kannada # Lo [2] KANNADA LETTER VOCALIC RR..KANNADA LETTER VOCALIC LL
0CE2..0CE3 ; Kannada # Mn [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL
0CE6..0CEF ; Kannada # Nd [10] KANNADA DIGIT ZERO..KANNADA DIGIT NINE
# Total code points: 84
# ================================================
0D02..0D03 ; Malayalam # Mc [2] MALAYALAM SIGN ANUSVARA..MALAYALAM SIGN VISARGA
0D05..0D0C ; Malayalam # Lo [8] MALAYALAM LETTER A..MALAYALAM LETTER VOCALIC L
0D0E..0D10 ; Malayalam # Lo [3] MALAYALAM LETTER E..MALAYALAM LETTER AI
0D12..0D28 ; Malayalam # Lo [23] MALAYALAM LETTER O..MALAYALAM LETTER NA
0D2A..0D39 ; Malayalam # Lo [16] MALAYALAM LETTER PA..MALAYALAM LETTER HA
0D3D ; Malayalam # Lo MALAYALAM SIGN AVAGRAHA
0D3E..0D40 ; Malayalam # Mc [3] MALAYALAM VOWEL SIGN AA..MALAYALAM VOWEL SIGN II
0D41..0D44 ; Malayalam # Mn [4] MALAYALAM VOWEL SIGN U..MALAYALAM VOWEL SIGN VOCALIC RR
0D46..0D48 ; Malayalam # Mc [3] MALAYALAM VOWEL SIGN E..MALAYALAM VOWEL SIGN AI
0D4A..0D4C ; Malayalam # Mc [3] MALAYALAM VOWEL SIGN O..MALAYALAM VOWEL SIGN AU
0D4D ; Malayalam # Mn MALAYALAM SIGN VIRAMA
0D57 ; Malayalam # Mc MALAYALAM AU LENGTH MARK
0D60..0D61 ; Malayalam # Lo [2] MALAYALAM LETTER VOCALIC RR..MALAYALAM LETTER VOCALIC LL
0D62..0D63 ; Malayalam # Mn [2] MALAYALAM VOWEL SIGN VOCALIC L..MALAYALAM VOWEL SIGN VOCALIC LL
0D66..0D6F ; Malayalam # Nd [10] MALAYALAM DIGIT ZERO..MALAYALAM DIGIT NINE
0D70..0D75 ; Malayalam # No [6] MALAYALAM NUMBER TEN..MALAYALAM FRACTION THREE QUARTERS
0D79 ; Malayalam # So MALAYALAM DATE MARK
0D7A..0D7F ; Malayalam # Lo [6] MALAYALAM LETTER CHILLU NN..MALAYALAM LETTER CHILLU K
# Total code points: 95
# ================================================
0D82..0D83 ; Sinhala # Mc [2] SINHALA SIGN ANUSVARAYA..SINHALA SIGN VISARGAYA
0D85..0D96 ; Sinhala # Lo [18] SINHALA LETTER AYANNA..SINHALA LETTER AUYANNA
0D9A..0DB1 ; Sinhala # Lo [24] SINHALA LETTER ALPAPRAANA KAYANNA..SINHALA LETTER DANTAJA NAYANNA
0DB3..0DBB ; Sinhala # Lo [9] SINHALA LETTER SANYAKA DAYANNA..SINHALA LETTER RAYANNA
0DBD ; Sinhala # Lo SINHALA LETTER DANTAJA LAYANNA
0DC0..0DC6 ; Sinhala # Lo [7] SINHALA LETTER VAYANNA..SINHALA LETTER FAYANNA
0DCA ; Sinhala # Mn SINHALA SIGN AL-LAKUNA
0DCF..0DD1 ; Sinhala # Mc [3] SINHALA VOWEL SIGN AELA-PILLA..SINHALA VOWEL SIGN DIGA AEDA-PILLA
0DD2..0DD4 ; Sinhala # Mn [3] SINHALA VOWEL SIGN KETTI IS-PILLA..SINHALA VOWEL SIGN KETTI PAA-PILLA
0DD6 ; Sinhala # Mn SINHALA VOWEL SIGN DIGA PAA-PILLA
0DD8..0DDF ; Sinhala # Mc [8] SINHALA VOWEL SIGN GAETTA-PILLA..SINHALA VOWEL SIGN GAYANUKITTA
0DF2..0DF3 ; Sinhala # Mc [2] SINHALA VOWEL SIGN DIGA GAETTA-PILLA..SINHALA VOWEL SIGN DIGA GAYANUKITTA
0DF4 ; Sinhala # Po SINHALA PUNCTUATION KUNDDALIYA
# Total code points: 80
# ================================================
0E01..0E30 ; Thai # Lo [48] THAI CHARACTER KO KAI..THAI CHARACTER SARA A
0E31 ; Thai # Mn THAI CHARACTER MAI HAN-AKAT
0E32..0E33 ; Thai # Lo [2] THAI CHARACTER SARA AA..THAI CHARACTER SARA AM
0E34..0E3A ; Thai # Mn [7] THAI CHARACTER SARA I..THAI CHARACTER PHINTHU
0E40..0E45 ; Thai # Lo [6] THAI CHARACTER SARA E..THAI CHARACTER LAKKHANGYAO
0E46 ; Thai # Lm THAI CHARACTER MAIYAMOK
0E47..0E4E ; Thai # Mn [8] THAI CHARACTER MAITAIKHU..THAI CHARACTER YAMAKKAN
0E4F ; Thai # Po THAI CHARACTER FONGMAN
0E50..0E59 ; Thai # Nd [10] THAI DIGIT ZERO..THAI DIGIT NINE
0E5A..0E5B ; Thai # Po [2] THAI CHARACTER ANGKHANKHU..THAI CHARACTER KHOMUT
# Total code points: 86
# ================================================
0E81..0E82 ; Lao # Lo [2] LAO LETTER KO..LAO LETTER KHO SUNG
0E84 ; Lao # Lo LAO LETTER KHO TAM
0E87..0E88 ; Lao # Lo [2] LAO LETTER NGO..LAO LETTER CO
0E8A ; Lao # Lo LAO LETTER SO TAM
0E8D ; Lao # Lo LAO LETTER NYO
0E94..0E97 ; Lao # Lo [4] LAO LETTER DO..LAO LETTER THO TAM
0E99..0E9F ; Lao # Lo [7] LAO LETTER NO..LAO LETTER FO SUNG
0EA1..0EA3 ; Lao # Lo [3] LAO LETTER MO..LAO LETTER LO LING
0EA5 ; Lao # Lo LAO LETTER LO LOOT
0EA7 ; Lao # Lo LAO LETTER WO
0EAA..0EAB ; Lao # Lo [2] LAO LETTER SO SUNG..LAO LETTER HO SUNG
0EAD..0EB0 ; Lao # Lo [4] LAO LETTER O..LAO VOWEL SIGN A
0EB1 ; Lao # Mn LAO VOWEL SIGN MAI KAN
0EB2..0EB3 ; Lao # Lo [2] LAO VOWEL SIGN AA..LAO VOWEL SIGN AM
0EB4..0EB9 ; Lao # Mn [6] LAO VOWEL SIGN I..LAO VOWEL SIGN UU
0EBB..0EBC ; Lao # Mn [2] LAO VOWEL SIGN MAI KON..LAO SEMIVOWEL SIGN LO
0EBD ; Lao # Lo LAO SEMIVOWEL SIGN NYO
0EC0..0EC4 ; Lao # Lo [5] LAO VOWEL SIGN E..LAO VOWEL SIGN AI
0EC6 ; Lao # Lm LAO KO LA
0EC8..0ECD ; Lao # Mn [6] LAO TONE MAI EK..LAO NIGGAHITA
0ED0..0ED9 ; Lao # Nd [10] LAO DIGIT ZERO..LAO DIGIT NINE
0EDC..0EDD ; Lao # Lo [2] LAO HO NO..LAO HO MO
# Total code points: 65
# ================================================
0F00 ; Tibetan # Lo TIBETAN SYLLABLE OM
0F01..0F03 ; Tibetan # So [3] TIBETAN MARK GTER YIG MGO TRUNCATED A..TIBETAN MARK GTER YIG MGO -UM GTER TSHEG MA
0F04..0F12 ; Tibetan # Po [15] TIBETAN MARK INITIAL YIG MGO MDUN MA..TIBETAN MARK RGYA GRAM SHAD
0F13..0F17 ; Tibetan # So [5] TIBETAN MARK CARET -DZUD RTAGS ME LONG CAN..TIBETAN ASTROLOGICAL SIGN SGRA GCAN -CHAR RTAGS
0F18..0F19 ; Tibetan # Mn [2] TIBETAN ASTROLOGICAL SIGN -KHYUD PA..TIBETAN ASTROLOGICAL SIGN SDONG TSHUGS
0F1A..0F1F ; Tibetan # So [6] TIBETAN SIGN RDEL DKAR GCIG..TIBETAN SIGN RDEL DKAR RDEL NAG
0F20..0F29 ; Tibetan # Nd [10] TIBETAN DIGIT ZERO..TIBETAN DIGIT NINE
0F2A..0F33 ; Tibetan # No [10] TIBETAN DIGIT HALF ONE..TIBETAN DIGIT HALF ZERO
0F34 ; Tibetan # So TIBETAN MARK BSDUS RTAGS
0F35 ; Tibetan # Mn TIBETAN MARK NGAS BZUNG NYI ZLA
0F36 ; Tibetan # So TIBETAN MARK CARET -DZUD RTAGS BZHI MIG CAN
0F37 ; Tibetan # Mn TIBETAN MARK NGAS BZUNG SGOR RTAGS
0F38 ; Tibetan # So TIBETAN MARK CHE MGO
0F39 ; Tibetan # Mn TIBETAN MARK TSA -PHRU
0F3A ; Tibetan # Ps TIBETAN MARK GUG RTAGS GYON
0F3B ; Tibetan # Pe TIBETAN MARK GUG RTAGS GYAS
0F3C ; Tibetan # Ps TIBETAN MARK ANG KHANG GYON
0F3D ; Tibetan # Pe TIBETAN MARK ANG KHANG GYAS
0F3E..0F3F ; Tibetan # Mc [2] TIBETAN SIGN YAR TSHES..TIBETAN SIGN MAR TSHES
0F40..0F47 ; Tibetan # Lo [8] TIBETAN LETTER KA..TIBETAN LETTER JA
0F49..0F6C ; Tibetan # Lo [36] TIBETAN LETTER NYA..TIBETAN LETTER RRA
0F71..0F7E ; Tibetan # Mn [14] TIBETAN VOWEL SIGN AA..TIBETAN SIGN RJES SU NGA RO
0F7F ; Tibetan # Mc TIBETAN SIGN RNAM BCAD
0F80..0F84 ; Tibetan # Mn [5] TIBETAN VOWEL SIGN REVERSED I..TIBETAN MARK HALANTA
0F85 ; Tibetan # Po TIBETAN MARK PALUTA
0F86..0F87 ; Tibetan # Mn [2] TIBETAN SIGN LCI RTAGS..TIBETAN SIGN YANG RTAGS
0F88..0F8B ; Tibetan # Lo [4] TIBETAN SIGN LCE TSA CAN..TIBETAN SIGN GRU MED RGYINGS
0F90..0F97 ; Tibetan # Mn [8] TIBETAN SUBJOINED LETTER KA..TIBETAN SUBJOINED LETTER JA
0F99..0FBC ; Tibetan # Mn [36] TIBETAN SUBJOINED LETTER NYA..TIBETAN SUBJOINED LETTER FIXED-FORM RA
0FBE..0FC5 ; Tibetan # So [8] TIBETAN KU RU KHA..TIBETAN SYMBOL RDO RJE
0FC6 ; Tibetan # Mn TIBETAN SYMBOL PADMA GDAN
0FC7..0FCC ; Tibetan # So [6] TIBETAN SYMBOL RDO RJE RGYA GRAM..TIBETAN SYMBOL NOR BU BZHI -KHYIL
0FCE..0FCF ; Tibetan # So [2] TIBETAN SIGN RDEL NAG RDEL DKAR..TIBETAN SIGN RDEL NAG GSUM
0FD0..0FD4 ; Tibetan # Po [5] TIBETAN MARK BSKA- SHOG GI MGO RGYAN..TIBETAN MARK CLOSING BRDA RNYING YIG MGO SGAB MA
# Total code points: 201
# ================================================
1000..102A ; Myanmar # Lo [43] MYANMAR LETTER KA..MYANMAR LETTER AU
102B..102C ; Myanmar # Mc [2] MYANMAR VOWEL SIGN TALL AA..MYANMAR VOWEL SIGN AA
102D..1030 ; Myanmar # Mn [4] MYANMAR VOWEL SIGN I..MYANMAR VOWEL SIGN UU
1031 ; Myanmar # Mc MYANMAR VOWEL SIGN E
1032..1037 ; Myanmar # Mn [6] MYANMAR VOWEL SIGN AI..MYANMAR SIGN DOT BELOW
1038 ; Myanmar # Mc MYANMAR SIGN VISARGA
1039..103A ; Myanmar # Mn [2] MYANMAR SIGN VIRAMA..MYANMAR SIGN ASAT
103B..103C ; Myanmar # Mc [2] MYANMAR CONSONANT SIGN MEDIAL YA..MYANMAR CONSONANT SIGN MEDIAL RA
103D..103E ; Myanmar # Mn [2] MYANMAR CONSONANT SIGN MEDIAL WA..MYANMAR CONSONANT SIGN MEDIAL HA
103F ; Myanmar # Lo MYANMAR LETTER GREAT SA
1040..1049 ; Myanmar # Nd [10] MYANMAR DIGIT ZERO..MYANMAR DIGIT NINE
104A..104F ; Myanmar # Po [6] MYANMAR SIGN LITTLE SECTION..MYANMAR SYMBOL GENITIVE
1050..1055 ; Myanmar # Lo [6] MYANMAR LETTER SHA..MYANMAR LETTER VOCALIC LL
1056..1057 ; Myanmar # Mc [2] MYANMAR VOWEL SIGN VOCALIC R..MYANMAR VOWEL SIGN VOCALIC RR
1058..1059 ; Myanmar # Mn [2] MYANMAR VOWEL SIGN VOCALIC L..MYANMAR VOWEL SIGN VOCALIC LL
105A..105D ; Myanmar # Lo [4] MYANMAR LETTER MON NGA..MYANMAR LETTER MON BBE
105E..1060 ; Myanmar # Mn [3] MYANMAR CONSONANT SIGN MON MEDIAL NA..MYANMAR CONSONANT SIGN MON MEDIAL LA
1061 ; Myanmar # Lo MYANMAR LETTER SGAW KAREN SHA
1062..1064 ; Myanmar # Mc [3] MYANMAR VOWEL SIGN SGAW KAREN EU..MYANMAR TONE MARK SGAW KAREN KE PHO
1065..1066 ; Myanmar # Lo [2] MYANMAR LETTER WESTERN PWO KAREN THA..MYANMAR LETTER WESTERN PWO KAREN PWA
1067..106D ; Myanmar # Mc [7] MYANMAR VOWEL SIGN WESTERN PWO KAREN EU..MYANMAR SIGN WESTERN PWO KAREN TONE-5
106E..1070 ; Myanmar # Lo [3] MYANMAR LETTER EASTERN PWO KAREN NNA..MYANMAR LETTER EASTERN PWO KAREN GHWA
1071..1074 ; Myanmar # Mn [4] MYANMAR VOWEL SIGN GEBA KAREN I..MYANMAR VOWEL SIGN KAYAH EE
1075..1081 ; Myanmar # Lo [13] MYANMAR LETTER SHAN KA..MYANMAR LETTER SHAN HA
1082 ; Myanmar # Mn MYANMAR CONSONANT SIGN SHAN MEDIAL WA
1083..1084 ; Myanmar # Mc [2] MYANMAR VOWEL SIGN SHAN AA..MYANMAR VOWEL SIGN SHAN E
1085..1086 ; Myanmar # Mn [2] MYANMAR VOWEL SIGN SHAN E ABOVE..MYANMAR VOWEL SIGN SHAN FINAL Y
1087..108C ; Myanmar # Mc [6] MYANMAR SIGN SHAN TONE-2..MYANMAR SIGN SHAN COUNCIL TONE-3
108D ; Myanmar # Mn MYANMAR SIGN SHAN COUNCIL EMPHATIC TONE
108E ; Myanmar # Lo MYANMAR LETTER RUMAI PALAUNG FA
108F ; Myanmar # Mc MYANMAR SIGN RUMAI PALAUNG TONE-5
1090..1099 ; Myanmar # Nd [10] MYANMAR SHAN DIGIT ZERO..MYANMAR SHAN DIGIT NINE
109A..109C ; Myanmar # Mc [3] MYANMAR SIGN KHAMTI TONE-1..MYANMAR VOWEL SIGN AITON A
109D ; Myanmar # Mn MYANMAR VOWEL SIGN AITON AI
109E..109F ; Myanmar # So [2] MYANMAR SYMBOL SHAN ONE..MYANMAR SYMBOL SHAN EXCLAMATION
AA60..AA6F ; Myanmar # Lo [16] MYANMAR LETTER KHAMTI GA..MYANMAR LETTER KHAMTI FA
AA70 ; Myanmar # Lm MYANMAR MODIFIER LETTER KHAMTI REDUPLICATION
AA71..AA76 ; Myanmar # Lo [6] MYANMAR LETTER KHAMTI XA..MYANMAR LOGOGRAM KHAMTI HM
AA77..AA79 ; Myanmar # So [3] MYANMAR SYMBOL AITON EXCLAMATION..MYANMAR SYMBOL AITON TWO
AA7A ; Myanmar # Lo MYANMAR LETTER AITON RA
AA7B ; Myanmar # Mc MYANMAR SIGN PAO KAREN TONE
# Total code points: 188
# ================================================
10A0..10C5 ; Georgian # L& [38] GEORGIAN CAPITAL LETTER AN..GEORGIAN CAPITAL LETTER HOE
10D0..10FA ; Georgian # Lo [43] GEORGIAN LETTER AN..GEORGIAN LETTER AIN
10FC ; Georgian # Lm MODIFIER LETTER GEORGIAN NAR
2D00..2D25 ; Georgian # L& [38] GEORGIAN SMALL LETTER AN..GEORGIAN SMALL LETTER HOE
# Total code points: 120
# ================================================
1100..11FF ; Hangul # Lo [256] HANGUL CHOSEONG KIYEOK..HANGUL JONGSEONG SSANGNIEUN
3131..318E ; Hangul # Lo [94] HANGUL LETTER KIYEOK..HANGUL LETTER ARAEAE
3200..321E ; Hangul # So [31] PARENTHESIZED HANGUL KIYEOK..PARENTHESIZED KOREAN CHARACTER O HU
3260..327E ; Hangul # So [31] CIRCLED HANGUL KIYEOK..CIRCLED HANGUL IEUNG U
A960..A97C ; Hangul # Lo [29] HANGUL CHOSEONG TIKEUT-MIEUM..HANGUL CHOSEONG SSANGYEORINHIEUH
AC00..D7A3 ; Hangul # Lo [11172] HANGUL SYLLABLE GA..HANGUL SYLLABLE HIH
D7B0..D7C6 ; Hangul # Lo [23] HANGUL JUNGSEONG O-YEO..HANGUL JUNGSEONG ARAEA-E
D7CB..D7FB ; Hangul # Lo [49] HANGUL JONGSEONG NIEUN-RIEUL..HANGUL JONGSEONG PHIEUPH-THIEUTH
FFA0..FFBE ; Hangul # Lo [31] HALFWIDTH HANGUL FILLER..HALFWIDTH HANGUL LETTER HIEUH
FFC2..FFC7 ; Hangul # Lo [6] HALFWIDTH HANGUL LETTER A..HALFWIDTH HANGUL LETTER E
FFCA..FFCF ; Hangul # Lo [6] HALFWIDTH HANGUL LETTER YEO..HALFWIDTH HANGUL LETTER OE
FFD2..FFD7 ; Hangul # Lo [6] HALFWIDTH HANGUL LETTER YO..HALFWIDTH HANGUL LETTER YU
FFDA..FFDC ; Hangul # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I
# Total code points: 11737
# ================================================
1200..1248 ; Ethiopic # Lo [73] ETHIOPIC SYLLABLE HA..ETHIOPIC SYLLABLE QWA
124A..124D ; Ethiopic # Lo [4] ETHIOPIC SYLLABLE QWI..ETHIOPIC SYLLABLE QWE
1250..1256 ; Ethiopic # Lo [7] ETHIOPIC SYLLABLE QHA..ETHIOPIC SYLLABLE QHO
1258 ; Ethiopic # Lo ETHIOPIC SYLLABLE QHWA
125A..125D ; Ethiopic # Lo [4] ETHIOPIC SYLLABLE QHWI..ETHIOPIC SYLLABLE QHWE
1260..1288 ; Ethiopic # Lo [41] ETHIOPIC SYLLABLE BA..ETHIOPIC SYLLABLE XWA
128A..128D ; Ethiopic # Lo [4] ETHIOPIC SYLLABLE XWI..ETHIOPIC SYLLABLE XWE
1290..12B0 ; Ethiopic # Lo [33] ETHIOPIC SYLLABLE NA..ETHIOPIC SYLLABLE KWA
12B2..12B5 ; Ethiopic # Lo [4] ETHIOPIC SYLLABLE KWI..ETHIOPIC SYLLABLE KWE
12B8..12BE ; Ethiopic # Lo [7] ETHIOPIC SYLLABLE KXA..ETHIOPIC SYLLABLE KXO
12C0 ; Ethiopic # Lo ETHIOPIC SYLLABLE KXWA
12C2..12C5 ; Ethiopic # Lo [4] ETHIOPIC SYLLABLE KXWI..ETHIOPIC SYLLABLE KXWE
12C8..12D6 ; Ethiopic # Lo [15] ETHIOPIC SYLLABLE WA..ETHIOPIC SYLLABLE PHARYNGEAL O
12D8..1310 ; Ethiopic # Lo [57] ETHIOPIC SYLLABLE ZA..ETHIOPIC SYLLABLE GWA
1312..1315 ; Ethiopic # Lo [4] ETHIOPIC SYLLABLE GWI..ETHIOPIC SYLLABLE GWE
1318..135A ; Ethiopic # Lo [67] ETHIOPIC SYLLABLE GGA..ETHIOPIC SYLLABLE FYA
135F ; Ethiopic # Mn ETHIOPIC COMBINING GEMINATION MARK
1360 ; Ethiopic # So ETHIOPIC SECTION MARK
1361..1368 ; Ethiopic # Po [8] ETHIOPIC WORDSPACE..ETHIOPIC PARAGRAPH SEPARATOR
1369..137C ; Ethiopic # No [20] ETHIOPIC DIGIT ONE..ETHIOPIC NUMBER TEN THOUSAND
1380..138F ; Ethiopic # Lo [16] ETHIOPIC SYLLABLE SEBATBEIT MWA..ETHIOPIC SYLLABLE PWE
1390..1399 ; Ethiopic # So [10] ETHIOPIC TONAL MARK YIZET..ETHIOPIC TONAL MARK KURT
2D80..2D96 ; Ethiopic # Lo [23] ETHIOPIC SYLLABLE LOA..ETHIOPIC SYLLABLE GGWE
2DA0..2DA6 ; Ethiopic # Lo [7] ETHIOPIC SYLLABLE SSA..ETHIOPIC SYLLABLE SSO
2DA8..2DAE ; Ethiopic # Lo [7] ETHIOPIC SYLLABLE CCA..ETHIOPIC SYLLABLE CCO
2DB0..2DB6 ; Ethiopic # Lo [7] ETHIOPIC SYLLABLE ZZA..ETHIOPIC SYLLABLE ZZO
2DB8..2DBE ; Ethiopic # Lo [7] ETHIOPIC SYLLABLE CCHA..ETHIOPIC SYLLABLE CCHO
2DC0..2DC6 ; Ethiopic # Lo [7] ETHIOPIC SYLLABLE QYA..ETHIOPIC SYLLABLE QYO
2DC8..2DCE ; Ethiopic # Lo [7] ETHIOPIC SYLLABLE KYA..ETHIOPIC SYLLABLE KYO
2DD0..2DD6 ; Ethiopic # Lo [7] ETHIOPIC SYLLABLE XYA..ETHIOPIC SYLLABLE XYO
2DD8..2DDE ; Ethiopic # Lo [7] ETHIOPIC SYLLABLE GYA..ETHIOPIC SYLLABLE GYO
# Total code points: 461
# ================================================
13A0..13F4 ; Cherokee # Lo [85] CHEROKEE LETTER A..CHEROKEE LETTER YV
# Total code points: 85
# ================================================
1400 ; Canadian_Aboriginal # Pd CANADIAN SYLLABICS HYPHEN
1401..166C ; Canadian_Aboriginal # Lo [620] CANADIAN SYLLABICS E..CANADIAN SYLLABICS CARRIER TTSA
166D..166E ; Canadian_Aboriginal # Po [2] CANADIAN SYLLABICS CHI SIGN..CANADIAN SYLLABICS FULL STOP
166F..167F ; Canadian_Aboriginal # Lo [17] CANADIAN SYLLABICS QAI..CANADIAN SYLLABICS BLACKFOOT W
18B0..18F5 ; Canadian_Aboriginal # Lo [70] CANADIAN SYLLABICS OY..CANADIAN SYLLABICS CARRIER DENTAL S
# Total code points: 710
# ================================================
1680 ; Ogham # Zs OGHAM SPACE MARK
1681..169A ; Ogham # Lo [26] OGHAM LETTER BEITH..OGHAM LETTER PEITH
169B ; Ogham # Ps OGHAM FEATHER MARK
169C ; Ogham # Pe OGHAM REVERSED FEATHER MARK
# Total code points: 29
# ================================================
16A0..16EA ; Runic # Lo [75] RUNIC LETTER FEHU FEOH FE F..RUNIC LETTER X
16EE..16F0 ; Runic # Nl [3] RUNIC ARLAUG SYMBOL..RUNIC BELGTHOR SYMBOL
# Total code points: 78
# ================================================
1780..17B3 ; Khmer # Lo [52] KHMER LETTER KA..KHMER INDEPENDENT VOWEL QAU
17B4..17B5 ; Khmer # Cf [2] KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA
17B6 ; Khmer # Mc KHMER VOWEL SIGN AA
17B7..17BD ; Khmer # Mn [7] KHMER VOWEL SIGN I..KHMER VOWEL SIGN UA
17BE..17C5 ; Khmer # Mc [8] KHMER VOWEL SIGN OE..KHMER VOWEL SIGN AU
17C6 ; Khmer # Mn KHMER SIGN NIKAHIT
17C7..17C8 ; Khmer # Mc [2] KHMER SIGN REAHMUK..KHMER SIGN YUUKALEAPINTU
17C9..17D3 ; Khmer # Mn [11] KHMER SIGN MUUSIKATOAN..KHMER SIGN BATHAMASAT
17D4..17D6 ; Khmer # Po [3] KHMER SIGN KHAN..KHMER SIGN CAMNUC PII KUUH
17D7 ; Khmer # Lm KHMER SIGN LEK TOO
17D8..17DA ; Khmer # Po [3] KHMER SIGN BEYYAL..KHMER SIGN KOOMUUT
17DB ; Khmer # Sc KHMER CURRENCY SYMBOL RIEL
17DC ; Khmer # Lo KHMER SIGN AVAKRAHASANYA
17DD ; Khmer # Mn KHMER SIGN ATTHACAN
17E0..17E9 ; Khmer # Nd [10] KHMER DIGIT ZERO..KHMER DIGIT NINE
17F0..17F9 ; Khmer # No [10] KHMER SYMBOL LEK ATTAK SON..KHMER SYMBOL LEK ATTAK PRAM-BUON
19E0..19FF ; Khmer # So [32] KHMER SYMBOL PATHAMASAT..KHMER SYMBOL DAP-PRAM ROC
# Total code points: 146
# ================================================
1800..1801 ; Mongolian # Po [2] MONGOLIAN BIRGA..MONGOLIAN ELLIPSIS
1804 ; Mongolian # Po MONGOLIAN COLON
1806 ; Mongolian # Pd MONGOLIAN TODO SOFT HYPHEN
1807..180A ; Mongolian # Po [4] MONGOLIAN SIBE SYLLABLE BOUNDARY MARKER..MONGOLIAN NIRUGU
180B..180D ; Mongolian # Mn [3] MONGOLIAN FREE VARIATION SELECTOR ONE..MONGOLIAN FREE VARIATION SELECTOR THREE
180E ; Mongolian # Zs MONGOLIAN VOWEL SEPARATOR
1810..1819 ; Mongolian # Nd [10] MONGOLIAN DIGIT ZERO..MONGOLIAN DIGIT NINE
1820..1842 ; Mongolian # Lo [35] MONGOLIAN LETTER A..MONGOLIAN LETTER CHI
1843 ; Mongolian # Lm MONGOLIAN LETTER TODO LONG VOWEL SIGN
1844..1877 ; Mongolian # Lo [52] MONGOLIAN LETTER TODO E..MONGOLIAN LETTER MANCHU ZHA
1880..18A8 ; Mongolian # Lo [41] MONGOLIAN LETTER ALI GALI ANUSVARA ONE..MONGOLIAN LETTER MANCHU ALI GALI BHA
18A9 ; Mongolian # Mn MONGOLIAN LETTER ALI GALI DAGALGA
18AA ; Mongolian # Lo MONGOLIAN LETTER MANCHU ALI GALI LHA
# Total code points: 153
# ================================================
3041..3096 ; Hiragana # Lo [86] HIRAGANA LETTER SMALL A..HIRAGANA LETTER SMALL KE
309D..309E ; Hiragana # Lm [2] HIRAGANA ITERATION MARK..HIRAGANA VOICED ITERATION MARK
309F ; Hiragana # Lo HIRAGANA DIGRAPH YORI
1F200 ; Hiragana # So SQUARE HIRAGANA HOKA
# Total code points: 90
# ================================================
30A1..30FA ; Katakana # Lo [90] KATAKANA LETTER SMALL A..KATAKANA LETTER VO
30FD..30FE ; Katakana # Lm [2] KATAKANA ITERATION MARK..KATAKANA VOICED ITERATION MARK
30FF ; Katakana # Lo KATAKANA DIGRAPH KOTO
31F0..31FF ; Katakana # Lo [16] KATAKANA LETTER SMALL KU..KATAKANA LETTER SMALL RO
32D0..32FE ; Katakana # So [47] CIRCLED KATAKANA A..CIRCLED KATAKANA WO
3300..3357 ; Katakana # So [88] SQUARE APAATO..SQUARE WATTO
FF66..FF6F ; Katakana # Lo [10] HALFWIDTH KATAKANA LETTER WO..HALFWIDTH KATAKANA LETTER SMALL TU
FF71..FF9D ; Katakana # Lo [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAKANA LETTER N
# Total code points: 299
# ================================================
3105..312D ; Bopomofo # Lo [41] BOPOMOFO LETTER B..BOPOMOFO LETTER IH
31A0..31B7 ; Bopomofo # Lo [24] BOPOMOFO LETTER BU..BOPOMOFO FINAL LETTER H
# Total code points: 65
# ================================================
2E80..2E99 ; Han # So [26] CJK RADICAL REPEAT..CJK RADICAL RAP
2E9B..2EF3 ; Han # So [89] CJK RADICAL CHOKE..CJK RADICAL C-SIMPLIFIED TURTLE
2F00..2FD5 ; Han # So [214] KANGXI RADICAL ONE..KANGXI RADICAL FLUTE
3005 ; Han # Lm IDEOGRAPHIC ITERATION MARK
3007 ; Han # Nl IDEOGRAPHIC NUMBER ZERO
3021..3029 ; Han # Nl [9] HANGZHOU NUMERAL ONE..HANGZHOU NUMERAL NINE
3038..303A ; Han # Nl [3] HANGZHOU NUMERAL TEN..HANGZHOU NUMERAL THIRTY
303B ; Han # Lm VERTICAL IDEOGRAPHIC ITERATION MARK
3400..4DB5 ; Han # Lo [6582] CJK UNIFIED IDEOGRAPH-3400..CJK UNIFIED IDEOGRAPH-4DB5
4E00..9FCB ; Han # Lo [20940] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FCB
F900..FA2D ; Han # Lo [302] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA2D
FA30..FA6D ; Han # Lo [62] CJK COMPATIBILITY IDEOGRAPH-FA30..CJK COMPATIBILITY IDEOGRAPH-FA6D
FA70..FAD9 ; Han # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILITY IDEOGRAPH-FAD9
20000..2A6D6 ; Han # Lo [42711] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6D6
2A700..2B734 ; Han # Lo [4149] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B734
2F800..2FA1D ; Han # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D
# Total code points: 75738
# ================================================
A000..A014 ; Yi # Lo [21] YI SYLLABLE IT..YI SYLLABLE E
A015 ; Yi # Lm YI SYLLABLE WU
A016..A48C ; Yi # Lo [1143] YI SYLLABLE BIT..YI SYLLABLE YYR
A490..A4C6 ; Yi # So [55] YI RADICAL QOT..YI RADICAL KE
# Total code points: 1220
# ================================================
10300..1031E ; Old_Italic # Lo [31] OLD ITALIC LETTER A..OLD ITALIC LETTER UU
10320..10323 ; Old_Italic # No [4] OLD ITALIC NUMERAL ONE..OLD ITALIC NUMERAL FIFTY
# Total code points: 35
# ================================================
10330..10340 ; Gothic # Lo [17] GOTHIC LETTER AHSA..GOTHIC LETTER PAIRTHRA
10341 ; Gothic # Nl GOTHIC LETTER NINETY
10342..10349 ; Gothic # Lo [8] GOTHIC LETTER RAIDA..GOTHIC LETTER OTHAL
1034A ; Gothic # Nl GOTHIC LETTER NINE HUNDRED
# Total code points: 27
# ================================================
10400..1044F ; Deseret # L& [80] DESERET CAPITAL LETTER LONG I..DESERET SMALL LETTER EW
# Total code points: 80
# ================================================
0300..036F ; Inherited # Mn [112] COMBINING GRAVE ACCENT..COMBINING LATIN SMALL LETTER X
0485..0486 ; Inherited # Mn [2] COMBINING CYRILLIC DASIA PNEUMATA..COMBINING CYRILLIC PSILI PNEUMATA
064B..0655 ; Inherited # Mn [11] ARABIC FATHATAN..ARABIC HAMZA BELOW
0670 ; Inherited # Mn ARABIC LETTER SUPERSCRIPT ALEF
0951..0952 ; Inherited # Mn [2] DEVANAGARI STRESS SIGN UDATTA..DEVANAGARI STRESS SIGN ANUDATTA
1CD0..1CD2 ; Inherited # Mn [3] VEDIC TONE KARSHANA..VEDIC TONE PRENKHA
1CD4..1CE0 ; Inherited # Mn [13] VEDIC SIGN YAJURVEDIC MIDLINE SVARITA..VEDIC TONE RIGVEDIC KASHMIRI INDEPENDENT SVARITA
1CE2..1CE8 ; Inherited # Mn [7] VEDIC SIGN VISARGA SVARITA..VEDIC SIGN VISARGA ANUDATTA WITH TAIL
1CED ; Inherited # Mn VEDIC SIGN TIRYAK
1DC0..1DE6 ; Inherited # Mn [39] COMBINING DOTTED GRAVE ACCENT..COMBINING LATIN SMALL LETTER Z
1DFD..1DFF ; Inherited # Mn [3] COMBINING ALMOST EQUAL TO BELOW..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW
200C..200D ; Inherited # Cf [2] ZERO WIDTH NON-JOINER..ZERO WIDTH JOINER
20D0..20DC ; Inherited # Mn [13] COMBINING LEFT HARPOON ABOVE..COMBINING FOUR DOTS ABOVE
20DD..20E0 ; Inherited # Me [4] COMBINING ENCLOSING CIRCLE..COMBINING ENCLOSING CIRCLE BACKSLASH
20E1 ; Inherited # Mn COMBINING LEFT RIGHT ARROW ABOVE
20E2..20E4 ; Inherited # Me [3] COMBINING ENCLOSING SCREEN..COMBINING ENCLOSING UPWARD POINTING TRIANGLE
20E5..20F0 ; Inherited # Mn [12] COMBINING REVERSE SOLIDUS OVERLAY..COMBINING ASTERISK ABOVE
302A..302F ; Inherited # Mn [6] IDEOGRAPHIC LEVEL TONE MARK..HANGUL DOUBLE DOT TONE MARK
3099..309A ; Inherited # Mn [2] COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK..COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK
FE00..FE0F ; Inherited # Mn [16] VARIATION SELECTOR-1..VARIATION SELECTOR-16
FE20..FE26 ; Inherited # Mn [7] COMBINING LIGATURE LEFT HALF..COMBINING CONJOINING MACRON
101FD ; Inherited # Mn PHAISTOS DISC SIGN COMBINING OBLIQUE STROKE
1D167..1D169 ; Inherited # Mn [3] MUSICAL SYMBOL COMBINING TREMOLO-1..MUSICAL SYMBOL COMBINING TREMOLO-3
1D17B..1D182 ; Inherited # Mn [8] MUSICAL SYMBOL COMBINING ACCENT..MUSICAL SYMBOL COMBINING LOURE
1D185..1D18B ; Inherited # Mn [7] MUSICAL SYMBOL COMBINING DOIT..MUSICAL SYMBOL COMBINING TRIPLE TONGUE
1D1AA..1D1AD ; Inherited # Mn [4] MUSICAL SYMBOL COMBINING DOWN BOW..MUSICAL SYMBOL COMBINING SNAP PIZZICATO
E0100..E01EF ; Inherited # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256
# Total code points: 523
# ================================================
1700..170C ; Tagalog # Lo [13] TAGALOG LETTER A..TAGALOG LETTER YA
170E..1711 ; Tagalog # Lo [4] TAGALOG LETTER LA..TAGALOG LETTER HA
1712..1714 ; Tagalog # Mn [3] TAGALOG VOWEL SIGN I..TAGALOG SIGN VIRAMA
# Total code points: 20
# ================================================
1720..1731 ; Hanunoo # Lo [18] HANUNOO LETTER A..HANUNOO LETTER HA
1732..1734 ; Hanunoo # Mn [3] HANUNOO VOWEL SIGN I..HANUNOO SIGN PAMUDPOD
# Total code points: 21
# ================================================
1740..1751 ; Buhid # Lo [18] BUHID LETTER A..BUHID LETTER HA
1752..1753 ; Buhid # Mn [2] BUHID VOWEL SIGN I..BUHID VOWEL SIGN U
# Total code points: 20
# ================================================
1760..176C ; Tagbanwa # Lo [13] TAGBANWA LETTER A..TAGBANWA LETTER YA
176E..1770 ; Tagbanwa # Lo [3] TAGBANWA LETTER LA..TAGBANWA LETTER SA
1772..1773 ; Tagbanwa # Mn [2] TAGBANWA VOWEL SIGN I..TAGBANWA VOWEL SIGN U
# Total code points: 18
# ================================================
1900..191C ; Limbu # Lo [29] LIMBU VOWEL-CARRIER LETTER..LIMBU LETTER HA
1920..1922 ; Limbu # Mn [3] LIMBU VOWEL SIGN A..LIMBU VOWEL SIGN U
1923..1926 ; Limbu # Mc [4] LIMBU VOWEL SIGN EE..LIMBU VOWEL SIGN AU
1927..1928 ; Limbu # Mn [2] LIMBU VOWEL SIGN E..LIMBU VOWEL SIGN O
1929..192B ; Limbu # Mc [3] LIMBU SUBJOINED LETTER YA..LIMBU SUBJOINED LETTER WA
1930..1931 ; Limbu # Mc [2] LIMBU SMALL LETTER KA..LIMBU SMALL LETTER NGA
1932 ; Limbu # Mn LIMBU SMALL LETTER ANUSVARA
1933..1938 ; Limbu # Mc [6] LIMBU SMALL LETTER TA..LIMBU SMALL LETTER LA
1939..193B ; Limbu # Mn [3] LIMBU SIGN MUKPHRENG..LIMBU SIGN SA-I
1940 ; Limbu # So LIMBU SIGN LOO
1944..1945 ; Limbu # Po [2] LIMBU EXCLAMATION MARK..LIMBU QUESTION MARK
1946..194F ; Limbu # Nd [10] LIMBU DIGIT ZERO..LIMBU DIGIT NINE
# Total code points: 66
# ================================================
1950..196D ; Tai_Le # Lo [30] TAI LE LETTER KA..TAI LE LETTER AI
1970..1974 ; Tai_Le # Lo [5] TAI LE LETTER TONE-2..TAI LE LETTER TONE-6
# Total code points: 35
# ================================================
10000..1000B ; Linear_B # Lo [12] LINEAR B SYLLABLE B008 A..LINEAR B SYLLABLE B046 JE
1000D..10026 ; Linear_B # Lo [26] LINEAR B SYLLABLE B036 JO..LINEAR B SYLLABLE B032 QO
10028..1003A ; Linear_B # Lo [19] LINEAR B SYLLABLE B060 RA..LINEAR B SYLLABLE B042 WO
1003C..1003D ; Linear_B # Lo [2] LINEAR B SYLLABLE B017 ZA..LINEAR B SYLLABLE B074 ZE
1003F..1004D ; Linear_B # Lo [15] LINEAR B SYLLABLE B020 ZO..LINEAR B SYLLABLE B091 TWO
10050..1005D ; Linear_B # Lo [14] LINEAR B SYMBOL B018..LINEAR B SYMBOL B089
10080..100FA ; Linear_B # Lo [123] LINEAR B IDEOGRAM B100 MAN..LINEAR B IDEOGRAM VESSEL B305
# Total code points: 211
# ================================================
10380..1039D ; Ugaritic # Lo [30] UGARITIC LETTER ALPA..UGARITIC LETTER SSU
1039F ; Ugaritic # Po UGARITIC WORD DIVIDER
# Total code points: 31
# ================================================
10450..1047F ; Shavian # Lo [48] SHAVIAN LETTER PEEP..SHAVIAN LETTER YEW
# Total code points: 48
# ================================================
10480..1049D ; Osmanya # Lo [30] OSMANYA LETTER ALEF..OSMANYA LETTER OO
104A0..104A9 ; Osmanya # Nd [10] OSMANYA DIGIT ZERO..OSMANYA DIGIT NINE
# Total code points: 40
# ================================================
10800..10805 ; Cypriot # Lo [6] CYPRIOT SYLLABLE A..CYPRIOT SYLLABLE JA
10808 ; Cypriot # Lo CYPRIOT SYLLABLE JO
1080A..10835 ; Cypriot # Lo [44] CYPRIOT SYLLABLE KA..CYPRIOT SYLLABLE WO
10837..10838 ; Cypriot # Lo [2] CYPRIOT SYLLABLE XA..CYPRIOT SYLLABLE XE
1083C ; Cypriot # Lo CYPRIOT SYLLABLE ZA
1083F ; Cypriot # Lo CYPRIOT SYLLABLE ZO
# Total code points: 55
# ================================================
2800..28FF ; Braille # So [256] BRAILLE PATTERN BLANK..BRAILLE PATTERN DOTS-12345678
# Total code points: 256
# ================================================
1A00..1A16 ; Buginese # Lo [23] BUGINESE LETTER KA..BUGINESE LETTER HA
1A17..1A18 ; Buginese # Mn [2] BUGINESE VOWEL SIGN I..BUGINESE VOWEL SIGN U
1A19..1A1B ; Buginese # Mc [3] BUGINESE VOWEL SIGN E..BUGINESE VOWEL SIGN AE
1A1E..1A1F ; Buginese # Po [2] BUGINESE PALLAWA..BUGINESE END OF SECTION
# Total code points: 30
# ================================================
03E2..03EF ; Coptic # L& [14] COPTIC CAPITAL LETTER SHEI..COPTIC SMALL LETTER DEI
2C80..2CE4 ; Coptic # L& [101] COPTIC CAPITAL LETTER ALFA..COPTIC SYMBOL KAI
2CE5..2CEA ; Coptic # So [6] COPTIC SYMBOL MI RO..COPTIC SYMBOL SHIMA SIMA
2CEB..2CEE ; Coptic # L& [4] COPTIC CAPITAL LETTER CRYPTOGRAMMIC SHEI..COPTIC SMALL LETTER CRYPTOGRAMMIC GANGIA
2CEF..2CF1 ; Coptic # Mn [3] COPTIC COMBINING NI ABOVE..COPTIC COMBINING SPIRITUS LENIS
2CF9..2CFC ; Coptic # Po [4] COPTIC OLD NUBIAN FULL STOP..COPTIC OLD NUBIAN VERSE DIVIDER
2CFD ; Coptic # No COPTIC FRACTION ONE HALF
2CFE..2CFF ; Coptic # Po [2] COPTIC FULL STOP..COPTIC MORPHOLOGICAL DIVIDER
# Total code points: 135
# ================================================
1980..19AB ; New_Tai_Lue # Lo [44] NEW TAI LUE LETTER HIGH QA..NEW TAI LUE LETTER LOW SUA
19B0..19C0 ; New_Tai_Lue # Mc [17] NEW TAI LUE VOWEL SIGN VOWEL SHORTENER..NEW TAI LUE VOWEL SIGN IY
19C1..19C7 ; New_Tai_Lue # Lo [7] NEW TAI LUE LETTER FINAL V..NEW TAI LUE LETTER FINAL B
19C8..19C9 ; New_Tai_Lue # Mc [2] NEW TAI LUE TONE MARK-1..NEW TAI LUE TONE MARK-2
19D0..19DA ; New_Tai_Lue # Nd [11] NEW TAI LUE DIGIT ZERO..NEW TAI LUE THAM DIGIT ONE
19DE..19DF ; New_Tai_Lue # Po [2] NEW TAI LUE SIGN LAE..NEW TAI LUE SIGN LAEV
# Total code points: 83
# ================================================
2C00..2C2E ; Glagolitic # L& [47] GLAGOLITIC CAPITAL LETTER AZU..GLAGOLITIC CAPITAL LETTER LATINATE MYSLITE
2C30..2C5E ; Glagolitic # L& [47] GLAGOLITIC SMALL LETTER AZU..GLAGOLITIC SMALL LETTER LATINATE MYSLITE
# Total code points: 94
# ================================================
2D30..2D65 ; Tifinagh # Lo [54] TIFINAGH LETTER YA..TIFINAGH LETTER YAZZ
2D6F ; Tifinagh # Lm TIFINAGH MODIFIER LETTER LABIALIZATION MARK
# Total code points: 55
# ================================================
A800..A801 ; Syloti_Nagri # Lo [2] SYLOTI NAGRI LETTER A..SYLOTI NAGRI LETTER I
A802 ; Syloti_Nagri # Mn SYLOTI NAGRI SIGN DVISVARA
A803..A805 ; Syloti_Nagri # Lo [3] SYLOTI NAGRI LETTER U..SYLOTI NAGRI LETTER O
A806 ; Syloti_Nagri # Mn SYLOTI NAGRI SIGN HASANTA
A807..A80A ; Syloti_Nagri # Lo [4] SYLOTI NAGRI LETTER KO..SYLOTI NAGRI LETTER GHO
A80B ; Syloti_Nagri # Mn SYLOTI NAGRI SIGN ANUSVARA
A80C..A822 ; Syloti_Nagri # Lo [23] SYLOTI NAGRI LETTER CO..SYLOTI NAGRI LETTER HO
A823..A824 ; Syloti_Nagri # Mc [2] SYLOTI NAGRI VOWEL SIGN A..SYLOTI NAGRI VOWEL SIGN I
A825..A826 ; Syloti_Nagri # Mn [2] SYLOTI NAGRI VOWEL SIGN U..SYLOTI NAGRI VOWEL SIGN E
A827 ; Syloti_Nagri # Mc SYLOTI NAGRI VOWEL SIGN OO
A828..A82B ; Syloti_Nagri # So [4] SYLOTI NAGRI POETRY MARK-1..SYLOTI NAGRI POETRY MARK-4
# Total code points: 44
# ================================================
103A0..103C3 ; Old_Persian # Lo [36] OLD PERSIAN SIGN A..OLD PERSIAN SIGN HA
103C8..103CF ; Old_Persian # Lo [8] OLD PERSIAN SIGN AURAMAZDAA..OLD PERSIAN SIGN BUUMISH
103D0 ; Old_Persian # Po OLD PERSIAN WORD DIVIDER
103D1..103D5 ; Old_Persian # Nl [5] OLD PERSIAN NUMBER ONE..OLD PERSIAN NUMBER HUNDRED
# Total code points: 50
# ================================================
10A00 ; Kharoshthi # Lo KHAROSHTHI LETTER A
10A01..10A03 ; Kharoshthi # Mn [3] KHAROSHTHI VOWEL SIGN I..KHAROSHTHI VOWEL SIGN VOCALIC R
10A05..10A06 ; Kharoshthi # Mn [2] KHAROSHTHI VOWEL SIGN E..KHAROSHTHI VOWEL SIGN O
10A0C..10A0F ; Kharoshthi # Mn [4] KHAROSHTHI VOWEL LENGTH MARK..KHAROSHTHI SIGN VISARGA
10A10..10A13 ; Kharoshthi # Lo [4] KHAROSHTHI LETTER KA..KHAROSHTHI LETTER GHA
10A15..10A17 ; Kharoshthi # Lo [3] KHAROSHTHI LETTER CA..KHAROSHTHI LETTER JA
10A19..10A33 ; Kharoshthi # Lo [27] KHAROSHTHI LETTER NYA..KHAROSHTHI LETTER TTTHA
10A38..10A3A ; Kharoshthi # Mn [3] KHAROSHTHI SIGN BAR ABOVE..KHAROSHTHI SIGN DOT BELOW
10A3F ; Kharoshthi # Mn KHAROSHTHI VIRAMA
10A40..10A47 ; Kharoshthi # No [8] KHAROSHTHI DIGIT ONE..KHAROSHTHI NUMBER ONE THOUSAND
10A50..10A58 ; Kharoshthi # Po [9] KHAROSHTHI PUNCTUATION DOT..KHAROSHTHI PUNCTUATION LINES
# Total code points: 65
# ================================================
1B00..1B03 ; Balinese # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG
1B04 ; Balinese # Mc BALINESE SIGN BISAH
1B05..1B33 ; Balinese # Lo [47] BALINESE LETTER AKARA..BALINESE LETTER HA
1B34 ; Balinese # Mn BALINESE SIGN REREKAN
1B35 ; Balinese # Mc BALINESE VOWEL SIGN TEDUNG
1B36..1B3A ; Balinese # Mn [5] BALINESE VOWEL SIGN ULU..BALINESE VOWEL SIGN RA REPA
1B3B ; Balinese # Mc BALINESE VOWEL SIGN RA REPA TEDUNG
1B3C ; Balinese # Mn BALINESE VOWEL SIGN LA LENGA
1B3D..1B41 ; Balinese # Mc [5] BALINESE VOWEL SIGN LA LENGA TEDUNG..BALINESE VOWEL SIGN TALING REPA TEDUNG
1B42 ; Balinese # Mn BALINESE VOWEL SIGN PEPET
1B43..1B44 ; Balinese # Mc [2] BALINESE VOWEL SIGN PEPET TEDUNG..BALINESE ADEG ADEG
1B45..1B4B ; Balinese # Lo [7] BALINESE LETTER KAF SASAK..BALINESE LETTER ASYURA SASAK
1B50..1B59 ; Balinese # Nd [10] BALINESE DIGIT ZERO..BALINESE DIGIT NINE
1B5A..1B60 ; Balinese # Po [7] BALINESE PANTI..BALINESE PAMENENG
1B61..1B6A ; Balinese # So [10] BALINESE MUSICAL SYMBOL DONG..BALINESE MUSICAL SYMBOL DANG GEDE
1B6B..1B73 ; Balinese # Mn [9] BALINESE MUSICAL SYMBOL COMBINING TEGEH..BALINESE MUSICAL SYMBOL COMBINING GONG
1B74..1B7C ; Balinese # So [9] BALINESE MUSICAL SYMBOL RIGHT-HAND OPEN DUG..BALINESE MUSICAL SYMBOL LEFT-HAND OPEN PING
# Total code points: 121
# ================================================
12000..1236E ; Cuneiform # Lo [879] CUNEIFORM SIGN A..CUNEIFORM SIGN ZUM
12400..12462 ; Cuneiform # Nl [99] CUNEIFORM NUMERIC SIGN TWO ASH..CUNEIFORM NUMERIC SIGN OLD ASSYRIAN ONE QUARTER
12470..12473 ; Cuneiform # Po [4] CUNEIFORM PUNCTUATION SIGN OLD ASSYRIAN WORD DIVIDER..CUNEIFORM PUNCTUATION SIGN DIAGONAL TRICOLON
# Total code points: 982
# ================================================
10900..10915 ; Phoenician # Lo [22] PHOENICIAN LETTER ALF..PHOENICIAN LETTER TAU
10916..1091B ; Phoenician # No [6] PHOENICIAN NUMBER ONE..PHOENICIAN NUMBER THREE
1091F ; Phoenician # Po PHOENICIAN WORD SEPARATOR
# Total code points: 29
# ================================================
A840..A873 ; Phags_Pa # Lo [52] PHAGS-PA LETTER KA..PHAGS-PA LETTER CANDRABINDU
A874..A877 ; Phags_Pa # Po [4] PHAGS-PA SINGLE HEAD MARK..PHAGS-PA MARK DOUBLE SHAD
# Total code points: 56
# ================================================
07C0..07C9 ; Nko # Nd [10] NKO DIGIT ZERO..NKO DIGIT NINE
07CA..07EA ; Nko # Lo [33] NKO LETTER A..NKO LETTER JONA RA
07EB..07F3 ; Nko # Mn [9] NKO COMBINING SHORT HIGH TONE..NKO COMBINING DOUBLE DOT ABOVE
07F4..07F5 ; Nko # Lm [2] NKO HIGH TONE APOSTROPHE..NKO LOW TONE APOSTROPHE
07F6 ; Nko # So NKO SYMBOL OO DENNEN
07F7..07F9 ; Nko # Po [3] NKO SYMBOL GBAKURUNEN..NKO EXCLAMATION MARK
07FA ; Nko # Lm NKO LAJANYALAN
# Total code points: 59
# ================================================
1B80..1B81 ; Sundanese # Mn [2] SUNDANESE SIGN PANYECEK..SUNDANESE SIGN PANGLAYAR
1B82 ; Sundanese # Mc SUNDANESE SIGN PANGWISAD
1B83..1BA0 ; Sundanese # Lo [30] SUNDANESE LETTER A..SUNDANESE LETTER HA
1BA1 ; Sundanese # Mc SUNDANESE CONSONANT SIGN PAMINGKAL
1BA2..1BA5 ; Sundanese # Mn [4] SUNDANESE CONSONANT SIGN PANYAKRA..SUNDANESE VOWEL SIGN PANYUKU
1BA6..1BA7 ; Sundanese # Mc [2] SUNDANESE VOWEL SIGN PANAELAENG..SUNDANESE VOWEL SIGN PANOLONG
1BA8..1BA9 ; Sundanese # Mn [2] SUNDANESE VOWEL SIGN PAMEPET..SUNDANESE VOWEL SIGN PANEULEUNG
1BAA ; Sundanese # Mc SUNDANESE SIGN PAMAAEH
1BAE..1BAF ; Sundanese # Lo [2] SUNDANESE LETTER KHA..SUNDANESE LETTER SYA
1BB0..1BB9 ; Sundanese # Nd [10] SUNDANESE DIGIT ZERO..SUNDANESE DIGIT NINE
# Total code points: 55
# ================================================
1C00..1C23 ; Lepcha # Lo [36] LEPCHA LETTER KA..LEPCHA LETTER A
1C24..1C2B ; Lepcha # Mc [8] LEPCHA SUBJOINED LETTER YA..LEPCHA VOWEL SIGN UU
1C2C..1C33 ; Lepcha # Mn [8] LEPCHA VOWEL SIGN E..LEPCHA CONSONANT SIGN T
1C34..1C35 ; Lepcha # Mc [2] LEPCHA CONSONANT SIGN NYIN-DO..LEPCHA CONSONANT SIGN KANG
1C36..1C37 ; Lepcha # Mn [2] LEPCHA SIGN RAN..LEPCHA SIGN NUKTA
1C3B..1C3F ; Lepcha # Po [5] LEPCHA PUNCTUATION TA-ROL..LEPCHA PUNCTUATION TSHOOK
1C40..1C49 ; Lepcha # Nd [10] LEPCHA DIGIT ZERO..LEPCHA DIGIT NINE
1C4D..1C4F ; Lepcha # Lo [3] LEPCHA LETTER TTA..LEPCHA LETTER DDA
# Total code points: 74
# ================================================
1C50..1C59 ; Ol_Chiki # Nd [10] OL CHIKI DIGIT ZERO..OL CHIKI DIGIT NINE
1C5A..1C77 ; Ol_Chiki # Lo [30] OL CHIKI LETTER LA..OL CHIKI LETTER OH
1C78..1C7D ; Ol_Chiki # Lm [6] OL CHIKI MU TTUDDAG..OL CHIKI AHAD
1C7E..1C7F ; Ol_Chiki # Po [2] OL CHIKI PUNCTUATION MUCAAD..OL CHIKI PUNCTUATION DOUBLE MUCAAD
# Total code points: 48
# ================================================
A500..A60B ; Vai # Lo [268] VAI SYLLABLE EE..VAI SYLLABLE NG
A60C ; Vai # Lm VAI SYLLABLE LENGTHENER
A60D..A60F ; Vai # Po [3] VAI COMMA..VAI QUESTION MARK
A610..A61F ; Vai # Lo [16] VAI SYLLABLE NDOLE FA..VAI SYMBOL JONG
A620..A629 ; Vai # Nd [10] VAI DIGIT ZERO..VAI DIGIT NINE
A62A..A62B ; Vai # Lo [2] VAI SYLLABLE NDOLE MA..VAI SYLLABLE NDOLE DO
# Total code points: 300
# ================================================
A880..A881 ; Saurashtra # Mc [2] SAURASHTRA SIGN ANUSVARA..SAURASHTRA SIGN VISARGA
A882..A8B3 ; Saurashtra # Lo [50] SAURASHTRA LETTER A..SAURASHTRA LETTER LLA
A8B4..A8C3 ; Saurashtra # Mc [16] SAURASHTRA CONSONANT SIGN HAARU..SAURASHTRA VOWEL SIGN AU
A8C4 ; Saurashtra # Mn SAURASHTRA SIGN VIRAMA
A8CE..A8CF ; Saurashtra # Po [2] SAURASHTRA DANDA..SAURASHTRA DOUBLE DANDA
A8D0..A8D9 ; Saurashtra # Nd [10] SAURASHTRA DIGIT ZERO..SAURASHTRA DIGIT NINE
# Total code points: 81
# ================================================
A900..A909 ; Kayah_Li # Nd [10] KAYAH LI DIGIT ZERO..KAYAH LI DIGIT NINE
A90A..A925 ; Kayah_Li # Lo [28] KAYAH LI LETTER KA..KAYAH LI LETTER OO
A926..A92D ; Kayah_Li # Mn [8] KAYAH LI VOWEL UE..KAYAH LI TONE CALYA PLOPHU
A92E..A92F ; Kayah_Li # Po [2] KAYAH LI SIGN CWI..KAYAH LI SIGN SHYA
# Total code points: 48
# ================================================
A930..A946 ; Rejang # Lo [23] REJANG LETTER KA..REJANG LETTER A
A947..A951 ; Rejang # Mn [11] REJANG VOWEL SIGN I..REJANG CONSONANT SIGN R
A952..A953 ; Rejang # Mc [2] REJANG CONSONANT SIGN H..REJANG VIRAMA
A95F ; Rejang # Po REJANG SECTION MARK
# Total code points: 37
# ================================================
10280..1029C ; Lycian # Lo [29] LYCIAN LETTER A..LYCIAN LETTER X
# Total code points: 29
# ================================================
102A0..102D0 ; Carian # Lo [49] CARIAN LETTER A..CARIAN LETTER UUU3
# Total code points: 49
# ================================================
10920..10939 ; Lydian # Lo [26] LYDIAN LETTER A..LYDIAN LETTER C
1093F ; Lydian # Po LYDIAN TRIANGULAR MARK
# Total code points: 27
# ================================================
AA00..AA28 ; Cham # Lo [41] CHAM LETTER A..CHAM LETTER HA
AA29..AA2E ; Cham # Mn [6] CHAM VOWEL SIGN AA..CHAM VOWEL SIGN OE
AA2F..AA30 ; Cham # Mc [2] CHAM VOWEL SIGN O..CHAM VOWEL SIGN AI
AA31..AA32 ; Cham # Mn [2] CHAM VOWEL SIGN AU..CHAM VOWEL SIGN UE
AA33..AA34 ; Cham # Mc [2] CHAM CONSONANT SIGN YA..CHAM CONSONANT SIGN RA
AA35..AA36 ; Cham # Mn [2] CHAM CONSONANT SIGN LA..CHAM CONSONANT SIGN WA
AA40..AA42 ; Cham # Lo [3] CHAM LETTER FINAL K..CHAM LETTER FINAL NG
AA43 ; Cham # Mn CHAM CONSONANT SIGN FINAL NG
AA44..AA4B ; Cham # Lo [8] CHAM LETTER FINAL CH..CHAM LETTER FINAL SS
AA4C ; Cham # Mn CHAM CONSONANT SIGN FINAL M
AA4D ; Cham # Mc CHAM CONSONANT SIGN FINAL H
AA50..AA59 ; Cham # Nd [10] CHAM DIGIT ZERO..CHAM DIGIT NINE
AA5C..AA5F ; Cham # Po [4] CHAM PUNCTUATION SPIRAL..CHAM PUNCTUATION TRIPLE DANDA
# Total code points: 83
# ================================================
1A20..1A54 ; Tai_Tham # Lo [53] TAI THAM LETTER HIGH KA..TAI THAM LETTER GREAT SA
1A55 ; Tai_Tham # Mc TAI THAM CONSONANT SIGN MEDIAL RA
1A56 ; Tai_Tham # Mn TAI THAM CONSONANT SIGN MEDIAL LA
1A57 ; Tai_Tham # Mc TAI THAM CONSONANT SIGN LA TANG LAI
1A58..1A5E ; Tai_Tham # Mn [7] TAI THAM SIGN MAI KANG LAI..TAI THAM CONSONANT SIGN SA
1A60 ; Tai_Tham # Mn TAI THAM SIGN SAKOT
1A61 ; Tai_Tham # Mc TAI THAM VOWEL SIGN A
1A62 ; Tai_Tham # Mn TAI THAM VOWEL SIGN MAI SAT
1A63..1A64 ; Tai_Tham # Mc [2] TAI THAM VOWEL SIGN AA..TAI THAM VOWEL SIGN TALL AA
1A65..1A6C ; Tai_Tham # Mn [8] TAI THAM VOWEL SIGN I..TAI THAM VOWEL SIGN OA BELOW
1A6D..1A72 ; Tai_Tham # Mc [6] TAI THAM VOWEL SIGN OY..TAI THAM VOWEL SIGN THAM AI
1A73..1A7C ; Tai_Tham # Mn [10] TAI THAM VOWEL SIGN OA ABOVE..TAI THAM SIGN KHUEN-LUE KARAN
1A7F ; Tai_Tham # Mn TAI THAM COMBINING CRYPTOGRAMMIC DOT
1A80..1A89 ; Tai_Tham # Nd [10] TAI THAM HORA DIGIT ZERO..TAI THAM HORA DIGIT NINE
1A90..1A99 ; Tai_Tham # Nd [10] TAI THAM THAM DIGIT ZERO..TAI THAM THAM DIGIT NINE
1AA0..1AA6 ; Tai_Tham # Po [7] TAI THAM SIGN WIANG..TAI THAM SIGN REVERSED ROTATED RANA
1AA7 ; Tai_Tham # Lm TAI THAM SIGN MAI YAMOK
1AA8..1AAD ; Tai_Tham # Po [6] TAI THAM SIGN KAAN..TAI THAM SIGN CAANG
# Total code points: 127
# ================================================
AA80..AAAF ; Tai_Viet # Lo [48] TAI VIET LETTER LOW KO..TAI VIET LETTER HIGH O
AAB0 ; Tai_Viet # Mn TAI VIET MAI KANG
AAB1 ; Tai_Viet # Lo TAI VIET VOWEL AA
AAB2..AAB4 ; Tai_Viet # Mn [3] TAI VIET VOWEL I..TAI VIET VOWEL U
AAB5..AAB6 ; Tai_Viet # Lo [2] TAI VIET VOWEL E..TAI VIET VOWEL O
AAB7..AAB8 ; Tai_Viet # Mn [2] TAI VIET MAI KHIT..TAI VIET VOWEL IA
AAB9..AABD ; Tai_Viet # Lo [5] TAI VIET VOWEL UEA..TAI VIET VOWEL AN
AABE..AABF ; Tai_Viet # Mn [2] TAI VIET VOWEL AM..TAI VIET TONE MAI EK
AAC0 ; Tai_Viet # Lo TAI VIET TONE MAI NUENG
AAC1 ; Tai_Viet # Mn TAI VIET TONE MAI THO
AAC2 ; Tai_Viet # Lo TAI VIET TONE MAI SONG
AADB..AADC ; Tai_Viet # Lo [2] TAI VIET SYMBOL KON..TAI VIET SYMBOL NUENG
AADD ; Tai_Viet # Lm TAI VIET SYMBOL SAM
AADE..AADF ; Tai_Viet # Po [2] TAI VIET SYMBOL HO HOI..TAI VIET SYMBOL KOI KOI
# Total code points: 72
# ================================================
10B00..10B35 ; Avestan # Lo [54] AVESTAN LETTER A..AVESTAN LETTER HE
10B39..10B3F ; Avestan # Po [7] AVESTAN ABBREVIATION MARK..LARGE ONE RING OVER TWO RINGS PUNCTUATION
# Total code points: 61
# ================================================
13000..1342E ; Egyptian_Hieroglyphs # Lo [1071] EGYPTIAN HIEROGLYPH A001..EGYPTIAN HIEROGLYPH AA032
# Total code points: 1071
# ================================================
0800..0815 ; Samaritan # Lo [22] SAMARITAN LETTER ALAF..SAMARITAN LETTER TAAF
0816..0819 ; Samaritan # Mn [4] SAMARITAN MARK IN..SAMARITAN MARK DAGESH
081A ; Samaritan # Lm SAMARITAN MODIFIER LETTER EPENTHETIC YUT
081B..0823 ; Samaritan # Mn [9] SAMARITAN MARK EPENTHETIC YUT..SAMARITAN VOWEL SIGN A
0824 ; Samaritan # Lm SAMARITAN MODIFIER LETTER SHORT A
0825..0827 ; Samaritan # Mn [3] SAMARITAN VOWEL SIGN SHORT A..SAMARITAN VOWEL SIGN U
0828 ; Samaritan # Lm SAMARITAN MODIFIER LETTER I
0829..082D ; Samaritan # Mn [5] SAMARITAN VOWEL SIGN LONG I..SAMARITAN MARK NEQUDAA
0830..083E ; Samaritan # Po [15] SAMARITAN PUNCTUATION NEQUDAA..SAMARITAN PUNCTUATION ANNAAU
# Total code points: 61
# ================================================
A4D0..A4F7 ; Lisu # Lo [40] LISU LETTER BA..LISU LETTER OE
A4F8..A4FD ; Lisu # Lm [6] LISU LETTER TONE MYA TI..LISU LETTER TONE MYA JEU
A4FE..A4FF ; Lisu # Po [2] LISU PUNCTUATION COMMA..LISU PUNCTUATION FULL STOP
# Total code points: 48
# ================================================
A6A0..A6E5 ; Bamum # Lo [70] BAMUM LETTER A..BAMUM LETTER KI
A6E6..A6EF ; Bamum # Nl [10] BAMUM LETTER MO..BAMUM LETTER KOGHOM
A6F0..A6F1 ; Bamum # Mn [2] BAMUM COMBINING MARK KOQNDON..BAMUM COMBINING MARK TUKWENTIS
A6F2..A6F7 ; Bamum # Po [6] BAMUM NJAEMLI..BAMUM QUESTION MARK
# Total code points: 88
# ================================================
A980..A982 ; Javanese # Mn [3] JAVANESE SIGN PANYANGGA..JAVANESE SIGN LAYAR
A983 ; Javanese # Mc JAVANESE SIGN WIGNYAN
A984..A9B2 ; Javanese # Lo [47] JAVANESE LETTER A..JAVANESE LETTER HA
A9B3 ; Javanese # Mn JAVANESE SIGN CECAK TELU
A9B4..A9B5 ; Javanese # Mc [2] JAVANESE VOWEL SIGN TARUNG..JAVANESE VOWEL SIGN TOLONG
A9B6..A9B9 ; Javanese # Mn [4] JAVANESE VOWEL SIGN WULU..JAVANESE VOWEL SIGN SUKU MENDUT
A9BA..A9BB ; Javanese # Mc [2] JAVANESE VOWEL SIGN TALING..JAVANESE VOWEL SIGN DIRGA MURE
A9BC ; Javanese # Mn JAVANESE VOWEL SIGN PEPET
A9BD..A9C0 ; Javanese # Mc [4] JAVANESE CONSONANT SIGN KERET..JAVANESE PANGKON
A9C1..A9CD ; Javanese # Po [13] JAVANESE LEFT RERENGGAN..JAVANESE TURNED PADA PISELEH
A9CF ; Javanese # Lm JAVANESE PANGRANGKEP
A9D0..A9D9 ; Javanese # Nd [10] JAVANESE DIGIT ZERO..JAVANESE DIGIT NINE
A9DE..A9DF ; Javanese # Po [2] JAVANESE PADA TIRTA TUMETES..JAVANESE PADA ISEN-ISEN
# Total code points: 91
# ================================================
ABC0..ABE2 ; Meetei_Mayek # Lo [35] MEETEI MAYEK LETTER KOK..MEETEI MAYEK LETTER I LONSUM
ABE3..ABE4 ; Meetei_Mayek # Mc [2] MEETEI MAYEK VOWEL SIGN ONAP..MEETEI MAYEK VOWEL SIGN INAP
ABE5 ; Meetei_Mayek # Mn MEETEI MAYEK VOWEL SIGN ANAP
ABE6..ABE7 ; Meetei_Mayek # Mc [2] MEETEI MAYEK VOWEL SIGN YENAP..MEETEI MAYEK VOWEL SIGN SOUNAP
ABE8 ; Meetei_Mayek # Mn MEETEI MAYEK VOWEL SIGN UNAP
ABE9..ABEA ; Meetei_Mayek # Mc [2] MEETEI MAYEK VOWEL SIGN CHEINAP..MEETEI MAYEK VOWEL SIGN NUNG
ABEB ; Meetei_Mayek # Po MEETEI MAYEK CHEIKHEI
ABEC ; Meetei_Mayek # Mc MEETEI MAYEK LUM IYEK
ABED ; Meetei_Mayek # Mn MEETEI MAYEK APUN IYEK
ABF0..ABF9 ; Meetei_Mayek # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DIGIT NINE
# Total code points: 56
# ================================================
10840..10855 ; Imperial_Aramaic # Lo [22] IMPERIAL ARAMAIC LETTER ALEPH..IMPERIAL ARAMAIC LETTER TAW
10857 ; Imperial_Aramaic # Po IMPERIAL ARAMAIC SECTION SIGN
10858..1085F ; Imperial_Aramaic # No [8] IMPERIAL ARAMAIC NUMBER ONE..IMPERIAL ARAMAIC NUMBER TEN THOUSAND
# Total code points: 31
# ================================================
10A60..10A7C ; Old_South_Arabian # Lo [29] OLD SOUTH ARABIAN LETTER HE..OLD SOUTH ARABIAN LETTER THETH
10A7D..10A7E ; Old_South_Arabian # No [2] OLD SOUTH ARABIAN NUMBER ONE..OLD SOUTH ARABIAN NUMBER FIFTY
10A7F ; Old_South_Arabian # Po OLD SOUTH ARABIAN NUMERIC INDICATOR
# Total code points: 32
# ================================================
10B40..10B55 ; Inscriptional_Parthian # Lo [22] INSCRIPTIONAL PARTHIAN LETTER ALEPH..INSCRIPTIONAL PARTHIAN LETTER TAW
10B58..10B5F ; Inscriptional_Parthian # No [8] INSCRIPTIONAL PARTHIAN NUMBER ONE..INSCRIPTIONAL PARTHIAN NUMBER ONE THOUSAND
# Total code points: 30
# ================================================
10B60..10B72 ; Inscriptional_Pahlavi # Lo [19] INSCRIPTIONAL PAHLAVI LETTER ALEPH..INSCRIPTIONAL PAHLAVI LETTER TAW
10B78..10B7F ; Inscriptional_Pahlavi # No [8] INSCRIPTIONAL PAHLAVI NUMBER ONE..INSCRIPTIONAL PAHLAVI NUMBER ONE THOUSAND
# Total code points: 27
# ================================================
10C00..10C48 ; Old_Turkic # Lo [73] OLD TURKIC LETTER ORKHON A..OLD TURKIC LETTER ORKHON BASH
# Total code points: 73
# ================================================
11080..11081 ; Kaithi # Mn [2] KAITHI SIGN CANDRABINDU..KAITHI SIGN ANUSVARA
11082 ; Kaithi # Mc KAITHI SIGN VISARGA
11083..110AF ; Kaithi # Lo [45] KAITHI LETTER A..KAITHI LETTER HA
110B0..110B2 ; Kaithi # Mc [3] KAITHI VOWEL SIGN AA..KAITHI VOWEL SIGN II
110B3..110B6 ; Kaithi # Mn [4] KAITHI VOWEL SIGN U..KAITHI VOWEL SIGN AI
110B7..110B8 ; Kaithi # Mc [2] KAITHI VOWEL SIGN O..KAITHI VOWEL SIGN AU
110B9..110BA ; Kaithi # Mn [2] KAITHI SIGN VIRAMA..KAITHI SIGN NUKTA
110BB..110BC ; Kaithi # Po [2] KAITHI ABBREVIATION SIGN..KAITHI ENUMERATION SIGN
110BD ; Kaithi # Cf KAITHI NUMBER SIGN
110BE..110C1 ; Kaithi # Po [4] KAITHI SECTION MARK..KAITHI DOUBLE DANDA
# Total code points: 66
# EOF
......@@ -32,7 +32,7 @@
* 4872664 4803179 4892980 4900747 4945394 4938995 4979006 4994840 4997476
* 5013885 5003322 4988891 5098443 5110268 6173522 4829857 5027748 6376940
* 6358731 6178785 6284152 6231989 6497148 6486934 6233084 6504326 6635133
* 6350801 6676425 6878475 6919132 6931676
* 6350801 6676425 6878475 6919132 6931676 6948903
*/
import java.util.regex.*;
......@@ -135,7 +135,7 @@ public class RegExTest {
surrogatesInClassTest();
namedGroupCaptureTest();
nonBmpClassComplementTest();
unicodePropertiesTest();
if (failure)
throw new RuntimeException("Failure in the RE handling.");
else
......@@ -3515,7 +3515,7 @@ public class RegExTest {
report("NamedGroupCapture");
}
// This is for bug 6919132
// This is for bug 6969132
private static void nonBmpClassComplementTest() throws Exception {
Pattern p = Pattern.compile("\\P{Lu}");
Matcher m = p.matcher(new String(new int[] {0x1d400}, 0, 1));
......@@ -3539,4 +3539,79 @@ public class RegExTest {
report("NonBmpClassComplement");
}
private static void unicodePropertiesTest() throws Exception {
// different forms
if (!Pattern.compile("\\p{IsLu}").matcher("A").matches() ||
!Pattern.compile("\\p{Lu}").matcher("A").matches() ||
!Pattern.compile("\\p{gc=Lu}").matcher("A").matches() ||
!Pattern.compile("\\p{general_category=Lu}").matcher("A").matches() ||
!Pattern.compile("\\p{IsLatin}").matcher("B").matches() ||
!Pattern.compile("\\p{sc=Latin}").matcher("B").matches() ||
!Pattern.compile("\\p{script=Latin}").matcher("B").matches() ||
!Pattern.compile("\\p{InBasicLatin}").matcher("c").matches() ||
!Pattern.compile("\\p{blk=BasicLatin}").matcher("c").matches() ||
!Pattern.compile("\\p{block=BasicLatin}").matcher("c").matches())
failCount++;
Matcher common = Pattern.compile("\\p{script=Common}").matcher("");
Matcher unknown = Pattern.compile("\\p{IsUnknown}").matcher("");
Matcher lastSM = common;
Character.UnicodeScript lastScript = Character.UnicodeScript.of(0);
Matcher latin = Pattern.compile("\\p{block=basic_latin}").matcher("");
Matcher greek = Pattern.compile("\\p{InGreek}").matcher("");
Matcher lastBM = latin;
Character.UnicodeBlock lastBlock = Character.UnicodeBlock.of(0);
for (int cp = 1; cp < Character.MAX_CODE_POINT; cp++) {
if (cp >= 0x30000 && (cp & 0x70) == 0){
continue; // only pick couple code points, they are the same
}
// Unicode Script
Character.UnicodeScript script = Character.UnicodeScript.of(cp);
Matcher m;
String str = new String(Character.toChars(cp));
if (script == lastScript) {
m = lastSM;
m.reset(str);
} else {
m = Pattern.compile("\\p{Is" + script.name() + "}").matcher(str);
}
if (!m.matches()) {
failCount++;
}
Matcher other = (script == Character.UnicodeScript.COMMON)? unknown : common;
other.reset(str);
if (other.matches()) {
failCount++;
}
lastSM = m;
lastScript = script;
// Unicode Block
Character.UnicodeBlock block = Character.UnicodeBlock.of(cp);
if (block == null) {
//System.out.printf("Not a Block: cp=%x%n", cp);
continue;
}
if (block == lastBlock) {
m = lastBM;
m.reset(str);
} else {
m = Pattern.compile("\\p{block=" + block.toString() + "}").matcher(str);
}
if (!m.matches()) {
failCount++;
}
other = (block == Character.UnicodeBlock.BASIC_LATIN)? greek : latin;
other.reset(str);
if (other.matches()) {
failCount++;
}
lastBM = m;
lastBlock = block;
}
report("unicodeProperties");
}
}
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册