提交 aed29e12 编写于 作者: P peytoia

6959267: Support Unicode 6.0.0

Reviewed-by: okutsu
上级 f90d6618
/* /*
* Copyright (c) 2003, 2006, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
* *
* This code is free software; you can redistribute it and/or modify it * This code is free software; you can redistribute it and/or modify it
...@@ -111,7 +111,7 @@ class CharacterData00 extends CharacterData { ...@@ -111,7 +111,7 @@ class CharacterData00 extends CharacterData {
if ((val & $$maskCaseOffset) == $$maskCaseOffset) { if ((val & $$maskCaseOffset) == $$maskCaseOffset) {
switch(ch) { switch(ch) {
// map the offset overflow chars // map the offset overflow chars
case 0x0130 : mapChar = 0x0069; break; case 0x0130 : mapChar = 0x0069; break;
case 0x2126 : mapChar = 0x03C9; break; case 0x2126 : mapChar = 0x03C9; break;
case 0x212A : mapChar = 0x006B; break; case 0x212A : mapChar = 0x006B; break;
case 0x212B : mapChar = 0x00E5; break; case 0x212B : mapChar = 0x00E5; break;
...@@ -192,7 +192,11 @@ class CharacterData00 extends CharacterData { ...@@ -192,7 +192,11 @@ class CharacterData00 extends CharacterData {
case 0x2C6D : mapChar = 0x0251; break; case 0x2C6D : mapChar = 0x0251; break;
case 0x2C6E : mapChar = 0x0271; break; case 0x2C6E : mapChar = 0x0271; break;
case 0x2C6F : mapChar = 0x0250; break; case 0x2C6F : mapChar = 0x0250; break;
case 0x2C70 : mapChar = 0x0252; break;
case 0x2C7E : mapChar = 0x023F; break;
case 0x2C7F : mapChar = 0x0240; break;
case 0xA77D : mapChar = 0x1D79; break; case 0xA77D : mapChar = 0x1D79; break;
case 0xA78D : mapChar = 0x0265; break;
// default mapChar is already set, so no // default mapChar is already set, so no
// need to redo it here. // need to redo it here.
// default : mapChar = ch; // default : mapChar = ch;
...@@ -246,8 +250,12 @@ class CharacterData00 extends CharacterData { ...@@ -246,8 +250,12 @@ class CharacterData00 extends CharacterData {
case 0x1FC3 : mapChar = 0x1FCC; break; case 0x1FC3 : mapChar = 0x1FCC; break;
case 0x1FF3 : mapChar = 0x1FFC; break; case 0x1FF3 : mapChar = 0x1FFC; break;
case 0x023F : mapChar = 0x2C7E; break;
case 0x0240 : mapChar = 0x2C7F; break;
case 0x0250 : mapChar = 0x2C6F; break; case 0x0250 : mapChar = 0x2C6F; break;
case 0x0251 : mapChar = 0x2C6D; break; case 0x0251 : mapChar = 0x2C6D; break;
case 0x0252 : mapChar = 0x2C70; break;
case 0x0265 : mapChar = 0xA78D; break;
case 0x026B : mapChar = 0x2C62; break; case 0x026B : mapChar = 0x2C62; break;
case 0x0271 : mapChar = 0x2C6E; break; case 0x0271 : mapChar = 0x2C6E; break;
case 0x027D : mapChar = 0x2C64; break; case 0x027D : mapChar = 0x2C64; break;
...@@ -487,8 +495,12 @@ class CharacterData00 extends CharacterData { ...@@ -487,8 +495,12 @@ class CharacterData00 extends CharacterData {
case 0x017F : mapChar = 0x0053; break; case 0x017F : mapChar = 0x0053; break;
case 0x1FBE : mapChar = 0x0399; break; case 0x1FBE : mapChar = 0x0399; break;
case 0x023F : mapChar = 0x2C7E; break;
case 0x0240 : mapChar = 0x2C7F; break;
case 0x0250 : mapChar = 0x2C6F; break; case 0x0250 : mapChar = 0x2C6F; break;
case 0x0251 : mapChar = 0x2C6D; break; case 0x0251 : mapChar = 0x2C6D; break;
case 0x0252 : mapChar = 0x2C70; break;
case 0x0265 : mapChar = 0xA78D; break;
case 0x026B : mapChar = 0x2C62; break; case 0x026B : mapChar = 0x2C62; break;
case 0x0271 : mapChar = 0x2C6E; break; case 0x0271 : mapChar = 0x2C6E; break;
case 0x027D : mapChar = 0x2C64; break; case 0x027D : mapChar = 0x2C64; break;
......
/* /*
* Copyright (c) 2003, 2006, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
* *
* This code is free software; you can redistribute it and/or modify it * This code is free software; you can redistribute it and/or modify it
...@@ -251,9 +251,40 @@ class CharacterData01 extends CharacterData { ...@@ -251,9 +251,40 @@ class CharacterData01 extends CharacterData {
case 0x010341: retval = 90; break; // GOTHIC LETTER NINETY case 0x010341: retval = 90; break; // GOTHIC LETTER NINETY
case 0x01034A: retval = 900; break; // GOTHIC LETTER NINE HUNDRED case 0x01034A: retval = 900; break; // GOTHIC LETTER NINE HUNDRED
case 0x0103D5: retval = 100; break; // OLD PERSIAN NUMBER HUNDRED case 0x0103D5: retval = 100; break; // OLD PERSIAN NUMBER HUNDRED
case 0x01085D: retval = 100; break; // IMPERIAL ARAMAIC NUMBER ONE HUNDRED
case 0x01085E: retval = 1000; break; // IMPERIAL ARAMAIC NUMBER ONE THOUSAND
case 0x01085F: retval = 10000; break; // IMPERIAL ARAMAIC NUMBER TEN THOUSAND
case 0x010919: retval = 100; break; // PHOENICIAN NUMBER ONE HUNDRED case 0x010919: retval = 100; break; // PHOENICIAN NUMBER ONE HUNDRED
case 0x010A46: retval = 100; break; // KHAROSHTHI NUMBER ONE HUNDRED case 0x010A46: retval = 100; break; // KHAROSHTHI NUMBER ONE HUNDRED
case 0x010A47: retval = 1000; break; // KHAROSHTHI NUMBER ONE THOUSAND case 0x010A47: retval = 1000; break; // KHAROSHTHI NUMBER ONE THOUSAND
case 0x010A7E: retval = 50; break; // OLD SOUTH ARABIAN NUMBER FIFTY
case 0x010B5E: retval = 100; break; // INSCRIPTIONAL PARTHIAN NUMBER ONE HUNDRED
case 0x010B5F: retval = 1000; break; // INSCRIPTIONAL PARTHIAN NUMBER ONE THOUSAND
case 0x010B7E: retval = 100; break; // INSCRIPTIONAL PAHLAVI NUMBER ONE HUNDRED
case 0x010B7F: retval = 1000; break; // INSCRIPTIONAL PAHLAVI NUMBER ONE THOUSAND
case 0x010E6C: retval = 40; break; // RUMI NUMBER FORTY
case 0x010E6D: retval = 50; break; // RUMI NUMBER FIFTY
case 0x010E6E: retval = 60; break; // RUMI NUMBER SIXTY
case 0x010E6F: retval = 70; break; // RUMI NUMBER SEVENTY
case 0x010E70: retval = 80; break; // RUMI NUMBER EIGHTY
case 0x010E71: retval = 90; break; // RUMI NUMBER NINETY
case 0x010E72: retval = 100; break; // RUMI NUMBER ONE HUNDRED
case 0x010E73: retval = 200; break; // RUMI NUMBER TWO HUNDRED
case 0x010E74: retval = 300; break; // RUMI NUMBER THREE HUNDRED
case 0x010E75: retval = 400; break; // RUMI NUMBER FOUR HUNDRED
case 0x010E76: retval = 500; break; // RUMI NUMBER FIVE HUNDRED
case 0x010E77: retval = 600; break; // RUMI NUMBER SIX HUNDRED
case 0x010E78: retval = 700; break; // RUMI NUMBER SEVEN HUNDRED
case 0x010E79: retval = 800; break; // RUMI NUMBER EIGHT HUNDRED
case 0x010E7A: retval = 900; break; // RUMI NUMBER NINE HUNDRED
case 0x01105E: retval = 40; break; // BRAHMI NUMBER FORTY
case 0x01105F: retval = 50; break; // BRAHMI NUMBER FIFTY
case 0x011060: retval = 60; break; // BRAHMI NUMBER SIXTY
case 0x011061: retval = 70; break; // BRAHMI NUMBER SEVENTY
case 0x011062: retval = 80; break; // BRAHMI NUMBER EIGHTY
case 0x011063: retval = 90; break; // BRAHMI NUMBER NINETY
case 0x011064: retval = 100; break; // BRAHMI NUMBER ONE HUNDRED
case 0x011065: retval = 1000; break; // BRAHMI NUMBER ONE THOUSAND
case 0x01D36C: retval = 40; break; // COUNTING ROD TENS DIGIT FOUR case 0x01D36C: retval = 40; break; // COUNTING ROD TENS DIGIT FOUR
case 0x01D36D: retval = 50; break; // COUNTING ROD TENS DIGIT FIVE case 0x01D36D: retval = 50; break; // COUNTING ROD TENS DIGIT FIVE
case 0x01D36E: retval = 60; break; // COUNTING ROD TENS DIGIT SIX case 0x01D36E: retval = 60; break; // COUNTING ROD TENS DIGIT SIX
......
# Scripts-5.2.0.txt # Scripts-6.0.0.txt
# Date: 2009-08-22, 04:58:43 GMT [MD] # Date: 2010-08-19, 00:48:47 GMT [MD]
# #
# Unicode Character Database # Unicode Character Database
# Copyright (c) 1991-2009 Unicode, Inc. # Copyright (c) 1991-2010 Unicode, Inc.
# For terms of use, see http://www.unicode.org/terms_of_use.html # For terms of use, see http://www.unicode.org/terms_of_use.html
# For documentation, see http://www.unicode.org/reports/tr44/ # For documentation, see http://www.unicode.org/reports/tr44/
...@@ -73,7 +73,7 @@ ...@@ -73,7 +73,7 @@
02C2..02C5 ; Common # Sk [4] MODIFIER LETTER LEFT ARROWHEAD..MODIFIER LETTER DOWN ARROWHEAD 02C2..02C5 ; Common # Sk [4] MODIFIER LETTER LEFT ARROWHEAD..MODIFIER LETTER DOWN ARROWHEAD
02C6..02D1 ; Common # Lm [12] MODIFIER LETTER CIRCUMFLEX ACCENT..MODIFIER LETTER HALF TRIANGULAR COLON 02C6..02D1 ; Common # Lm [12] MODIFIER LETTER CIRCUMFLEX ACCENT..MODIFIER LETTER HALF TRIANGULAR COLON
02D2..02DF ; Common # Sk [14] MODIFIER LETTER CENTRED RIGHT HALF RING..MODIFIER LETTER CROSS ACCENT 02D2..02DF ; Common # Sk [14] MODIFIER LETTER CENTRED RIGHT HALF RING..MODIFIER LETTER CROSS ACCENT
02E5..02EB ; Common # Sk [7] MODIFIER LETTER EXTRA-HIGH TONE BAR..MODIFIER LETTER YANG DEPARTING TONE MARK 02E5..02E9 ; Common # Sk [5] MODIFIER LETTER EXTRA-HIGH TONE BAR..MODIFIER LETTER EXTRA-LOW TONE BAR
02EC ; Common # Lm MODIFIER LETTER VOICING 02EC ; Common # Lm MODIFIER LETTER VOICING
02ED ; Common # Sk MODIFIER LETTER UNASPIRATED 02ED ; Common # Sk MODIFIER LETTER UNASPIRATED
02EE ; Common # Lm MODIFIER LETTER DOUBLE APOSTROPHE 02EE ; Common # Lm MODIFIER LETTER DOUBLE APOSTROPHE
...@@ -83,7 +83,6 @@ ...@@ -83,7 +83,6 @@
0385 ; Common # Sk GREEK DIALYTIKA TONOS 0385 ; Common # Sk GREEK DIALYTIKA TONOS
0387 ; Common # Po GREEK ANO TELEIA 0387 ; Common # Po GREEK ANO TELEIA
0589 ; Common # Po ARMENIAN FULL STOP 0589 ; Common # Po ARMENIAN FULL STOP
0600..0603 ; Common # Cf [4] ARABIC NUMBER SIGN..ARABIC SIGN SAFHA
060C ; Common # Po ARABIC COMMA 060C ; Common # Po ARABIC COMMA
061B ; Common # Po ARABIC SEMICOLON 061B ; Common # Po ARABIC SEMICOLON
061F ; Common # Po ARABIC QUESTION MARK 061F ; Common # Po ARABIC QUESTION MARK
...@@ -92,7 +91,6 @@ ...@@ -92,7 +91,6 @@
06DD ; Common # Cf ARABIC END OF AYAH 06DD ; Common # Cf ARABIC END OF AYAH
0964..0965 ; Common # Po [2] DEVANAGARI DANDA..DEVANAGARI DOUBLE DANDA 0964..0965 ; Common # Po [2] DEVANAGARI DANDA..DEVANAGARI DOUBLE DANDA
0970 ; Common # Po DEVANAGARI ABBREVIATION SIGN 0970 ; Common # Po DEVANAGARI ABBREVIATION SIGN
0CF1..0CF2 ; Common # So [2] KANNADA SIGN JIHVAMULIYA..KANNADA SIGN UPADHMANIYA
0E3F ; Common # Sc THAI CURRENCY SYMBOL BAHT 0E3F ; Common # Sc THAI CURRENCY SYMBOL BAHT
0FD5..0FD8 ; Common # So [4] RIGHT-FACING SVASTI SIGN..LEFT-FACING SVASTI SIGN WITH DOTS 0FD5..0FD8 ; Common # So [4] RIGHT-FACING SVASTI SIGN..LEFT-FACING SVASTI SIGN WITH DOTS
10FB ; Common # Po GEORGIAN PARAGRAPH SEPARATOR 10FB ; Common # Po GEORGIAN PARAGRAPH SEPARATOR
...@@ -148,7 +146,7 @@ ...@@ -148,7 +146,7 @@
208A..208C ; Common # Sm [3] SUBSCRIPT PLUS SIGN..SUBSCRIPT EQUALS SIGN 208A..208C ; Common # Sm [3] SUBSCRIPT PLUS SIGN..SUBSCRIPT EQUALS SIGN
208D ; Common # Ps SUBSCRIPT LEFT PARENTHESIS 208D ; Common # Ps SUBSCRIPT LEFT PARENTHESIS
208E ; Common # Pe SUBSCRIPT RIGHT PARENTHESIS 208E ; Common # Pe SUBSCRIPT RIGHT PARENTHESIS
20A0..20B8 ; Common # Sc [25] EURO-CURRENCY SIGN..TENGE SIGN 20A0..20B9 ; Common # Sc [26] EURO-CURRENCY SIGN..INDIAN RUPEE SIGN
2100..2101 ; Common # So [2] ACCOUNT OF..ADDRESSED TO THE SUBJECT 2100..2101 ; Common # So [2] ACCOUNT OF..ADDRESSED TO THE SUBJECT
2102 ; Common # L& DOUBLE-STRUCK CAPITAL C 2102 ; Common # L& DOUBLE-STRUCK CAPITAL C
2103..2106 ; Common # So [4] DEGREE CELSIUS..CADA UNA 2103..2106 ; Common # So [4] DEGREE CELSIUS..CADA UNA
...@@ -157,7 +155,8 @@ ...@@ -157,7 +155,8 @@
210A..2113 ; Common # L& [10] SCRIPT SMALL G..SCRIPT SMALL L 210A..2113 ; Common # L& [10] SCRIPT SMALL G..SCRIPT SMALL L
2114 ; Common # So L B BAR SYMBOL 2114 ; Common # So L B BAR SYMBOL
2115 ; Common # L& DOUBLE-STRUCK CAPITAL N 2115 ; Common # L& DOUBLE-STRUCK CAPITAL N
2116..2118 ; Common # So [3] NUMERO SIGN..SCRIPT CAPITAL P 2116..2117 ; Common # So [2] NUMERO SIGN..SOUND RECORDING COPYRIGHT
2118 ; Common # Sm SCRIPT CAPITAL P
2119..211D ; Common # L& [5] DOUBLE-STRUCK CAPITAL P..DOUBLE-STRUCK CAPITAL R 2119..211D ; Common # L& [5] DOUBLE-STRUCK CAPITAL P..DOUBLE-STRUCK CAPITAL R
211E..2123 ; Common # So [6] PRESCRIPTION TAKE..VERSICLE 211E..2123 ; Common # So [6] PRESCRIPTION TAKE..VERSICLE
2124 ; Common # L& DOUBLE-STRUCK CAPITAL Z 2124 ; Common # L& DOUBLE-STRUCK CAPITAL Z
...@@ -213,7 +212,7 @@ ...@@ -213,7 +212,7 @@
239B..23B3 ; Common # Sm [25] LEFT PARENTHESIS UPPER HOOK..SUMMATION BOTTOM 239B..23B3 ; Common # Sm [25] LEFT PARENTHESIS UPPER HOOK..SUMMATION BOTTOM
23B4..23DB ; Common # So [40] TOP SQUARE BRACKET..FUSE 23B4..23DB ; Common # So [40] TOP SQUARE BRACKET..FUSE
23DC..23E1 ; Common # Sm [6] TOP PARENTHESIS..BOTTOM TORTOISE SHELL BRACKET 23DC..23E1 ; Common # Sm [6] TOP PARENTHESIS..BOTTOM TORTOISE SHELL BRACKET
23E2..23E8 ; Common # So [7] WHITE TRAPEZIUM..DECIMAL EXPONENT SYMBOL 23E2..23F3 ; Common # So [18] WHITE TRAPEZIUM..HOURGLASS WITH FLOWING SAND
2400..2426 ; Common # So [39] SYMBOL FOR NULL..SYMBOL FOR SUBSTITUTE FORM TWO 2400..2426 ; Common # So [39] SYMBOL FOR NULL..SYMBOL FOR SUBSTITUTE FORM TWO
2440..244A ; Common # So [11] OCR HOOK..OCR DOUBLE BACKSLASH 2440..244A ; Common # So [11] OCR HOOK..OCR DOUBLE BACKSLASH
2460..249B ; Common # No [60] CIRCLED DIGIT ONE..NUMBER TWENTY FULL STOP 2460..249B ; Common # No [60] CIRCLED DIGIT ONE..NUMBER TWENTY FULL STOP
...@@ -227,18 +226,8 @@ ...@@ -227,18 +226,8 @@
25F8..25FF ; Common # Sm [8] UPPER LEFT TRIANGLE..LOWER RIGHT TRIANGLE 25F8..25FF ; Common # Sm [8] UPPER LEFT TRIANGLE..LOWER RIGHT TRIANGLE
2600..266E ; Common # So [111] BLACK SUN WITH RAYS..MUSIC NATURAL SIGN 2600..266E ; Common # So [111] BLACK SUN WITH RAYS..MUSIC NATURAL SIGN
266F ; Common # Sm MUSIC SHARP SIGN 266F ; Common # Sm MUSIC SHARP SIGN
2670..26CD ; Common # So [94] WEST SYRIAC CROSS..DISABLED CAR 2670..26FF ; Common # So [144] WEST SYRIAC CROSS..WHITE FLAG WITH HORIZONTAL MIDDLE BLACK STRIPE
26CF..26E1 ; Common # So [19] PICK..RESTRICTED LEFT ENTRY-2 2701..2767 ; Common # So [103] UPPER BLADE SCISSORS..ROTATED FLORAL HEART BULLET
26E3 ; Common # So HEAVY CIRCLE WITH STROKE AND TWO DOTS ABOVE
26E8..26FF ; Common # So [24] BLACK CROSS ON SHIELD..WHITE FLAG WITH HORIZONTAL MIDDLE BLACK STRIPE
2701..2704 ; Common # So [4] UPPER BLADE SCISSORS..WHITE SCISSORS
2706..2709 ; Common # So [4] TELEPHONE LOCATION SIGN..ENVELOPE
270C..2727 ; Common # So [28] VICTORY HAND..WHITE FOUR POINTED STAR
2729..274B ; Common # So [35] STRESS OUTLINED WHITE STAR..HEAVY EIGHT TEARDROP-SPOKED PROPELLER ASTERISK
274D ; Common # So SHADOWED WHITE CIRCLE
274F..2752 ; Common # So [4] LOWER RIGHT DROP-SHADOWED WHITE SQUARE..UPPER RIGHT SHADOWED WHITE SQUARE
2756..275E ; Common # So [9] BLACK DIAMOND MINUS WHITE X..HEAVY DOUBLE COMMA QUOTATION MARK ORNAMENT
2761..2767 ; Common # So [7] CURVED STEM PARAGRAPH SIGN ORNAMENT..ROTATED FLORAL HEART BULLET
2768 ; Common # Ps MEDIUM LEFT PARENTHESIS ORNAMENT 2768 ; Common # Ps MEDIUM LEFT PARENTHESIS ORNAMENT
2769 ; Common # Pe MEDIUM RIGHT PARENTHESIS ORNAMENT 2769 ; Common # Pe MEDIUM RIGHT PARENTHESIS ORNAMENT
276A ; Common # Ps MEDIUM FLATTENED LEFT PARENTHESIS ORNAMENT 276A ; Common # Ps MEDIUM FLATTENED LEFT PARENTHESIS ORNAMENT
...@@ -254,15 +243,13 @@ ...@@ -254,15 +243,13 @@
2774 ; Common # Ps MEDIUM LEFT CURLY BRACKET ORNAMENT 2774 ; Common # Ps MEDIUM LEFT CURLY BRACKET ORNAMENT
2775 ; Common # Pe MEDIUM RIGHT CURLY BRACKET ORNAMENT 2775 ; Common # Pe MEDIUM RIGHT CURLY BRACKET ORNAMENT
2776..2793 ; Common # No [30] DINGBAT NEGATIVE CIRCLED DIGIT ONE..DINGBAT NEGATIVE CIRCLED SANS-SERIF NUMBER TEN 2776..2793 ; Common # No [30] DINGBAT NEGATIVE CIRCLED DIGIT ONE..DINGBAT NEGATIVE CIRCLED SANS-SERIF NUMBER TEN
2794 ; Common # So HEAVY WIDE-HEADED RIGHTWARDS ARROW 2794..27BF ; Common # So [44] HEAVY WIDE-HEADED RIGHTWARDS ARROW..DOUBLE CURLY LOOP
2798..27AF ; Common # So [24] HEAVY SOUTH EAST ARROW..NOTCHED LOWER RIGHT-SHADOWED WHITE RIGHTWARDS ARROW
27B1..27BE ; Common # So [14] NOTCHED UPPER RIGHT-SHADOWED WHITE RIGHTWARDS ARROW..OPEN-OUTLINED RIGHTWARDS ARROW
27C0..27C4 ; Common # Sm [5] THREE DIMENSIONAL ANGLE..OPEN SUPERSET 27C0..27C4 ; Common # Sm [5] THREE DIMENSIONAL ANGLE..OPEN SUPERSET
27C5 ; Common # Ps LEFT S-SHAPED BAG DELIMITER 27C5 ; Common # Ps LEFT S-SHAPED BAG DELIMITER
27C6 ; Common # Pe RIGHT S-SHAPED BAG DELIMITER 27C6 ; Common # Pe RIGHT S-SHAPED BAG DELIMITER
27C7..27CA ; Common # Sm [4] OR WITH DOT INSIDE..VERTICAL BAR WITH HORIZONTAL STROKE 27C7..27CA ; Common # Sm [4] OR WITH DOT INSIDE..VERTICAL BAR WITH HORIZONTAL STROKE
27CC ; Common # Sm LONG DIVISION 27CC ; Common # Sm LONG DIVISION
27D0..27E5 ; Common # Sm [22] WHITE DIAMOND WITH CENTRED DOT..WHITE SQUARE WITH RIGHTWARDS TICK 27CE..27E5 ; Common # Sm [24] SQUARED LOGICAL AND..WHITE SQUARE WITH RIGHTWARDS TICK
27E6 ; Common # Ps MATHEMATICAL LEFT WHITE SQUARE BRACKET 27E6 ; Common # Ps MATHEMATICAL LEFT WHITE SQUARE BRACKET
27E7 ; Common # Pe MATHEMATICAL RIGHT WHITE SQUARE BRACKET 27E7 ; Common # Pe MATHEMATICAL RIGHT WHITE SQUARE BRACKET
27E8 ; Common # Ps MATHEMATICAL LEFT ANGLE BRACKET 27E8 ; Common # Ps MATHEMATICAL LEFT ANGLE BRACKET
...@@ -555,27 +542,51 @@ FFFC..FFFD ; Common # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHAR ...@@ -555,27 +542,51 @@ FFFC..FFFD ; Common # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHAR
1D7CE..1D7FF ; Common # Nd [50] MATHEMATICAL BOLD DIGIT ZERO..MATHEMATICAL MONOSPACE DIGIT NINE 1D7CE..1D7FF ; Common # Nd [50] MATHEMATICAL BOLD DIGIT ZERO..MATHEMATICAL MONOSPACE DIGIT NINE
1F000..1F02B ; Common # So [44] MAHJONG TILE EAST WIND..MAHJONG TILE BACK 1F000..1F02B ; Common # So [44] MAHJONG TILE EAST WIND..MAHJONG TILE BACK
1F030..1F093 ; Common # So [100] DOMINO TILE HORIZONTAL BACK..DOMINO TILE VERTICAL-06-06 1F030..1F093 ; Common # So [100] DOMINO TILE HORIZONTAL BACK..DOMINO TILE VERTICAL-06-06
1F0A0..1F0AE ; Common # So [15] PLAYING CARD BACK..PLAYING CARD KING OF SPADES
1F0B1..1F0BE ; Common # So [14] PLAYING CARD ACE OF HEARTS..PLAYING CARD KING OF HEARTS
1F0C1..1F0CF ; Common # So [15] PLAYING CARD ACE OF DIAMONDS..PLAYING CARD BLACK JOKER
1F0D1..1F0DF ; Common # So [15] PLAYING CARD ACE OF CLUBS..PLAYING CARD WHITE JOKER
1F100..1F10A ; Common # No [11] DIGIT ZERO FULL STOP..DIGIT NINE COMMA 1F100..1F10A ; Common # No [11] DIGIT ZERO FULL STOP..DIGIT NINE COMMA
1F110..1F12E ; Common # So [31] PARENTHESIZED LATIN CAPITAL LETTER A..CIRCLED WZ 1F110..1F12E ; Common # So [31] PARENTHESIZED LATIN CAPITAL LETTER A..CIRCLED WZ
1F131 ; Common # So SQUARED LATIN CAPITAL LETTER B 1F130..1F169 ; Common # So [58] SQUARED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z
1F13D ; Common # So SQUARED LATIN CAPITAL LETTER N 1F170..1F19A ; Common # So [43] NEGATIVE SQUARED LATIN CAPITAL LETTER A..SQUARED VS
1F13F ; Common # So SQUARED LATIN CAPITAL LETTER P 1F1E6..1F1FF ; Common # So [26] REGIONAL INDICATOR SYMBOL LETTER A..REGIONAL INDICATOR SYMBOL LETTER Z
1F142 ; Common # So SQUARED LATIN CAPITAL LETTER S 1F201..1F202 ; Common # So [2] SQUARED KATAKANA KOKO..SQUARED KATAKANA SA
1F146 ; Common # So SQUARED LATIN CAPITAL LETTER W 1F210..1F23A ; Common # So [43] SQUARED CJK UNIFIED IDEOGRAPH-624B..SQUARED CJK UNIFIED IDEOGRAPH-55B6
1F14A..1F14E ; Common # So [5] SQUARED HV..SQUARED PPV
1F157 ; Common # So NEGATIVE CIRCLED LATIN CAPITAL LETTER H
1F15F ; Common # So NEGATIVE CIRCLED LATIN CAPITAL LETTER P
1F179 ; Common # So NEGATIVE SQUARED LATIN CAPITAL LETTER J
1F17B..1F17C ; Common # So [2] NEGATIVE SQUARED LATIN CAPITAL LETTER L..NEGATIVE SQUARED LATIN CAPITAL LETTER M
1F17F ; Common # So NEGATIVE SQUARED LATIN CAPITAL LETTER P
1F18A..1F18D ; Common # So [4] CROSSED NEGATIVE SQUARED LATIN CAPITAL LETTER P..NEGATIVE SQUARED SA
1F190 ; Common # So SQUARE DJ
1F210..1F231 ; Common # So [34] SQUARED CJK UNIFIED IDEOGRAPH-624B..SQUARED CJK UNIFIED IDEOGRAPH-6253
1F240..1F248 ; Common # So [9] TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-672C..TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-6557 1F240..1F248 ; Common # So [9] TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-672C..TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-6557
1F250..1F251 ; Common # So [2] CIRCLED IDEOGRAPH ADVANTAGE..CIRCLED IDEOGRAPH ACCEPT
1F300..1F320 ; Common # So [33] CYCLONE..SHOOTING STAR
1F330..1F335 ; Common # So [6] CHESTNUT..CACTUS
1F337..1F37C ; Common # So [70] TULIP..BABY BOTTLE
1F380..1F393 ; Common # So [20] RIBBON..GRADUATION CAP
1F3A0..1F3C4 ; Common # So [37] CAROUSEL HORSE..SURFER
1F3C6..1F3CA ; Common # So [5] TROPHY..SWIMMER
1F3E0..1F3F0 ; Common # So [17] HOUSE BUILDING..EUROPEAN CASTLE
1F400..1F43E ; Common # So [63] RAT..PAW PRINTS
1F440 ; Common # So EYES
1F442..1F4F7 ; Common # So [182] EAR..CAMERA
1F4F9..1F4FC ; Common # So [4] VIDEO CAMERA..VIDEOCASSETTE
1F500..1F53D ; Common # So [62] TWISTED RIGHTWARDS ARROWS..DOWN-POINTING SMALL RED TRIANGLE
1F550..1F567 ; Common # So [24] CLOCK FACE ONE OCLOCK..CLOCK FACE TWELVE-THIRTY
1F5FB..1F5FF ; Common # So [5] MOUNT FUJI..MOYAI
1F601..1F610 ; Common # So [16] GRINNING FACE WITH SMILING EYES..NEUTRAL FACE
1F612..1F614 ; Common # So [3] UNAMUSED FACE..PENSIVE FACE
1F616 ; Common # So CONFOUNDED FACE
1F618 ; Common # So FACE THROWING A KISS
1F61A ; Common # So KISSING FACE WITH CLOSED EYES
1F61C..1F61E ; Common # So [3] FACE WITH STUCK-OUT TONGUE AND WINKING EYE..DISAPPOINTED FACE
1F620..1F625 ; Common # So [6] ANGRY FACE..DISAPPOINTED BUT RELIEVED FACE
1F628..1F62B ; Common # So [4] FEARFUL FACE..TIRED FACE
1F62D ; Common # So LOUDLY CRYING FACE
1F630..1F633 ; Common # So [4] FACE WITH OPEN MOUTH AND COLD SWEAT..FLUSHED FACE
1F635..1F640 ; Common # So [12] DIZZY FACE..WEARY CAT FACE
1F645..1F64F ; Common # So [11] FACE WITH NO GOOD GESTURE..PERSON WITH FOLDED HANDS
1F680..1F6C5 ; Common # So [70] ROCKET..LEFT LUGGAGE
1F700..1F773 ; Common # So [116] ALCHEMICAL SYMBOL FOR QUINTESSENCE..ALCHEMICAL SYMBOL FOR HALF OUNCE
E0001 ; Common # Cf LANGUAGE TAG E0001 ; Common # Cf LANGUAGE TAG
E0020..E007F ; Common # Cf [96] TAG SPACE..CANCEL TAG E0020..E007F ; Common # Cf [96] TAG SPACE..CANCEL TAG
# Total code points: 5395 # Total code points: 6379
# ================================================ # ================================================
...@@ -603,7 +614,7 @@ E0020..E007F ; Common # Cf [96] TAG SPACE..CANCEL TAG ...@@ -603,7 +614,7 @@ E0020..E007F ; Common # Cf [96] TAG SPACE..CANCEL TAG
1E00..1EFF ; Latin # L& [256] LATIN CAPITAL LETTER A WITH RING BELOW..LATIN SMALL LETTER Y WITH LOOP 1E00..1EFF ; Latin # L& [256] LATIN CAPITAL LETTER A WITH RING BELOW..LATIN SMALL LETTER Y WITH LOOP
2071 ; Latin # Lm SUPERSCRIPT LATIN SMALL LETTER I 2071 ; Latin # Lm SUPERSCRIPT LATIN SMALL LETTER I
207F ; Latin # Lm SUPERSCRIPT LATIN SMALL LETTER N 207F ; Latin # Lm SUPERSCRIPT LATIN SMALL LETTER N
2090..2094 ; Latin # Lm [5] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER SCHWA 2090..209C ; Latin # Lm [13] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER T
212A..212B ; Latin # L& [2] KELVIN SIGN..ANGSTROM SIGN 212A..212B ; Latin # L& [2] KELVIN SIGN..ANGSTROM SIGN
2132 ; Latin # L& TURNED CAPITAL F 2132 ; Latin # L& TURNED CAPITAL F
214E ; Latin # L& TURNED SMALL F 214E ; Latin # L& TURNED SMALL F
...@@ -616,13 +627,16 @@ E0020..E007F ; Common # Cf [96] TAG SPACE..CANCEL TAG ...@@ -616,13 +627,16 @@ E0020..E007F ; Common # Cf [96] TAG SPACE..CANCEL TAG
A722..A76F ; Latin # L& [78] LATIN CAPITAL LETTER EGYPTOLOGICAL ALEF..LATIN SMALL LETTER CON A722..A76F ; Latin # L& [78] LATIN CAPITAL LETTER EGYPTOLOGICAL ALEF..LATIN SMALL LETTER CON
A770 ; Latin # Lm MODIFIER LETTER US A770 ; Latin # Lm MODIFIER LETTER US
A771..A787 ; Latin # L& [23] LATIN SMALL LETTER DUM..LATIN SMALL LETTER INSULAR T A771..A787 ; Latin # L& [23] LATIN SMALL LETTER DUM..LATIN SMALL LETTER INSULAR T
A78B..A78C ; Latin # L& [2] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER SALTILLO A78B..A78E ; Latin # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT
A790..A791 ; Latin # L& [2] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER N WITH DESCENDER
A7A0..A7A9 ; Latin # L& [10] LATIN CAPITAL LETTER G WITH OBLIQUE STROKE..LATIN SMALL LETTER S WITH OBLIQUE STROKE
A7FA ; Latin # L& LATIN LETTER SMALL CAPITAL TURNED M
A7FB..A7FF ; Latin # Lo [5] LATIN EPIGRAPHIC LETTER REVERSED F..LATIN EPIGRAPHIC LETTER ARCHAIC M A7FB..A7FF ; Latin # Lo [5] LATIN EPIGRAPHIC LETTER REVERSED F..LATIN EPIGRAPHIC LETTER ARCHAIC M
FB00..FB06 ; Latin # L& [7] LATIN SMALL LIGATURE FF..LATIN SMALL LIGATURE ST FB00..FB06 ; Latin # L& [7] LATIN SMALL LIGATURE FF..LATIN SMALL LIGATURE ST
FF21..FF3A ; Latin # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAPITAL LETTER Z FF21..FF3A ; Latin # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAPITAL LETTER Z
FF41..FF5A ; Latin # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN SMALL LETTER Z FF41..FF5A ; Latin # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN SMALL LETTER Z
# Total code points: 1244 # Total code points: 1267
# ================================================ # ================================================
...@@ -687,12 +701,11 @@ FF41..FF5A ; Latin # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN ...@@ -687,12 +701,11 @@ FF41..FF5A ; Latin # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN
0483..0484 ; Cyrillic # Mn [2] COMBINING CYRILLIC TITLO..COMBINING CYRILLIC PALATALIZATION 0483..0484 ; Cyrillic # Mn [2] COMBINING CYRILLIC TITLO..COMBINING CYRILLIC PALATALIZATION
0487 ; Cyrillic # Mn COMBINING CYRILLIC POKRYTIE 0487 ; Cyrillic # Mn COMBINING CYRILLIC POKRYTIE
0488..0489 ; Cyrillic # Me [2] COMBINING CYRILLIC HUNDRED THOUSANDS SIGN..COMBINING CYRILLIC MILLIONS SIGN 0488..0489 ; Cyrillic # Me [2] COMBINING CYRILLIC HUNDRED THOUSANDS SIGN..COMBINING CYRILLIC MILLIONS SIGN
048A..0525 ; Cyrillic # L& [156] CYRILLIC CAPITAL LETTER SHORT I WITH TAIL..CYRILLIC SMALL LETTER PE WITH DESCENDER 048A..0527 ; Cyrillic # L& [158] CYRILLIC CAPITAL LETTER SHORT I WITH TAIL..CYRILLIC SMALL LETTER SHHA WITH DESCENDER
1D2B ; Cyrillic # L& CYRILLIC LETTER SMALL CAPITAL EL 1D2B ; Cyrillic # L& CYRILLIC LETTER SMALL CAPITAL EL
1D78 ; Cyrillic # Lm MODIFIER LETTER CYRILLIC EN 1D78 ; Cyrillic # Lm MODIFIER LETTER CYRILLIC EN
2DE0..2DFF ; Cyrillic # Mn [32] COMBINING CYRILLIC LETTER BE..COMBINING CYRILLIC LETTER IOTIFIED BIG YUS 2DE0..2DFF ; Cyrillic # Mn [32] COMBINING CYRILLIC LETTER BE..COMBINING CYRILLIC LETTER IOTIFIED BIG YUS
A640..A65F ; Cyrillic # L& [32] CYRILLIC CAPITAL LETTER ZEMLYA..CYRILLIC SMALL LETTER YN A640..A66D ; Cyrillic # L& [46] CYRILLIC CAPITAL LETTER ZEMLYA..CYRILLIC SMALL LETTER DOUBLE MONOCULAR O
A662..A66D ; Cyrillic # L& [12] CYRILLIC CAPITAL LETTER SOFT DE..CYRILLIC SMALL LETTER DOUBLE MONOCULAR O
A66E ; Cyrillic # Lo CYRILLIC LETTER MULTIOCULAR O A66E ; Cyrillic # Lo CYRILLIC LETTER MULTIOCULAR O
A66F ; Cyrillic # Mn COMBINING CYRILLIC VZMET A66F ; Cyrillic # Mn COMBINING CYRILLIC VZMET
A670..A672 ; Cyrillic # Me [3] COMBINING CYRILLIC TEN MILLIONS SIGN..COMBINING CYRILLIC THOUSAND MILLIONS SIGN A670..A672 ; Cyrillic # Me [3] COMBINING CYRILLIC TEN MILLIONS SIGN..COMBINING CYRILLIC THOUSAND MILLIONS SIGN
...@@ -702,7 +715,7 @@ A67E ; Cyrillic # Po CYRILLIC KAVYKA ...@@ -702,7 +715,7 @@ A67E ; Cyrillic # Po CYRILLIC KAVYKA
A67F ; Cyrillic # Lm CYRILLIC PAYEROK A67F ; Cyrillic # Lm CYRILLIC PAYEROK
A680..A697 ; Cyrillic # L& [24] CYRILLIC CAPITAL LETTER DWE..CYRILLIC SMALL LETTER SHWE A680..A697 ; Cyrillic # L& [24] CYRILLIC CAPITAL LETTER DWE..CYRILLIC SMALL LETTER SHWE
# Total code points: 404 # Total code points: 408
# ================================================ # ================================================
...@@ -744,6 +757,7 @@ FB46..FB4F ; Hebrew # Lo [10] HEBREW LETTER TSADI WITH DAGESH..HEBREW LIGATU ...@@ -744,6 +757,7 @@ FB46..FB4F ; Hebrew # Lo [10] HEBREW LETTER TSADI WITH DAGESH..HEBREW LIGATU
# ================================================ # ================================================
0600..0603 ; Arabic # Cf [4] ARABIC NUMBER SIGN..ARABIC SIGN SAFHA
0606..0608 ; Arabic # Sm [3] ARABIC-INDIC CUBE ROOT..ARABIC RAY 0606..0608 ; Arabic # Sm [3] ARABIC-INDIC CUBE ROOT..ARABIC RAY
0609..060A ; Arabic # Po [2] ARABIC-INDIC PER MILLE SIGN..ARABIC-INDIC PER TEN THOUSAND SIGN 0609..060A ; Arabic # Po [2] ARABIC-INDIC PER MILLE SIGN..ARABIC-INDIC PER TEN THOUSAND SIGN
060B ; Arabic # Sc AFGHANI SIGN 060B ; Arabic # Sc AFGHANI SIGN
...@@ -751,7 +765,7 @@ FB46..FB4F ; Hebrew # Lo [10] HEBREW LETTER TSADI WITH DAGESH..HEBREW LIGATU ...@@ -751,7 +765,7 @@ FB46..FB4F ; Hebrew # Lo [10] HEBREW LETTER TSADI WITH DAGESH..HEBREW LIGATU
060E..060F ; Arabic # So [2] ARABIC POETIC VERSE SIGN..ARABIC SIGN MISRA 060E..060F ; Arabic # So [2] ARABIC POETIC VERSE SIGN..ARABIC SIGN MISRA
0610..061A ; Arabic # Mn [11] ARABIC SIGN SALLALLAHOU ALAYHE WASSALLAM..ARABIC SMALL KASRA 0610..061A ; Arabic # Mn [11] ARABIC SIGN SALLALLAHOU ALAYHE WASSALLAM..ARABIC SMALL KASRA
061E ; Arabic # Po ARABIC TRIPLE DOT PUNCTUATION MARK 061E ; Arabic # Po ARABIC TRIPLE DOT PUNCTUATION MARK
0621..063F ; Arabic # Lo [31] ARABIC LETTER HAMZA..ARABIC LETTER FARSI YEH WITH THREE DOTS ABOVE 0620..063F ; Arabic # Lo [32] ARABIC LETTER KASHMIRI YEH..ARABIC LETTER FARSI YEH WITH THREE DOTS ABOVE
0641..064A ; Arabic # Lo [10] ARABIC LETTER FEH..ARABIC LETTER YEH 0641..064A ; Arabic # Lo [10] ARABIC LETTER FEH..ARABIC LETTER YEH
0656..065E ; Arabic # Mn [9] ARABIC SUBSCRIPT ALEF..ARABIC FATHA WITH TWO DOTS 0656..065E ; Arabic # Mn [9] ARABIC SUBSCRIPT ALEF..ARABIC FATHA WITH TWO DOTS
066A..066D ; Arabic # Po [4] ARABIC PERCENT SIGN..ARABIC FIVE POINTED STAR 066A..066D ; Arabic # Po [4] ARABIC PERCENT SIGN..ARABIC FIVE POINTED STAR
...@@ -760,7 +774,7 @@ FB46..FB4F ; Hebrew # Lo [10] HEBREW LETTER TSADI WITH DAGESH..HEBREW LIGATU ...@@ -760,7 +774,7 @@ FB46..FB4F ; Hebrew # Lo [10] HEBREW LETTER TSADI WITH DAGESH..HEBREW LIGATU
06D4 ; Arabic # Po ARABIC FULL STOP 06D4 ; Arabic # Po ARABIC FULL STOP
06D5 ; Arabic # Lo ARABIC LETTER AE 06D5 ; Arabic # Lo ARABIC LETTER AE
06D6..06DC ; Arabic # Mn [7] ARABIC SMALL HIGH LIGATURE SAD WITH LAM WITH ALEF MAKSURA..ARABIC SMALL HIGH SEEN 06D6..06DC ; Arabic # Mn [7] ARABIC SMALL HIGH LIGATURE SAD WITH LAM WITH ALEF MAKSURA..ARABIC SMALL HIGH SEEN
06DE ; Arabic # Me ARABIC START OF RUB EL HIZB 06DE ; Arabic # So ARABIC START OF RUB EL HIZB
06DF..06E4 ; Arabic # Mn [6] ARABIC SMALL HIGH ROUNDED ZERO..ARABIC SMALL HIGH MADDA 06DF..06E4 ; Arabic # Mn [6] ARABIC SMALL HIGH ROUNDED ZERO..ARABIC SMALL HIGH MADDA
06E5..06E6 ; Arabic # Lm [2] ARABIC SMALL WAW..ARABIC SMALL YEH 06E5..06E6 ; Arabic # Lm [2] ARABIC SMALL WAW..ARABIC SMALL YEH
06E7..06E8 ; Arabic # Mn [2] ARABIC SMALL HIGH YEH..ARABIC SMALL HIGH NOON 06E7..06E8 ; Arabic # Mn [2] ARABIC SMALL HIGH YEH..ARABIC SMALL HIGH NOON
...@@ -773,6 +787,7 @@ FB46..FB4F ; Hebrew # Lo [10] HEBREW LETTER TSADI WITH DAGESH..HEBREW LIGATU ...@@ -773,6 +787,7 @@ FB46..FB4F ; Hebrew # Lo [10] HEBREW LETTER TSADI WITH DAGESH..HEBREW LIGATU
06FF ; Arabic # Lo ARABIC LETTER HEH WITH INVERTED V 06FF ; Arabic # Lo ARABIC LETTER HEH WITH INVERTED V
0750..077F ; Arabic # Lo [48] ARABIC LETTER BEH WITH THREE DOTS HORIZONTALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS ABOVE 0750..077F ; Arabic # Lo [48] ARABIC LETTER BEH WITH THREE DOTS HORIZONTALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS ABOVE
FB50..FBB1 ; Arabic # Lo [98] ARABIC LETTER ALEF WASLA ISOLATED FORM..ARABIC LETTER YEH BARREE WITH HAMZA ABOVE FINAL FORM FB50..FBB1 ; Arabic # Lo [98] ARABIC LETTER ALEF WASLA ISOLATED FORM..ARABIC LETTER YEH BARREE WITH HAMZA ABOVE FINAL FORM
FBB2..FBC1 ; Arabic # Sk [16] ARABIC SYMBOL DOT ABOVE..ARABIC SYMBOL SMALL TAH BELOW
FBD3..FD3D ; Arabic # Lo [363] ARABIC LETTER NG ISOLATED FORM..ARABIC LIGATURE ALEF WITH FATHATAN ISOLATED FORM FBD3..FD3D ; Arabic # Lo [363] ARABIC LETTER NG ISOLATED FORM..ARABIC LIGATURE ALEF WITH FATHATAN ISOLATED FORM
FD50..FD8F ; Arabic # Lo [64] ARABIC LIGATURE TEH WITH JEEM WITH MEEM INITIAL FORM..ARABIC LIGATURE MEEM WITH KHAH WITH MEEM INITIAL FORM FD50..FD8F ; Arabic # Lo [64] ARABIC LIGATURE TEH WITH JEEM WITH MEEM INITIAL FORM..ARABIC LIGATURE MEEM WITH KHAH WITH MEEM INITIAL FORM
FD92..FDC7 ; Arabic # Lo [54] ARABIC LIGATURE MEEM WITH JEEM WITH KHAH INITIAL FORM..ARABIC LIGATURE NOON WITH JEEM WITH YEH FINAL FORM FD92..FDC7 ; Arabic # Lo [54] ARABIC LIGATURE MEEM WITH JEEM WITH KHAH INITIAL FORM..ARABIC LIGATURE NOON WITH JEEM WITH YEH FINAL FORM
...@@ -782,7 +797,7 @@ FE70..FE74 ; Arabic # Lo [5] ARABIC FATHATAN ISOLATED FORM..ARABIC KASRATAN ...@@ -782,7 +797,7 @@ FE70..FE74 ; Arabic # Lo [5] ARABIC FATHATAN ISOLATED FORM..ARABIC KASRATAN
FE76..FEFC ; Arabic # Lo [135] ARABIC FATHA ISOLATED FORM..ARABIC LIGATURE LAM WITH ALEF FINAL FORM FE76..FEFC ; Arabic # Lo [135] ARABIC FATHA ISOLATED FORM..ARABIC LIGATURE LAM WITH ALEF FINAL FORM
10E60..10E7E ; Arabic # No [31] RUMI DIGIT ONE..RUMI FRACTION TWO THIRDS 10E60..10E7E ; Arabic # No [31] RUMI DIGIT ONE..RUMI FRACTION TWO THIRDS
# Total code points: 1030 # Total code points: 1051
# ================================================ # ================================================
...@@ -809,27 +824,29 @@ FE76..FEFC ; Arabic # Lo [135] ARABIC FATHA ISOLATED FORM..ARABIC LIGATURE LA ...@@ -809,27 +824,29 @@ FE76..FEFC ; Arabic # Lo [135] ARABIC FATHA ISOLATED FORM..ARABIC LIGATURE LA
0900..0902 ; Devanagari # Mn [3] DEVANAGARI SIGN INVERTED CANDRABINDU..DEVANAGARI SIGN ANUSVARA 0900..0902 ; Devanagari # Mn [3] DEVANAGARI SIGN INVERTED CANDRABINDU..DEVANAGARI SIGN ANUSVARA
0903 ; Devanagari # Mc DEVANAGARI SIGN VISARGA 0903 ; Devanagari # Mc DEVANAGARI SIGN VISARGA
0904..0939 ; Devanagari # Lo [54] DEVANAGARI LETTER SHORT A..DEVANAGARI LETTER HA 0904..0939 ; Devanagari # Lo [54] DEVANAGARI LETTER SHORT A..DEVANAGARI LETTER HA
093A ; Devanagari # Mn DEVANAGARI VOWEL SIGN OE
093B ; Devanagari # Mc DEVANAGARI VOWEL SIGN OOE
093C ; Devanagari # Mn DEVANAGARI SIGN NUKTA 093C ; Devanagari # Mn DEVANAGARI SIGN NUKTA
093D ; Devanagari # Lo DEVANAGARI SIGN AVAGRAHA 093D ; Devanagari # Lo DEVANAGARI SIGN AVAGRAHA
093E..0940 ; Devanagari # Mc [3] DEVANAGARI VOWEL SIGN AA..DEVANAGARI VOWEL SIGN II 093E..0940 ; Devanagari # Mc [3] DEVANAGARI VOWEL SIGN AA..DEVANAGARI VOWEL SIGN II
0941..0948 ; Devanagari # Mn [8] DEVANAGARI VOWEL SIGN U..DEVANAGARI VOWEL SIGN AI 0941..0948 ; Devanagari # Mn [8] DEVANAGARI VOWEL SIGN U..DEVANAGARI VOWEL SIGN AI
0949..094C ; Devanagari # Mc [4] DEVANAGARI VOWEL SIGN CANDRA O..DEVANAGARI VOWEL SIGN AU 0949..094C ; Devanagari # Mc [4] DEVANAGARI VOWEL SIGN CANDRA O..DEVANAGARI VOWEL SIGN AU
094D ; Devanagari # Mn DEVANAGARI SIGN VIRAMA 094D ; Devanagari # Mn DEVANAGARI SIGN VIRAMA
094E ; Devanagari # Mc DEVANAGARI VOWEL SIGN PRISHTHAMATRA E 094E..094F ; Devanagari # Mc [2] DEVANAGARI VOWEL SIGN PRISHTHAMATRA E..DEVANAGARI VOWEL SIGN AW
0950 ; Devanagari # Lo DEVANAGARI OM 0950 ; Devanagari # Lo DEVANAGARI OM
0953..0955 ; Devanagari # Mn [3] DEVANAGARI GRAVE ACCENT..DEVANAGARI VOWEL SIGN CANDRA LONG E 0953..0957 ; Devanagari # Mn [5] DEVANAGARI GRAVE ACCENT..DEVANAGARI VOWEL SIGN UUE
0958..0961 ; Devanagari # Lo [10] DEVANAGARI LETTER QA..DEVANAGARI LETTER VOCALIC LL 0958..0961 ; Devanagari # Lo [10] DEVANAGARI LETTER QA..DEVANAGARI LETTER VOCALIC LL
0962..0963 ; Devanagari # Mn [2] DEVANAGARI VOWEL SIGN VOCALIC L..DEVANAGARI VOWEL SIGN VOCALIC LL 0962..0963 ; Devanagari # Mn [2] DEVANAGARI VOWEL SIGN VOCALIC L..DEVANAGARI VOWEL SIGN VOCALIC LL
0966..096F ; Devanagari # Nd [10] DEVANAGARI DIGIT ZERO..DEVANAGARI DIGIT NINE 0966..096F ; Devanagari # Nd [10] DEVANAGARI DIGIT ZERO..DEVANAGARI DIGIT NINE
0971 ; Devanagari # Lm DEVANAGARI SIGN HIGH SPACING DOT 0971 ; Devanagari # Lm DEVANAGARI SIGN HIGH SPACING DOT
0972 ; Devanagari # Lo DEVANAGARI LETTER CANDRA A 0972..0977 ; Devanagari # Lo [6] DEVANAGARI LETTER CANDRA A..DEVANAGARI LETTER UUE
0979..097F ; Devanagari # Lo [7] DEVANAGARI LETTER ZHA..DEVANAGARI LETTER BBA 0979..097F ; Devanagari # Lo [7] DEVANAGARI LETTER ZHA..DEVANAGARI LETTER BBA
A8E0..A8F1 ; Devanagari # Mn [18] COMBINING DEVANAGARI DIGIT ZERO..COMBINING DEVANAGARI SIGN AVAGRAHA A8E0..A8F1 ; Devanagari # Mn [18] COMBINING DEVANAGARI DIGIT ZERO..COMBINING DEVANAGARI SIGN AVAGRAHA
A8F2..A8F7 ; Devanagari # Lo [6] DEVANAGARI SIGN SPACING CANDRABINDU..DEVANAGARI SIGN CANDRABINDU AVAGRAHA A8F2..A8F7 ; Devanagari # Lo [6] DEVANAGARI SIGN SPACING CANDRABINDU..DEVANAGARI SIGN CANDRABINDU AVAGRAHA
A8F8..A8FA ; Devanagari # Po [3] DEVANAGARI SIGN PUSHPIKA..DEVANAGARI CARET A8F8..A8FA ; Devanagari # Po [3] DEVANAGARI SIGN PUSHPIKA..DEVANAGARI CARET
A8FB ; Devanagari # Lo DEVANAGARI HEADSTROKE A8FB ; Devanagari # Lo DEVANAGARI HEADSTROKE
# Total code points: 140 # Total code points: 150
# ================================================ # ================================================
...@@ -941,8 +958,9 @@ A8FB ; Devanagari # Lo DEVANAGARI HEADSTROKE ...@@ -941,8 +958,9 @@ A8FB ; Devanagari # Lo DEVANAGARI HEADSTROKE
0B66..0B6F ; Oriya # Nd [10] ORIYA DIGIT ZERO..ORIYA DIGIT NINE 0B66..0B6F ; Oriya # Nd [10] ORIYA DIGIT ZERO..ORIYA DIGIT NINE
0B70 ; Oriya # So ORIYA ISSHAR 0B70 ; Oriya # So ORIYA ISSHAR
0B71 ; Oriya # Lo ORIYA LETTER WA 0B71 ; Oriya # Lo ORIYA LETTER WA
0B72..0B77 ; Oriya # No [6] ORIYA FRACTION ONE QUARTER..ORIYA FRACTION THREE SIXTEENTHS
# Total code points: 84 # Total code points: 90
# ================================================ # ================================================
...@@ -1018,22 +1036,23 @@ A8FB ; Devanagari # Lo DEVANAGARI HEADSTROKE ...@@ -1018,22 +1036,23 @@ A8FB ; Devanagari # Lo DEVANAGARI HEADSTROKE
0CE0..0CE1 ; Kannada # Lo [2] KANNADA LETTER VOCALIC RR..KANNADA LETTER VOCALIC LL 0CE0..0CE1 ; Kannada # Lo [2] KANNADA LETTER VOCALIC RR..KANNADA LETTER VOCALIC LL
0CE2..0CE3 ; Kannada # Mn [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL 0CE2..0CE3 ; Kannada # Mn [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL
0CE6..0CEF ; Kannada # Nd [10] KANNADA DIGIT ZERO..KANNADA DIGIT NINE 0CE6..0CEF ; Kannada # Nd [10] KANNADA DIGIT ZERO..KANNADA DIGIT NINE
0CF1..0CF2 ; Kannada # Lo [2] KANNADA SIGN JIHVAMULIYA..KANNADA SIGN UPADHMANIYA
# Total code points: 84 # Total code points: 86
# ================================================ # ================================================
0D02..0D03 ; Malayalam # Mc [2] MALAYALAM SIGN ANUSVARA..MALAYALAM SIGN VISARGA 0D02..0D03 ; Malayalam # Mc [2] MALAYALAM SIGN ANUSVARA..MALAYALAM SIGN VISARGA
0D05..0D0C ; Malayalam # Lo [8] MALAYALAM LETTER A..MALAYALAM LETTER VOCALIC L 0D05..0D0C ; Malayalam # Lo [8] MALAYALAM LETTER A..MALAYALAM LETTER VOCALIC L
0D0E..0D10 ; Malayalam # Lo [3] MALAYALAM LETTER E..MALAYALAM LETTER AI 0D0E..0D10 ; Malayalam # Lo [3] MALAYALAM LETTER E..MALAYALAM LETTER AI
0D12..0D28 ; Malayalam # Lo [23] MALAYALAM LETTER O..MALAYALAM LETTER NA 0D12..0D3A ; Malayalam # Lo [41] MALAYALAM LETTER O..MALAYALAM LETTER TTTA
0D2A..0D39 ; Malayalam # Lo [16] MALAYALAM LETTER PA..MALAYALAM LETTER HA
0D3D ; Malayalam # Lo MALAYALAM SIGN AVAGRAHA 0D3D ; Malayalam # Lo MALAYALAM SIGN AVAGRAHA
0D3E..0D40 ; Malayalam # Mc [3] MALAYALAM VOWEL SIGN AA..MALAYALAM VOWEL SIGN II 0D3E..0D40 ; Malayalam # Mc [3] MALAYALAM VOWEL SIGN AA..MALAYALAM VOWEL SIGN II
0D41..0D44 ; Malayalam # Mn [4] MALAYALAM VOWEL SIGN U..MALAYALAM VOWEL SIGN VOCALIC RR 0D41..0D44 ; Malayalam # Mn [4] MALAYALAM VOWEL SIGN U..MALAYALAM VOWEL SIGN VOCALIC RR
0D46..0D48 ; Malayalam # Mc [3] MALAYALAM VOWEL SIGN E..MALAYALAM VOWEL SIGN AI 0D46..0D48 ; Malayalam # Mc [3] MALAYALAM VOWEL SIGN E..MALAYALAM VOWEL SIGN AI
0D4A..0D4C ; Malayalam # Mc [3] MALAYALAM VOWEL SIGN O..MALAYALAM VOWEL SIGN AU 0D4A..0D4C ; Malayalam # Mc [3] MALAYALAM VOWEL SIGN O..MALAYALAM VOWEL SIGN AU
0D4D ; Malayalam # Mn MALAYALAM SIGN VIRAMA 0D4D ; Malayalam # Mn MALAYALAM SIGN VIRAMA
0D4E ; Malayalam # Lo MALAYALAM LETTER DOT REPH
0D57 ; Malayalam # Mc MALAYALAM AU LENGTH MARK 0D57 ; Malayalam # Mc MALAYALAM AU LENGTH MARK
0D60..0D61 ; Malayalam # Lo [2] MALAYALAM LETTER VOCALIC RR..MALAYALAM LETTER VOCALIC LL 0D60..0D61 ; Malayalam # Lo [2] MALAYALAM LETTER VOCALIC RR..MALAYALAM LETTER VOCALIC LL
0D62..0D63 ; Malayalam # Mn [2] MALAYALAM VOWEL SIGN VOCALIC L..MALAYALAM VOWEL SIGN VOCALIC LL 0D62..0D63 ; Malayalam # Mn [2] MALAYALAM VOWEL SIGN VOCALIC L..MALAYALAM VOWEL SIGN VOCALIC LL
...@@ -1042,7 +1061,7 @@ A8FB ; Devanagari # Lo DEVANAGARI HEADSTROKE ...@@ -1042,7 +1061,7 @@ A8FB ; Devanagari # Lo DEVANAGARI HEADSTROKE
0D79 ; Malayalam # So MALAYALAM DATE MARK 0D79 ; Malayalam # So MALAYALAM DATE MARK
0D7A..0D7F ; Malayalam # Lo [6] MALAYALAM LETTER CHILLU NN..MALAYALAM LETTER CHILLU K 0D7A..0D7F ; Malayalam # Lo [6] MALAYALAM LETTER CHILLU NN..MALAYALAM LETTER CHILLU K
# Total code points: 95 # Total code points: 98
# ================================================ # ================================================
...@@ -1132,16 +1151,17 @@ A8FB ; Devanagari # Lo DEVANAGARI HEADSTROKE ...@@ -1132,16 +1151,17 @@ A8FB ; Devanagari # Lo DEVANAGARI HEADSTROKE
0F80..0F84 ; Tibetan # Mn [5] TIBETAN VOWEL SIGN REVERSED I..TIBETAN MARK HALANTA 0F80..0F84 ; Tibetan # Mn [5] TIBETAN VOWEL SIGN REVERSED I..TIBETAN MARK HALANTA
0F85 ; Tibetan # Po TIBETAN MARK PALUTA 0F85 ; Tibetan # Po TIBETAN MARK PALUTA
0F86..0F87 ; Tibetan # Mn [2] TIBETAN SIGN LCI RTAGS..TIBETAN SIGN YANG RTAGS 0F86..0F87 ; Tibetan # Mn [2] TIBETAN SIGN LCI RTAGS..TIBETAN SIGN YANG RTAGS
0F88..0F8B ; Tibetan # Lo [4] TIBETAN SIGN LCE TSA CAN..TIBETAN SIGN GRU MED RGYINGS 0F88..0F8C ; Tibetan # Lo [5] TIBETAN SIGN LCE TSA CAN..TIBETAN SIGN INVERTED MCHU CAN
0F90..0F97 ; Tibetan # Mn [8] TIBETAN SUBJOINED LETTER KA..TIBETAN SUBJOINED LETTER JA 0F8D..0F97 ; Tibetan # Mn [11] TIBETAN SUBJOINED SIGN LCE TSA CAN..TIBETAN SUBJOINED LETTER JA
0F99..0FBC ; Tibetan # Mn [36] TIBETAN SUBJOINED LETTER NYA..TIBETAN SUBJOINED LETTER FIXED-FORM RA 0F99..0FBC ; Tibetan # Mn [36] TIBETAN SUBJOINED LETTER NYA..TIBETAN SUBJOINED LETTER FIXED-FORM RA
0FBE..0FC5 ; Tibetan # So [8] TIBETAN KU RU KHA..TIBETAN SYMBOL RDO RJE 0FBE..0FC5 ; Tibetan # So [8] TIBETAN KU RU KHA..TIBETAN SYMBOL RDO RJE
0FC6 ; Tibetan # Mn TIBETAN SYMBOL PADMA GDAN 0FC6 ; Tibetan # Mn TIBETAN SYMBOL PADMA GDAN
0FC7..0FCC ; Tibetan # So [6] TIBETAN SYMBOL RDO RJE RGYA GRAM..TIBETAN SYMBOL NOR BU BZHI -KHYIL 0FC7..0FCC ; Tibetan # So [6] TIBETAN SYMBOL RDO RJE RGYA GRAM..TIBETAN SYMBOL NOR BU BZHI -KHYIL
0FCE..0FCF ; Tibetan # So [2] TIBETAN SIGN RDEL NAG RDEL DKAR..TIBETAN SIGN RDEL NAG GSUM 0FCE..0FCF ; Tibetan # So [2] TIBETAN SIGN RDEL NAG RDEL DKAR..TIBETAN SIGN RDEL NAG GSUM
0FD0..0FD4 ; Tibetan # Po [5] TIBETAN MARK BSKA- SHOG GI MGO RGYAN..TIBETAN MARK CLOSING BRDA RNYING YIG MGO SGAB MA 0FD0..0FD4 ; Tibetan # Po [5] TIBETAN MARK BSKA- SHOG GI MGO RGYAN..TIBETAN MARK CLOSING BRDA RNYING YIG MGO SGAB MA
0FD9..0FDA ; Tibetan # Po [2] TIBETAN MARK LEADING MCHAN RTAGS..TIBETAN MARK TRAILING MCHAN RTAGS
# Total code points: 201 # Total code points: 207
# ================================================ # ================================================
...@@ -1201,6 +1221,7 @@ AA7B ; Myanmar # Mc MYANMAR SIGN PAO KAREN TONE ...@@ -1201,6 +1221,7 @@ AA7B ; Myanmar # Mc MYANMAR SIGN PAO KAREN TONE
# ================================================ # ================================================
1100..11FF ; Hangul # Lo [256] HANGUL CHOSEONG KIYEOK..HANGUL JONGSEONG SSANGNIEUN 1100..11FF ; Hangul # Lo [256] HANGUL CHOSEONG KIYEOK..HANGUL JONGSEONG SSANGNIEUN
302E..302F ; Hangul # Mn [2] HANGUL SINGLE DOT TONE MARK..HANGUL DOUBLE DOT TONE MARK
3131..318E ; Hangul # Lo [94] HANGUL LETTER KIYEOK..HANGUL LETTER ARAEAE 3131..318E ; Hangul # Lo [94] HANGUL LETTER KIYEOK..HANGUL LETTER ARAEAE
3200..321E ; Hangul # So [31] PARENTHESIZED HANGUL KIYEOK..PARENTHESIZED KOREAN CHARACTER O HU 3200..321E ; Hangul # So [31] PARENTHESIZED HANGUL KIYEOK..PARENTHESIZED KOREAN CHARACTER O HU
3260..327E ; Hangul # So [31] CIRCLED HANGUL KIYEOK..CIRCLED HANGUL IEUNG U 3260..327E ; Hangul # So [31] CIRCLED HANGUL KIYEOK..CIRCLED HANGUL IEUNG U
...@@ -1214,7 +1235,7 @@ FFCA..FFCF ; Hangul # Lo [6] HALFWIDTH HANGUL LETTER YEO..HALFWIDTH HANGUL ...@@ -1214,7 +1235,7 @@ FFCA..FFCF ; Hangul # Lo [6] HALFWIDTH HANGUL LETTER YEO..HALFWIDTH HANGUL
FFD2..FFD7 ; Hangul # Lo [6] HALFWIDTH HANGUL LETTER YO..HALFWIDTH HANGUL LETTER YU FFD2..FFD7 ; Hangul # Lo [6] HALFWIDTH HANGUL LETTER YO..HALFWIDTH HANGUL LETTER YU
FFDA..FFDC ; Hangul # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I FFDA..FFDC ; Hangul # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I
# Total code points: 11737 # Total code points: 11739
# ================================================ # ================================================
...@@ -1234,7 +1255,7 @@ FFDA..FFDC ; Hangul # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL L ...@@ -1234,7 +1255,7 @@ FFDA..FFDC ; Hangul # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL L
12D8..1310 ; Ethiopic # Lo [57] ETHIOPIC SYLLABLE ZA..ETHIOPIC SYLLABLE GWA 12D8..1310 ; Ethiopic # Lo [57] ETHIOPIC SYLLABLE ZA..ETHIOPIC SYLLABLE GWA
1312..1315 ; Ethiopic # Lo [4] ETHIOPIC SYLLABLE GWI..ETHIOPIC SYLLABLE GWE 1312..1315 ; Ethiopic # Lo [4] ETHIOPIC SYLLABLE GWI..ETHIOPIC SYLLABLE GWE
1318..135A ; Ethiopic # Lo [67] ETHIOPIC SYLLABLE GGA..ETHIOPIC SYLLABLE FYA 1318..135A ; Ethiopic # Lo [67] ETHIOPIC SYLLABLE GGA..ETHIOPIC SYLLABLE FYA
135F ; Ethiopic # Mn ETHIOPIC COMBINING GEMINATION MARK 135D..135F ; Ethiopic # Mn [3] ETHIOPIC COMBINING GEMINATION AND VOWEL LENGTH MARK..ETHIOPIC COMBINING GEMINATION MARK
1360 ; Ethiopic # So ETHIOPIC SECTION MARK 1360 ; Ethiopic # So ETHIOPIC SECTION MARK
1361..1368 ; Ethiopic # Po [8] ETHIOPIC WORDSPACE..ETHIOPIC PARAGRAPH SEPARATOR 1361..1368 ; Ethiopic # Po [8] ETHIOPIC WORDSPACE..ETHIOPIC PARAGRAPH SEPARATOR
1369..137C ; Ethiopic # No [20] ETHIOPIC DIGIT ONE..ETHIOPIC NUMBER TEN THOUSAND 1369..137C ; Ethiopic # No [20] ETHIOPIC DIGIT ONE..ETHIOPIC NUMBER TEN THOUSAND
...@@ -1249,8 +1270,13 @@ FFDA..FFDC ; Hangul # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL L ...@@ -1249,8 +1270,13 @@ FFDA..FFDC ; Hangul # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL L
2DC8..2DCE ; Ethiopic # Lo [7] ETHIOPIC SYLLABLE KYA..ETHIOPIC SYLLABLE KYO 2DC8..2DCE ; Ethiopic # Lo [7] ETHIOPIC SYLLABLE KYA..ETHIOPIC SYLLABLE KYO
2DD0..2DD6 ; Ethiopic # Lo [7] ETHIOPIC SYLLABLE XYA..ETHIOPIC SYLLABLE XYO 2DD0..2DD6 ; Ethiopic # Lo [7] ETHIOPIC SYLLABLE XYA..ETHIOPIC SYLLABLE XYO
2DD8..2DDE ; Ethiopic # Lo [7] ETHIOPIC SYLLABLE GYA..ETHIOPIC SYLLABLE GYO 2DD8..2DDE ; Ethiopic # Lo [7] ETHIOPIC SYLLABLE GYA..ETHIOPIC SYLLABLE GYO
AB01..AB06 ; Ethiopic # Lo [6] ETHIOPIC SYLLABLE TTHU..ETHIOPIC SYLLABLE TTHO
AB09..AB0E ; Ethiopic # Lo [6] ETHIOPIC SYLLABLE DDHU..ETHIOPIC SYLLABLE DDHO
AB11..AB16 ; Ethiopic # Lo [6] ETHIOPIC SYLLABLE DZU..ETHIOPIC SYLLABLE DZO
AB20..AB26 ; Ethiopic # Lo [7] ETHIOPIC SYLLABLE CCHHA..ETHIOPIC SYLLABLE CCHHO
AB28..AB2E ; Ethiopic # Lo [7] ETHIOPIC SYLLABLE BBA..ETHIOPIC SYLLABLE BBO
# Total code points: 461 # Total code points: 495
# ================================================ # ================================================
...@@ -1329,9 +1355,10 @@ FFDA..FFDC ; Hangul # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL L ...@@ -1329,9 +1355,10 @@ FFDA..FFDC ; Hangul # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL L
3041..3096 ; Hiragana # Lo [86] HIRAGANA LETTER SMALL A..HIRAGANA LETTER SMALL KE 3041..3096 ; Hiragana # Lo [86] HIRAGANA LETTER SMALL A..HIRAGANA LETTER SMALL KE
309D..309E ; Hiragana # Lm [2] HIRAGANA ITERATION MARK..HIRAGANA VOICED ITERATION MARK 309D..309E ; Hiragana # Lm [2] HIRAGANA ITERATION MARK..HIRAGANA VOICED ITERATION MARK
309F ; Hiragana # Lo HIRAGANA DIGRAPH YORI 309F ; Hiragana # Lo HIRAGANA DIGRAPH YORI
1B001 ; Hiragana # Lo HIRAGANA LETTER ARCHAIC YE
1F200 ; Hiragana # So SQUARE HIRAGANA HOKA 1F200 ; Hiragana # So SQUARE HIRAGANA HOKA
# Total code points: 90 # Total code points: 91
# ================================================ # ================================================
...@@ -1343,15 +1370,17 @@ FFDA..FFDC ; Hangul # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL L ...@@ -1343,15 +1370,17 @@ FFDA..FFDC ; Hangul # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL L
3300..3357 ; Katakana # So [88] SQUARE APAATO..SQUARE WATTO 3300..3357 ; Katakana # So [88] SQUARE APAATO..SQUARE WATTO
FF66..FF6F ; Katakana # Lo [10] HALFWIDTH KATAKANA LETTER WO..HALFWIDTH KATAKANA LETTER SMALL TU FF66..FF6F ; Katakana # Lo [10] HALFWIDTH KATAKANA LETTER WO..HALFWIDTH KATAKANA LETTER SMALL TU
FF71..FF9D ; Katakana # Lo [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAKANA LETTER N FF71..FF9D ; Katakana # Lo [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAKANA LETTER N
1B000 ; Katakana # Lo KATAKANA LETTER ARCHAIC E
# Total code points: 299 # Total code points: 300
# ================================================ # ================================================
02EA..02EB ; Bopomofo # Sk [2] MODIFIER LETTER YIN DEPARTING TONE MARK..MODIFIER LETTER YANG DEPARTING TONE MARK
3105..312D ; Bopomofo # Lo [41] BOPOMOFO LETTER B..BOPOMOFO LETTER IH 3105..312D ; Bopomofo # Lo [41] BOPOMOFO LETTER B..BOPOMOFO LETTER IH
31A0..31B7 ; Bopomofo # Lo [24] BOPOMOFO LETTER BU..BOPOMOFO FINAL LETTER H 31A0..31BA ; Bopomofo # Lo [27] BOPOMOFO LETTER BU..BOPOMOFO LETTER ZY
# Total code points: 65 # Total code points: 70
# ================================================ # ================================================
...@@ -1370,9 +1399,10 @@ FA30..FA6D ; Han # Lo [62] CJK COMPATIBILITY IDEOGRAPH-FA30..CJK COMPATIBILI ...@@ -1370,9 +1399,10 @@ FA30..FA6D ; Han # Lo [62] CJK COMPATIBILITY IDEOGRAPH-FA30..CJK COMPATIBILI
FA70..FAD9 ; Han # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILITY IDEOGRAPH-FAD9 FA70..FAD9 ; Han # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILITY IDEOGRAPH-FAD9
20000..2A6D6 ; Han # Lo [42711] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6D6 20000..2A6D6 ; Han # Lo [42711] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6D6
2A700..2B734 ; Han # Lo [4149] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B734 2A700..2B734 ; Han # Lo [4149] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B734
2B740..2B81D ; Han # Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D
2F800..2FA1D ; Han # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D 2F800..2FA1D ; Han # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D
# Total code points: 75738 # Total code points: 75960
# ================================================ # ================================================
...@@ -1410,6 +1440,7 @@ A490..A4C6 ; Yi # So [55] YI RADICAL QOT..YI RADICAL KE ...@@ -1410,6 +1440,7 @@ A490..A4C6 ; Yi # So [55] YI RADICAL QOT..YI RADICAL KE
0300..036F ; Inherited # Mn [112] COMBINING GRAVE ACCENT..COMBINING LATIN SMALL LETTER X 0300..036F ; Inherited # Mn [112] COMBINING GRAVE ACCENT..COMBINING LATIN SMALL LETTER X
0485..0486 ; Inherited # Mn [2] COMBINING CYRILLIC DASIA PNEUMATA..COMBINING CYRILLIC PSILI PNEUMATA 0485..0486 ; Inherited # Mn [2] COMBINING CYRILLIC DASIA PNEUMATA..COMBINING CYRILLIC PSILI PNEUMATA
064B..0655 ; Inherited # Mn [11] ARABIC FATHATAN..ARABIC HAMZA BELOW 064B..0655 ; Inherited # Mn [11] ARABIC FATHATAN..ARABIC HAMZA BELOW
065F ; Inherited # Mn ARABIC WAVY HAMZA BELOW
0670 ; Inherited # Mn ARABIC LETTER SUPERSCRIPT ALEF 0670 ; Inherited # Mn ARABIC LETTER SUPERSCRIPT ALEF
0951..0952 ; Inherited # Mn [2] DEVANAGARI STRESS SIGN UDATTA..DEVANAGARI STRESS SIGN ANUDATTA 0951..0952 ; Inherited # Mn [2] DEVANAGARI STRESS SIGN UDATTA..DEVANAGARI STRESS SIGN ANUDATTA
1CD0..1CD2 ; Inherited # Mn [3] VEDIC TONE KARSHANA..VEDIC TONE PRENKHA 1CD0..1CD2 ; Inherited # Mn [3] VEDIC TONE KARSHANA..VEDIC TONE PRENKHA
...@@ -1417,14 +1448,14 @@ A490..A4C6 ; Yi # So [55] YI RADICAL QOT..YI RADICAL KE ...@@ -1417,14 +1448,14 @@ A490..A4C6 ; Yi # So [55] YI RADICAL QOT..YI RADICAL KE
1CE2..1CE8 ; Inherited # Mn [7] VEDIC SIGN VISARGA SVARITA..VEDIC SIGN VISARGA ANUDATTA WITH TAIL 1CE2..1CE8 ; Inherited # Mn [7] VEDIC SIGN VISARGA SVARITA..VEDIC SIGN VISARGA ANUDATTA WITH TAIL
1CED ; Inherited # Mn VEDIC SIGN TIRYAK 1CED ; Inherited # Mn VEDIC SIGN TIRYAK
1DC0..1DE6 ; Inherited # Mn [39] COMBINING DOTTED GRAVE ACCENT..COMBINING LATIN SMALL LETTER Z 1DC0..1DE6 ; Inherited # Mn [39] COMBINING DOTTED GRAVE ACCENT..COMBINING LATIN SMALL LETTER Z
1DFD..1DFF ; Inherited # Mn [3] COMBINING ALMOST EQUAL TO BELOW..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW 1DFC..1DFF ; Inherited # Mn [4] COMBINING DOUBLE INVERTED BREVE BELOW..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW
200C..200D ; Inherited # Cf [2] ZERO WIDTH NON-JOINER..ZERO WIDTH JOINER 200C..200D ; Inherited # Cf [2] ZERO WIDTH NON-JOINER..ZERO WIDTH JOINER
20D0..20DC ; Inherited # Mn [13] COMBINING LEFT HARPOON ABOVE..COMBINING FOUR DOTS ABOVE 20D0..20DC ; Inherited # Mn [13] COMBINING LEFT HARPOON ABOVE..COMBINING FOUR DOTS ABOVE
20DD..20E0 ; Inherited # Me [4] COMBINING ENCLOSING CIRCLE..COMBINING ENCLOSING CIRCLE BACKSLASH 20DD..20E0 ; Inherited # Me [4] COMBINING ENCLOSING CIRCLE..COMBINING ENCLOSING CIRCLE BACKSLASH
20E1 ; Inherited # Mn COMBINING LEFT RIGHT ARROW ABOVE 20E1 ; Inherited # Mn COMBINING LEFT RIGHT ARROW ABOVE
20E2..20E4 ; Inherited # Me [3] COMBINING ENCLOSING SCREEN..COMBINING ENCLOSING UPWARD POINTING TRIANGLE 20E2..20E4 ; Inherited # Me [3] COMBINING ENCLOSING SCREEN..COMBINING ENCLOSING UPWARD POINTING TRIANGLE
20E5..20F0 ; Inherited # Mn [12] COMBINING REVERSE SOLIDUS OVERLAY..COMBINING ASTERISK ABOVE 20E5..20F0 ; Inherited # Mn [12] COMBINING REVERSE SOLIDUS OVERLAY..COMBINING ASTERISK ABOVE
302A..302F ; Inherited # Mn [6] IDEOGRAPHIC LEVEL TONE MARK..HANGUL DOUBLE DOT TONE MARK 302A..302D ; Inherited # Mn [4] IDEOGRAPHIC LEVEL TONE MARK..IDEOGRAPHIC ENTERING TONE MARK
3099..309A ; Inherited # Mn [2] COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK..COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK 3099..309A ; Inherited # Mn [2] COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK..COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK
FE00..FE0F ; Inherited # Mn [16] VARIATION SELECTOR-1..VARIATION SELECTOR-16 FE00..FE0F ; Inherited # Mn [16] VARIATION SELECTOR-1..VARIATION SELECTOR-16
FE20..FE26 ; Inherited # Mn [7] COMBINING LIGATURE LEFT HALF..COMBINING CONJOINING MACRON FE20..FE26 ; Inherited # Mn [7] COMBINING LIGATURE LEFT HALF..COMBINING CONJOINING MACRON
...@@ -1568,8 +1599,9 @@ E0100..E01EF ; Inherited # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-2 ...@@ -1568,8 +1599,9 @@ E0100..E01EF ; Inherited # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-2
19B0..19C0 ; New_Tai_Lue # Mc [17] NEW TAI LUE VOWEL SIGN VOWEL SHORTENER..NEW TAI LUE VOWEL SIGN IY 19B0..19C0 ; New_Tai_Lue # Mc [17] NEW TAI LUE VOWEL SIGN VOWEL SHORTENER..NEW TAI LUE VOWEL SIGN IY
19C1..19C7 ; New_Tai_Lue # Lo [7] NEW TAI LUE LETTER FINAL V..NEW TAI LUE LETTER FINAL B 19C1..19C7 ; New_Tai_Lue # Lo [7] NEW TAI LUE LETTER FINAL V..NEW TAI LUE LETTER FINAL B
19C8..19C9 ; New_Tai_Lue # Mc [2] NEW TAI LUE TONE MARK-1..NEW TAI LUE TONE MARK-2 19C8..19C9 ; New_Tai_Lue # Mc [2] NEW TAI LUE TONE MARK-1..NEW TAI LUE TONE MARK-2
19D0..19DA ; New_Tai_Lue # Nd [11] NEW TAI LUE DIGIT ZERO..NEW TAI LUE THAM DIGIT ONE 19D0..19D9 ; New_Tai_Lue # Nd [10] NEW TAI LUE DIGIT ZERO..NEW TAI LUE DIGIT NINE
19DE..19DF ; New_Tai_Lue # Po [2] NEW TAI LUE SIGN LAE..NEW TAI LUE SIGN LAEV 19DA ; New_Tai_Lue # No NEW TAI LUE THAM DIGIT ONE
19DE..19DF ; New_Tai_Lue # So [2] NEW TAI LUE SIGN LAE..NEW TAI LUE SIGN LAEV
# Total code points: 83 # Total code points: 83
...@@ -1584,8 +1616,10 @@ E0100..E01EF ; Inherited # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-2 ...@@ -1584,8 +1616,10 @@ E0100..E01EF ; Inherited # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-2
2D30..2D65 ; Tifinagh # Lo [54] TIFINAGH LETTER YA..TIFINAGH LETTER YAZZ 2D30..2D65 ; Tifinagh # Lo [54] TIFINAGH LETTER YA..TIFINAGH LETTER YAZZ
2D6F ; Tifinagh # Lm TIFINAGH MODIFIER LETTER LABIALIZATION MARK 2D6F ; Tifinagh # Lm TIFINAGH MODIFIER LETTER LABIALIZATION MARK
2D70 ; Tifinagh # Po TIFINAGH SEPARATOR MARK
2D7F ; Tifinagh # Mn TIFINAGH CONSONANT JOINER
# Total code points: 55 # Total code points: 57
# ================================================ # ================================================
...@@ -1882,8 +1916,9 @@ A6A0..A6E5 ; Bamum # Lo [70] BAMUM LETTER A..BAMUM LETTER KI ...@@ -1882,8 +1916,9 @@ A6A0..A6E5 ; Bamum # Lo [70] BAMUM LETTER A..BAMUM LETTER KI
A6E6..A6EF ; Bamum # Nl [10] BAMUM LETTER MO..BAMUM LETTER KOGHOM A6E6..A6EF ; Bamum # Nl [10] BAMUM LETTER MO..BAMUM LETTER KOGHOM
A6F0..A6F1 ; Bamum # Mn [2] BAMUM COMBINING MARK KOQNDON..BAMUM COMBINING MARK TUKWENTIS A6F0..A6F1 ; Bamum # Mn [2] BAMUM COMBINING MARK KOQNDON..BAMUM COMBINING MARK TUKWENTIS
A6F2..A6F7 ; Bamum # Po [6] BAMUM NJAEMLI..BAMUM QUESTION MARK A6F2..A6F7 ; Bamum # Po [6] BAMUM NJAEMLI..BAMUM QUESTION MARK
16800..16A38 ; Bamum # Lo [569] BAMUM LETTER PHASE-A NGKUE MFON..BAMUM LETTER PHASE-F VUEQ
# Total code points: 88 # Total code points: 657
# ================================================ # ================================================
...@@ -1969,4 +2004,40 @@ ABF0..ABF9 ; Meetei_Mayek # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DI ...@@ -1969,4 +2004,40 @@ ABF0..ABF9 ; Meetei_Mayek # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DI
# Total code points: 66 # Total code points: 66
# ================================================
1BC0..1BE5 ; Batak # Lo [38] BATAK LETTER A..BATAK LETTER U
1BE6 ; Batak # Mn BATAK SIGN TOMPI
1BE7 ; Batak # Mc BATAK VOWEL SIGN E
1BE8..1BE9 ; Batak # Mn [2] BATAK VOWEL SIGN PAKPAK E..BATAK VOWEL SIGN EE
1BEA..1BEC ; Batak # Mc [3] BATAK VOWEL SIGN I..BATAK VOWEL SIGN O
1BED ; Batak # Mn BATAK VOWEL SIGN KARO O
1BEE ; Batak # Mc BATAK VOWEL SIGN U
1BEF..1BF1 ; Batak # Mn [3] BATAK VOWEL SIGN U FOR SIMALUNGUN SA..BATAK CONSONANT SIGN H
1BF2..1BF3 ; Batak # Mc [2] BATAK PANGOLAT..BATAK PANONGONAN
1BFC..1BFF ; Batak # Po [4] BATAK SYMBOL BINDU NA METEK..BATAK SYMBOL BINDU PANGOLAT
# Total code points: 56
# ================================================
11000 ; Brahmi # Mc BRAHMI SIGN CANDRABINDU
11001 ; Brahmi # Mn BRAHMI SIGN ANUSVARA
11002 ; Brahmi # Mc BRAHMI SIGN VISARGA
11003..11037 ; Brahmi # Lo [53] BRAHMI SIGN JIHVAMULIYA..BRAHMI LETTER OLD TAMIL NNNA
11038..11046 ; Brahmi # Mn [15] BRAHMI VOWEL SIGN AA..BRAHMI VIRAMA
11047..1104D ; Brahmi # Po [7] BRAHMI DANDA..BRAHMI PUNCTUATION LOTUS
11052..11065 ; Brahmi # No [20] BRAHMI NUMBER ONE..BRAHMI NUMBER ONE THOUSAND
11066..1106F ; Brahmi # Nd [10] BRAHMI DIGIT ZERO..BRAHMI DIGIT NINE
# Total code points: 108
# ================================================
0840..0858 ; Mandaic # Lo [25] MANDAIC LETTER HALQA..MANDAIC LETTER AIN
0859..085B ; Mandaic # Mn [3] MANDAIC AFFRICATION MARK..MANDAIC GEMINATION MARK
085E ; Mandaic # Po MANDAIC PUNCTUATION
# Total code points: 29
# EOF # EOF
# SpecialCasing-5.1.0.txt # SpecialCasing-6.0.0.txt
# Date: 2008-03-03, 21:58:10 GMT [MD] # Date: 2010-05-18, 00:49:39 GMT [MD]
# #
# Unicode Character Database # Unicode Character Database
# Copyright (c) 1991-2008 Unicode, Inc. # Copyright (c) 1991-2010 Unicode, Inc.
# For terms of use, see http://www.unicode.org/terms_of_use.html # For terms of use, see http://www.unicode.org/terms_of_use.html
# For documentation, see UCD.html # For documentation, see http://www.unicode.org/reports/tr44/
# #
# Special Casing Properties # Special Casing Properties
# #
...@@ -106,11 +106,11 @@ FB17; FB17; 0544 056D; 0544 053D; # ARMENIAN SMALL LIGATURE MEN XEH ...@@ -106,11 +106,11 @@ FB17; FB17; 0544 056D; 0544 053D; # ARMENIAN SMALL LIGATURE MEN XEH
1FE7; 1FE7; 03A5 0308 0342; 03A5 0308 0342; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND PERISPOMENI 1FE7; 1FE7; 03A5 0308 0342; 03A5 0308 0342; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND PERISPOMENI
1FF6; 1FF6; 03A9 0342; 03A9 0342; # GREEK SMALL LETTER OMEGA WITH PERISPOMENI 1FF6; 1FF6; 03A9 0342; 03A9 0342; # GREEK SMALL LETTER OMEGA WITH PERISPOMENI
# IMPORTANT-when capitalizing iota-subscript (0345) # IMPORTANT-when iota-subscript (0345) is uppercased or titlecased,
# It MUST be in normalized form--moved to the end of any sequence of combining marks. # the result will be incorrect unless the iota-subscript is moved to the end
# This is because logically it represents a following base character! # of any sequence of combining marks. Otherwise, the accents will go on the capital iota.
# E.g. <iota_subscript> (<Mn> | <Mc> | <Me>)+ => (<Mn> | <Mc> | <Me>)+ <iota_subscript> # This process can be achieved by first transforming the text to NFC before casing.
# It should never be the first character in a word, so in titlecasing it can be left as is. # E.g. <alpha><iota_subscript><acute> is uppercased to <ALPHA><acute><IOTA>
# The following cases are already in the UnicodeData file, so are only commented here. # The following cases are already in the UnicodeData file, so are only commented here.
......
因为 它太大了无法显示 source diff 。你可以改为 查看blob
/* /*
* Copyright (c) 2000, 2009, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
* *
* This code is free software; you can redistribute it and/or modify it * This code is free software; you can redistribute it and/or modify it
...@@ -262,7 +262,23 @@ public final class NumericShaper implements java.io.Serializable { ...@@ -262,7 +262,23 @@ public final class NumericShaper implements java.io.Serializable {
/** /**
* The Cham range with the Cham digits. * The Cham range with the Cham digits.
*/ */
CHAM ('\uaa50', '\uaa00', '\uaa60'); CHAM ('\uaa50', '\uaa00', '\uaa60'),
/**
* The Tai Tham Hora range with the Tai Tham Hora digits.
*/
TAI_THAM_HORA ('\u1a80', '\u1a20', '\u1ab0'),
/**
* The Tai Tham Tham range with the Tai Tham Tham digits.
*/
TAI_THAM_THAM ('\u1a90', '\u1a20', '\u1ab0'),
/**
* The Javanese range with the Javanese digits.
*/
JAVANESE ('\ua9d0', '\ua980', '\ua9e0'),
/**
* The Meetei Mayek range with the Meetei Mayek digits.
*/
MEETEI_MAYEK ('\uabf0', '\uabc0', '\uac00');
private static int toRangeIndex(Range script) { private static int toRangeIndex(Range script) {
int index = script.ordinal(); int index = script.ordinal();
...@@ -592,10 +608,16 @@ public final class NumericShaper implements java.io.Serializable { ...@@ -592,10 +608,16 @@ public final class NumericShaper implements java.io.Serializable {
0x07a6, 0x07b1, 0x07a6, 0x07b1,
0x07eb, 0x07f4, 0x07eb, 0x07f4,
0x07f6, 0x07fa, 0x07f6, 0x07fa,
0x0901, 0x0903, 0x0816, 0x081a,
0x081b, 0x0824,
0x0825, 0x0828,
0x0829, 0x0830,
0x0859, 0x085e,
0x0900, 0x0903,
0x093a, 0x093b,
0x093c, 0x093d, 0x093c, 0x093d,
0x0941, 0x0949, 0x0941, 0x0949,
0x094d, 0x0950, 0x094d, 0x094e,
0x0951, 0x0958, 0x0951, 0x0958,
0x0962, 0x0964, 0x0962, 0x0964,
0x0981, 0x0982, 0x0981, 0x0982,
...@@ -604,7 +626,7 @@ public final class NumericShaper implements java.io.Serializable { ...@@ -604,7 +626,7 @@ public final class NumericShaper implements java.io.Serializable {
0x09cd, 0x09ce, 0x09cd, 0x09ce,
0x09e2, 0x09e6, 0x09e2, 0x09e6,
0x09f2, 0x09f4, 0x09f2, 0x09f4,
0x0a01, 0x0a03, 0x09fb, 0x0a03,
0x0a3c, 0x0a3e, 0x0a3c, 0x0a3e,
0x0a41, 0x0a59, 0x0a41, 0x0a59,
0x0a70, 0x0a72, 0x0a70, 0x0a72,
...@@ -630,9 +652,8 @@ public final class NumericShaper implements java.io.Serializable { ...@@ -630,9 +652,8 @@ public final class NumericShaper implements java.io.Serializable {
0x0cbc, 0x0cbd, 0x0cbc, 0x0cbd,
0x0ccc, 0x0cd5, 0x0ccc, 0x0cd5,
0x0ce2, 0x0ce6, 0x0ce2, 0x0ce6,
0x0cf1, 0x0d02,
0x0d41, 0x0d46, 0x0d41, 0x0d46,
0x0d4d, 0x0d57, 0x0d4d, 0x0d4e,
0x0d62, 0x0d66, 0x0d62, 0x0d66,
0x0dca, 0x0dcf, 0x0dca, 0x0dcf,
0x0dd2, 0x0dd8, 0x0dd2, 0x0dd8,
...@@ -649,7 +670,7 @@ public final class NumericShaper implements java.io.Serializable { ...@@ -649,7 +670,7 @@ public final class NumericShaper implements java.io.Serializable {
0x0f71, 0x0f7f, 0x0f71, 0x0f7f,
0x0f80, 0x0f85, 0x0f80, 0x0f85,
0x0f86, 0x0f88, 0x0f86, 0x0f88,
0x0f90, 0x0fbe, 0x0f8d, 0x0fbe,
0x0fc6, 0x0fc7, 0x0fc6, 0x0fc7,
0x102d, 0x1031, 0x102d, 0x1031,
0x1032, 0x1038, 0x1032, 0x1038,
...@@ -661,8 +682,10 @@ public final class NumericShaper implements java.io.Serializable { ...@@ -661,8 +682,10 @@ public final class NumericShaper implements java.io.Serializable {
0x1082, 0x1083, 0x1082, 0x1083,
0x1085, 0x1087, 0x1085, 0x1087,
0x108d, 0x108e, 0x108d, 0x108e,
0x135f, 0x1360, 0x109d, 0x109e,
0x135d, 0x1360,
0x1390, 0x13a0, 0x1390, 0x13a0,
0x1400, 0x1401,
0x1680, 0x1681, 0x1680, 0x1681,
0x169b, 0x16a0, 0x169b, 0x16a0,
0x1712, 0x1720, 0x1712, 0x1720,
...@@ -682,6 +705,11 @@ public final class NumericShaper implements java.io.Serializable { ...@@ -682,6 +705,11 @@ public final class NumericShaper implements java.io.Serializable {
0x1939, 0x1946, 0x1939, 0x1946,
0x19de, 0x1a00, 0x19de, 0x1a00,
0x1a17, 0x1a19, 0x1a17, 0x1a19,
0x1a56, 0x1a57,
0x1a58, 0x1a61,
0x1a62, 0x1a63,
0x1a65, 0x1a6d,
0x1a73, 0x1a80,
0x1b00, 0x1b04, 0x1b00, 0x1b04,
0x1b34, 0x1b35, 0x1b34, 0x1b35,
0x1b36, 0x1b3b, 0x1b36, 0x1b3b,
...@@ -691,8 +719,16 @@ public final class NumericShaper implements java.io.Serializable { ...@@ -691,8 +719,16 @@ public final class NumericShaper implements java.io.Serializable {
0x1b80, 0x1b82, 0x1b80, 0x1b82,
0x1ba2, 0x1ba6, 0x1ba2, 0x1ba6,
0x1ba8, 0x1baa, 0x1ba8, 0x1baa,
0x1be6, 0x1be7,
0x1be8, 0x1bea,
0x1bed, 0x1bee,
0x1bef, 0x1bf2,
0x1c2c, 0x1c34, 0x1c2c, 0x1c34,
0x1c36, 0x1c3b, 0x1c36, 0x1c3b,
0x1cd0, 0x1cd3,
0x1cd4, 0x1ce1,
0x1ce2, 0x1ce9,
0x1ced, 0x1cee,
0x1dc0, 0x1e00, 0x1dc0, 0x1e00,
0x1fbd, 0x1fbe, 0x1fbd, 0x1fbe,
0x1fbf, 0x1fc2, 0x1fbf, 0x1fc2,
...@@ -716,14 +752,16 @@ public final class NumericShaper implements java.io.Serializable { ...@@ -716,14 +752,16 @@ public final class NumericShaper implements java.io.Serializable {
0x213a, 0x213c, 0x213a, 0x213c,
0x2140, 0x2145, 0x2140, 0x2145,
0x214a, 0x214e, 0x214a, 0x214e,
0x2153, 0x2160, 0x2150, 0x2160,
0x2190, 0x2336, 0x2189, 0x2336,
0x237b, 0x2395, 0x237b, 0x2395,
0x2396, 0x249c, 0x2396, 0x249c,
0x24ea, 0x26ac, 0x24ea, 0x26ac,
0x26ad, 0x2800, 0x26ad, 0x2800,
0x2900, 0x2c00, 0x2900, 0x2c00,
0x2ce5, 0x2d00, 0x2ce5, 0x2ceb,
0x2cef, 0x2d00,
0x2d7f, 0x2d80,
0x2de0, 0x3005, 0x2de0, 0x3005,
0x3008, 0x3021, 0x3008, 0x3021,
0x302a, 0x3031, 0x302a, 0x3031,
...@@ -742,25 +780,40 @@ public final class NumericShaper implements java.io.Serializable { ...@@ -742,25 +780,40 @@ public final class NumericShaper implements java.io.Serializable {
0x33de, 0x33e0, 0x33de, 0x33e0,
0x33ff, 0x3400, 0x33ff, 0x3400,
0x4dc0, 0x4e00, 0x4dc0, 0x4e00,
0xa490, 0xa500, 0xa490, 0xa4d0,
0xa60d, 0xa610, 0xa60d, 0xa610,
0xa66f, 0xa680, 0xa66f, 0xa680,
0xa6f0, 0xa6f2,
0xa700, 0xa722, 0xa700, 0xa722,
0xa788, 0xa789, 0xa788, 0xa789,
0xa802, 0xa803, 0xa802, 0xa803,
0xa806, 0xa807, 0xa806, 0xa807,
0xa80b, 0xa80c, 0xa80b, 0xa80c,
0xa825, 0xa827, 0xa825, 0xa827,
0xa828, 0xa840, 0xa828, 0xa830,
0xa838, 0xa840,
0xa874, 0xa880, 0xa874, 0xa880,
0xa8c4, 0xa8ce, 0xa8c4, 0xa8ce,
0xa8e0, 0xa8f2,
0xa926, 0xa92e, 0xa926, 0xa92e,
0xa947, 0xa952, 0xa947, 0xa952,
0xa980, 0xa983,
0xa9b3, 0xa9b4,
0xa9b6, 0xa9ba,
0xa9bc, 0xa9bd,
0xaa29, 0xaa2f, 0xaa29, 0xaa2f,
0xaa31, 0xaa33, 0xaa31, 0xaa33,
0xaa35, 0xaa40, 0xaa35, 0xaa40,
0xaa43, 0xaa44, 0xaa43, 0xaa44,
0xaa4c, 0xaa4d, 0xaa4c, 0xaa4d,
0xaab0, 0xaab1,
0xaab2, 0xaab5,
0xaab7, 0xaab9,
0xaabe, 0xaac0,
0xaac1, 0xaac2,
0xabe5, 0xabe6,
0xabe8, 0xabe9,
0xabed, 0xabf0,
0xfb1e, 0xfb1f, 0xfb1e, 0xfb1f,
0xfb29, 0xfb2a, 0xfb29, 0xfb2a,
0xfd3e, 0xfd50, 0xfd3e, 0xfd50,
...@@ -775,12 +828,28 @@ public final class NumericShaper implements java.io.Serializable { ...@@ -775,12 +828,28 @@ public final class NumericShaper implements java.io.Serializable {
0x1091f, 0x10920, 0x1091f, 0x10920,
0x10a01, 0x10a10, 0x10a01, 0x10a10,
0x10a38, 0x10a40, 0x10a38, 0x10a40,
0x10b39, 0x10b40,
0x10e60, 0x11000,
0x11001, 0x11002,
0x11038, 0x11047,
0x11052, 0x11066,
0x11080, 0x11082,
0x110b3, 0x110b7,
0x110b9, 0x110bb,
0x1d167, 0x1d16a, 0x1d167, 0x1d16a,
0x1d173, 0x1d183, 0x1d173, 0x1d183,
0x1d185, 0x1d18c, 0x1d185, 0x1d18c,
0x1d1aa, 0x1d1ae, 0x1d1aa, 0x1d1ae,
0x1d200, 0x1d360, 0x1d200, 0x1d360,
0x1d7ce, 0x20000, 0x1d6db, 0x1d6dc,
0x1d715, 0x1d716,
0x1d74f, 0x1d750,
0x1d789, 0x1d78a,
0x1d7c3, 0x1d7c4,
0x1d7ce, 0x1f110,
0x1f300, 0x1f48c,
0x1f48d, 0x1f524,
0x1f525, 0x20000,
0xe0001, 0xf0000, 0xe0001, 0xf0000,
0x10fffe, 0x10ffff // sentinel 0x10fffe, 0x10ffff // sentinel
}; };
...@@ -947,6 +1016,14 @@ public final class NumericShaper implements java.io.Serializable { ...@@ -947,6 +1016,14 @@ public final class NumericShaper implements java.io.Serializable {
&& rangeSet.contains(Range.ARABIC)) { && rangeSet.contains(Range.ARABIC)) {
rangeSet.remove(Range.ARABIC); rangeSet.remove(Range.ARABIC);
} }
// As well as the above case, give precedance to TAI_THAM_THAM if both
// TAI_THAM_HORA and TAI_THAM_THAM are specified.
if (rangeSet.contains(Range.TAI_THAM_THAM)
&& rangeSet.contains(Range.TAI_THAM_HORA)) {
rangeSet.remove(Range.TAI_THAM_HORA);
}
rangeArray = rangeSet.toArray(new Range[rangeSet.size()]); rangeArray = rangeSet.toArray(new Range[rangeSet.size()]);
if (rangeArray.length > BSEARCH_THRESHOLD) { if (rangeArray.length > BSEARCH_THRESHOLD) {
// sort rangeArray for binary search // sort rangeArray for binary search
......
/* /*
* Copyright (c) 2002, 2009, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
* *
* This code is free software; you can redistribute it and/or modify it * This code is free software; you can redistribute it and/or modify it
...@@ -40,7 +40,7 @@ import java.util.Locale; ...@@ -40,7 +40,7 @@ import java.util.Locale;
* a character's category (lowercase letter, digit, etc.) and for converting * a character's category (lowercase letter, digit, etc.) and for converting
* characters from uppercase to lowercase and vice versa. * characters from uppercase to lowercase and vice versa.
* <p> * <p>
* Character information is based on the Unicode Standard, version 5.1.0. * Character information is based on the Unicode Standard, version 6.0.0.
* <p> * <p>
* The methods and data of class <code>Character</code> are defined by * The methods and data of class <code>Character</code> are defined by
* the information in the <i>UnicodeData</i> file that is part of the * the information in the <i>UnicodeData</i> file that is part of the
...@@ -910,7 +910,8 @@ class Character implements java.io.Serializable, Comparable<Character> { ...@@ -910,7 +910,8 @@ class Character implements java.io.Serializable, Comparable<Character> {
"GENERALPUNCTUATION"); "GENERALPUNCTUATION");
/** /**
* Constant for the "Superscripts and Subscripts" Unicode character block. * Constant for the "Superscripts and Subscripts" Unicode character
* block.
* @since 1.2 * @since 1.2
*/ */
public static final UnicodeBlock SUPERSCRIPTS_AND_SUBSCRIPTS = public static final UnicodeBlock SUPERSCRIPTS_AND_SUBSCRIPTS =
...@@ -928,7 +929,8 @@ class Character implements java.io.Serializable, Comparable<Character> { ...@@ -928,7 +929,8 @@ class Character implements java.io.Serializable, Comparable<Character> {
"CURRENCYSYMBOLS"); "CURRENCYSYMBOLS");
/** /**
* Constant for the "Combining Diacritical Marks for Symbols" Unicode character block. * Constant for the "Combining Diacritical Marks for Symbols" Unicode
* character block.
* <p> * <p>
* This block was previously known as "Combining Marks for Symbols". * This block was previously known as "Combining Marks for Symbols".
* @since 1.2 * @since 1.2
...@@ -1145,7 +1147,8 @@ class Character implements java.io.Serializable, Comparable<Character> { ...@@ -1145,7 +1147,8 @@ class Character implements java.io.Serializable, Comparable<Character> {
"PRIVATEUSEAREA"); "PRIVATEUSEAREA");
/** /**
* Constant for the "CJK Compatibility Ideographs" Unicode character block. * Constant for the "CJK Compatibility Ideographs" Unicode character
* block.
* @since 1.2 * @since 1.2
*/ */
public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS = public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS =
...@@ -1163,7 +1166,8 @@ class Character implements java.io.Serializable, Comparable<Character> { ...@@ -1163,7 +1166,8 @@ class Character implements java.io.Serializable, Comparable<Character> {
"ALPHABETICPRESENTATIONFORMS"); "ALPHABETICPRESENTATIONFORMS");
/** /**
* Constant for the "Arabic Presentation Forms-A" Unicode character block. * Constant for the "Arabic Presentation Forms-A" Unicode character
* block.
* @since 1.2 * @since 1.2
*/ */
public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_A = public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_A =
...@@ -1208,7 +1212,8 @@ class Character implements java.io.Serializable, Comparable<Character> { ...@@ -1208,7 +1212,8 @@ class Character implements java.io.Serializable, Comparable<Character> {
"ARABICPRESENTATIONFORMS-B"); "ARABICPRESENTATIONFORMS-B");
/** /**
* Constant for the "Halfwidth and Fullwidth Forms" Unicode character block. * Constant for the "Halfwidth and Fullwidth Forms" Unicode character
* block.
* @since 1.2 * @since 1.2
*/ */
public static final UnicodeBlock HALFWIDTH_AND_FULLWIDTH_FORMS = public static final UnicodeBlock HALFWIDTH_AND_FULLWIDTH_FORMS =
...@@ -1386,7 +1391,6 @@ class Character implements java.io.Serializable, Comparable<Character> { ...@@ -1386,7 +1391,6 @@ class Character implements java.io.Serializable, Comparable<Character> {
"YI RADICALS", "YI RADICALS",
"YIRADICALS"); "YIRADICALS");
/** /**
* Constant for the "Cyrillic Supplementary" Unicode character block. * Constant for the "Cyrillic Supplementary" Unicode character block.
* @since 1.5 * @since 1.5
...@@ -1488,7 +1492,8 @@ class Character implements java.io.Serializable, Comparable<Character> { ...@@ -1488,7 +1492,8 @@ class Character implements java.io.Serializable, Comparable<Character> {
"SUPPLEMENTALARROWS-B"); "SUPPLEMENTALARROWS-B");
/** /**
* Constant for the "Miscellaneous Mathematical Symbols-B" Unicode character block. * Constant for the "Miscellaneous Mathematical Symbols-B" Unicode
* character block.
* @since 1.5 * @since 1.5
*/ */
public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B = public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B =
...@@ -1497,7 +1502,8 @@ class Character implements java.io.Serializable, Comparable<Character> { ...@@ -1497,7 +1502,8 @@ class Character implements java.io.Serializable, Comparable<Character> {
"MISCELLANEOUSMATHEMATICALSYMBOLS-B"); "MISCELLANEOUSMATHEMATICALSYMBOLS-B");
/** /**
* Constant for the "Supplemental Mathematical Operators" Unicode character block. * Constant for the "Supplemental Mathematical Operators" Unicode
* character block.
* @since 1.5 * @since 1.5
*/ */
public static final UnicodeBlock SUPPLEMENTAL_MATHEMATICAL_OPERATORS = public static final UnicodeBlock SUPPLEMENTAL_MATHEMATICAL_OPERATORS =
...@@ -1506,7 +1512,8 @@ class Character implements java.io.Serializable, Comparable<Character> { ...@@ -1506,7 +1512,8 @@ class Character implements java.io.Serializable, Comparable<Character> {
"SUPPLEMENTALMATHEMATICALOPERATORS"); "SUPPLEMENTALMATHEMATICALOPERATORS");
/** /**
* Constant for the "Miscellaneous Symbols and Arrows" Unicode character block. * Constant for the "Miscellaneous Symbols and Arrows" Unicode character
* block.
* @since 1.5 * @since 1.5
*/ */
public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_ARROWS = public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_ARROWS =
...@@ -1515,7 +1522,8 @@ class Character implements java.io.Serializable, Comparable<Character> { ...@@ -1515,7 +1522,8 @@ class Character implements java.io.Serializable, Comparable<Character> {
"MISCELLANEOUSSYMBOLSANDARROWS"); "MISCELLANEOUSSYMBOLSANDARROWS");
/** /**
* Constant for the "Katakana Phonetic Extensions" Unicode character block. * Constant for the "Katakana Phonetic Extensions" Unicode character
* block.
* @since 1.5 * @since 1.5
*/ */
public static final UnicodeBlock KATAKANA_PHONETIC_EXTENSIONS = public static final UnicodeBlock KATAKANA_PHONETIC_EXTENSIONS =
...@@ -1649,7 +1657,8 @@ class Character implements java.io.Serializable, Comparable<Character> { ...@@ -1649,7 +1657,8 @@ class Character implements java.io.Serializable, Comparable<Character> {
"TAIXUANJINGSYMBOLS"); "TAIXUANJINGSYMBOLS");
/** /**
* Constant for the "Mathematical Alphanumeric Symbols" Unicode character block. * Constant for the "Mathematical Alphanumeric Symbols" Unicode
* character block.
* @since 1.5 * @since 1.5
*/ */
public static final UnicodeBlock MATHEMATICAL_ALPHANUMERIC_SYMBOLS = public static final UnicodeBlock MATHEMATICAL_ALPHANUMERIC_SYMBOLS =
...@@ -1658,7 +1667,8 @@ class Character implements java.io.Serializable, Comparable<Character> { ...@@ -1658,7 +1667,8 @@ class Character implements java.io.Serializable, Comparable<Character> {
"MATHEMATICALALPHANUMERICSYMBOLS"); "MATHEMATICALALPHANUMERICSYMBOLS");
/** /**
* Constant for the "CJK Unified Ideographs Extension B" Unicode character block. * Constant for the "CJK Unified Ideographs Extension B" Unicode
* character block.
* @since 1.5 * @since 1.5
*/ */
public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B = public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B =
...@@ -1683,7 +1693,8 @@ class Character implements java.io.Serializable, Comparable<Character> { ...@@ -1683,7 +1693,8 @@ class Character implements java.io.Serializable, Comparable<Character> {
new UnicodeBlock("TAGS"); new UnicodeBlock("TAGS");
/** /**
* Constant for the "Variation Selectors Supplement" Unicode character block. * Constant for the "Variation Selectors Supplement" Unicode character
* block.
* @since 1.5 * @since 1.5
*/ */
public static final UnicodeBlock VARIATION_SELECTORS_SUPPLEMENT = public static final UnicodeBlock VARIATION_SELECTORS_SUPPLEMENT =
...@@ -1692,7 +1703,8 @@ class Character implements java.io.Serializable, Comparable<Character> { ...@@ -1692,7 +1703,8 @@ class Character implements java.io.Serializable, Comparable<Character> {
"VARIATIONSELECTORSSUPPLEMENT"); "VARIATIONSELECTORSSUPPLEMENT");
/** /**
* Constant for the "Supplementary Private Use Area-A" Unicode character block. * Constant for the "Supplementary Private Use Area-A" Unicode character
* block.
* @since 1.5 * @since 1.5
*/ */
public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_A = public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_A =
...@@ -1701,7 +1713,8 @@ class Character implements java.io.Serializable, Comparable<Character> { ...@@ -1701,7 +1713,8 @@ class Character implements java.io.Serializable, Comparable<Character> {
"SUPPLEMENTARYPRIVATEUSEAREA-A"); "SUPPLEMENTARYPRIVATEUSEAREA-A");
/** /**
* Constant for the "Supplementary Private Use Area-B" Unicode character block. * Constant for the "Supplementary Private Use Area-B" Unicode character
* block.
* @since 1.5 * @since 1.5
*/ */
public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_B = public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_B =
...@@ -1722,9 +1735,10 @@ class Character implements java.io.Serializable, Comparable<Character> { ...@@ -1722,9 +1735,10 @@ class Character implements java.io.Serializable, Comparable<Character> {
"HIGHSURROGATES"); "HIGHSURROGATES");
/** /**
* Constant for the "High Private Use Surrogates" Unicode character block. * Constant for the "High Private Use Surrogates" Unicode character
* This block represents codepoint values in the private use high surrogate * block.
* range: U+DB80 through U+DBFF * This block represents codepoint values in the private use high
* surrogate range: U+DB80 through U+DBFF
* *
* @since 1.5 * @since 1.5
*/ */
...@@ -1761,6 +1775,20 @@ class Character implements java.io.Serializable, Comparable<Character> { ...@@ -1761,6 +1775,20 @@ class Character implements java.io.Serializable, Comparable<Character> {
public static final UnicodeBlock NKO = public static final UnicodeBlock NKO =
new UnicodeBlock("NKO"); new UnicodeBlock("NKO");
/**
* Constant for the "Samaritan" Unicode character block.
* @since 1.7
*/
public static final UnicodeBlock SAMARITAN =
new UnicodeBlock("SAMARITAN");
/**
* Constant for the "Mandaic" Unicode character block.
* @since 1.7
*/
public static final UnicodeBlock MANDAIC =
new UnicodeBlock("MANDAIC");
/** /**
* Constant for the "Ethiopic Supplement" Unicode character block. * Constant for the "Ethiopic Supplement" Unicode character block.
* @since 1.7 * @since 1.7
...@@ -1770,6 +1798,16 @@ class Character implements java.io.Serializable, Comparable<Character> { ...@@ -1770,6 +1798,16 @@ class Character implements java.io.Serializable, Comparable<Character> {
"ETHIOPIC SUPPLEMENT", "ETHIOPIC SUPPLEMENT",
"ETHIOPICSUPPLEMENT"); "ETHIOPICSUPPLEMENT");
/**
* Constant for the "Unified Canadian Aboriginal Syllabics Extended"
* Unicode character block.
* @since 1.7
*/
public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED =
new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED",
"UNIFIED CANADIAN ABORIGINAL SYLLABICS EXTENDED",
"UNIFIEDCANADIANABORIGINALSYLLABICSEXTENDED");
/** /**
* Constant for the "New Tai Lue" Unicode character block. * Constant for the "New Tai Lue" Unicode character block.
* @since 1.7 * @since 1.7
...@@ -1786,6 +1824,15 @@ class Character implements java.io.Serializable, Comparable<Character> { ...@@ -1786,6 +1824,15 @@ class Character implements java.io.Serializable, Comparable<Character> {
public static final UnicodeBlock BUGINESE = public static final UnicodeBlock BUGINESE =
new UnicodeBlock("BUGINESE"); new UnicodeBlock("BUGINESE");
/**
* Constant for the "Tai Tham" Unicode character block.
* @since 1.7
*/
public static final UnicodeBlock TAI_THAM =
new UnicodeBlock("TAI_THAM",
"TAI THAM",
"TAITHAM");
/** /**
* Constant for the "Balinese" Unicode character block. * Constant for the "Balinese" Unicode character block.
* @since 1.7 * @since 1.7
...@@ -1800,6 +1847,13 @@ class Character implements java.io.Serializable, Comparable<Character> { ...@@ -1800,6 +1847,13 @@ class Character implements java.io.Serializable, Comparable<Character> {
public static final UnicodeBlock SUNDANESE = public static final UnicodeBlock SUNDANESE =
new UnicodeBlock("SUNDANESE"); new UnicodeBlock("SUNDANESE");
/**
* Constant for the "Batak" Unicode character block.
* @since 1.7
*/
public static final UnicodeBlock BATAK =
new UnicodeBlock("BATAK");
/** /**
* Constant for the "Lepcha" Unicode character block. * Constant for the "Lepcha" Unicode character block.
* @since 1.7 * @since 1.7
...@@ -1816,6 +1870,15 @@ class Character implements java.io.Serializable, Comparable<Character> { ...@@ -1816,6 +1870,15 @@ class Character implements java.io.Serializable, Comparable<Character> {
"OL CHIKI", "OL CHIKI",
"OLCHIKI"); "OLCHIKI");
/**
* Constant for the "Vedic Extensions" Unicode character block.
* @since 1.7
*/
public static final UnicodeBlock VEDIC_EXTENSIONS =
new UnicodeBlock("VEDIC_EXTENSIONS",
"VEDIC EXTENSIONS",
"VEDICEXTENSIONS");
/** /**
* Constant for the "Phonetic Extensions Supplement" Unicode character * Constant for the "Phonetic Extensions Supplement" Unicode character
* block. * block.
...@@ -1911,6 +1974,13 @@ class Character implements java.io.Serializable, Comparable<Character> { ...@@ -1911,6 +1974,13 @@ class Character implements java.io.Serializable, Comparable<Character> {
"CJK STROKES", "CJK STROKES",
"CJKSTROKES"); "CJKSTROKES");
/**
* Constant for the "Lisu" Unicode character block.
* @since 1.7
*/
public static final UnicodeBlock LISU =
new UnicodeBlock("LISU");
/** /**
* Constant for the "Vai" Unicode character block. * Constant for the "Vai" Unicode character block.
* @since 1.7 * @since 1.7
...@@ -1927,6 +1997,13 @@ class Character implements java.io.Serializable, Comparable<Character> { ...@@ -1927,6 +1997,13 @@ class Character implements java.io.Serializable, Comparable<Character> {
"CYRILLIC EXTENDED-B", "CYRILLIC EXTENDED-B",
"CYRILLICEXTENDED-B"); "CYRILLICEXTENDED-B");
/**
* Constant for the "Bamum" Unicode character block.
* @since 1.7
*/
public static final UnicodeBlock BAMUM =
new UnicodeBlock("BAMUM");
/** /**
* Constant for the "Modifier Tone Letters" Unicode character block. * Constant for the "Modifier Tone Letters" Unicode character block.
* @since 1.7 * @since 1.7
...@@ -1954,6 +2031,15 @@ class Character implements java.io.Serializable, Comparable<Character> { ...@@ -1954,6 +2031,15 @@ class Character implements java.io.Serializable, Comparable<Character> {
"SYLOTI NAGRI", "SYLOTI NAGRI",
"SYLOTINAGRI"); "SYLOTINAGRI");
/**
* Constant for the "Common Indic Number Forms" Unicode character block.
* @since 1.7
*/
public static final UnicodeBlock COMMON_INDIC_NUMBER_FORMS =
new UnicodeBlock("COMMON_INDIC_NUMBER_FORMS",
"COMMON INDIC NUMBER FORMS",
"COMMONINDICNUMBERFORMS");
/** /**
* Constant for the "Phags-pa" Unicode character block. * Constant for the "Phags-pa" Unicode character block.
* @since 1.7 * @since 1.7
...@@ -1969,6 +2055,15 @@ class Character implements java.io.Serializable, Comparable<Character> { ...@@ -1969,6 +2055,15 @@ class Character implements java.io.Serializable, Comparable<Character> {
public static final UnicodeBlock SAURASHTRA = public static final UnicodeBlock SAURASHTRA =
new UnicodeBlock("SAURASHTRA"); new UnicodeBlock("SAURASHTRA");
/**
* Constant for the "Devanagari Extended" Unicode character block.
* @since 1.7
*/
public static final UnicodeBlock DEVANAGARI_EXTENDED =
new UnicodeBlock("DEVANAGARI_EXTENDED",
"DEVANAGARI EXTENDED",
"DEVANAGARIEXTENDED");
/** /**
* Constant for the "Kayah Li" Unicode character block. * Constant for the "Kayah Li" Unicode character block.
* @since 1.7 * @since 1.7
...@@ -1985,6 +2080,22 @@ class Character implements java.io.Serializable, Comparable<Character> { ...@@ -1985,6 +2080,22 @@ class Character implements java.io.Serializable, Comparable<Character> {
public static final UnicodeBlock REJANG = public static final UnicodeBlock REJANG =
new UnicodeBlock("REJANG"); new UnicodeBlock("REJANG");
/**
* Constant for the "Hangul Jamo Extended-A" Unicode character block.
* @since 1.7
*/
public static final UnicodeBlock HANGUL_JAMO_EXTENDED_A =
new UnicodeBlock("HANGUL_JAMO_EXTENDED_A",
"HANGUL JAMO EXTENDED-A",
"HANGULJAMOEXTENDED-A");
/**
* Constant for the "Javanese" Unicode character block.
* @since 1.7
*/
public static final UnicodeBlock JAVANESE =
new UnicodeBlock("JAVANESE");
/** /**
* Constant for the "Cham" Unicode character block. * Constant for the "Cham" Unicode character block.
* @since 1.7 * @since 1.7
...@@ -1992,6 +2103,51 @@ class Character implements java.io.Serializable, Comparable<Character> { ...@@ -1992,6 +2103,51 @@ class Character implements java.io.Serializable, Comparable<Character> {
public static final UnicodeBlock CHAM = public static final UnicodeBlock CHAM =
new UnicodeBlock("CHAM"); new UnicodeBlock("CHAM");
/**
* Constant for the "Myanmar Extended-A" Unicode character block.
* @since 1.7
*/
public static final UnicodeBlock MYANMAR_EXTENDED_A =
new UnicodeBlock("MYANMAR_EXTENDED_A",
"MYANMAR EXTENDED-A",
"MYANMAREXTENDED-A");
/**
* Constant for the "Tai Viet" Unicode character block.
* @since 1.7
*/
public static final UnicodeBlock TAI_VIET =
new UnicodeBlock("TAI_VIET",
"TAI VIET",
"TAIVIET");
/**
* Constant for the "Ethiopic Extended-A" Unicode character block.
* @since 1.7
*/
public static final UnicodeBlock ETHIOPIC_EXTENDED_A =
new UnicodeBlock("ETHIOPIC_EXTENDED_A",
"ETHIOPIC EXTENDED-A",
"ETHIOPICEXTENDED-A");
/**
* Constant for the "Meetei Mayek" Unicode character block.
* @since 1.7
*/
public static final UnicodeBlock MEETEI_MAYEK =
new UnicodeBlock("MEETEI_MAYEK",
"MEETEI MAYEK",
"MEETEIMAYEK");
/**
* Constant for the "Hangul Jamo Extended-B" Unicode character block.
* @since 1.7
*/
public static final UnicodeBlock HANGUL_JAMO_EXTENDED_B =
new UnicodeBlock("HANGUL_JAMO_EXTENDED_B",
"HANGUL JAMO EXTENDED-B",
"HANGULJAMOEXTENDED-B");
/** /**
* Constant for the "Vertical Forms" Unicode character block. * Constant for the "Vertical Forms" Unicode character block.
* @since 1.7 * @since 1.7
...@@ -2051,6 +2207,15 @@ class Character implements java.io.Serializable, Comparable<Character> { ...@@ -2051,6 +2207,15 @@ class Character implements java.io.Serializable, Comparable<Character> {
"OLD PERSIAN", "OLD PERSIAN",
"OLDPERSIAN"); "OLDPERSIAN");
/**
* Constant for the "Imperial Aramaic" Unicode character block.
* @since 1.7
*/
public static final UnicodeBlock IMPERIAL_ARAMAIC =
new UnicodeBlock("IMPERIAL_ARAMAIC",
"IMPERIAL ARAMAIC",
"IMPERIALARAMAIC");
/** /**
* Constant for the "Phoenician" Unicode character block. * Constant for the "Phoenician" Unicode character block.
* @since 1.7 * @since 1.7
...@@ -2072,6 +2237,72 @@ class Character implements java.io.Serializable, Comparable<Character> { ...@@ -2072,6 +2237,72 @@ class Character implements java.io.Serializable, Comparable<Character> {
public static final UnicodeBlock KHAROSHTHI = public static final UnicodeBlock KHAROSHTHI =
new UnicodeBlock("KHAROSHTHI"); new UnicodeBlock("KHAROSHTHI");
/**
* Constant for the "Old South Arabian" Unicode character block.
* @since 1.7
*/
public static final UnicodeBlock OLD_SOUTH_ARABIAN =
new UnicodeBlock("OLD_SOUTH_ARABIAN",
"OLD SOUTH ARABIAN",
"OLDSOUTHARABIAN");
/**
* Constant for the "Avestan" Unicode character block.
* @since 1.7
*/
public static final UnicodeBlock AVESTAN =
new UnicodeBlock("AVESTAN");
/**
* Constant for the "Inscriptional Parthian" Unicode character block.
* @since 1.7
*/
public static final UnicodeBlock INSCRIPTIONAL_PARTHIAN =
new UnicodeBlock("INSCRIPTIONAL_PARTHIAN",
"INSCRIPTIONAL PARTHIAN",
"INSCRIPTIONALPARTHIAN");
/**
* Constant for the "Inscriptional Pahlavi" Unicode character block.
* @since 1.7
*/
public static final UnicodeBlock INSCRIPTIONAL_PAHLAVI =
new UnicodeBlock("INSCRIPTIONAL_PAHLAVI",
"INSCRIPTIONAL PAHLAVI",
"INSCRIPTIONALPAHLAVI");
/**
* Constant for the "Old Turkic" Unicode character block.
* @since 1.7
*/
public static final UnicodeBlock OLD_TURKIC =
new UnicodeBlock("OLD_TURKIC",
"OLD TURKIC",
"OLDTURKIC");
/**
* Constant for the "Rumi Numeral Symbols" Unicode character block.
* @since 1.7
*/
public static final UnicodeBlock RUMI_NUMERAL_SYMBOLS =
new UnicodeBlock("RUMI_NUMERAL_SYMBOLS",
"RUMI NUMERAL SYMBOLS",
"RUMINUMERALSYMBOLS");
/**
* Constant for the "Brahmi" Unicode character block.
* @since 1.7
*/
public static final UnicodeBlock BRAHMI =
new UnicodeBlock("BRAHMI");
/**
* Constant for the "Kaithi" Unicode character block.
* @since 1.7
*/
public static final UnicodeBlock KAITHI =
new UnicodeBlock("KAITHI");
/** /**
* Constant for the "Cuneiform" Unicode character block. * Constant for the "Cuneiform" Unicode character block.
* @since 1.7 * @since 1.7
...@@ -2089,6 +2320,33 @@ class Character implements java.io.Serializable, Comparable<Character> { ...@@ -2089,6 +2320,33 @@ class Character implements java.io.Serializable, Comparable<Character> {
"CUNEIFORM NUMBERS AND PUNCTUATION", "CUNEIFORM NUMBERS AND PUNCTUATION",
"CUNEIFORMNUMBERSANDPUNCTUATION"); "CUNEIFORMNUMBERSANDPUNCTUATION");
/**
* Constant for the "Egyptian Hieroglyphs" Unicode character block.
* @since 1.7
*/
public static final UnicodeBlock EGYPTIAN_HIEROGLYPHS =
new UnicodeBlock("EGYPTIAN_HIEROGLYPHS",
"EGYPTIAN HIEROGLYPHS",
"EGYPTIANHIEROGLYPHS");
/**
* Constant for the "Bamum Supplement" Unicode character block.
* @since 1.7
*/
public static final UnicodeBlock BAMUM_SUPPLEMENT =
new UnicodeBlock("BAMUM_SUPPLEMENT",
"BAMUM SUPPLEMENT",
"BAMUMSUPPLEMENT");
/**
* Constant for the "Kana Supplement" Unicode character block.
* @since 1.7
*/
public static final UnicodeBlock KANA_SUPPLEMENT =
new UnicodeBlock("KANA_SUPPLEMENT",
"KANA SUPPLEMENT",
"KANASUPPLEMENT");
/** /**
* Constant for the "Ancient Greek Musical Notation" Unicode character * Constant for the "Ancient Greek Musical Notation" Unicode character
* block. * block.
...@@ -2126,6 +2384,90 @@ class Character implements java.io.Serializable, Comparable<Character> { ...@@ -2126,6 +2384,90 @@ class Character implements java.io.Serializable, Comparable<Character> {
"DOMINO TILES", "DOMINO TILES",
"DOMINOTILES"); "DOMINOTILES");
/**
* Constant for the "Playing Cards" Unicode character block.
* @since 1.7
*/
public static final UnicodeBlock PLAYING_CARDS =
new UnicodeBlock("PLAYING_CARDS",
"PLAYING CARDS",
"PLAYINGCARDS");
/**
* Constant for the "Enclosed Alphanumeric Supplement" Unicode character
* block.
* @since 1.7
*/
public static final UnicodeBlock ENCLOSED_ALPHANUMERIC_SUPPLEMENT =
new UnicodeBlock("ENCLOSED_ALPHANUMERIC_SUPPLEMENT",
"ENCLOSED ALPHANUMERIC SUPPLEMENT",
"ENCLOSEDALPHANUMERICSUPPLEMENT");
/**
* Constant for the "Enclosed Ideographic Supplement" Unicode character
* block.
* @since 1.7
*/
public static final UnicodeBlock ENCLOSED_IDEOGRAPHIC_SUPPLEMENT =
new UnicodeBlock("ENCLOSED_IDEOGRAPHIC_SUPPLEMENT",
"ENCLOSED IDEOGRAPHIC SUPPLEMENT",
"ENCLOSEDIDEOGRAPHICSUPPLEMENT");
/**
* Constant for the "Miscellaneous Symbols And Pictographs" Unicode
* character block.
* @since 1.7
*/
public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS =
new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS",
"MISCELLANEOUS SYMBOLS AND PICTOGRAPHS",
"MISCELLANEOUSSYMBOLSANDPICTOGRAPHS");
/**
* Constant for the "Emoticons" Unicode character block.
* @since 1.7
*/
public static final UnicodeBlock EMOTICONS =
new UnicodeBlock("EMOTICONS");
/**
* Constant for the "Transport And Map Symbols" Unicode character block.
* @since 1.7
*/
public static final UnicodeBlock TRANSPORT_AND_MAP_SYMBOLS =
new UnicodeBlock("TRANSPORT_AND_MAP_SYMBOLS",
"TRANSPORT AND MAP SYMBOLS",
"TRANSPORTANDMAPSYMBOLS");
/**
* Constant for the "Alchemical Symbols" Unicode character block.
* @since 1.7
*/
public static final UnicodeBlock ALCHEMICAL_SYMBOLS =
new UnicodeBlock("ALCHEMICAL_SYMBOLS",
"ALCHEMICAL SYMBOLS",
"ALCHEMICALSYMBOLS");
/**
* Constant for the "CJK Unified Ideographs Extension C" Unicode
* character block.
* @since 1.7
*/
public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C =
new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C",
"CJK UNIFIED IDEOGRAPHS EXTENSION C",
"CJKUNIFIEDIDEOGRAPHSEXTENSIONC");
/**
* Constant for the "CJK Unified Ideographs Extension D" Unicode
* character block.
* @since 1.7
*/
public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D =
new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D",
"CJK UNIFIED IDEOGRAPHS EXTENSION D",
"CJKUNIFIEDIDEOGRAPHSEXTENSIOND");
private static final int blockStarts[] = { private static final int blockStarts[] = {
0x0000, // 0000..007F; Basic Latin 0x0000, // 0000..007F; Basic Latin
0x0080, // 0080..00FF; Latin-1 Supplement 0x0080, // 0080..00FF; Latin-1 Supplement
...@@ -2144,7 +2486,9 @@ class Character implements java.io.Serializable, Comparable<Character> { ...@@ -2144,7 +2486,9 @@ class Character implements java.io.Serializable, Comparable<Character> {
0x0750, // 0750..077F; Arabic Supplement 0x0750, // 0750..077F; Arabic Supplement
0x0780, // 0780..07BF; Thaana 0x0780, // 0780..07BF; Thaana
0x07C0, // 07C0..07FF; NKo 0x07C0, // 07C0..07FF; NKo
0x0800, // unassigned 0x0800, // 0800..083F; Samaritan
0x0840, // 0840..085F; Mandaic
0x0860, // unassigned
0x0900, // 0900..097F; Devanagari 0x0900, // 0900..097F; Devanagari
0x0980, // 0980..09FF; Bengali 0x0980, // 0980..09FF; Bengali
0x0A00, // 0A00..0A7F; Gurmukhi 0x0A00, // 0A00..0A7F; Gurmukhi
...@@ -2173,19 +2517,21 @@ class Character implements java.io.Serializable, Comparable<Character> { ...@@ -2173,19 +2517,21 @@ class Character implements java.io.Serializable, Comparable<Character> {
0x1760, // 1760..177F; Tagbanwa 0x1760, // 1760..177F; Tagbanwa
0x1780, // 1780..17FF; Khmer 0x1780, // 1780..17FF; Khmer
0x1800, // 1800..18AF; Mongolian 0x1800, // 1800..18AF; Mongolian
0x18B0, // unassigned 0x18B0, // 18B0..18FF; Unified Canadian Aboriginal Syllabics Extended
0x1900, // 1900..194F; Limbu 0x1900, // 1900..194F; Limbu
0x1950, // 1950..197F; Tai Le 0x1950, // 1950..197F; Tai Le
0x1980, // 1980..19DF; New Tai Lue 0x1980, // 1980..19DF; New Tai Lue
0x19E0, // 19E0..19FF; Khmer Symbols 0x19E0, // 19E0..19FF; Khmer Symbols
0x1A00, // 1A00..1A1F; Buginese 0x1A00, // 1A00..1A1F; Buginese
0x1A20, // unassigned 0x1A20, // 1A20..1AAF; Tai Tham
0x1AB0, // unassigned
0x1B00, // 1B00..1B7F; Balinese 0x1B00, // 1B00..1B7F; Balinese
0x1B80, // 1B80..1BBF; Sundanese 0x1B80, // 1B80..1BBF; Sundanese
0x1BC0, // unassigned 0x1BC0, // 1BC0..1BFF; Batak
0x1C00, // 1C00..1C4F; Lepcha 0x1C00, // 1C00..1C4F; Lepcha
0x1C50, // 1C50..1C7F; Ol Chiki 0x1C50, // 1C50..1C7F; Ol Chiki
0x1C80, // unassigned 0x1C80, // unassigned
0x1CD0, // 1CD0..1CFF; Vedic Extensions
0x1D00, // 1D00..1D7F; Phonetic Extensions 0x1D00, // 1D00..1D7F; Phonetic Extensions
0x1D80, // 1D80..1DBF; Phonetic Extensions Supplement 0x1D80, // 1D80..1DBF; Phonetic Extensions Supplement
0x1DC0, // 1DC0..1DFF; Combining Diacritical Marks Supplement 0x1DC0, // 1DC0..1DFF; Combining Diacritical Marks Supplement
...@@ -2240,27 +2586,34 @@ class Character implements java.io.Serializable, Comparable<Character> { ...@@ -2240,27 +2586,34 @@ class Character implements java.io.Serializable, Comparable<Character> {
0x3300, // 3300..33FF; CJK Compatibility 0x3300, // 3300..33FF; CJK Compatibility
0x3400, // 3400..4DBF; CJK Unified Ideographs Extension A 0x3400, // 3400..4DBF; CJK Unified Ideographs Extension A
0x4DC0, // 4DC0..4DFF; Yijing Hexagram Symbols 0x4DC0, // 4DC0..4DFF; Yijing Hexagram Symbols
0x4E00, // 4E00..9FFF; CJK Unified Ideograph 0x4E00, // 4E00..9FFF; CJK Unified Ideographs
0xA000, // A000..A48F; Yi Syllables 0xA000, // A000..A48F; Yi Syllables
0xA490, // A490..A4CF; Yi Radicals 0xA490, // A490..A4CF; Yi Radicals
0xA4D0, // unassigned 0xA4D0, // A4D0..A4FF; Lisu
0xA500, // A500..A63F; Vai 0xA500, // A500..A63F; Vai
0xA640, // A640..A69F; Cyrillic Extended-B 0xA640, // A640..A69F; Cyrillic Extended-B
0xA6A0, // unassigned 0xA6A0, // A6A0..A6FF; Bamum
0xA700, // A700..A71F; Modifier Tone Letters 0xA700, // A700..A71F; Modifier Tone Letters
0xA720, // A720..A7FF; Latin Extended-D 0xA720, // A720..A7FF; Latin Extended-D
0xA800, // A800..A82F; Syloti Nagri 0xA800, // A800..A82F; Syloti Nagri
0xA830, // unassigned 0xA830, // A830..A83F; Common Indic Number Forms
0xA840, // A840..A87F; Phags-pa 0xA840, // A840..A87F; Phags-pa
0xA880, // A880..A8DF; Saurashtra 0xA880, // A880..A8DF; Saurashtra
0xA8E0, // unassigned 0xA8E0, // A8E0..A8FF; Devanagari Extended
0xA900, // A900..A92F; Kayah Li 0xA900, // A900..A92F; Kayah Li
0xA930, // A930..A95F; Rejang 0xA930, // A930..A95F; Rejang
0xA960, // unassigned 0xA960, // A960..A97F; Hangul Jamo Extended-A
0xA980, // A980..A9DF; Javanese
0xA9E0, // unassigned
0xAA00, // AA00..AA5F; Cham 0xAA00, // AA00..AA5F; Cham
0xAA60, // unassigned 0xAA60, // AA60..AA7F; Myanmar Extended-A
0xAA80, // AA80..AADF; Tai Viet
0xAAE0, // unassigned
0xAB00, // AB00..AB2F; Ethiopic Extended-A
0xAB30, // unassigned
0xABC0, // ABC0..ABFF; Meetei Mayek
0xAC00, // AC00..D7AF; Hangul Syllables 0xAC00, // AC00..D7AF; Hangul Syllables
0xD7B0, // unassigned 0xD7B0, // D7B0..D7FF; Hangul Jamo Extended-B
0xD800, // D800..DB7F; High Surrogates 0xD800, // D800..DB7F; High Surrogates
0xDB80, // DB80..DBFF; High Private Use Surrogates 0xDB80, // DB80..DBFF; High Private Use Surrogates
0xDC00, // DC00..DFFF; Low Surrogates 0xDC00, // DC00..DFFF; Low Surrogates
...@@ -2292,20 +2645,39 @@ class Character implements java.io.Serializable, Comparable<Character> { ...@@ -2292,20 +2645,39 @@ class Character implements java.io.Serializable, Comparable<Character> {
0x10380, // 10380..1039F; Ugaritic 0x10380, // 10380..1039F; Ugaritic
0x103A0, // 103A0..103DF; Old Persian 0x103A0, // 103A0..103DF; Old Persian
0x103E0, // unassigned 0x103E0, // unassigned
0x10400, // 10400..1044F; Desere 0x10400, // 10400..1044F; Deseret
0x10450, // 10450..1047F; Shavian 0x10450, // 10450..1047F; Shavian
0x10480, // 10480..104AF; Osmanya 0x10480, // 10480..104AF; Osmanya
0x104B0, // unassigned 0x104B0, // unassigned
0x10800, // 10800..1083F; Cypriot Syllabary 0x10800, // 10800..1083F; Cypriot Syllabary
0x10840, // unassigned 0x10840, // 10840..1085F; Imperial Aramaic
0x10860, // unassigned
0x10900, // 10900..1091F; Phoenician 0x10900, // 10900..1091F; Phoenician
0x10920, // 10920..1093F; Lydian 0x10920, // 10920..1093F; Lydian
0x10940, // unassigned 0x10940, // unassigned
0x10A00, // 10A00..10A5F; Kharoshthi 0x10A00, // 10A00..10A5F; Kharoshthi
0x10A60, // unassigned 0x10A60, // 10A60..10A7F; Old South Arabian
0x10A80, // unassigned
0x10B00, // 10B00..10B3F; Avestan
0x10B40, // 10B40..10B5F; Inscriptional Parthian
0x10B60, // 10B60..10B7F; Inscriptional Pahlavi
0x10B80, // unassigned
0x10C00, // 10C00..10C4F; Old Turkic
0x10C50, // unassigned
0x10E60, // 10E60..10E7F; Rumi Numeral Symbols
0x10E80, // unassigned
0x11000, // 11000..1107F; Brahmi
0x11080, // 11080..110CF; Kaithi
0x110D0, // unassigned
0x12000, // 12000..123FF; Cuneiform 0x12000, // 12000..123FF; Cuneiform
0x12400, // 12400..1247F; Cuneiform Numbers and Punctuation 0x12400, // 12400..1247F; Cuneiform Numbers and Punctuation
0x12480, // unassigned 0x12480, // unassigned
0x13000, // 13000..1342F; Egyptian Hieroglyphs
0x13430, // unassigned
0x16800, // 16800..16A3F; Bamum Supplement
0x16A40, // unassigned
0x1B000, // 1B000..1B0FF; Kana Supplement
0x1B100, // unassigned
0x1D000, // 1D000..1D0FF; Byzantine Musical Symbols 0x1D000, // 1D000..1D0FF; Byzantine Musical Symbols
0x1D100, // 1D100..1D1FF; Musical Symbols 0x1D100, // 1D100..1D1FF; Musical Symbols
0x1D200, // 1D200..1D24F; Ancient Greek Musical Notation 0x1D200, // 1D200..1D24F; Ancient Greek Musical Notation
...@@ -2317,9 +2689,20 @@ class Character implements java.io.Serializable, Comparable<Character> { ...@@ -2317,9 +2689,20 @@ class Character implements java.io.Serializable, Comparable<Character> {
0x1D800, // unassigned 0x1D800, // unassigned
0x1F000, // 1F000..1F02F; Mahjong Tiles 0x1F000, // 1F000..1F02F; Mahjong Tiles
0x1F030, // 1F030..1F09F; Domino Tiles 0x1F030, // 1F030..1F09F; Domino Tiles
0x1F0A0, // unassigned 0x1F0A0, // 1F0A0..1F0FF; Playing Cards
0x1F100, // 1F100..1F1FF; Enclosed Alphanumeric Supplement
0x1F200, // 1F200..1F2FF; Enclosed Ideographic Supplement
0x1F300, // 1F300..1F5FF; Miscellaneous Symbols And Pictographs
0x1F600, // 1F600..1F64F; Emoticons
0x1F650, // unassigned
0x1F680, // 1F680..1F6FF; Transport And Map Symbols
0x1F700, // 1F700..1F77F; Alchemical Symbols
0x1F780, // unassigned
0x20000, // 20000..2A6DF; CJK Unified Ideographs Extension B 0x20000, // 20000..2A6DF; CJK Unified Ideographs Extension B
0x2A6E0, // unassigned 0x2A6E0, // unassigned
0x2A700, // 2A700..2B73F; CJK Unified Ideographs Extension C
0x2B740, // 2B740..2B81F; CJK Unified Ideographs Extension D
0x2B820, // unassigned
0x2F800, // 2F800..2FA1F; CJK Compatibility Ideographs Supplement 0x2F800, // 2F800..2FA1F; CJK Compatibility Ideographs Supplement
0x2FA20, // unassigned 0x2FA20, // unassigned
0xE0000, // E0000..E007F; Tags 0xE0000, // E0000..E007F; Tags
...@@ -2327,7 +2710,7 @@ class Character implements java.io.Serializable, Comparable<Character> { ...@@ -2327,7 +2710,7 @@ class Character implements java.io.Serializable, Comparable<Character> {
0xE0100, // E0100..E01EF; Variation Selectors Supplement 0xE0100, // E0100..E01EF; Variation Selectors Supplement
0xE01F0, // unassigned 0xE01F0, // unassigned
0xF0000, // F0000..FFFFF; Supplementary Private Use Area-A 0xF0000, // F0000..FFFFF; Supplementary Private Use Area-A
0x100000, // 100000..10FFFF; Supplementary Private Use Area-B 0x100000 // 100000..10FFFF; Supplementary Private Use Area-B
}; };
private static final UnicodeBlock[] blocks = { private static final UnicodeBlock[] blocks = {
...@@ -2348,6 +2731,8 @@ class Character implements java.io.Serializable, Comparable<Character> { ...@@ -2348,6 +2731,8 @@ class Character implements java.io.Serializable, Comparable<Character> {
ARABIC_SUPPLEMENT, ARABIC_SUPPLEMENT,
THAANA, THAANA,
NKO, NKO,
SAMARITAN,
MANDAIC,
null, null,
DEVANAGARI, DEVANAGARI,
BENGALI, BENGALI,
...@@ -2377,19 +2762,21 @@ class Character implements java.io.Serializable, Comparable<Character> { ...@@ -2377,19 +2762,21 @@ class Character implements java.io.Serializable, Comparable<Character> {
TAGBANWA, TAGBANWA,
KHMER, KHMER,
MONGOLIAN, MONGOLIAN,
null, UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED,
LIMBU, LIMBU,
TAI_LE, TAI_LE,
NEW_TAI_LUE, NEW_TAI_LUE,
KHMER_SYMBOLS, KHMER_SYMBOLS,
BUGINESE, BUGINESE,
TAI_THAM,
null, null,
BALINESE, BALINESE,
SUNDANESE, SUNDANESE,
null, BATAK,
LEPCHA, LEPCHA,
OL_CHIKI, OL_CHIKI,
null, null,
VEDIC_EXTENSIONS,
PHONETIC_EXTENSIONS, PHONETIC_EXTENSIONS,
PHONETIC_EXTENSIONS_SUPPLEMENT, PHONETIC_EXTENSIONS_SUPPLEMENT,
COMBINING_DIACRITICAL_MARKS_SUPPLEMENT, COMBINING_DIACRITICAL_MARKS_SUPPLEMENT,
...@@ -2447,24 +2834,31 @@ class Character implements java.io.Serializable, Comparable<Character> { ...@@ -2447,24 +2834,31 @@ class Character implements java.io.Serializable, Comparable<Character> {
CJK_UNIFIED_IDEOGRAPHS, CJK_UNIFIED_IDEOGRAPHS,
YI_SYLLABLES, YI_SYLLABLES,
YI_RADICALS, YI_RADICALS,
null, LISU,
VAI, VAI,
CYRILLIC_EXTENDED_B, CYRILLIC_EXTENDED_B,
null, BAMUM,
MODIFIER_TONE_LETTERS, MODIFIER_TONE_LETTERS,
LATIN_EXTENDED_D, LATIN_EXTENDED_D,
SYLOTI_NAGRI, SYLOTI_NAGRI,
null, COMMON_INDIC_NUMBER_FORMS,
PHAGS_PA, PHAGS_PA,
SAURASHTRA, SAURASHTRA,
null, DEVANAGARI_EXTENDED,
KAYAH_LI, KAYAH_LI,
REJANG, REJANG,
HANGUL_JAMO_EXTENDED_A,
JAVANESE,
null, null,
CHAM, CHAM,
MYANMAR_EXTENDED_A,
TAI_VIET,
null, null,
HANGUL_SYLLABLES, ETHIOPIC_EXTENDED_A,
null, null,
MEETEI_MAYEK,
HANGUL_SYLLABLES,
HANGUL_JAMO_EXTENDED_B,
HIGH_SURROGATES, HIGH_SURROGATES,
HIGH_PRIVATE_USE_SURROGATES, HIGH_PRIVATE_USE_SURROGATES,
LOW_SURROGATES, LOW_SURROGATES,
...@@ -2501,15 +2895,34 @@ class Character implements java.io.Serializable, Comparable<Character> { ...@@ -2501,15 +2895,34 @@ class Character implements java.io.Serializable, Comparable<Character> {
OSMANYA, OSMANYA,
null, null,
CYPRIOT_SYLLABARY, CYPRIOT_SYLLABARY,
IMPERIAL_ARAMAIC,
null, null,
PHOENICIAN, PHOENICIAN,
LYDIAN, LYDIAN,
null, null,
KHAROSHTHI, KHAROSHTHI,
OLD_SOUTH_ARABIAN,
null,
AVESTAN,
INSCRIPTIONAL_PARTHIAN,
INSCRIPTIONAL_PAHLAVI,
null,
OLD_TURKIC,
null,
RUMI_NUMERAL_SYMBOLS,
null,
BRAHMI,
KAITHI,
null, null,
CUNEIFORM, CUNEIFORM,
CUNEIFORM_NUMBERS_AND_PUNCTUATION, CUNEIFORM_NUMBERS_AND_PUNCTUATION,
null, null,
EGYPTIAN_HIEROGLYPHS,
null,
BAMUM_SUPPLEMENT,
null,
KANA_SUPPLEMENT,
null,
BYZANTINE_MUSICAL_SYMBOLS, BYZANTINE_MUSICAL_SYMBOLS,
MUSICAL_SYMBOLS, MUSICAL_SYMBOLS,
ANCIENT_GREEK_MUSICAL_NOTATION, ANCIENT_GREEK_MUSICAL_NOTATION,
...@@ -2521,9 +2934,20 @@ class Character implements java.io.Serializable, Comparable<Character> { ...@@ -2521,9 +2934,20 @@ class Character implements java.io.Serializable, Comparable<Character> {
null, null,
MAHJONG_TILES, MAHJONG_TILES,
DOMINO_TILES, DOMINO_TILES,
PLAYING_CARDS,
ENCLOSED_ALPHANUMERIC_SUPPLEMENT,
ENCLOSED_IDEOGRAPHIC_SUPPLEMENT,
MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS,
EMOTICONS,
null,
TRANSPORT_AND_MAP_SYMBOLS,
ALCHEMICAL_SYMBOLS,
null, null,
CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B, CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B,
null, null,
CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C,
CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D,
null,
CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT, CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT,
null, null,
TAGS, TAGS,
...@@ -2988,6 +3412,11 @@ class Character implements java.io.Serializable, Comparable<Character> { ...@@ -2988,6 +3412,11 @@ class Character implements java.io.Serializable, Comparable<Character> {
*/ */
SUNDANESE, SUNDANESE,
/**
* Unicode script "Batak".
*/
BATAK,
/** /**
* Unicode script "Lepcha". * Unicode script "Lepcha".
*/ */
...@@ -3063,6 +3492,11 @@ class Character implements java.io.Serializable, Comparable<Character> { ...@@ -3063,6 +3492,11 @@ class Character implements java.io.Serializable, Comparable<Character> {
*/ */
SAMARITAN, SAMARITAN,
/**
* Unicode script "Mandaic".
*/
MANDAIC,
/** /**
* Unicode script "Lisu". * Unicode script "Lisu".
*/ */
...@@ -3108,6 +3542,11 @@ class Character implements java.io.Serializable, Comparable<Character> { ...@@ -3108,6 +3542,11 @@ class Character implements java.io.Serializable, Comparable<Character> {
*/ */
OLD_TURKIC, OLD_TURKIC,
/**
* Unicode script "Brahmi".
*/
BRAHMI,
/** /**
* Unicode script "Kaithi". * Unicode script "Kaithi".
*/ */
...@@ -3135,7 +3574,9 @@ class Character implements java.io.Serializable, Comparable<Character> { ...@@ -3135,7 +3574,9 @@ class Character implements java.io.Serializable, Comparable<Character> {
0x00F8, // 00F8..02B8; LATIN 0x00F8, // 00F8..02B8; LATIN
0x02B9, // 02B9..02DF; COMMON 0x02B9, // 02B9..02DF; COMMON
0x02E0, // 02E0..02E4; LATIN 0x02E0, // 02E0..02E4; LATIN
0x02E5, // 02E5..02FF; COMMON 0x02E5, // 02E5..02E9; COMMON
0x02EA, // 02EA..02EB; BOPOMOFO
0x02EC, // 02EC..02FF; COMMON
0x0300, // 0300..036F; INHERITED 0x0300, // 0300..036F; INHERITED
0x0370, // 0370..0373; GREEK 0x0370, // 0370..0373; GREEK
0x0374, // 0374..0374; COMMON 0x0374, // 0374..0374; COMMON
...@@ -3155,18 +3596,18 @@ class Character implements java.io.Serializable, Comparable<Character> { ...@@ -3155,18 +3596,18 @@ class Character implements java.io.Serializable, Comparable<Character> {
0x0589, // 0589..0589; COMMON 0x0589, // 0589..0589; COMMON
0x058A, // 058A..0590; ARMENIAN 0x058A, // 058A..0590; ARMENIAN
0x0591, // 0591..05FF; HEBREW 0x0591, // 0591..05FF; HEBREW
0x0600, // 0600..0605; COMMON 0x0600, // 0600..060B; ARABIC
0x0606, // 0606..060B; ARABIC
0x060C, // 060C..060C; COMMON 0x060C, // 060C..060C; COMMON
0x060D, // 060D..061A; ARABIC 0x060D, // 060D..061A; ARABIC
0x061B, // 061B..061D; COMMON 0x061B, // 061B..061D; COMMON
0x061E, // 061E..061E; ARABIC 0x061E, // 061E..061E; ARABIC
0x061F, // 061F..0620; COMMON 0x061F, // 061F..061F; COMMON
0x0621, // 0621..063F; ARABIC 0x0620, // 0620..063F; ARABIC
0x0640, // 0640..0640; COMMON 0x0640, // 0640..0640; COMMON
0x0641, // 0641..064A; ARABIC 0x0641, // 0641..064A; ARABIC
0x064B, // 064B..0655; INHERITED 0x064B, // 064B..0655; INHERITED
0x0656, // 0656..065F; ARABIC 0x0656, // 0656..065E; ARABIC
0x065F, // 065F..065F; INHERITED
0x0660, // 0660..0669; COMMON 0x0660, // 0660..0669; COMMON
0x066A, // 066A..066F; ARABIC 0x066A, // 066A..066F; ARABIC
0x0670, // 0670..0670; INHERITED 0x0670, // 0670..0670; INHERITED
...@@ -3177,7 +3618,8 @@ class Character implements java.io.Serializable, Comparable<Character> { ...@@ -3177,7 +3618,8 @@ class Character implements java.io.Serializable, Comparable<Character> {
0x0750, // 0750..077F; ARABIC 0x0750, // 0750..077F; ARABIC
0x0780, // 0780..07BF; THAANA 0x0780, // 0780..07BF; THAANA
0x07C0, // 07C0..07FF; NKO 0x07C0, // 07C0..07FF; NKO
0x0800, // 0800..08FF; SAMARITAN 0x0800, // 0800..083F; SAMARITAN
0x0840, // 0840..08FF; MANDAIC
0x0900, // 0900..0950; DEVANAGARI 0x0900, // 0900..0950; DEVANAGARI
0x0951, // 0951..0952; INHERITED 0x0951, // 0951..0952; INHERITED
0x0953, // 0953..0963; DEVANAGARI 0x0953, // 0953..0963; DEVANAGARI
...@@ -3192,7 +3634,6 @@ class Character implements java.io.Serializable, Comparable<Character> { ...@@ -3192,7 +3634,6 @@ class Character implements java.io.Serializable, Comparable<Character> {
0x0B82, // 0B82..0C00; TAMIL 0x0B82, // 0B82..0C00; TAMIL
0x0C01, // 0C01..0C81; TELUGU 0x0C01, // 0C01..0C81; TELUGU
0x0C82, // 0C82..0CF0; KANNADA 0x0C82, // 0C82..0CF0; KANNADA
0x0CF1, // 0CF1..0D01; COMMON
0x0D02, // 0D02..0D81; MALAYALAM 0x0D02, // 0D02..0D81; MALAYALAM
0x0D82, // 0D82..0E00; SINHALA 0x0D82, // 0D82..0E00; SINHALA
0x0E01, // 0E01..0E3E; THAI 0x0E01, // 0E01..0E3E; THAI
...@@ -3200,7 +3641,8 @@ class Character implements java.io.Serializable, Comparable<Character> { ...@@ -3200,7 +3641,8 @@ class Character implements java.io.Serializable, Comparable<Character> {
0x0E40, // 0E40..0E80; THAI 0x0E40, // 0E40..0E80; THAI
0x0E81, // 0E81..0EFF; LAO 0x0E81, // 0E81..0EFF; LAO
0x0F00, // 0F00..0FD4; TIBETAN 0x0F00, // 0F00..0FD4; TIBETAN
0x0FD5, // 0FD5..0FFF; COMMON 0x0FD5, // 0FD5..0FD8; COMMON
0x0FD9, // 0FD9..0FFF; TIBETAN
0x1000, // 1000..109F; MYANMAR 0x1000, // 1000..109F; MYANMAR
0x10A0, // 10A0..10FA; GEORGIAN 0x10A0, // 10A0..10FA; GEORGIAN
0x10FB, // 10FB..10FB; COMMON 0x10FB, // 10FB..10FB; COMMON
...@@ -3232,7 +3674,8 @@ class Character implements java.io.Serializable, Comparable<Character> { ...@@ -3232,7 +3674,8 @@ class Character implements java.io.Serializable, Comparable<Character> {
0x1A00, // 1A00..1A1F; BUGINESE 0x1A00, // 1A00..1A1F; BUGINESE
0x1A20, // 1A20..1AFF; TAI_THAM 0x1A20, // 1A20..1AFF; TAI_THAM
0x1B00, // 1B00..1B7F; BALINESE 0x1B00, // 1B00..1B7F; BALINESE
0x1B80, // 1B80..1BFF; SUNDANESE 0x1B80, // 1B80..1BBF; SUNDANESE
0x1BC0, // 1BC0..1BFF; BATAK
0x1C00, // 1C00..1C4F; LEPCHA 0x1C00, // 1C00..1C4F; LEPCHA
0x1C50, // 1C50..1CCF; OL_CHIKI 0x1C50, // 1C50..1CCF; OL_CHIKI
0x1CD0, // 1CD0..1CD2; INHERITED 0x1CD0, // 1CD0..1CD2; INHERITED
...@@ -3295,7 +3738,8 @@ class Character implements java.io.Serializable, Comparable<Character> { ...@@ -3295,7 +3738,8 @@ class Character implements java.io.Serializable, Comparable<Character> {
0x3007, // 3007..3007; HAN 0x3007, // 3007..3007; HAN
0x3008, // 3008..3020; COMMON 0x3008, // 3008..3020; COMMON
0x3021, // 3021..3029; HAN 0x3021, // 3021..3029; HAN
0x302A, // 302A..302F; INHERITED 0x302A, // 302A..302D; INHERITED
0x302E, // 302E..302F; HANGUL
0x3030, // 3030..3037; COMMON 0x3030, // 3030..3037; COMMON
0x3038, // 3038..303B; HAN 0x3038, // 3038..303B; HAN
0x303C, // 303C..3040; COMMON 0x303C, // 303C..3040; COMMON
...@@ -3342,7 +3786,8 @@ class Character implements java.io.Serializable, Comparable<Character> { ...@@ -3342,7 +3786,8 @@ class Character implements java.io.Serializable, Comparable<Character> {
0xA980, // A980..A9FF; JAVANESE 0xA980, // A980..A9FF; JAVANESE
0xAA00, // AA00..AA5F; CHAM 0xAA00, // AA00..AA5F; CHAM
0xAA60, // AA60..AA7F; MYANMAR 0xAA60, // AA60..AA7F; MYANMAR
0xAA80, // AA80..ABBF; TAI_VIET 0xAA80, // AA80..AB00; TAI_VIET
0xAB01, // AB01..ABBF; ETHIOPIC
0xABC0, // ABC0..ABFF; MEETEI_MAYEK 0xABC0, // ABC0..ABFF; MEETEI_MAYEK
0xAC00, // AC00..D7FB; HANGUL 0xAC00, // AC00..D7FB; HANGUL
0xD7FC, // D7FC..F8FF; UNKNOWN 0xD7FC, // D7FC..F8FF; UNKNOWN
...@@ -3394,10 +3839,14 @@ class Character implements java.io.Serializable, Comparable<Character> { ...@@ -3394,10 +3839,14 @@ class Character implements java.io.Serializable, Comparable<Character> {
0x10B40, // 10B40..10B5F; INSCRIPTIONAL_PARTHIAN 0x10B40, // 10B40..10B5F; INSCRIPTIONAL_PARTHIAN
0x10B60, // 10B60..10BFF; INSCRIPTIONAL_PAHLAVI 0x10B60, // 10B60..10BFF; INSCRIPTIONAL_PAHLAVI
0x10C00, // 10C00..10E5F; OLD_TURKIC 0x10C00, // 10C00..10E5F; OLD_TURKIC
0x10E60, // 10E60..1107F; ARABIC 0x10E60, // 10E60..10FFF; ARABIC
0x11000, // 11000..1107F; BRAHMI
0x11080, // 11080..11FFF; KAITHI 0x11080, // 11080..11FFF; KAITHI
0x12000, // 12000..12FFF; CUNEIFORM 0x12000, // 12000..12FFF; CUNEIFORM
0x13000, // 13000..1CFFF; EGYPTIAN_HIEROGLYPHS 0x13000, // 13000..167FF; EGYPTIAN_HIEROGLYPHS
0x16800, // 16800..16A38; BAMUM
0x1B000, // 1B000..1B000; KATAKANA
0x1B001, // 1B001..1CFFF; HIRAGANA
0x1D000, // 1D000..1D166; COMMON 0x1D000, // 1D000..1D166; COMMON
0x1D167, // 1D167..1D169; INHERITED 0x1D167, // 1D167..1D169; INHERITED
0x1D16A, // 1D16A..1D17A; COMMON 0x1D16A, // 1D16A..1D17A; COMMON
...@@ -3409,8 +3858,8 @@ class Character implements java.io.Serializable, Comparable<Character> { ...@@ -3409,8 +3858,8 @@ class Character implements java.io.Serializable, Comparable<Character> {
0x1D1AE, // 1D1AE..1D1FF; COMMON 0x1D1AE, // 1D1AE..1D1FF; COMMON
0x1D200, // 1D200..1D2FF; GREEK 0x1D200, // 1D200..1D2FF; GREEK
0x1D300, // 1D300..1F1FF; COMMON 0x1D300, // 1D300..1F1FF; COMMON
0x1F200, // 1F200..1F20F; HIRAGANA 0x1F200, // 1F200..1F200; HIRAGANA
0x1F210, // 1F210..1FFFF; COMMON 0x1F201, // 1F210..1FFFF; COMMON
0x20000, // 20000..E0000; HAN 0x20000, // 20000..E0000; HAN
0xE0001, // E0001..E00FF; COMMON 0xE0001, // E0001..E00FF; COMMON
0xE0100, // E0100..E01EF; INHERITED 0xE0100, // E0100..E01EF; INHERITED
...@@ -3436,6 +3885,8 @@ class Character implements java.io.Serializable, Comparable<Character> { ...@@ -3436,6 +3885,8 @@ class Character implements java.io.Serializable, Comparable<Character> {
COMMON, COMMON,
LATIN, LATIN,
COMMON, COMMON,
BOPOMOFO,
COMMON,
INHERITED, INHERITED,
GREEK, GREEK,
COMMON, COMMON,
...@@ -3455,7 +3906,6 @@ class Character implements java.io.Serializable, Comparable<Character> { ...@@ -3455,7 +3906,6 @@ class Character implements java.io.Serializable, Comparable<Character> {
COMMON, COMMON,
ARMENIAN, ARMENIAN,
HEBREW, HEBREW,
COMMON,
ARABIC, ARABIC,
COMMON, COMMON,
ARABIC, ARABIC,
...@@ -3467,6 +3917,7 @@ class Character implements java.io.Serializable, Comparable<Character> { ...@@ -3467,6 +3917,7 @@ class Character implements java.io.Serializable, Comparable<Character> {
ARABIC, ARABIC,
INHERITED, INHERITED,
ARABIC, ARABIC,
INHERITED,
COMMON, COMMON,
ARABIC, ARABIC,
INHERITED, INHERITED,
...@@ -3478,6 +3929,7 @@ class Character implements java.io.Serializable, Comparable<Character> { ...@@ -3478,6 +3929,7 @@ class Character implements java.io.Serializable, Comparable<Character> {
THAANA, THAANA,
NKO, NKO,
SAMARITAN, SAMARITAN,
MANDAIC,
DEVANAGARI, DEVANAGARI,
INHERITED, INHERITED,
DEVANAGARI, DEVANAGARI,
...@@ -3492,7 +3944,6 @@ class Character implements java.io.Serializable, Comparable<Character> { ...@@ -3492,7 +3944,6 @@ class Character implements java.io.Serializable, Comparable<Character> {
TAMIL, TAMIL,
TELUGU, TELUGU,
KANNADA, KANNADA,
COMMON,
MALAYALAM, MALAYALAM,
SINHALA, SINHALA,
THAI, THAI,
...@@ -3501,6 +3952,7 @@ class Character implements java.io.Serializable, Comparable<Character> { ...@@ -3501,6 +3952,7 @@ class Character implements java.io.Serializable, Comparable<Character> {
LAO, LAO,
TIBETAN, TIBETAN,
COMMON, COMMON,
TIBETAN,
MYANMAR, MYANMAR,
GEORGIAN, GEORGIAN,
COMMON, COMMON,
...@@ -3533,6 +3985,7 @@ class Character implements java.io.Serializable, Comparable<Character> { ...@@ -3533,6 +3985,7 @@ class Character implements java.io.Serializable, Comparable<Character> {
TAI_THAM, TAI_THAM,
BALINESE, BALINESE,
SUNDANESE, SUNDANESE,
BATAK,
LEPCHA, LEPCHA,
OL_CHIKI, OL_CHIKI,
INHERITED, INHERITED,
...@@ -3596,6 +4049,7 @@ class Character implements java.io.Serializable, Comparable<Character> { ...@@ -3596,6 +4049,7 @@ class Character implements java.io.Serializable, Comparable<Character> {
COMMON, COMMON,
HAN, HAN,
INHERITED, INHERITED,
HANGUL,
COMMON, COMMON,
HAN, HAN,
COMMON, COMMON,
...@@ -3643,6 +4097,7 @@ class Character implements java.io.Serializable, Comparable<Character> { ...@@ -3643,6 +4097,7 @@ class Character implements java.io.Serializable, Comparable<Character> {
CHAM, CHAM,
MYANMAR, MYANMAR,
TAI_VIET, TAI_VIET,
ETHIOPIC,
MEETEI_MAYEK, MEETEI_MAYEK,
HANGUL, HANGUL,
UNKNOWN, UNKNOWN,
...@@ -3695,9 +4150,13 @@ class Character implements java.io.Serializable, Comparable<Character> { ...@@ -3695,9 +4150,13 @@ class Character implements java.io.Serializable, Comparable<Character> {
INSCRIPTIONAL_PAHLAVI, INSCRIPTIONAL_PAHLAVI,
OLD_TURKIC, OLD_TURKIC,
ARABIC, ARABIC,
BRAHMI,
KAITHI, KAITHI,
CUNEIFORM, CUNEIFORM,
EGYPTIAN_HIEROGLYPHS, EGYPTIAN_HIEROGLYPHS,
BAMUM,
KATAKANA,
HIRAGANA,
COMMON, COMMON,
INHERITED, INHERITED,
COMMON, COMMON,
......
/* /*
* Copyright (c) 2005, 2009, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
* *
* This code is free software; you can redistribute it and/or modify it * This code is free software; you can redistribute it and/or modify it
...@@ -1472,19 +1472,9 @@ public final class NormalizerImpl { ...@@ -1472,19 +1472,9 @@ public final class NormalizerImpl {
} }
--remove; --remove;
} }
} else if(value2!=0) { } else if(value2!=0) { // for U+1109A, U+1109C, and U+110AB
/* the composition is longer than the starter,
* move the intermediate characters back one */
starterIsSupplementary=true; starterIsSupplementary=true;
/* temporarily increment for the loop boundary */ args.source[starter+1]=(char)value2;
++starter;
q=remove;
r=++remove;
while(starter<q) {
args.source[--r]=args.source[--q];
}
args.source[starter]=(char)value2;
--starter; /* undo the temporary increment */
/* } else { both are on the BMP, nothing more to do */ /* } else { both are on the BMP, nothing more to do */
} }
......
...@@ -23,8 +23,8 @@ ...@@ -23,8 +23,8 @@
/* /*
* @test * @test
* @bug 6842557 6943963 * @bug 6842557 6943963 6959267
* @summary confirm that shaping works as expected. (Mainly for new characters which were added in Unicode 5) * @summary confirm that shaping works as expected. (Mainly for new characters which were added in Unicode 5 and 6)
* used where appropriate. * used where appropriate.
*/ */
...@@ -39,6 +39,7 @@ public class ShapingTest { ...@@ -39,6 +39,7 @@ public class ShapingTest {
public static void main(String[] args) { public static void main(String[] args) {
test6842557(); test6842557();
test6943963(); test6943963();
test6903266();
if (err) { if (err) {
throw new RuntimeException("shape() returned unexpected value."); throw new RuntimeException("shape() returned unexpected value.");
...@@ -109,6 +110,34 @@ public class ShapingTest { ...@@ -109,6 +110,34 @@ public class ShapingTest {
checkResult("Range.ARABIC, Range.EASTERN_ARABIC", ns, given, expected_EASTERN_ARABIC); checkResult("Range.ARABIC, Range.EASTERN_ARABIC", ns, given, expected_EASTERN_ARABIC);
} }
private static void test6903266() {
NumericShaper ns = getContextualShaper(EnumSet.of(Range.TAI_THAM_HORA));
String given = "\u1a20 012";
String expected = "\u1a20 \u1a80\u1a81\u1a82";
checkResult("Range.TAI_THAM_HORA", ns, given, expected);
ns = getContextualShaper(EnumSet.of(Range.TAI_THAM_HORA,
Range.TAI_THAM_THAM));
given = "\u1a20 012";
expected = "\u1a20 \u1a90\u1a91\u1a92"; // Tham digits are prioritized.
checkResult("Range.TAI_THAM_HORA, Range.TAI_THAM_THAM", ns, given, expected);
ns = getContextualShaper(EnumSet.of(Range.JAVANESE));
given = "\ua984 012";
expected = "\ua984 \ua9d0\ua9d1\ua9d2";
checkResult("Range.JAVANESE", ns, given, expected);
ns = getContextualShaper(EnumSet.of(Range.TAI_THAM_THAM));
given = "\u1a20 012";
expected = "\u1a20 \u1a90\u1a91\u1a92";
checkResult("Range.TAI_THAM_THAM", ns, given, expected);
ns = getContextualShaper(EnumSet.of(Range.MEETEI_MAYEK));
given = "\uabc0 012";
expected = "\uabc0 \uabf0\uabf1\uabf2";
checkResult("Range.MEETEI_MAYEK", ns, given, expected);
}
private static void checkResult(String ranges, NumericShaper ns, private static void checkResult(String ranges, NumericShaper ns,
String given, String expected) { String given, String expected) {
char[] text = given.toCharArray(); char[] text = given.toCharArray();
......
/** /**
* @test * @test
* @bug 6945564 * @bug 6945564 6959267
* @summary Check that the j.l.Character.UnicodeScript * @summary Check that the j.l.Character.UnicodeScript
* @ignore don't run until #6903266 is integrated
*/ */
import java.io.*; import java.io.*;
...@@ -15,11 +14,15 @@ public class CheckScript { ...@@ -15,11 +14,15 @@ public class CheckScript {
public static void main(String[] args) throws Exception { public static void main(String[] args) throws Exception {
if (args.length != 1) { BufferedReader sbfr = null;
System.out.println("java CharacterScript script.txt"); if (args.length == 0) {
System.exit(1); sbfr = new BufferedReader(new FileReader(new File(System.getProperty("test.src", "."), "Scripts.txt")));
} else if (args.length == 1) {
sbfr = new BufferedReader(new FileReader(args[0]));
} else {
System.out.println("java CharacterScript Scripts.txt");
throw new RuntimeException("Datafile name should be specified.");
} }
BufferedReader sbfr = new BufferedReader(new FileReader(args[0]));
Matcher m = Pattern.compile("(\\p{XDigit}+)(?:\\.{2}(\\p{XDigit}+))?\\s+;\\s+(\\w+)\\s+#.*").matcher(""); Matcher m = Pattern.compile("(\\p{XDigit}+)(?:\\.{2}(\\p{XDigit}+))?\\s+;\\s+(\\w+)\\s+#.*").matcher("");
String line = null; String line = null;
HashMap<String,ArrayList<Integer>> scripts = new HashMap<>(); HashMap<String,ArrayList<Integer>> scripts = new HashMap<>();
......
# Scripts-5.2.0.txt # Scripts-6.0.0.txt
# Date: 2009-08-22, 04:58:43 GMT [MD] # Date: 2010-08-19, 00:48:47 GMT [MD]
# #
# Unicode Character Database # Unicode Character Database
# Copyright (c) 1991-2009 Unicode, Inc. # Copyright (c) 1991-2010 Unicode, Inc.
# For terms of use, see http://www.unicode.org/terms_of_use.html # For terms of use, see http://www.unicode.org/terms_of_use.html
# For documentation, see http://www.unicode.org/reports/tr44/ # For documentation, see http://www.unicode.org/reports/tr44/
...@@ -73,7 +73,7 @@ ...@@ -73,7 +73,7 @@
02C2..02C5 ; Common # Sk [4] MODIFIER LETTER LEFT ARROWHEAD..MODIFIER LETTER DOWN ARROWHEAD 02C2..02C5 ; Common # Sk [4] MODIFIER LETTER LEFT ARROWHEAD..MODIFIER LETTER DOWN ARROWHEAD
02C6..02D1 ; Common # Lm [12] MODIFIER LETTER CIRCUMFLEX ACCENT..MODIFIER LETTER HALF TRIANGULAR COLON 02C6..02D1 ; Common # Lm [12] MODIFIER LETTER CIRCUMFLEX ACCENT..MODIFIER LETTER HALF TRIANGULAR COLON
02D2..02DF ; Common # Sk [14] MODIFIER LETTER CENTRED RIGHT HALF RING..MODIFIER LETTER CROSS ACCENT 02D2..02DF ; Common # Sk [14] MODIFIER LETTER CENTRED RIGHT HALF RING..MODIFIER LETTER CROSS ACCENT
02E5..02EB ; Common # Sk [7] MODIFIER LETTER EXTRA-HIGH TONE BAR..MODIFIER LETTER YANG DEPARTING TONE MARK 02E5..02E9 ; Common # Sk [5] MODIFIER LETTER EXTRA-HIGH TONE BAR..MODIFIER LETTER EXTRA-LOW TONE BAR
02EC ; Common # Lm MODIFIER LETTER VOICING 02EC ; Common # Lm MODIFIER LETTER VOICING
02ED ; Common # Sk MODIFIER LETTER UNASPIRATED 02ED ; Common # Sk MODIFIER LETTER UNASPIRATED
02EE ; Common # Lm MODIFIER LETTER DOUBLE APOSTROPHE 02EE ; Common # Lm MODIFIER LETTER DOUBLE APOSTROPHE
...@@ -83,7 +83,6 @@ ...@@ -83,7 +83,6 @@
0385 ; Common # Sk GREEK DIALYTIKA TONOS 0385 ; Common # Sk GREEK DIALYTIKA TONOS
0387 ; Common # Po GREEK ANO TELEIA 0387 ; Common # Po GREEK ANO TELEIA
0589 ; Common # Po ARMENIAN FULL STOP 0589 ; Common # Po ARMENIAN FULL STOP
0600..0603 ; Common # Cf [4] ARABIC NUMBER SIGN..ARABIC SIGN SAFHA
060C ; Common # Po ARABIC COMMA 060C ; Common # Po ARABIC COMMA
061B ; Common # Po ARABIC SEMICOLON 061B ; Common # Po ARABIC SEMICOLON
061F ; Common # Po ARABIC QUESTION MARK 061F ; Common # Po ARABIC QUESTION MARK
...@@ -92,7 +91,6 @@ ...@@ -92,7 +91,6 @@
06DD ; Common # Cf ARABIC END OF AYAH 06DD ; Common # Cf ARABIC END OF AYAH
0964..0965 ; Common # Po [2] DEVANAGARI DANDA..DEVANAGARI DOUBLE DANDA 0964..0965 ; Common # Po [2] DEVANAGARI DANDA..DEVANAGARI DOUBLE DANDA
0970 ; Common # Po DEVANAGARI ABBREVIATION SIGN 0970 ; Common # Po DEVANAGARI ABBREVIATION SIGN
0CF1..0CF2 ; Common # So [2] KANNADA SIGN JIHVAMULIYA..KANNADA SIGN UPADHMANIYA
0E3F ; Common # Sc THAI CURRENCY SYMBOL BAHT 0E3F ; Common # Sc THAI CURRENCY SYMBOL BAHT
0FD5..0FD8 ; Common # So [4] RIGHT-FACING SVASTI SIGN..LEFT-FACING SVASTI SIGN WITH DOTS 0FD5..0FD8 ; Common # So [4] RIGHT-FACING SVASTI SIGN..LEFT-FACING SVASTI SIGN WITH DOTS
10FB ; Common # Po GEORGIAN PARAGRAPH SEPARATOR 10FB ; Common # Po GEORGIAN PARAGRAPH SEPARATOR
...@@ -148,7 +146,7 @@ ...@@ -148,7 +146,7 @@
208A..208C ; Common # Sm [3] SUBSCRIPT PLUS SIGN..SUBSCRIPT EQUALS SIGN 208A..208C ; Common # Sm [3] SUBSCRIPT PLUS SIGN..SUBSCRIPT EQUALS SIGN
208D ; Common # Ps SUBSCRIPT LEFT PARENTHESIS 208D ; Common # Ps SUBSCRIPT LEFT PARENTHESIS
208E ; Common # Pe SUBSCRIPT RIGHT PARENTHESIS 208E ; Common # Pe SUBSCRIPT RIGHT PARENTHESIS
20A0..20B8 ; Common # Sc [25] EURO-CURRENCY SIGN..TENGE SIGN 20A0..20B9 ; Common # Sc [26] EURO-CURRENCY SIGN..INDIAN RUPEE SIGN
2100..2101 ; Common # So [2] ACCOUNT OF..ADDRESSED TO THE SUBJECT 2100..2101 ; Common # So [2] ACCOUNT OF..ADDRESSED TO THE SUBJECT
2102 ; Common # L& DOUBLE-STRUCK CAPITAL C 2102 ; Common # L& DOUBLE-STRUCK CAPITAL C
2103..2106 ; Common # So [4] DEGREE CELSIUS..CADA UNA 2103..2106 ; Common # So [4] DEGREE CELSIUS..CADA UNA
...@@ -157,7 +155,8 @@ ...@@ -157,7 +155,8 @@
210A..2113 ; Common # L& [10] SCRIPT SMALL G..SCRIPT SMALL L 210A..2113 ; Common # L& [10] SCRIPT SMALL G..SCRIPT SMALL L
2114 ; Common # So L B BAR SYMBOL 2114 ; Common # So L B BAR SYMBOL
2115 ; Common # L& DOUBLE-STRUCK CAPITAL N 2115 ; Common # L& DOUBLE-STRUCK CAPITAL N
2116..2118 ; Common # So [3] NUMERO SIGN..SCRIPT CAPITAL P 2116..2117 ; Common # So [2] NUMERO SIGN..SOUND RECORDING COPYRIGHT
2118 ; Common # Sm SCRIPT CAPITAL P
2119..211D ; Common # L& [5] DOUBLE-STRUCK CAPITAL P..DOUBLE-STRUCK CAPITAL R 2119..211D ; Common # L& [5] DOUBLE-STRUCK CAPITAL P..DOUBLE-STRUCK CAPITAL R
211E..2123 ; Common # So [6] PRESCRIPTION TAKE..VERSICLE 211E..2123 ; Common # So [6] PRESCRIPTION TAKE..VERSICLE
2124 ; Common # L& DOUBLE-STRUCK CAPITAL Z 2124 ; Common # L& DOUBLE-STRUCK CAPITAL Z
...@@ -213,7 +212,7 @@ ...@@ -213,7 +212,7 @@
239B..23B3 ; Common # Sm [25] LEFT PARENTHESIS UPPER HOOK..SUMMATION BOTTOM 239B..23B3 ; Common # Sm [25] LEFT PARENTHESIS UPPER HOOK..SUMMATION BOTTOM
23B4..23DB ; Common # So [40] TOP SQUARE BRACKET..FUSE 23B4..23DB ; Common # So [40] TOP SQUARE BRACKET..FUSE
23DC..23E1 ; Common # Sm [6] TOP PARENTHESIS..BOTTOM TORTOISE SHELL BRACKET 23DC..23E1 ; Common # Sm [6] TOP PARENTHESIS..BOTTOM TORTOISE SHELL BRACKET
23E2..23E8 ; Common # So [7] WHITE TRAPEZIUM..DECIMAL EXPONENT SYMBOL 23E2..23F3 ; Common # So [18] WHITE TRAPEZIUM..HOURGLASS WITH FLOWING SAND
2400..2426 ; Common # So [39] SYMBOL FOR NULL..SYMBOL FOR SUBSTITUTE FORM TWO 2400..2426 ; Common # So [39] SYMBOL FOR NULL..SYMBOL FOR SUBSTITUTE FORM TWO
2440..244A ; Common # So [11] OCR HOOK..OCR DOUBLE BACKSLASH 2440..244A ; Common # So [11] OCR HOOK..OCR DOUBLE BACKSLASH
2460..249B ; Common # No [60] CIRCLED DIGIT ONE..NUMBER TWENTY FULL STOP 2460..249B ; Common # No [60] CIRCLED DIGIT ONE..NUMBER TWENTY FULL STOP
...@@ -227,18 +226,8 @@ ...@@ -227,18 +226,8 @@
25F8..25FF ; Common # Sm [8] UPPER LEFT TRIANGLE..LOWER RIGHT TRIANGLE 25F8..25FF ; Common # Sm [8] UPPER LEFT TRIANGLE..LOWER RIGHT TRIANGLE
2600..266E ; Common # So [111] BLACK SUN WITH RAYS..MUSIC NATURAL SIGN 2600..266E ; Common # So [111] BLACK SUN WITH RAYS..MUSIC NATURAL SIGN
266F ; Common # Sm MUSIC SHARP SIGN 266F ; Common # Sm MUSIC SHARP SIGN
2670..26CD ; Common # So [94] WEST SYRIAC CROSS..DISABLED CAR 2670..26FF ; Common # So [144] WEST SYRIAC CROSS..WHITE FLAG WITH HORIZONTAL MIDDLE BLACK STRIPE
26CF..26E1 ; Common # So [19] PICK..RESTRICTED LEFT ENTRY-2 2701..2767 ; Common # So [103] UPPER BLADE SCISSORS..ROTATED FLORAL HEART BULLET
26E3 ; Common # So HEAVY CIRCLE WITH STROKE AND TWO DOTS ABOVE
26E8..26FF ; Common # So [24] BLACK CROSS ON SHIELD..WHITE FLAG WITH HORIZONTAL MIDDLE BLACK STRIPE
2701..2704 ; Common # So [4] UPPER BLADE SCISSORS..WHITE SCISSORS
2706..2709 ; Common # So [4] TELEPHONE LOCATION SIGN..ENVELOPE
270C..2727 ; Common # So [28] VICTORY HAND..WHITE FOUR POINTED STAR
2729..274B ; Common # So [35] STRESS OUTLINED WHITE STAR..HEAVY EIGHT TEARDROP-SPOKED PROPELLER ASTERISK
274D ; Common # So SHADOWED WHITE CIRCLE
274F..2752 ; Common # So [4] LOWER RIGHT DROP-SHADOWED WHITE SQUARE..UPPER RIGHT SHADOWED WHITE SQUARE
2756..275E ; Common # So [9] BLACK DIAMOND MINUS WHITE X..HEAVY DOUBLE COMMA QUOTATION MARK ORNAMENT
2761..2767 ; Common # So [7] CURVED STEM PARAGRAPH SIGN ORNAMENT..ROTATED FLORAL HEART BULLET
2768 ; Common # Ps MEDIUM LEFT PARENTHESIS ORNAMENT 2768 ; Common # Ps MEDIUM LEFT PARENTHESIS ORNAMENT
2769 ; Common # Pe MEDIUM RIGHT PARENTHESIS ORNAMENT 2769 ; Common # Pe MEDIUM RIGHT PARENTHESIS ORNAMENT
276A ; Common # Ps MEDIUM FLATTENED LEFT PARENTHESIS ORNAMENT 276A ; Common # Ps MEDIUM FLATTENED LEFT PARENTHESIS ORNAMENT
...@@ -254,15 +243,13 @@ ...@@ -254,15 +243,13 @@
2774 ; Common # Ps MEDIUM LEFT CURLY BRACKET ORNAMENT 2774 ; Common # Ps MEDIUM LEFT CURLY BRACKET ORNAMENT
2775 ; Common # Pe MEDIUM RIGHT CURLY BRACKET ORNAMENT 2775 ; Common # Pe MEDIUM RIGHT CURLY BRACKET ORNAMENT
2776..2793 ; Common # No [30] DINGBAT NEGATIVE CIRCLED DIGIT ONE..DINGBAT NEGATIVE CIRCLED SANS-SERIF NUMBER TEN 2776..2793 ; Common # No [30] DINGBAT NEGATIVE CIRCLED DIGIT ONE..DINGBAT NEGATIVE CIRCLED SANS-SERIF NUMBER TEN
2794 ; Common # So HEAVY WIDE-HEADED RIGHTWARDS ARROW 2794..27BF ; Common # So [44] HEAVY WIDE-HEADED RIGHTWARDS ARROW..DOUBLE CURLY LOOP
2798..27AF ; Common # So [24] HEAVY SOUTH EAST ARROW..NOTCHED LOWER RIGHT-SHADOWED WHITE RIGHTWARDS ARROW
27B1..27BE ; Common # So [14] NOTCHED UPPER RIGHT-SHADOWED WHITE RIGHTWARDS ARROW..OPEN-OUTLINED RIGHTWARDS ARROW
27C0..27C4 ; Common # Sm [5] THREE DIMENSIONAL ANGLE..OPEN SUPERSET 27C0..27C4 ; Common # Sm [5] THREE DIMENSIONAL ANGLE..OPEN SUPERSET
27C5 ; Common # Ps LEFT S-SHAPED BAG DELIMITER 27C5 ; Common # Ps LEFT S-SHAPED BAG DELIMITER
27C6 ; Common # Pe RIGHT S-SHAPED BAG DELIMITER 27C6 ; Common # Pe RIGHT S-SHAPED BAG DELIMITER
27C7..27CA ; Common # Sm [4] OR WITH DOT INSIDE..VERTICAL BAR WITH HORIZONTAL STROKE 27C7..27CA ; Common # Sm [4] OR WITH DOT INSIDE..VERTICAL BAR WITH HORIZONTAL STROKE
27CC ; Common # Sm LONG DIVISION 27CC ; Common # Sm LONG DIVISION
27D0..27E5 ; Common # Sm [22] WHITE DIAMOND WITH CENTRED DOT..WHITE SQUARE WITH RIGHTWARDS TICK 27CE..27E5 ; Common # Sm [24] SQUARED LOGICAL AND..WHITE SQUARE WITH RIGHTWARDS TICK
27E6 ; Common # Ps MATHEMATICAL LEFT WHITE SQUARE BRACKET 27E6 ; Common # Ps MATHEMATICAL LEFT WHITE SQUARE BRACKET
27E7 ; Common # Pe MATHEMATICAL RIGHT WHITE SQUARE BRACKET 27E7 ; Common # Pe MATHEMATICAL RIGHT WHITE SQUARE BRACKET
27E8 ; Common # Ps MATHEMATICAL LEFT ANGLE BRACKET 27E8 ; Common # Ps MATHEMATICAL LEFT ANGLE BRACKET
...@@ -555,27 +542,51 @@ FFFC..FFFD ; Common # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHAR ...@@ -555,27 +542,51 @@ FFFC..FFFD ; Common # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHAR
1D7CE..1D7FF ; Common # Nd [50] MATHEMATICAL BOLD DIGIT ZERO..MATHEMATICAL MONOSPACE DIGIT NINE 1D7CE..1D7FF ; Common # Nd [50] MATHEMATICAL BOLD DIGIT ZERO..MATHEMATICAL MONOSPACE DIGIT NINE
1F000..1F02B ; Common # So [44] MAHJONG TILE EAST WIND..MAHJONG TILE BACK 1F000..1F02B ; Common # So [44] MAHJONG TILE EAST WIND..MAHJONG TILE BACK
1F030..1F093 ; Common # So [100] DOMINO TILE HORIZONTAL BACK..DOMINO TILE VERTICAL-06-06 1F030..1F093 ; Common # So [100] DOMINO TILE HORIZONTAL BACK..DOMINO TILE VERTICAL-06-06
1F0A0..1F0AE ; Common # So [15] PLAYING CARD BACK..PLAYING CARD KING OF SPADES
1F0B1..1F0BE ; Common # So [14] PLAYING CARD ACE OF HEARTS..PLAYING CARD KING OF HEARTS
1F0C1..1F0CF ; Common # So [15] PLAYING CARD ACE OF DIAMONDS..PLAYING CARD BLACK JOKER
1F0D1..1F0DF ; Common # So [15] PLAYING CARD ACE OF CLUBS..PLAYING CARD WHITE JOKER
1F100..1F10A ; Common # No [11] DIGIT ZERO FULL STOP..DIGIT NINE COMMA 1F100..1F10A ; Common # No [11] DIGIT ZERO FULL STOP..DIGIT NINE COMMA
1F110..1F12E ; Common # So [31] PARENTHESIZED LATIN CAPITAL LETTER A..CIRCLED WZ 1F110..1F12E ; Common # So [31] PARENTHESIZED LATIN CAPITAL LETTER A..CIRCLED WZ
1F131 ; Common # So SQUARED LATIN CAPITAL LETTER B 1F130..1F169 ; Common # So [58] SQUARED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z
1F13D ; Common # So SQUARED LATIN CAPITAL LETTER N 1F170..1F19A ; Common # So [43] NEGATIVE SQUARED LATIN CAPITAL LETTER A..SQUARED VS
1F13F ; Common # So SQUARED LATIN CAPITAL LETTER P 1F1E6..1F1FF ; Common # So [26] REGIONAL INDICATOR SYMBOL LETTER A..REGIONAL INDICATOR SYMBOL LETTER Z
1F142 ; Common # So SQUARED LATIN CAPITAL LETTER S 1F201..1F202 ; Common # So [2] SQUARED KATAKANA KOKO..SQUARED KATAKANA SA
1F146 ; Common # So SQUARED LATIN CAPITAL LETTER W 1F210..1F23A ; Common # So [43] SQUARED CJK UNIFIED IDEOGRAPH-624B..SQUARED CJK UNIFIED IDEOGRAPH-55B6
1F14A..1F14E ; Common # So [5] SQUARED HV..SQUARED PPV
1F157 ; Common # So NEGATIVE CIRCLED LATIN CAPITAL LETTER H
1F15F ; Common # So NEGATIVE CIRCLED LATIN CAPITAL LETTER P
1F179 ; Common # So NEGATIVE SQUARED LATIN CAPITAL LETTER J
1F17B..1F17C ; Common # So [2] NEGATIVE SQUARED LATIN CAPITAL LETTER L..NEGATIVE SQUARED LATIN CAPITAL LETTER M
1F17F ; Common # So NEGATIVE SQUARED LATIN CAPITAL LETTER P
1F18A..1F18D ; Common # So [4] CROSSED NEGATIVE SQUARED LATIN CAPITAL LETTER P..NEGATIVE SQUARED SA
1F190 ; Common # So SQUARE DJ
1F210..1F231 ; Common # So [34] SQUARED CJK UNIFIED IDEOGRAPH-624B..SQUARED CJK UNIFIED IDEOGRAPH-6253
1F240..1F248 ; Common # So [9] TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-672C..TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-6557 1F240..1F248 ; Common # So [9] TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-672C..TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-6557
1F250..1F251 ; Common # So [2] CIRCLED IDEOGRAPH ADVANTAGE..CIRCLED IDEOGRAPH ACCEPT
1F300..1F320 ; Common # So [33] CYCLONE..SHOOTING STAR
1F330..1F335 ; Common # So [6] CHESTNUT..CACTUS
1F337..1F37C ; Common # So [70] TULIP..BABY BOTTLE
1F380..1F393 ; Common # So [20] RIBBON..GRADUATION CAP
1F3A0..1F3C4 ; Common # So [37] CAROUSEL HORSE..SURFER
1F3C6..1F3CA ; Common # So [5] TROPHY..SWIMMER
1F3E0..1F3F0 ; Common # So [17] HOUSE BUILDING..EUROPEAN CASTLE
1F400..1F43E ; Common # So [63] RAT..PAW PRINTS
1F440 ; Common # So EYES
1F442..1F4F7 ; Common # So [182] EAR..CAMERA
1F4F9..1F4FC ; Common # So [4] VIDEO CAMERA..VIDEOCASSETTE
1F500..1F53D ; Common # So [62] TWISTED RIGHTWARDS ARROWS..DOWN-POINTING SMALL RED TRIANGLE
1F550..1F567 ; Common # So [24] CLOCK FACE ONE OCLOCK..CLOCK FACE TWELVE-THIRTY
1F5FB..1F5FF ; Common # So [5] MOUNT FUJI..MOYAI
1F601..1F610 ; Common # So [16] GRINNING FACE WITH SMILING EYES..NEUTRAL FACE
1F612..1F614 ; Common # So [3] UNAMUSED FACE..PENSIVE FACE
1F616 ; Common # So CONFOUNDED FACE
1F618 ; Common # So FACE THROWING A KISS
1F61A ; Common # So KISSING FACE WITH CLOSED EYES
1F61C..1F61E ; Common # So [3] FACE WITH STUCK-OUT TONGUE AND WINKING EYE..DISAPPOINTED FACE
1F620..1F625 ; Common # So [6] ANGRY FACE..DISAPPOINTED BUT RELIEVED FACE
1F628..1F62B ; Common # So [4] FEARFUL FACE..TIRED FACE
1F62D ; Common # So LOUDLY CRYING FACE
1F630..1F633 ; Common # So [4] FACE WITH OPEN MOUTH AND COLD SWEAT..FLUSHED FACE
1F635..1F640 ; Common # So [12] DIZZY FACE..WEARY CAT FACE
1F645..1F64F ; Common # So [11] FACE WITH NO GOOD GESTURE..PERSON WITH FOLDED HANDS
1F680..1F6C5 ; Common # So [70] ROCKET..LEFT LUGGAGE
1F700..1F773 ; Common # So [116] ALCHEMICAL SYMBOL FOR QUINTESSENCE..ALCHEMICAL SYMBOL FOR HALF OUNCE
E0001 ; Common # Cf LANGUAGE TAG E0001 ; Common # Cf LANGUAGE TAG
E0020..E007F ; Common # Cf [96] TAG SPACE..CANCEL TAG E0020..E007F ; Common # Cf [96] TAG SPACE..CANCEL TAG
# Total code points: 5395 # Total code points: 6379
# ================================================ # ================================================
...@@ -603,7 +614,7 @@ E0020..E007F ; Common # Cf [96] TAG SPACE..CANCEL TAG ...@@ -603,7 +614,7 @@ E0020..E007F ; Common # Cf [96] TAG SPACE..CANCEL TAG
1E00..1EFF ; Latin # L& [256] LATIN CAPITAL LETTER A WITH RING BELOW..LATIN SMALL LETTER Y WITH LOOP 1E00..1EFF ; Latin # L& [256] LATIN CAPITAL LETTER A WITH RING BELOW..LATIN SMALL LETTER Y WITH LOOP
2071 ; Latin # Lm SUPERSCRIPT LATIN SMALL LETTER I 2071 ; Latin # Lm SUPERSCRIPT LATIN SMALL LETTER I
207F ; Latin # Lm SUPERSCRIPT LATIN SMALL LETTER N 207F ; Latin # Lm SUPERSCRIPT LATIN SMALL LETTER N
2090..2094 ; Latin # Lm [5] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER SCHWA 2090..209C ; Latin # Lm [13] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER T
212A..212B ; Latin # L& [2] KELVIN SIGN..ANGSTROM SIGN 212A..212B ; Latin # L& [2] KELVIN SIGN..ANGSTROM SIGN
2132 ; Latin # L& TURNED CAPITAL F 2132 ; Latin # L& TURNED CAPITAL F
214E ; Latin # L& TURNED SMALL F 214E ; Latin # L& TURNED SMALL F
...@@ -616,13 +627,16 @@ E0020..E007F ; Common # Cf [96] TAG SPACE..CANCEL TAG ...@@ -616,13 +627,16 @@ E0020..E007F ; Common # Cf [96] TAG SPACE..CANCEL TAG
A722..A76F ; Latin # L& [78] LATIN CAPITAL LETTER EGYPTOLOGICAL ALEF..LATIN SMALL LETTER CON A722..A76F ; Latin # L& [78] LATIN CAPITAL LETTER EGYPTOLOGICAL ALEF..LATIN SMALL LETTER CON
A770 ; Latin # Lm MODIFIER LETTER US A770 ; Latin # Lm MODIFIER LETTER US
A771..A787 ; Latin # L& [23] LATIN SMALL LETTER DUM..LATIN SMALL LETTER INSULAR T A771..A787 ; Latin # L& [23] LATIN SMALL LETTER DUM..LATIN SMALL LETTER INSULAR T
A78B..A78C ; Latin # L& [2] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER SALTILLO A78B..A78E ; Latin # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT
A790..A791 ; Latin # L& [2] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER N WITH DESCENDER
A7A0..A7A9 ; Latin # L& [10] LATIN CAPITAL LETTER G WITH OBLIQUE STROKE..LATIN SMALL LETTER S WITH OBLIQUE STROKE
A7FA ; Latin # L& LATIN LETTER SMALL CAPITAL TURNED M
A7FB..A7FF ; Latin # Lo [5] LATIN EPIGRAPHIC LETTER REVERSED F..LATIN EPIGRAPHIC LETTER ARCHAIC M A7FB..A7FF ; Latin # Lo [5] LATIN EPIGRAPHIC LETTER REVERSED F..LATIN EPIGRAPHIC LETTER ARCHAIC M
FB00..FB06 ; Latin # L& [7] LATIN SMALL LIGATURE FF..LATIN SMALL LIGATURE ST FB00..FB06 ; Latin # L& [7] LATIN SMALL LIGATURE FF..LATIN SMALL LIGATURE ST
FF21..FF3A ; Latin # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAPITAL LETTER Z FF21..FF3A ; Latin # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAPITAL LETTER Z
FF41..FF5A ; Latin # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN SMALL LETTER Z FF41..FF5A ; Latin # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN SMALL LETTER Z
# Total code points: 1244 # Total code points: 1267
# ================================================ # ================================================
...@@ -687,12 +701,11 @@ FF41..FF5A ; Latin # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN ...@@ -687,12 +701,11 @@ FF41..FF5A ; Latin # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN
0483..0484 ; Cyrillic # Mn [2] COMBINING CYRILLIC TITLO..COMBINING CYRILLIC PALATALIZATION 0483..0484 ; Cyrillic # Mn [2] COMBINING CYRILLIC TITLO..COMBINING CYRILLIC PALATALIZATION
0487 ; Cyrillic # Mn COMBINING CYRILLIC POKRYTIE 0487 ; Cyrillic # Mn COMBINING CYRILLIC POKRYTIE
0488..0489 ; Cyrillic # Me [2] COMBINING CYRILLIC HUNDRED THOUSANDS SIGN..COMBINING CYRILLIC MILLIONS SIGN 0488..0489 ; Cyrillic # Me [2] COMBINING CYRILLIC HUNDRED THOUSANDS SIGN..COMBINING CYRILLIC MILLIONS SIGN
048A..0525 ; Cyrillic # L& [156] CYRILLIC CAPITAL LETTER SHORT I WITH TAIL..CYRILLIC SMALL LETTER PE WITH DESCENDER 048A..0527 ; Cyrillic # L& [158] CYRILLIC CAPITAL LETTER SHORT I WITH TAIL..CYRILLIC SMALL LETTER SHHA WITH DESCENDER
1D2B ; Cyrillic # L& CYRILLIC LETTER SMALL CAPITAL EL 1D2B ; Cyrillic # L& CYRILLIC LETTER SMALL CAPITAL EL
1D78 ; Cyrillic # Lm MODIFIER LETTER CYRILLIC EN 1D78 ; Cyrillic # Lm MODIFIER LETTER CYRILLIC EN
2DE0..2DFF ; Cyrillic # Mn [32] COMBINING CYRILLIC LETTER BE..COMBINING CYRILLIC LETTER IOTIFIED BIG YUS 2DE0..2DFF ; Cyrillic # Mn [32] COMBINING CYRILLIC LETTER BE..COMBINING CYRILLIC LETTER IOTIFIED BIG YUS
A640..A65F ; Cyrillic # L& [32] CYRILLIC CAPITAL LETTER ZEMLYA..CYRILLIC SMALL LETTER YN A640..A66D ; Cyrillic # L& [46] CYRILLIC CAPITAL LETTER ZEMLYA..CYRILLIC SMALL LETTER DOUBLE MONOCULAR O
A662..A66D ; Cyrillic # L& [12] CYRILLIC CAPITAL LETTER SOFT DE..CYRILLIC SMALL LETTER DOUBLE MONOCULAR O
A66E ; Cyrillic # Lo CYRILLIC LETTER MULTIOCULAR O A66E ; Cyrillic # Lo CYRILLIC LETTER MULTIOCULAR O
A66F ; Cyrillic # Mn COMBINING CYRILLIC VZMET A66F ; Cyrillic # Mn COMBINING CYRILLIC VZMET
A670..A672 ; Cyrillic # Me [3] COMBINING CYRILLIC TEN MILLIONS SIGN..COMBINING CYRILLIC THOUSAND MILLIONS SIGN A670..A672 ; Cyrillic # Me [3] COMBINING CYRILLIC TEN MILLIONS SIGN..COMBINING CYRILLIC THOUSAND MILLIONS SIGN
...@@ -702,7 +715,7 @@ A67E ; Cyrillic # Po CYRILLIC KAVYKA ...@@ -702,7 +715,7 @@ A67E ; Cyrillic # Po CYRILLIC KAVYKA
A67F ; Cyrillic # Lm CYRILLIC PAYEROK A67F ; Cyrillic # Lm CYRILLIC PAYEROK
A680..A697 ; Cyrillic # L& [24] CYRILLIC CAPITAL LETTER DWE..CYRILLIC SMALL LETTER SHWE A680..A697 ; Cyrillic # L& [24] CYRILLIC CAPITAL LETTER DWE..CYRILLIC SMALL LETTER SHWE
# Total code points: 404 # Total code points: 408
# ================================================ # ================================================
...@@ -744,6 +757,7 @@ FB46..FB4F ; Hebrew # Lo [10] HEBREW LETTER TSADI WITH DAGESH..HEBREW LIGATU ...@@ -744,6 +757,7 @@ FB46..FB4F ; Hebrew # Lo [10] HEBREW LETTER TSADI WITH DAGESH..HEBREW LIGATU
# ================================================ # ================================================
0600..0603 ; Arabic # Cf [4] ARABIC NUMBER SIGN..ARABIC SIGN SAFHA
0606..0608 ; Arabic # Sm [3] ARABIC-INDIC CUBE ROOT..ARABIC RAY 0606..0608 ; Arabic # Sm [3] ARABIC-INDIC CUBE ROOT..ARABIC RAY
0609..060A ; Arabic # Po [2] ARABIC-INDIC PER MILLE SIGN..ARABIC-INDIC PER TEN THOUSAND SIGN 0609..060A ; Arabic # Po [2] ARABIC-INDIC PER MILLE SIGN..ARABIC-INDIC PER TEN THOUSAND SIGN
060B ; Arabic # Sc AFGHANI SIGN 060B ; Arabic # Sc AFGHANI SIGN
...@@ -751,7 +765,7 @@ FB46..FB4F ; Hebrew # Lo [10] HEBREW LETTER TSADI WITH DAGESH..HEBREW LIGATU ...@@ -751,7 +765,7 @@ FB46..FB4F ; Hebrew # Lo [10] HEBREW LETTER TSADI WITH DAGESH..HEBREW LIGATU
060E..060F ; Arabic # So [2] ARABIC POETIC VERSE SIGN..ARABIC SIGN MISRA 060E..060F ; Arabic # So [2] ARABIC POETIC VERSE SIGN..ARABIC SIGN MISRA
0610..061A ; Arabic # Mn [11] ARABIC SIGN SALLALLAHOU ALAYHE WASSALLAM..ARABIC SMALL KASRA 0610..061A ; Arabic # Mn [11] ARABIC SIGN SALLALLAHOU ALAYHE WASSALLAM..ARABIC SMALL KASRA
061E ; Arabic # Po ARABIC TRIPLE DOT PUNCTUATION MARK 061E ; Arabic # Po ARABIC TRIPLE DOT PUNCTUATION MARK
0621..063F ; Arabic # Lo [31] ARABIC LETTER HAMZA..ARABIC LETTER FARSI YEH WITH THREE DOTS ABOVE 0620..063F ; Arabic # Lo [32] ARABIC LETTER KASHMIRI YEH..ARABIC LETTER FARSI YEH WITH THREE DOTS ABOVE
0641..064A ; Arabic # Lo [10] ARABIC LETTER FEH..ARABIC LETTER YEH 0641..064A ; Arabic # Lo [10] ARABIC LETTER FEH..ARABIC LETTER YEH
0656..065E ; Arabic # Mn [9] ARABIC SUBSCRIPT ALEF..ARABIC FATHA WITH TWO DOTS 0656..065E ; Arabic # Mn [9] ARABIC SUBSCRIPT ALEF..ARABIC FATHA WITH TWO DOTS
066A..066D ; Arabic # Po [4] ARABIC PERCENT SIGN..ARABIC FIVE POINTED STAR 066A..066D ; Arabic # Po [4] ARABIC PERCENT SIGN..ARABIC FIVE POINTED STAR
...@@ -760,7 +774,7 @@ FB46..FB4F ; Hebrew # Lo [10] HEBREW LETTER TSADI WITH DAGESH..HEBREW LIGATU ...@@ -760,7 +774,7 @@ FB46..FB4F ; Hebrew # Lo [10] HEBREW LETTER TSADI WITH DAGESH..HEBREW LIGATU
06D4 ; Arabic # Po ARABIC FULL STOP 06D4 ; Arabic # Po ARABIC FULL STOP
06D5 ; Arabic # Lo ARABIC LETTER AE 06D5 ; Arabic # Lo ARABIC LETTER AE
06D6..06DC ; Arabic # Mn [7] ARABIC SMALL HIGH LIGATURE SAD WITH LAM WITH ALEF MAKSURA..ARABIC SMALL HIGH SEEN 06D6..06DC ; Arabic # Mn [7] ARABIC SMALL HIGH LIGATURE SAD WITH LAM WITH ALEF MAKSURA..ARABIC SMALL HIGH SEEN
06DE ; Arabic # Me ARABIC START OF RUB EL HIZB 06DE ; Arabic # So ARABIC START OF RUB EL HIZB
06DF..06E4 ; Arabic # Mn [6] ARABIC SMALL HIGH ROUNDED ZERO..ARABIC SMALL HIGH MADDA 06DF..06E4 ; Arabic # Mn [6] ARABIC SMALL HIGH ROUNDED ZERO..ARABIC SMALL HIGH MADDA
06E5..06E6 ; Arabic # Lm [2] ARABIC SMALL WAW..ARABIC SMALL YEH 06E5..06E6 ; Arabic # Lm [2] ARABIC SMALL WAW..ARABIC SMALL YEH
06E7..06E8 ; Arabic # Mn [2] ARABIC SMALL HIGH YEH..ARABIC SMALL HIGH NOON 06E7..06E8 ; Arabic # Mn [2] ARABIC SMALL HIGH YEH..ARABIC SMALL HIGH NOON
...@@ -773,6 +787,7 @@ FB46..FB4F ; Hebrew # Lo [10] HEBREW LETTER TSADI WITH DAGESH..HEBREW LIGATU ...@@ -773,6 +787,7 @@ FB46..FB4F ; Hebrew # Lo [10] HEBREW LETTER TSADI WITH DAGESH..HEBREW LIGATU
06FF ; Arabic # Lo ARABIC LETTER HEH WITH INVERTED V 06FF ; Arabic # Lo ARABIC LETTER HEH WITH INVERTED V
0750..077F ; Arabic # Lo [48] ARABIC LETTER BEH WITH THREE DOTS HORIZONTALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS ABOVE 0750..077F ; Arabic # Lo [48] ARABIC LETTER BEH WITH THREE DOTS HORIZONTALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS ABOVE
FB50..FBB1 ; Arabic # Lo [98] ARABIC LETTER ALEF WASLA ISOLATED FORM..ARABIC LETTER YEH BARREE WITH HAMZA ABOVE FINAL FORM FB50..FBB1 ; Arabic # Lo [98] ARABIC LETTER ALEF WASLA ISOLATED FORM..ARABIC LETTER YEH BARREE WITH HAMZA ABOVE FINAL FORM
FBB2..FBC1 ; Arabic # Sk [16] ARABIC SYMBOL DOT ABOVE..ARABIC SYMBOL SMALL TAH BELOW
FBD3..FD3D ; Arabic # Lo [363] ARABIC LETTER NG ISOLATED FORM..ARABIC LIGATURE ALEF WITH FATHATAN ISOLATED FORM FBD3..FD3D ; Arabic # Lo [363] ARABIC LETTER NG ISOLATED FORM..ARABIC LIGATURE ALEF WITH FATHATAN ISOLATED FORM
FD50..FD8F ; Arabic # Lo [64] ARABIC LIGATURE TEH WITH JEEM WITH MEEM INITIAL FORM..ARABIC LIGATURE MEEM WITH KHAH WITH MEEM INITIAL FORM FD50..FD8F ; Arabic # Lo [64] ARABIC LIGATURE TEH WITH JEEM WITH MEEM INITIAL FORM..ARABIC LIGATURE MEEM WITH KHAH WITH MEEM INITIAL FORM
FD92..FDC7 ; Arabic # Lo [54] ARABIC LIGATURE MEEM WITH JEEM WITH KHAH INITIAL FORM..ARABIC LIGATURE NOON WITH JEEM WITH YEH FINAL FORM FD92..FDC7 ; Arabic # Lo [54] ARABIC LIGATURE MEEM WITH JEEM WITH KHAH INITIAL FORM..ARABIC LIGATURE NOON WITH JEEM WITH YEH FINAL FORM
...@@ -782,7 +797,7 @@ FE70..FE74 ; Arabic # Lo [5] ARABIC FATHATAN ISOLATED FORM..ARABIC KASRATAN ...@@ -782,7 +797,7 @@ FE70..FE74 ; Arabic # Lo [5] ARABIC FATHATAN ISOLATED FORM..ARABIC KASRATAN
FE76..FEFC ; Arabic # Lo [135] ARABIC FATHA ISOLATED FORM..ARABIC LIGATURE LAM WITH ALEF FINAL FORM FE76..FEFC ; Arabic # Lo [135] ARABIC FATHA ISOLATED FORM..ARABIC LIGATURE LAM WITH ALEF FINAL FORM
10E60..10E7E ; Arabic # No [31] RUMI DIGIT ONE..RUMI FRACTION TWO THIRDS 10E60..10E7E ; Arabic # No [31] RUMI DIGIT ONE..RUMI FRACTION TWO THIRDS
# Total code points: 1030 # Total code points: 1051
# ================================================ # ================================================
...@@ -809,27 +824,29 @@ FE76..FEFC ; Arabic # Lo [135] ARABIC FATHA ISOLATED FORM..ARABIC LIGATURE LA ...@@ -809,27 +824,29 @@ FE76..FEFC ; Arabic # Lo [135] ARABIC FATHA ISOLATED FORM..ARABIC LIGATURE LA
0900..0902 ; Devanagari # Mn [3] DEVANAGARI SIGN INVERTED CANDRABINDU..DEVANAGARI SIGN ANUSVARA 0900..0902 ; Devanagari # Mn [3] DEVANAGARI SIGN INVERTED CANDRABINDU..DEVANAGARI SIGN ANUSVARA
0903 ; Devanagari # Mc DEVANAGARI SIGN VISARGA 0903 ; Devanagari # Mc DEVANAGARI SIGN VISARGA
0904..0939 ; Devanagari # Lo [54] DEVANAGARI LETTER SHORT A..DEVANAGARI LETTER HA 0904..0939 ; Devanagari # Lo [54] DEVANAGARI LETTER SHORT A..DEVANAGARI LETTER HA
093A ; Devanagari # Mn DEVANAGARI VOWEL SIGN OE
093B ; Devanagari # Mc DEVANAGARI VOWEL SIGN OOE
093C ; Devanagari # Mn DEVANAGARI SIGN NUKTA 093C ; Devanagari # Mn DEVANAGARI SIGN NUKTA
093D ; Devanagari # Lo DEVANAGARI SIGN AVAGRAHA 093D ; Devanagari # Lo DEVANAGARI SIGN AVAGRAHA
093E..0940 ; Devanagari # Mc [3] DEVANAGARI VOWEL SIGN AA..DEVANAGARI VOWEL SIGN II 093E..0940 ; Devanagari # Mc [3] DEVANAGARI VOWEL SIGN AA..DEVANAGARI VOWEL SIGN II
0941..0948 ; Devanagari # Mn [8] DEVANAGARI VOWEL SIGN U..DEVANAGARI VOWEL SIGN AI 0941..0948 ; Devanagari # Mn [8] DEVANAGARI VOWEL SIGN U..DEVANAGARI VOWEL SIGN AI
0949..094C ; Devanagari # Mc [4] DEVANAGARI VOWEL SIGN CANDRA O..DEVANAGARI VOWEL SIGN AU 0949..094C ; Devanagari # Mc [4] DEVANAGARI VOWEL SIGN CANDRA O..DEVANAGARI VOWEL SIGN AU
094D ; Devanagari # Mn DEVANAGARI SIGN VIRAMA 094D ; Devanagari # Mn DEVANAGARI SIGN VIRAMA
094E ; Devanagari # Mc DEVANAGARI VOWEL SIGN PRISHTHAMATRA E 094E..094F ; Devanagari # Mc [2] DEVANAGARI VOWEL SIGN PRISHTHAMATRA E..DEVANAGARI VOWEL SIGN AW
0950 ; Devanagari # Lo DEVANAGARI OM 0950 ; Devanagari # Lo DEVANAGARI OM
0953..0955 ; Devanagari # Mn [3] DEVANAGARI GRAVE ACCENT..DEVANAGARI VOWEL SIGN CANDRA LONG E 0953..0957 ; Devanagari # Mn [5] DEVANAGARI GRAVE ACCENT..DEVANAGARI VOWEL SIGN UUE
0958..0961 ; Devanagari # Lo [10] DEVANAGARI LETTER QA..DEVANAGARI LETTER VOCALIC LL 0958..0961 ; Devanagari # Lo [10] DEVANAGARI LETTER QA..DEVANAGARI LETTER VOCALIC LL
0962..0963 ; Devanagari # Mn [2] DEVANAGARI VOWEL SIGN VOCALIC L..DEVANAGARI VOWEL SIGN VOCALIC LL 0962..0963 ; Devanagari # Mn [2] DEVANAGARI VOWEL SIGN VOCALIC L..DEVANAGARI VOWEL SIGN VOCALIC LL
0966..096F ; Devanagari # Nd [10] DEVANAGARI DIGIT ZERO..DEVANAGARI DIGIT NINE 0966..096F ; Devanagari # Nd [10] DEVANAGARI DIGIT ZERO..DEVANAGARI DIGIT NINE
0971 ; Devanagari # Lm DEVANAGARI SIGN HIGH SPACING DOT 0971 ; Devanagari # Lm DEVANAGARI SIGN HIGH SPACING DOT
0972 ; Devanagari # Lo DEVANAGARI LETTER CANDRA A 0972..0977 ; Devanagari # Lo [6] DEVANAGARI LETTER CANDRA A..DEVANAGARI LETTER UUE
0979..097F ; Devanagari # Lo [7] DEVANAGARI LETTER ZHA..DEVANAGARI LETTER BBA 0979..097F ; Devanagari # Lo [7] DEVANAGARI LETTER ZHA..DEVANAGARI LETTER BBA
A8E0..A8F1 ; Devanagari # Mn [18] COMBINING DEVANAGARI DIGIT ZERO..COMBINING DEVANAGARI SIGN AVAGRAHA A8E0..A8F1 ; Devanagari # Mn [18] COMBINING DEVANAGARI DIGIT ZERO..COMBINING DEVANAGARI SIGN AVAGRAHA
A8F2..A8F7 ; Devanagari # Lo [6] DEVANAGARI SIGN SPACING CANDRABINDU..DEVANAGARI SIGN CANDRABINDU AVAGRAHA A8F2..A8F7 ; Devanagari # Lo [6] DEVANAGARI SIGN SPACING CANDRABINDU..DEVANAGARI SIGN CANDRABINDU AVAGRAHA
A8F8..A8FA ; Devanagari # Po [3] DEVANAGARI SIGN PUSHPIKA..DEVANAGARI CARET A8F8..A8FA ; Devanagari # Po [3] DEVANAGARI SIGN PUSHPIKA..DEVANAGARI CARET
A8FB ; Devanagari # Lo DEVANAGARI HEADSTROKE A8FB ; Devanagari # Lo DEVANAGARI HEADSTROKE
# Total code points: 140 # Total code points: 150
# ================================================ # ================================================
...@@ -941,8 +958,9 @@ A8FB ; Devanagari # Lo DEVANAGARI HEADSTROKE ...@@ -941,8 +958,9 @@ A8FB ; Devanagari # Lo DEVANAGARI HEADSTROKE
0B66..0B6F ; Oriya # Nd [10] ORIYA DIGIT ZERO..ORIYA DIGIT NINE 0B66..0B6F ; Oriya # Nd [10] ORIYA DIGIT ZERO..ORIYA DIGIT NINE
0B70 ; Oriya # So ORIYA ISSHAR 0B70 ; Oriya # So ORIYA ISSHAR
0B71 ; Oriya # Lo ORIYA LETTER WA 0B71 ; Oriya # Lo ORIYA LETTER WA
0B72..0B77 ; Oriya # No [6] ORIYA FRACTION ONE QUARTER..ORIYA FRACTION THREE SIXTEENTHS
# Total code points: 84 # Total code points: 90
# ================================================ # ================================================
...@@ -1018,22 +1036,23 @@ A8FB ; Devanagari # Lo DEVANAGARI HEADSTROKE ...@@ -1018,22 +1036,23 @@ A8FB ; Devanagari # Lo DEVANAGARI HEADSTROKE
0CE0..0CE1 ; Kannada # Lo [2] KANNADA LETTER VOCALIC RR..KANNADA LETTER VOCALIC LL 0CE0..0CE1 ; Kannada # Lo [2] KANNADA LETTER VOCALIC RR..KANNADA LETTER VOCALIC LL
0CE2..0CE3 ; Kannada # Mn [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL 0CE2..0CE3 ; Kannada # Mn [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL
0CE6..0CEF ; Kannada # Nd [10] KANNADA DIGIT ZERO..KANNADA DIGIT NINE 0CE6..0CEF ; Kannada # Nd [10] KANNADA DIGIT ZERO..KANNADA DIGIT NINE
0CF1..0CF2 ; Kannada # Lo [2] KANNADA SIGN JIHVAMULIYA..KANNADA SIGN UPADHMANIYA
# Total code points: 84 # Total code points: 86
# ================================================ # ================================================
0D02..0D03 ; Malayalam # Mc [2] MALAYALAM SIGN ANUSVARA..MALAYALAM SIGN VISARGA 0D02..0D03 ; Malayalam # Mc [2] MALAYALAM SIGN ANUSVARA..MALAYALAM SIGN VISARGA
0D05..0D0C ; Malayalam # Lo [8] MALAYALAM LETTER A..MALAYALAM LETTER VOCALIC L 0D05..0D0C ; Malayalam # Lo [8] MALAYALAM LETTER A..MALAYALAM LETTER VOCALIC L
0D0E..0D10 ; Malayalam # Lo [3] MALAYALAM LETTER E..MALAYALAM LETTER AI 0D0E..0D10 ; Malayalam # Lo [3] MALAYALAM LETTER E..MALAYALAM LETTER AI
0D12..0D28 ; Malayalam # Lo [23] MALAYALAM LETTER O..MALAYALAM LETTER NA 0D12..0D3A ; Malayalam # Lo [41] MALAYALAM LETTER O..MALAYALAM LETTER TTTA
0D2A..0D39 ; Malayalam # Lo [16] MALAYALAM LETTER PA..MALAYALAM LETTER HA
0D3D ; Malayalam # Lo MALAYALAM SIGN AVAGRAHA 0D3D ; Malayalam # Lo MALAYALAM SIGN AVAGRAHA
0D3E..0D40 ; Malayalam # Mc [3] MALAYALAM VOWEL SIGN AA..MALAYALAM VOWEL SIGN II 0D3E..0D40 ; Malayalam # Mc [3] MALAYALAM VOWEL SIGN AA..MALAYALAM VOWEL SIGN II
0D41..0D44 ; Malayalam # Mn [4] MALAYALAM VOWEL SIGN U..MALAYALAM VOWEL SIGN VOCALIC RR 0D41..0D44 ; Malayalam # Mn [4] MALAYALAM VOWEL SIGN U..MALAYALAM VOWEL SIGN VOCALIC RR
0D46..0D48 ; Malayalam # Mc [3] MALAYALAM VOWEL SIGN E..MALAYALAM VOWEL SIGN AI 0D46..0D48 ; Malayalam # Mc [3] MALAYALAM VOWEL SIGN E..MALAYALAM VOWEL SIGN AI
0D4A..0D4C ; Malayalam # Mc [3] MALAYALAM VOWEL SIGN O..MALAYALAM VOWEL SIGN AU 0D4A..0D4C ; Malayalam # Mc [3] MALAYALAM VOWEL SIGN O..MALAYALAM VOWEL SIGN AU
0D4D ; Malayalam # Mn MALAYALAM SIGN VIRAMA 0D4D ; Malayalam # Mn MALAYALAM SIGN VIRAMA
0D4E ; Malayalam # Lo MALAYALAM LETTER DOT REPH
0D57 ; Malayalam # Mc MALAYALAM AU LENGTH MARK 0D57 ; Malayalam # Mc MALAYALAM AU LENGTH MARK
0D60..0D61 ; Malayalam # Lo [2] MALAYALAM LETTER VOCALIC RR..MALAYALAM LETTER VOCALIC LL 0D60..0D61 ; Malayalam # Lo [2] MALAYALAM LETTER VOCALIC RR..MALAYALAM LETTER VOCALIC LL
0D62..0D63 ; Malayalam # Mn [2] MALAYALAM VOWEL SIGN VOCALIC L..MALAYALAM VOWEL SIGN VOCALIC LL 0D62..0D63 ; Malayalam # Mn [2] MALAYALAM VOWEL SIGN VOCALIC L..MALAYALAM VOWEL SIGN VOCALIC LL
...@@ -1042,7 +1061,7 @@ A8FB ; Devanagari # Lo DEVANAGARI HEADSTROKE ...@@ -1042,7 +1061,7 @@ A8FB ; Devanagari # Lo DEVANAGARI HEADSTROKE
0D79 ; Malayalam # So MALAYALAM DATE MARK 0D79 ; Malayalam # So MALAYALAM DATE MARK
0D7A..0D7F ; Malayalam # Lo [6] MALAYALAM LETTER CHILLU NN..MALAYALAM LETTER CHILLU K 0D7A..0D7F ; Malayalam # Lo [6] MALAYALAM LETTER CHILLU NN..MALAYALAM LETTER CHILLU K
# Total code points: 95 # Total code points: 98
# ================================================ # ================================================
...@@ -1132,16 +1151,17 @@ A8FB ; Devanagari # Lo DEVANAGARI HEADSTROKE ...@@ -1132,16 +1151,17 @@ A8FB ; Devanagari # Lo DEVANAGARI HEADSTROKE
0F80..0F84 ; Tibetan # Mn [5] TIBETAN VOWEL SIGN REVERSED I..TIBETAN MARK HALANTA 0F80..0F84 ; Tibetan # Mn [5] TIBETAN VOWEL SIGN REVERSED I..TIBETAN MARK HALANTA
0F85 ; Tibetan # Po TIBETAN MARK PALUTA 0F85 ; Tibetan # Po TIBETAN MARK PALUTA
0F86..0F87 ; Tibetan # Mn [2] TIBETAN SIGN LCI RTAGS..TIBETAN SIGN YANG RTAGS 0F86..0F87 ; Tibetan # Mn [2] TIBETAN SIGN LCI RTAGS..TIBETAN SIGN YANG RTAGS
0F88..0F8B ; Tibetan # Lo [4] TIBETAN SIGN LCE TSA CAN..TIBETAN SIGN GRU MED RGYINGS 0F88..0F8C ; Tibetan # Lo [5] TIBETAN SIGN LCE TSA CAN..TIBETAN SIGN INVERTED MCHU CAN
0F90..0F97 ; Tibetan # Mn [8] TIBETAN SUBJOINED LETTER KA..TIBETAN SUBJOINED LETTER JA 0F8D..0F97 ; Tibetan # Mn [11] TIBETAN SUBJOINED SIGN LCE TSA CAN..TIBETAN SUBJOINED LETTER JA
0F99..0FBC ; Tibetan # Mn [36] TIBETAN SUBJOINED LETTER NYA..TIBETAN SUBJOINED LETTER FIXED-FORM RA 0F99..0FBC ; Tibetan # Mn [36] TIBETAN SUBJOINED LETTER NYA..TIBETAN SUBJOINED LETTER FIXED-FORM RA
0FBE..0FC5 ; Tibetan # So [8] TIBETAN KU RU KHA..TIBETAN SYMBOL RDO RJE 0FBE..0FC5 ; Tibetan # So [8] TIBETAN KU RU KHA..TIBETAN SYMBOL RDO RJE
0FC6 ; Tibetan # Mn TIBETAN SYMBOL PADMA GDAN 0FC6 ; Tibetan # Mn TIBETAN SYMBOL PADMA GDAN
0FC7..0FCC ; Tibetan # So [6] TIBETAN SYMBOL RDO RJE RGYA GRAM..TIBETAN SYMBOL NOR BU BZHI -KHYIL 0FC7..0FCC ; Tibetan # So [6] TIBETAN SYMBOL RDO RJE RGYA GRAM..TIBETAN SYMBOL NOR BU BZHI -KHYIL
0FCE..0FCF ; Tibetan # So [2] TIBETAN SIGN RDEL NAG RDEL DKAR..TIBETAN SIGN RDEL NAG GSUM 0FCE..0FCF ; Tibetan # So [2] TIBETAN SIGN RDEL NAG RDEL DKAR..TIBETAN SIGN RDEL NAG GSUM
0FD0..0FD4 ; Tibetan # Po [5] TIBETAN MARK BSKA- SHOG GI MGO RGYAN..TIBETAN MARK CLOSING BRDA RNYING YIG MGO SGAB MA 0FD0..0FD4 ; Tibetan # Po [5] TIBETAN MARK BSKA- SHOG GI MGO RGYAN..TIBETAN MARK CLOSING BRDA RNYING YIG MGO SGAB MA
0FD9..0FDA ; Tibetan # Po [2] TIBETAN MARK LEADING MCHAN RTAGS..TIBETAN MARK TRAILING MCHAN RTAGS
# Total code points: 201 # Total code points: 207
# ================================================ # ================================================
...@@ -1201,6 +1221,7 @@ AA7B ; Myanmar # Mc MYANMAR SIGN PAO KAREN TONE ...@@ -1201,6 +1221,7 @@ AA7B ; Myanmar # Mc MYANMAR SIGN PAO KAREN TONE
# ================================================ # ================================================
1100..11FF ; Hangul # Lo [256] HANGUL CHOSEONG KIYEOK..HANGUL JONGSEONG SSANGNIEUN 1100..11FF ; Hangul # Lo [256] HANGUL CHOSEONG KIYEOK..HANGUL JONGSEONG SSANGNIEUN
302E..302F ; Hangul # Mn [2] HANGUL SINGLE DOT TONE MARK..HANGUL DOUBLE DOT TONE MARK
3131..318E ; Hangul # Lo [94] HANGUL LETTER KIYEOK..HANGUL LETTER ARAEAE 3131..318E ; Hangul # Lo [94] HANGUL LETTER KIYEOK..HANGUL LETTER ARAEAE
3200..321E ; Hangul # So [31] PARENTHESIZED HANGUL KIYEOK..PARENTHESIZED KOREAN CHARACTER O HU 3200..321E ; Hangul # So [31] PARENTHESIZED HANGUL KIYEOK..PARENTHESIZED KOREAN CHARACTER O HU
3260..327E ; Hangul # So [31] CIRCLED HANGUL KIYEOK..CIRCLED HANGUL IEUNG U 3260..327E ; Hangul # So [31] CIRCLED HANGUL KIYEOK..CIRCLED HANGUL IEUNG U
...@@ -1214,7 +1235,7 @@ FFCA..FFCF ; Hangul # Lo [6] HALFWIDTH HANGUL LETTER YEO..HALFWIDTH HANGUL ...@@ -1214,7 +1235,7 @@ FFCA..FFCF ; Hangul # Lo [6] HALFWIDTH HANGUL LETTER YEO..HALFWIDTH HANGUL
FFD2..FFD7 ; Hangul # Lo [6] HALFWIDTH HANGUL LETTER YO..HALFWIDTH HANGUL LETTER YU FFD2..FFD7 ; Hangul # Lo [6] HALFWIDTH HANGUL LETTER YO..HALFWIDTH HANGUL LETTER YU
FFDA..FFDC ; Hangul # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I FFDA..FFDC ; Hangul # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I
# Total code points: 11737 # Total code points: 11739
# ================================================ # ================================================
...@@ -1234,7 +1255,7 @@ FFDA..FFDC ; Hangul # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL L ...@@ -1234,7 +1255,7 @@ FFDA..FFDC ; Hangul # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL L
12D8..1310 ; Ethiopic # Lo [57] ETHIOPIC SYLLABLE ZA..ETHIOPIC SYLLABLE GWA 12D8..1310 ; Ethiopic # Lo [57] ETHIOPIC SYLLABLE ZA..ETHIOPIC SYLLABLE GWA
1312..1315 ; Ethiopic # Lo [4] ETHIOPIC SYLLABLE GWI..ETHIOPIC SYLLABLE GWE 1312..1315 ; Ethiopic # Lo [4] ETHIOPIC SYLLABLE GWI..ETHIOPIC SYLLABLE GWE
1318..135A ; Ethiopic # Lo [67] ETHIOPIC SYLLABLE GGA..ETHIOPIC SYLLABLE FYA 1318..135A ; Ethiopic # Lo [67] ETHIOPIC SYLLABLE GGA..ETHIOPIC SYLLABLE FYA
135F ; Ethiopic # Mn ETHIOPIC COMBINING GEMINATION MARK 135D..135F ; Ethiopic # Mn [3] ETHIOPIC COMBINING GEMINATION AND VOWEL LENGTH MARK..ETHIOPIC COMBINING GEMINATION MARK
1360 ; Ethiopic # So ETHIOPIC SECTION MARK 1360 ; Ethiopic # So ETHIOPIC SECTION MARK
1361..1368 ; Ethiopic # Po [8] ETHIOPIC WORDSPACE..ETHIOPIC PARAGRAPH SEPARATOR 1361..1368 ; Ethiopic # Po [8] ETHIOPIC WORDSPACE..ETHIOPIC PARAGRAPH SEPARATOR
1369..137C ; Ethiopic # No [20] ETHIOPIC DIGIT ONE..ETHIOPIC NUMBER TEN THOUSAND 1369..137C ; Ethiopic # No [20] ETHIOPIC DIGIT ONE..ETHIOPIC NUMBER TEN THOUSAND
...@@ -1249,8 +1270,13 @@ FFDA..FFDC ; Hangul # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL L ...@@ -1249,8 +1270,13 @@ FFDA..FFDC ; Hangul # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL L
2DC8..2DCE ; Ethiopic # Lo [7] ETHIOPIC SYLLABLE KYA..ETHIOPIC SYLLABLE KYO 2DC8..2DCE ; Ethiopic # Lo [7] ETHIOPIC SYLLABLE KYA..ETHIOPIC SYLLABLE KYO
2DD0..2DD6 ; Ethiopic # Lo [7] ETHIOPIC SYLLABLE XYA..ETHIOPIC SYLLABLE XYO 2DD0..2DD6 ; Ethiopic # Lo [7] ETHIOPIC SYLLABLE XYA..ETHIOPIC SYLLABLE XYO
2DD8..2DDE ; Ethiopic # Lo [7] ETHIOPIC SYLLABLE GYA..ETHIOPIC SYLLABLE GYO 2DD8..2DDE ; Ethiopic # Lo [7] ETHIOPIC SYLLABLE GYA..ETHIOPIC SYLLABLE GYO
AB01..AB06 ; Ethiopic # Lo [6] ETHIOPIC SYLLABLE TTHU..ETHIOPIC SYLLABLE TTHO
AB09..AB0E ; Ethiopic # Lo [6] ETHIOPIC SYLLABLE DDHU..ETHIOPIC SYLLABLE DDHO
AB11..AB16 ; Ethiopic # Lo [6] ETHIOPIC SYLLABLE DZU..ETHIOPIC SYLLABLE DZO
AB20..AB26 ; Ethiopic # Lo [7] ETHIOPIC SYLLABLE CCHHA..ETHIOPIC SYLLABLE CCHHO
AB28..AB2E ; Ethiopic # Lo [7] ETHIOPIC SYLLABLE BBA..ETHIOPIC SYLLABLE BBO
# Total code points: 461 # Total code points: 495
# ================================================ # ================================================
...@@ -1329,9 +1355,10 @@ FFDA..FFDC ; Hangul # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL L ...@@ -1329,9 +1355,10 @@ FFDA..FFDC ; Hangul # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL L
3041..3096 ; Hiragana # Lo [86] HIRAGANA LETTER SMALL A..HIRAGANA LETTER SMALL KE 3041..3096 ; Hiragana # Lo [86] HIRAGANA LETTER SMALL A..HIRAGANA LETTER SMALL KE
309D..309E ; Hiragana # Lm [2] HIRAGANA ITERATION MARK..HIRAGANA VOICED ITERATION MARK 309D..309E ; Hiragana # Lm [2] HIRAGANA ITERATION MARK..HIRAGANA VOICED ITERATION MARK
309F ; Hiragana # Lo HIRAGANA DIGRAPH YORI 309F ; Hiragana # Lo HIRAGANA DIGRAPH YORI
1B001 ; Hiragana # Lo HIRAGANA LETTER ARCHAIC YE
1F200 ; Hiragana # So SQUARE HIRAGANA HOKA 1F200 ; Hiragana # So SQUARE HIRAGANA HOKA
# Total code points: 90 # Total code points: 91
# ================================================ # ================================================
...@@ -1343,15 +1370,17 @@ FFDA..FFDC ; Hangul # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL L ...@@ -1343,15 +1370,17 @@ FFDA..FFDC ; Hangul # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL L
3300..3357 ; Katakana # So [88] SQUARE APAATO..SQUARE WATTO 3300..3357 ; Katakana # So [88] SQUARE APAATO..SQUARE WATTO
FF66..FF6F ; Katakana # Lo [10] HALFWIDTH KATAKANA LETTER WO..HALFWIDTH KATAKANA LETTER SMALL TU FF66..FF6F ; Katakana # Lo [10] HALFWIDTH KATAKANA LETTER WO..HALFWIDTH KATAKANA LETTER SMALL TU
FF71..FF9D ; Katakana # Lo [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAKANA LETTER N FF71..FF9D ; Katakana # Lo [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAKANA LETTER N
1B000 ; Katakana # Lo KATAKANA LETTER ARCHAIC E
# Total code points: 299 # Total code points: 300
# ================================================ # ================================================
02EA..02EB ; Bopomofo # Sk [2] MODIFIER LETTER YIN DEPARTING TONE MARK..MODIFIER LETTER YANG DEPARTING TONE MARK
3105..312D ; Bopomofo # Lo [41] BOPOMOFO LETTER B..BOPOMOFO LETTER IH 3105..312D ; Bopomofo # Lo [41] BOPOMOFO LETTER B..BOPOMOFO LETTER IH
31A0..31B7 ; Bopomofo # Lo [24] BOPOMOFO LETTER BU..BOPOMOFO FINAL LETTER H 31A0..31BA ; Bopomofo # Lo [27] BOPOMOFO LETTER BU..BOPOMOFO LETTER ZY
# Total code points: 65 # Total code points: 70
# ================================================ # ================================================
...@@ -1370,9 +1399,10 @@ FA30..FA6D ; Han # Lo [62] CJK COMPATIBILITY IDEOGRAPH-FA30..CJK COMPATIBILI ...@@ -1370,9 +1399,10 @@ FA30..FA6D ; Han # Lo [62] CJK COMPATIBILITY IDEOGRAPH-FA30..CJK COMPATIBILI
FA70..FAD9 ; Han # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILITY IDEOGRAPH-FAD9 FA70..FAD9 ; Han # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILITY IDEOGRAPH-FAD9
20000..2A6D6 ; Han # Lo [42711] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6D6 20000..2A6D6 ; Han # Lo [42711] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6D6
2A700..2B734 ; Han # Lo [4149] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B734 2A700..2B734 ; Han # Lo [4149] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B734
2B740..2B81D ; Han # Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D
2F800..2FA1D ; Han # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D 2F800..2FA1D ; Han # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D
# Total code points: 75738 # Total code points: 75960
# ================================================ # ================================================
...@@ -1410,6 +1440,7 @@ A490..A4C6 ; Yi # So [55] YI RADICAL QOT..YI RADICAL KE ...@@ -1410,6 +1440,7 @@ A490..A4C6 ; Yi # So [55] YI RADICAL QOT..YI RADICAL KE
0300..036F ; Inherited # Mn [112] COMBINING GRAVE ACCENT..COMBINING LATIN SMALL LETTER X 0300..036F ; Inherited # Mn [112] COMBINING GRAVE ACCENT..COMBINING LATIN SMALL LETTER X
0485..0486 ; Inherited # Mn [2] COMBINING CYRILLIC DASIA PNEUMATA..COMBINING CYRILLIC PSILI PNEUMATA 0485..0486 ; Inherited # Mn [2] COMBINING CYRILLIC DASIA PNEUMATA..COMBINING CYRILLIC PSILI PNEUMATA
064B..0655 ; Inherited # Mn [11] ARABIC FATHATAN..ARABIC HAMZA BELOW 064B..0655 ; Inherited # Mn [11] ARABIC FATHATAN..ARABIC HAMZA BELOW
065F ; Inherited # Mn ARABIC WAVY HAMZA BELOW
0670 ; Inherited # Mn ARABIC LETTER SUPERSCRIPT ALEF 0670 ; Inherited # Mn ARABIC LETTER SUPERSCRIPT ALEF
0951..0952 ; Inherited # Mn [2] DEVANAGARI STRESS SIGN UDATTA..DEVANAGARI STRESS SIGN ANUDATTA 0951..0952 ; Inherited # Mn [2] DEVANAGARI STRESS SIGN UDATTA..DEVANAGARI STRESS SIGN ANUDATTA
1CD0..1CD2 ; Inherited # Mn [3] VEDIC TONE KARSHANA..VEDIC TONE PRENKHA 1CD0..1CD2 ; Inherited # Mn [3] VEDIC TONE KARSHANA..VEDIC TONE PRENKHA
...@@ -1417,14 +1448,14 @@ A490..A4C6 ; Yi # So [55] YI RADICAL QOT..YI RADICAL KE ...@@ -1417,14 +1448,14 @@ A490..A4C6 ; Yi # So [55] YI RADICAL QOT..YI RADICAL KE
1CE2..1CE8 ; Inherited # Mn [7] VEDIC SIGN VISARGA SVARITA..VEDIC SIGN VISARGA ANUDATTA WITH TAIL 1CE2..1CE8 ; Inherited # Mn [7] VEDIC SIGN VISARGA SVARITA..VEDIC SIGN VISARGA ANUDATTA WITH TAIL
1CED ; Inherited # Mn VEDIC SIGN TIRYAK 1CED ; Inherited # Mn VEDIC SIGN TIRYAK
1DC0..1DE6 ; Inherited # Mn [39] COMBINING DOTTED GRAVE ACCENT..COMBINING LATIN SMALL LETTER Z 1DC0..1DE6 ; Inherited # Mn [39] COMBINING DOTTED GRAVE ACCENT..COMBINING LATIN SMALL LETTER Z
1DFD..1DFF ; Inherited # Mn [3] COMBINING ALMOST EQUAL TO BELOW..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW 1DFC..1DFF ; Inherited # Mn [4] COMBINING DOUBLE INVERTED BREVE BELOW..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW
200C..200D ; Inherited # Cf [2] ZERO WIDTH NON-JOINER..ZERO WIDTH JOINER 200C..200D ; Inherited # Cf [2] ZERO WIDTH NON-JOINER..ZERO WIDTH JOINER
20D0..20DC ; Inherited # Mn [13] COMBINING LEFT HARPOON ABOVE..COMBINING FOUR DOTS ABOVE 20D0..20DC ; Inherited # Mn [13] COMBINING LEFT HARPOON ABOVE..COMBINING FOUR DOTS ABOVE
20DD..20E0 ; Inherited # Me [4] COMBINING ENCLOSING CIRCLE..COMBINING ENCLOSING CIRCLE BACKSLASH 20DD..20E0 ; Inherited # Me [4] COMBINING ENCLOSING CIRCLE..COMBINING ENCLOSING CIRCLE BACKSLASH
20E1 ; Inherited # Mn COMBINING LEFT RIGHT ARROW ABOVE 20E1 ; Inherited # Mn COMBINING LEFT RIGHT ARROW ABOVE
20E2..20E4 ; Inherited # Me [3] COMBINING ENCLOSING SCREEN..COMBINING ENCLOSING UPWARD POINTING TRIANGLE 20E2..20E4 ; Inherited # Me [3] COMBINING ENCLOSING SCREEN..COMBINING ENCLOSING UPWARD POINTING TRIANGLE
20E5..20F0 ; Inherited # Mn [12] COMBINING REVERSE SOLIDUS OVERLAY..COMBINING ASTERISK ABOVE 20E5..20F0 ; Inherited # Mn [12] COMBINING REVERSE SOLIDUS OVERLAY..COMBINING ASTERISK ABOVE
302A..302F ; Inherited # Mn [6] IDEOGRAPHIC LEVEL TONE MARK..HANGUL DOUBLE DOT TONE MARK 302A..302D ; Inherited # Mn [4] IDEOGRAPHIC LEVEL TONE MARK..IDEOGRAPHIC ENTERING TONE MARK
3099..309A ; Inherited # Mn [2] COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK..COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK 3099..309A ; Inherited # Mn [2] COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK..COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK
FE00..FE0F ; Inherited # Mn [16] VARIATION SELECTOR-1..VARIATION SELECTOR-16 FE00..FE0F ; Inherited # Mn [16] VARIATION SELECTOR-1..VARIATION SELECTOR-16
FE20..FE26 ; Inherited # Mn [7] COMBINING LIGATURE LEFT HALF..COMBINING CONJOINING MACRON FE20..FE26 ; Inherited # Mn [7] COMBINING LIGATURE LEFT HALF..COMBINING CONJOINING MACRON
...@@ -1568,8 +1599,9 @@ E0100..E01EF ; Inherited # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-2 ...@@ -1568,8 +1599,9 @@ E0100..E01EF ; Inherited # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-2
19B0..19C0 ; New_Tai_Lue # Mc [17] NEW TAI LUE VOWEL SIGN VOWEL SHORTENER..NEW TAI LUE VOWEL SIGN IY 19B0..19C0 ; New_Tai_Lue # Mc [17] NEW TAI LUE VOWEL SIGN VOWEL SHORTENER..NEW TAI LUE VOWEL SIGN IY
19C1..19C7 ; New_Tai_Lue # Lo [7] NEW TAI LUE LETTER FINAL V..NEW TAI LUE LETTER FINAL B 19C1..19C7 ; New_Tai_Lue # Lo [7] NEW TAI LUE LETTER FINAL V..NEW TAI LUE LETTER FINAL B
19C8..19C9 ; New_Tai_Lue # Mc [2] NEW TAI LUE TONE MARK-1..NEW TAI LUE TONE MARK-2 19C8..19C9 ; New_Tai_Lue # Mc [2] NEW TAI LUE TONE MARK-1..NEW TAI LUE TONE MARK-2
19D0..19DA ; New_Tai_Lue # Nd [11] NEW TAI LUE DIGIT ZERO..NEW TAI LUE THAM DIGIT ONE 19D0..19D9 ; New_Tai_Lue # Nd [10] NEW TAI LUE DIGIT ZERO..NEW TAI LUE DIGIT NINE
19DE..19DF ; New_Tai_Lue # Po [2] NEW TAI LUE SIGN LAE..NEW TAI LUE SIGN LAEV 19DA ; New_Tai_Lue # No NEW TAI LUE THAM DIGIT ONE
19DE..19DF ; New_Tai_Lue # So [2] NEW TAI LUE SIGN LAE..NEW TAI LUE SIGN LAEV
# Total code points: 83 # Total code points: 83
...@@ -1584,8 +1616,10 @@ E0100..E01EF ; Inherited # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-2 ...@@ -1584,8 +1616,10 @@ E0100..E01EF ; Inherited # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-2
2D30..2D65 ; Tifinagh # Lo [54] TIFINAGH LETTER YA..TIFINAGH LETTER YAZZ 2D30..2D65 ; Tifinagh # Lo [54] TIFINAGH LETTER YA..TIFINAGH LETTER YAZZ
2D6F ; Tifinagh # Lm TIFINAGH MODIFIER LETTER LABIALIZATION MARK 2D6F ; Tifinagh # Lm TIFINAGH MODIFIER LETTER LABIALIZATION MARK
2D70 ; Tifinagh # Po TIFINAGH SEPARATOR MARK
2D7F ; Tifinagh # Mn TIFINAGH CONSONANT JOINER
# Total code points: 55 # Total code points: 57
# ================================================ # ================================================
...@@ -1882,8 +1916,9 @@ A6A0..A6E5 ; Bamum # Lo [70] BAMUM LETTER A..BAMUM LETTER KI ...@@ -1882,8 +1916,9 @@ A6A0..A6E5 ; Bamum # Lo [70] BAMUM LETTER A..BAMUM LETTER KI
A6E6..A6EF ; Bamum # Nl [10] BAMUM LETTER MO..BAMUM LETTER KOGHOM A6E6..A6EF ; Bamum # Nl [10] BAMUM LETTER MO..BAMUM LETTER KOGHOM
A6F0..A6F1 ; Bamum # Mn [2] BAMUM COMBINING MARK KOQNDON..BAMUM COMBINING MARK TUKWENTIS A6F0..A6F1 ; Bamum # Mn [2] BAMUM COMBINING MARK KOQNDON..BAMUM COMBINING MARK TUKWENTIS
A6F2..A6F7 ; Bamum # Po [6] BAMUM NJAEMLI..BAMUM QUESTION MARK A6F2..A6F7 ; Bamum # Po [6] BAMUM NJAEMLI..BAMUM QUESTION MARK
16800..16A38 ; Bamum # Lo [569] BAMUM LETTER PHASE-A NGKUE MFON..BAMUM LETTER PHASE-F VUEQ
# Total code points: 88 # Total code points: 657
# ================================================ # ================================================
...@@ -1969,4 +2004,40 @@ ABF0..ABF9 ; Meetei_Mayek # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DI ...@@ -1969,4 +2004,40 @@ ABF0..ABF9 ; Meetei_Mayek # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DI
# Total code points: 66 # Total code points: 66
# ================================================
1BC0..1BE5 ; Batak # Lo [38] BATAK LETTER A..BATAK LETTER U
1BE6 ; Batak # Mn BATAK SIGN TOMPI
1BE7 ; Batak # Mc BATAK VOWEL SIGN E
1BE8..1BE9 ; Batak # Mn [2] BATAK VOWEL SIGN PAKPAK E..BATAK VOWEL SIGN EE
1BEA..1BEC ; Batak # Mc [3] BATAK VOWEL SIGN I..BATAK VOWEL SIGN O
1BED ; Batak # Mn BATAK VOWEL SIGN KARO O
1BEE ; Batak # Mc BATAK VOWEL SIGN U
1BEF..1BF1 ; Batak # Mn [3] BATAK VOWEL SIGN U FOR SIMALUNGUN SA..BATAK CONSONANT SIGN H
1BF2..1BF3 ; Batak # Mc [2] BATAK PANGOLAT..BATAK PANONGONAN
1BFC..1BFF ; Batak # Po [4] BATAK SYMBOL BINDU NA METEK..BATAK SYMBOL BINDU PANGOLAT
# Total code points: 56
# ================================================
11000 ; Brahmi # Mc BRAHMI SIGN CANDRABINDU
11001 ; Brahmi # Mn BRAHMI SIGN ANUSVARA
11002 ; Brahmi # Mc BRAHMI SIGN VISARGA
11003..11037 ; Brahmi # Lo [53] BRAHMI SIGN JIHVAMULIYA..BRAHMI LETTER OLD TAMIL NNNA
11038..11046 ; Brahmi # Mn [15] BRAHMI VOWEL SIGN AA..BRAHMI VIRAMA
11047..1104D ; Brahmi # Po [7] BRAHMI DANDA..BRAHMI PUNCTUATION LOTUS
11052..11065 ; Brahmi # No [20] BRAHMI NUMBER ONE..BRAHMI NUMBER ONE THOUSAND
11066..1106F ; Brahmi # Nd [10] BRAHMI DIGIT ZERO..BRAHMI DIGIT NINE
# Total code points: 108
# ================================================
0840..0858 ; Mandaic # Lo [25] MANDAIC LETTER HALQA..MANDAIC LETTER AIN
0859..085B ; Mandaic # Mn [3] MANDAIC AFFRICATION MARK..MANDAIC GEMINATION MARK
085E ; Mandaic # Po MANDAIC PUNCTUATION
# Total code points: 29
# EOF # EOF
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册