Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
openanolis
dragonwell11
提交
86e1c7e2
D
dragonwell11
项目概览
openanolis
/
dragonwell11
通知
7
Star
2
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
D
dragonwell11
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
86e1c7e2
编写于
6月 30, 2010
作者:
M
martin
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
6934271: Better handling of longer utf-8 sequences
Summary: Various cleanups, including clever bit-twiddling Reviewed-by: sherman
上级
8a9beae4
变更
1
隐藏空白更改
内联
并排
Showing
1 changed file
with
51 addition
and
25 deletion
+51
-25
jdk/src/share/classes/sun/nio/cs/UTF_8.java
jdk/src/share/classes/sun/nio/cs/UTF_8.java
+51
-25
未找到文件。
jdk/src/share/classes/sun/nio/cs/UTF_8.java
浏览文件 @
86e1c7e2
...
...
@@ -207,15 +207,15 @@ class UTF_8 extends Unicode
// ASCII only loop
while
(
dp
<
dlASCII
&&
sa
[
sp
]
>=
0
)
da
[
dp
++]
=
(
char
)
sa
[
sp
++];
da
[
dp
++]
=
(
char
)
sa
[
sp
++];
while
(
sp
<
sl
)
{
int
b1
=
sa
[
sp
];
if
(
b1
>=
0
)
{
if
(
b1
>=
0
)
{
// 1 byte, 7 bits: 0xxxxxxx
if
(
dp
>=
dl
)
return
xflow
(
src
,
sp
,
sl
,
dst
,
dp
,
1
);
da
[
dp
++]
=
(
char
)
b1
;
da
[
dp
++]
=
(
char
)
b1
;
sp
++;
}
else
if
((
b1
>>
5
)
==
-
2
)
{
// 2 bytes, 11 bits: 110xxxxx 10xxxxxx
...
...
@@ -224,7 +224,10 @@ class UTF_8 extends Unicode
int
b2
=
sa
[
sp
+
1
];
if
(
isMalformed2
(
b1
,
b2
))
return
malformed
(
src
,
sp
,
dst
,
dp
,
2
);
da
[
dp
++]
=
(
char
)
(((
b1
<<
6
)
^
b2
)
^
0x0f80
);
da
[
dp
++]
=
(
char
)
(((
b1
<<
6
)
^
b2
)
^
(((
byte
)
0xC0
<<
6
)
^
((
byte
)
0x80
<<
0
)));
sp
+=
2
;
}
else
if
((
b1
>>
4
)
==
-
2
)
{
// 3 bytes, 16 bits: 1110xxxx 10xxxxxx 10xxxxxx
...
...
@@ -234,7 +237,13 @@ class UTF_8 extends Unicode
int
b3
=
sa
[
sp
+
2
];
if
(
isMalformed3
(
b1
,
b2
,
b3
))
return
malformed
(
src
,
sp
,
dst
,
dp
,
3
);
da
[
dp
++]
=
(
char
)
(((
b1
<<
12
)
^
(
b2
<<
6
)
^
b3
)
^
0x1f80
);
da
[
dp
++]
=
(
char
)
((
b1
<<
12
)
^
(
b2
<<
6
)
^
(
b3
^
(((
byte
)
0xE0
<<
12
)
^
((
byte
)
0x80
<<
6
)
^
((
byte
)
0x80
<<
0
))));
sp
+=
3
;
}
else
if
((
b1
>>
3
)
==
-
2
)
{
// 4 bytes, 21 bits: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
...
...
@@ -243,10 +252,14 @@ class UTF_8 extends Unicode
int
b2
=
sa
[
sp
+
1
];
int
b3
=
sa
[
sp
+
2
];
int
b4
=
sa
[
sp
+
3
];
int
uc
=
((
b1
&
0x07
)
<<
18
)
|
((
b2
&
0x3f
)
<<
12
)
|
((
b3
&
0x3f
)
<<
06
)
|
(
b4
&
0x3f
);
int
uc
=
((
b1
<<
18
)
^
(
b2
<<
12
)
^
(
b3
<<
6
)
^
(
b4
^
(((
byte
)
0xF0
<<
18
)
^
((
byte
)
0x80
<<
12
)
^
((
byte
)
0x80
<<
6
)
^
((
byte
)
0x80
<<
0
))));
if
(
isMalformed4
(
b2
,
b3
,
b4
)
||
// shortest form check
!
Character
.
isSupplementaryCodePoint
(
uc
))
{
...
...
@@ -271,8 +284,8 @@ class UTF_8 extends Unicode
if
(
b1
>=
0
)
{
// 1 byte, 7 bits: 0xxxxxxx
if
(
dst
.
remaining
()
<
1
)
return
xflow
(
src
,
mark
,
1
);
//
overflow
dst
.
put
((
char
)
b1
);
return
xflow
(
src
,
mark
,
1
);
//
overflow
dst
.
put
((
char
)
b1
);
mark
++;
}
else
if
((
b1
>>
5
)
==
-
2
)
{
// 2 bytes, 11 bits: 110xxxxx 10xxxxxx
...
...
@@ -281,7 +294,10 @@ class UTF_8 extends Unicode
int
b2
=
src
.
get
();
if
(
isMalformed2
(
b1
,
b2
))
return
malformed
(
src
,
mark
,
2
);
dst
.
put
((
char
)
(((
b1
<<
6
)
^
b2
)
^
0x0f80
));
dst
.
put
((
char
)
(((
b1
<<
6
)
^
b2
)
^
(((
byte
)
0xC0
<<
6
)
^
((
byte
)
0x80
<<
0
))));
mark
+=
2
;
}
else
if
((
b1
>>
4
)
==
-
2
)
{
// 3 bytes, 16 bits: 1110xxxx 10xxxxxx 10xxxxxx
...
...
@@ -291,7 +307,13 @@ class UTF_8 extends Unicode
int
b3
=
src
.
get
();
if
(
isMalformed3
(
b1
,
b2
,
b3
))
return
malformed
(
src
,
mark
,
3
);
dst
.
put
((
char
)
(((
b1
<<
12
)
^
(
b2
<<
6
)
^
b3
)
^
0x1f80
));
dst
.
put
((
char
)
((
b1
<<
12
)
^
(
b2
<<
6
)
^
(
b3
^
(((
byte
)
0xE0
<<
12
)
^
((
byte
)
0x80
<<
6
)
^
((
byte
)
0x80
<<
0
)))));
mark
+=
3
;
}
else
if
((
b1
>>
3
)
==
-
2
)
{
// 4 bytes, 21 bits: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
...
...
@@ -300,10 +322,14 @@ class UTF_8 extends Unicode
int
b2
=
src
.
get
();
int
b3
=
src
.
get
();
int
b4
=
src
.
get
();
int
uc
=
((
b1
&
0x07
)
<<
18
)
|
((
b2
&
0x3f
)
<<
12
)
|
((
b3
&
0x3f
)
<<
06
)
|
(
b4
&
0x3f
);
int
uc
=
((
b1
<<
18
)
^
(
b2
<<
12
)
^
(
b3
<<
6
)
^
(
b4
^
(((
byte
)
0xF0
<<
18
)
^
((
byte
)
0x80
<<
12
)
^
((
byte
)
0x80
<<
6
)
^
((
byte
)
0x80
<<
0
))));
if
(
isMalformed4
(
b2
,
b3
,
b4
)
||
// shortest form check
!
Character
.
isSupplementaryCodePoint
(
uc
))
{
...
...
@@ -368,7 +394,7 @@ class UTF_8 extends Unicode
int
dl
=
dst
.
arrayOffset
()
+
dst
.
limit
();
int
dlASCII
=
dp
+
Math
.
min
(
sl
-
sp
,
dl
-
dp
);
//ASCII only loop
//
ASCII only loop
while
(
dp
<
dlASCII
&&
sa
[
sp
]
<
'\u0080'
)
da
[
dp
++]
=
(
byte
)
sa
[
sp
++];
while
(
sp
<
sl
)
{
...
...
@@ -382,7 +408,7 @@ class UTF_8 extends Unicode
// 2 bytes, 11 bits
if
(
dl
-
dp
<
2
)
return
overflow
(
src
,
sp
,
dst
,
dp
);
da
[
dp
++]
=
(
byte
)(
0xc0
|
(
(
c
>>
06
)
));
da
[
dp
++]
=
(
byte
)(
0xc0
|
(
c
>>
6
));
da
[
dp
++]
=
(
byte
)(
0x80
|
(
c
&
0x3f
));
}
else
if
(
Character
.
isSurrogate
(
c
))
{
// Have a surrogate pair
...
...
@@ -397,7 +423,7 @@ class UTF_8 extends Unicode
return
overflow
(
src
,
sp
,
dst
,
dp
);
da
[
dp
++]
=
(
byte
)(
0xf0
|
((
uc
>>
18
)));
da
[
dp
++]
=
(
byte
)(
0x80
|
((
uc
>>
12
)
&
0x3f
));
da
[
dp
++]
=
(
byte
)(
0x80
|
((
uc
>>
0
6
)
&
0x3f
));
da
[
dp
++]
=
(
byte
)(
0x80
|
((
uc
>>
6
)
&
0x3f
));
da
[
dp
++]
=
(
byte
)(
0x80
|
(
uc
&
0x3f
));
sp
++;
// 2 chars
}
else
{
...
...
@@ -405,7 +431,7 @@ class UTF_8 extends Unicode
if
(
dl
-
dp
<
3
)
return
overflow
(
src
,
sp
,
dst
,
dp
);
da
[
dp
++]
=
(
byte
)(
0xe0
|
((
c
>>
12
)));
da
[
dp
++]
=
(
byte
)(
0x80
|
((
c
>>
0
6
)
&
0x3f
));
da
[
dp
++]
=
(
byte
)(
0x80
|
((
c
>>
6
)
&
0x3f
));
da
[
dp
++]
=
(
byte
)(
0x80
|
(
c
&
0x3f
));
}
sp
++;
...
...
@@ -429,7 +455,7 @@ class UTF_8 extends Unicode
// 2 bytes, 11 bits
if
(
dst
.
remaining
()
<
2
)
return
overflow
(
src
,
mark
);
dst
.
put
((
byte
)(
0xc0
|
(
(
c
>>
06
)
)));
dst
.
put
((
byte
)(
0xc0
|
(
c
>>
6
)));
dst
.
put
((
byte
)(
0x80
|
(
c
&
0x3f
)));
}
else
if
(
Character
.
isSurrogate
(
c
))
{
// Have a surrogate pair
...
...
@@ -444,15 +470,15 @@ class UTF_8 extends Unicode
return
overflow
(
src
,
mark
);
dst
.
put
((
byte
)(
0xf0
|
((
uc
>>
18
))));
dst
.
put
((
byte
)(
0x80
|
((
uc
>>
12
)
&
0x3f
)));
dst
.
put
((
byte
)(
0x80
|
((
uc
>>
0
6
)
&
0x3f
)));
dst
.
put
((
byte
)(
0x80
|
((
uc
>>
6
)
&
0x3f
)));
dst
.
put
((
byte
)(
0x80
|
(
uc
&
0x3f
)));
mark
++;
//2 chars
mark
++;
//
2 chars
}
else
{
// 3 bytes, 16 bits
if
(
dst
.
remaining
()
<
3
)
return
overflow
(
src
,
mark
);
dst
.
put
((
byte
)(
0xe0
|
((
c
>>
12
))));
dst
.
put
((
byte
)(
0x80
|
((
c
>>
0
6
)
&
0x3f
)));
dst
.
put
((
byte
)(
0x80
|
((
c
>>
6
)
&
0x3f
)));
dst
.
put
((
byte
)(
0x80
|
(
c
&
0x3f
)));
}
mark
++;
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录