Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
LinuxSuRen
Dragonwell11
提交
75be47d5
D
Dragonwell11
项目概览
LinuxSuRen
/
Dragonwell11
通知
2
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
D
Dragonwell11
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
前往新版Gitcode,体验更适合开发者的 AI 搜索 >>
提交
75be47d5
编写于
4月 16, 2019
作者:
J
joehw
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
8222415: Xerces 2.12.0: Parsing Configuration
Reviewed-by: lancea
上级
d987d2d6
变更
2
隐藏空白更改
内联
并排
Showing
2 changed file
with
229 addition
and
247 deletion
+229
-247
src/java.xml/share/classes/com/sun/org/apache/xerces/internal/impl/XMLEntityManager.java
...sun/org/apache/xerces/internal/impl/XMLEntityManager.java
+206
-243
test/jaxp/javax/xml/jaxp/unittest/parsers/BaseParsingTest.java
...jaxp/javax/xml/jaxp/unittest/parsers/BaseParsingTest.java
+23
-4
未找到文件。
src/java.xml/share/classes/com/sun/org/apache/xerces/internal/impl/XMLEntityManager.java
浏览文件 @
75be47d5
/*
* Copyright (c) 2009, 201
8
, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2009, 201
9
, Oracle and/or its affiliates. All rights reserved.
*/
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
...
...
@@ -22,6 +22,7 @@ package com.sun.org.apache.xerces.internal.impl ;
import
com.sun.org.apache.xerces.internal.impl.io.ASCIIReader
;
import
com.sun.org.apache.xerces.internal.impl.io.UCSReader
;
import
com.sun.org.apache.xerces.internal.impl.io.UTF16Reader
;
import
com.sun.org.apache.xerces.internal.impl.io.UTF8Reader
;
import
com.sun.org.apache.xerces.internal.impl.msg.XMLMessageFormatter
;
import
com.sun.org.apache.xerces.internal.impl.validation.ValidationManager
;
...
...
@@ -89,7 +90,7 @@ import org.xml.sax.InputSource;
* @author K.Venugopal SUN Microsystems
* @author Neeraj Bajaj SUN Microsystems
* @author Sunitha Reddy SUN Microsystems
* @LastModified:
Nov 2018
* @LastModified:
Apr 2019
*/
public
class
XMLEntityManager
implements
XMLComponent
,
XMLEntityResolver
{
...
...
@@ -412,9 +413,6 @@ public class XMLEntityManager implements XMLComponent, XMLEntityResolver {
/** Augmentations for entities. */
private
final
Augmentations
fEntityAugs
=
new
AugmentationsImpl
();
/** Pool of character buffers. */
private
CharacterBufferPool
fBufferPool
=
new
CharacterBufferPool
(
fBufferSize
,
DEFAULT_INTERNAL_BUFFER_SIZE
);
/** indicate whether Catalog should be used for resolving external resources */
private
boolean
fUseCatalog
=
true
;
CatalogFeatures
fCatalogFeatures
;
...
...
@@ -694,7 +692,8 @@ public class XMLEntityManager implements XMLComponent, XMLEntityResolver {
}
// wrap this stream in RewindableInputStream
stream
=
new
RewindableInputStream
(
stream
);
RewindableInputStream
rewindableStream
=
new
RewindableInputStream
(
stream
);
stream
=
rewindableStream
;
// perform auto-detect of encoding if necessary
if
(
encoding
==
null
)
{
...
...
@@ -702,27 +701,30 @@ public class XMLEntityManager implements XMLComponent, XMLEntityResolver {
final
byte
[]
b4
=
new
byte
[
4
];
int
count
=
0
;
for
(;
count
<
4
;
count
++
)
{
b4
[
count
]
=
(
byte
)
stream
.
read
();
b4
[
count
]
=
(
byte
)
rewindableStream
.
readAndBuffer
();
}
if
(
count
==
4
)
{
Object
[]
encodingDesc
=
getEncodingName
(
b4
,
count
);
encoding
=
(
String
)(
encodingDesc
[
0
])
;
isBigEndian
=
(
Boolean
)(
encodingDesc
[
1
])
;
final
EncodingInfo
info
=
getEncodingInfo
(
b4
,
count
);
encoding
=
info
.
autoDetectedEncoding
;
final
String
readerEncoding
=
info
.
readerEncoding
;
isBigEndian
=
info
.
isBigEndian
;
stream
.
reset
();
// Special case UTF-8 files with BOM created by Microsoft
// tools. It's more efficient to consume the BOM than make
// the reader perform extra checks. -Ac
if
(
count
>
2
&&
encoding
.
equals
(
"UTF-8"
))
{
int
b0
=
b4
[
0
]
&
0xFF
;
int
b1
=
b4
[
1
]
&
0xFF
;
int
b2
=
b4
[
2
]
&
0xFF
;
if
(
b0
==
0xEF
&&
b1
==
0xBB
&&
b2
==
0xBF
)
{
// ignore first three bytes...
if
(
info
.
hasBOM
)
{
// Special case UTF-8 files with BOM created by Microsoft
// tools. It's more efficient to consume the BOM than make
// the reader perform extra checks. -Ac
if
(
EncodingInfo
.
STR_UTF8
.
equals
(
readerEncoding
))
{
// UTF-8 BOM: 0xEF 0xBB 0xBF
stream
.
skip
(
3
);
}
// It's also more efficient to consume the UTF-16 BOM.
else
if
(
EncodingInfo
.
STR_UTF16
.
equals
(
readerEncoding
))
{
// UTF-16 BE BOM: 0xFE 0xFF
// UTF-16 LE BOM: 0xFF 0xFE
stream
.
skip
(
2
);
}
}
reader
=
createReader
(
stream
,
e
ncoding
,
isBigEndian
);
reader
=
createReader
(
stream
,
readerE
ncoding
,
isBigEndian
);
}
else
{
reader
=
createReader
(
stream
,
encoding
,
isBigEndian
);
}
...
...
@@ -733,11 +735,11 @@ public class XMLEntityManager implements XMLComponent, XMLEntityResolver {
encoding
=
encoding
.
toUpperCase
(
Locale
.
ENGLISH
);
// If encoding is UTF-8, consume BOM if one is present.
if
(
encoding
.
equals
(
"UTF-8"
))
{
if
(
EncodingInfo
.
STR_UTF8
.
equals
(
encoding
))
{
final
int
[]
b3
=
new
int
[
3
];
int
count
=
0
;
for
(;
count
<
3
;
++
count
)
{
b3
[
count
]
=
stream
.
read
();
b3
[
count
]
=
rewindableStream
.
readAndBuffer
();
if
(
b3
[
count
]
==
-
1
)
break
;
}
...
...
@@ -750,56 +752,51 @@ public class XMLEntityManager implements XMLComponent, XMLEntityResolver {
stream
.
reset
();
}
}
// If encoding is UTF-16, we still need to read the first
four bytes
// in order to discover the byte order.
else
if
(
encoding
.
equals
(
"UTF-16"
))
{
// If encoding is UTF-16, we still need to read the first
//
four bytes,
in order to discover the byte order.
else
if
(
EncodingInfo
.
STR_UTF16
.
equals
(
encoding
))
{
final
int
[]
b4
=
new
int
[
4
];
int
count
=
0
;
for
(;
count
<
4
;
++
count
)
{
b4
[
count
]
=
stream
.
read
();
b4
[
count
]
=
rewindableStream
.
readAndBuffer
();
if
(
b4
[
count
]
==
-
1
)
break
;
}
stream
.
reset
();
String
utf16Encoding
=
"UTF-16"
;
if
(
count
>=
2
)
{
final
int
b0
=
b4
[
0
];
final
int
b1
=
b4
[
1
];
if
(
b0
==
0xFE
&&
b1
==
0xFF
)
{
// UTF-16, big-endian
utf16Encoding
=
"UTF-16BE"
;
isBigEndian
=
Boolean
.
TRUE
;
stream
.
skip
(
2
);
}
else
if
(
b0
==
0xFF
&&
b1
==
0xFE
)
{
// UTF-16, little-endian
utf16Encoding
=
"UTF-16LE"
;
isBigEndian
=
Boolean
.
FALSE
;
stream
.
skip
(
2
);
}
else
if
(
count
==
4
)
{
final
int
b2
=
b4
[
2
];
final
int
b3
=
b4
[
3
];
if
(
b0
==
0x00
&&
b1
==
0x3C
&&
b2
==
0x00
&&
b3
==
0x3F
)
{
// UTF-16, big-endian, no BOM
utf16Encoding
=
"UTF-16BE"
;
isBigEndian
=
Boolean
.
TRUE
;
}
if
(
b0
==
0x3C
&&
b1
==
0x00
&&
b2
==
0x3F
&&
b3
==
0x00
)
{
// UTF-16, little-endian, no BOM
utf16Encoding
=
"UTF-16LE"
;
isBigEndian
=
Boolean
.
FALSE
;
}
}
}
reader
=
createReader
(
stream
,
utf16Encoding
,
isBigEndian
);
}
// If encoding is UCS-4, we still need to read the first four bytes
// in order to discover the byte order.
else
if
(
encoding
.
equals
(
"ISO-10646-UCS-4"
))
{
else
if
(
EncodingInfo
.
STR_UCS4
.
equals
(
encoding
))
{
final
int
[]
b4
=
new
int
[
4
];
int
count
=
0
;
for
(;
count
<
4
;
++
count
)
{
b4
[
count
]
=
stream
.
read
();
b4
[
count
]
=
rewindableStream
.
readAndBuffer
();
if
(
b4
[
count
]
==
-
1
)
break
;
}
...
...
@@ -819,11 +816,11 @@ public class XMLEntityManager implements XMLComponent, XMLEntityResolver {
}
// If encoding is UCS-2, we still need to read the first four bytes
// in order to discover the byte order.
else
if
(
encoding
.
equals
(
"ISO-10646-UCS-2"
))
{
else
if
(
EncodingInfo
.
STR_UCS2
.
equals
(
encoding
))
{
final
int
[]
b4
=
new
int
[
4
];
int
count
=
0
;
for
(;
count
<
4
;
++
count
)
{
b4
[
count
]
=
stream
.
read
();
b4
[
count
]
=
rewindableStream
.
readAndBuffer
();
if
(
b4
[
count
]
==
-
1
)
break
;
}
...
...
@@ -1798,7 +1795,6 @@ public class XMLEntityManager implements XMLComponent, XMLEntityResolver {
bufferSize
.
intValue
()
>
DEFAULT_XMLDECL_BUFFER_SIZE
)
{
fBufferSize
=
bufferSize
.
intValue
();
fEntityScanner
.
setBufferSize
(
fBufferSize
);
fBufferPool
.
setExternalBufferSize
(
fBufferSize
);
}
}
if
(
suffixLength
==
Constants
.
SECURITY_MANAGER_PROPERTY
.
length
()
&&
...
...
@@ -2425,14 +2421,12 @@ public class XMLEntityManager implements XMLComponent, XMLEntityResolver {
*
* @param b4 The first four bytes of the input.
* @param count The number of bytes actually read.
* @return a 2-element array: the first element, an IANA-encoding string,
* the second element a Boolean which is true iff the document is big endian, false
* if it's little-endian, and null if the distinction isn't relevant.
* @return an instance of EncodingInfo which represents the auto-detected encoding.
*/
protected
Object
[]
getEncodingName
(
byte
[]
b4
,
int
count
)
{
protected
EncodingInfo
getEncodingInfo
(
byte
[]
b4
,
int
count
)
{
if
(
count
<
2
)
{
return
defaultEncoding
;
return
EncodingInfo
.
UTF_8
;
}
// UTF-16, with BOM
...
...
@@ -2440,69 +2434,70 @@ public class XMLEntityManager implements XMLComponent, XMLEntityResolver {
int
b1
=
b4
[
1
]
&
0xFF
;
if
(
b0
==
0xFE
&&
b1
==
0xFF
)
{
// UTF-16, big-endian
return
new
Object
[]
{
"UTF-16BE"
,
true
}
;
return
EncodingInfo
.
UTF_16_BIG_ENDIAN_WITH_BOM
;
}
if
(
b0
==
0xFF
&&
b1
==
0xFE
)
{
// UTF-16, little-endian
return
new
Object
[]
{
"UTF-16LE"
,
false
}
;
return
EncodingInfo
.
UTF_16_LITTLE_ENDIAN_WITH_BOM
;
}
// default to UTF-8 if we don't have enough bytes to make a
// good determination of the encoding
if
(
count
<
3
)
{
return
defaultEncoding
;
return
EncodingInfo
.
UTF_8
;
}
// UTF-8 with a BOM
int
b2
=
b4
[
2
]
&
0xFF
;
if
(
b0
==
0xEF
&&
b1
==
0xBB
&&
b2
==
0xBF
)
{
return
defaultEncoding
;
return
EncodingInfo
.
UTF_8_WITH_BOM
;
}
// default to UTF-8 if we don't have enough bytes to make a
// good determination of the encoding
if
(
count
<
4
)
{
return
defaultEncoding
;
return
EncodingInfo
.
UTF_8
;
}
// other encodings
int
b3
=
b4
[
3
]
&
0xFF
;
if
(
b0
==
0x00
&&
b1
==
0x00
&&
b2
==
0x00
&&
b3
==
0x3C
)
{
// UCS-4, big endian (1234)
return
new
Object
[]
{
"ISO-10646-UCS-4"
,
true
}
;
return
EncodingInfo
.
UCS_4_BIG_ENDIAN
;
}
if
(
b0
==
0x3C
&&
b1
==
0x00
&&
b2
==
0x00
&&
b3
==
0x00
)
{
// UCS-4, little endian (4321)
return
new
Object
[]
{
"ISO-10646-UCS-4"
,
false
}
;
return
EncodingInfo
.
UCS_4_LITTLE_ENDIAN
;
}
if
(
b0
==
0x00
&&
b1
==
0x00
&&
b2
==
0x3C
&&
b3
==
0x00
)
{
// UCS-4, unusual octet order (2143)
// REVISIT: What should this be?
return
new
Object
[]
{
"ISO-10646-UCS-4"
,
null
}
;
return
EncodingInfo
.
UCS_4_UNUSUAL_BYTE_ORDER
;
}
if
(
b0
==
0x00
&&
b1
==
0x3C
&&
b2
==
0x00
&&
b3
==
0x00
)
{
// UCS-4, unusual octect order (3412)
// REVISIT: What should this be?
return
new
Object
[]
{
"ISO-10646-UCS-4"
,
null
}
;
return
EncodingInfo
.
UCS_4_UNUSUAL_BYTE_ORDER
;
}
if
(
b0
==
0x00
&&
b1
==
0x3C
&&
b2
==
0x00
&&
b3
==
0x3F
)
{
// UTF-16, big-endian, no BOM
// (or could turn out to be UCS-2...
// REVISIT: What should this be?
return
new
Object
[]
{
"UTF-16BE"
,
true
}
;
return
EncodingInfo
.
UTF_16_BIG_ENDIAN
;
}
if
(
b0
==
0x3C
&&
b1
==
0x00
&&
b2
==
0x3F
&&
b3
==
0x00
)
{
// UTF-16, little-endian, no BOM
// (or could turn out to be UCS-2...
return
new
Object
[]
{
"UTF-16LE"
,
false
}
;
return
EncodingInfo
.
UTF_16_LITTLE_ENDIAN
;
}
if
(
b0
==
0x4C
&&
b1
==
0x6F
&&
b2
==
0xA7
&&
b3
==
0x94
)
{
// EBCDIC
// a la xerces1, return CP037 instead of EBCDIC here
return
new
Object
[]
{
"CP037"
,
null
}
;
return
EncodingInfo
.
EBCDIC
;
}
return
defaultEncoding
;
// default encoding
return
EncodingInfo
.
UTF_8
;
}
// getEncodingName(byte[],int):Object[]
...
...
@@ -2517,95 +2512,95 @@ public class XMLEntityManager implements XMLComponent, XMLEntityResolver {
* encoding name may be a Java encoding name;
* otherwise, it is an ianaEncoding name.
* @param isBigEndian For encodings (like uCS-4), whose names cannot
* specify a byte order, this tells whether the order
is bigEndian. null menas
*
unknown or not
relevant.
* specify a byte order, this tells whether the order
*
is bigEndian. null if unknown or ir
relevant.
*
* @return Returns a reader.
*/
protected
Reader
createReader
(
InputStream
inputStream
,
String
encoding
,
Boolean
isBigEndian
)
throws
IOException
{
// normalize encoding name
if
(
encoding
==
null
)
{
encoding
=
"UTF-8"
;
}
throws
IOException
{
// try to use an optimized reader
String
ENCODING
=
encoding
.
toUpperCase
(
Locale
.
ENGLISH
);
if
(
ENCODING
.
equals
(
"UTF-8"
))
{
if
(
DEBUG_ENCODINGS
)
{
System
.
out
.
println
(
"$$$ creating UTF8Reader"
);
}
return
new
UTF8Reader
(
inputStream
,
fBufferSize
,
fErrorReporter
.
getMessageFormatter
(
XMLMessageFormatter
.
XML_DOMAIN
),
fErrorReporter
.
getLocale
()
);
}
if
(
ENCODING
.
equals
(
"US-ASCII"
))
{
if
(
DEBUG_ENCODINGS
)
{
System
.
out
.
println
(
"$$$ creating ASCIIReader"
);
}
return
new
ASCIIReader
(
inputStream
,
fBufferSize
,
fErrorReporter
.
getMessageFormatter
(
XMLMessageFormatter
.
XML_DOMAIN
),
fErrorReporter
.
getLocale
());
}
if
(
ENCODING
.
equals
(
"ISO-10646-UCS-4"
))
{
if
(
isBigEndian
!=
null
)
{
boolean
isBE
=
isBigEndian
.
booleanValue
();
if
(
isBE
)
{
return
new
UCSReader
(
inputStream
,
UCSReader
.
UCS4BE
);
String
enc
=
(
encoding
!=
null
)
?
encoding
:
EncodingInfo
.
STR_UTF8
;
enc
=
enc
.
toUpperCase
(
Locale
.
ENGLISH
);
MessageFormatter
f
=
fErrorReporter
.
getMessageFormatter
(
XMLMessageFormatter
.
XML_DOMAIN
);
Locale
l
=
fErrorReporter
.
getLocale
();
switch
(
enc
)
{
case
EncodingInfo
.
STR_UTF8
:
return
new
UTF8Reader
(
inputStream
,
fBufferSize
,
f
,
l
);
case
EncodingInfo
.
STR_UTF16
:
if
(
isBigEndian
!=
null
)
{
return
new
UTF16Reader
(
inputStream
,
fBufferSize
,
isBigEndian
,
f
,
l
);
}
break
;
case
EncodingInfo
.
STR_UTF16BE
:
return
new
UTF16Reader
(
inputStream
,
fBufferSize
,
true
,
f
,
l
);
case
EncodingInfo
.
STR_UTF16LE
:
return
new
UTF16Reader
(
inputStream
,
fBufferSize
,
false
,
f
,
l
);
case
EncodingInfo
.
STR_UCS4
:
if
(
isBigEndian
!=
null
)
{
if
(
isBigEndian
)
{
return
new
UCSReader
(
inputStream
,
UCSReader
.
UCS4BE
);
}
else
{
return
new
UCSReader
(
inputStream
,
UCSReader
.
UCS4LE
);
}
}
else
{
return
new
UCSReader
(
inputStream
,
UCSReader
.
UCS4LE
);
fErrorReporter
.
reportError
(
this
.
getEntityScanner
(),
XMLMessageFormatter
.
XML_DOMAIN
,
"EncodingByteOrderUnsupported"
,
new
Object
[]
{
encoding
},
XMLErrorReporter
.
SEVERITY_FATAL_ERROR
);
}
}
else
{
fErrorReporter
.
reportError
(
this
.
getEntityScanner
(),
XMLMessageFormatter
.
XML_DOMAIN
,
"EncodingByteOrderUnsupported"
,
new
Object
[]
{
encoding
},
XMLErrorReporter
.
SEVERITY_FATAL_ERROR
);
}
}
if
(
ENCODING
.
equals
(
"ISO-10646-UCS-2"
))
{
if
(
isBigEndian
!=
null
)
{
// sould never happen with this encoding...
boolean
isBE
=
isBigEndian
.
booleanValue
();
if
(
isBE
)
{
return
new
UCSReader
(
inputStream
,
UCSReader
.
UCS2BE
);
break
;
case
EncodingInfo
.
STR_UCS2
:
if
(
isBigEndian
!=
null
)
{
if
(
isBigEndian
)
{
return
new
UCSReader
(
inputStream
,
UCSReader
.
UCS2BE
);
}
else
{
return
new
UCSReader
(
inputStream
,
UCSReader
.
UCS2LE
);
}
}
else
{
return
new
UCSReader
(
inputStream
,
UCSReader
.
UCS2LE
);
fErrorReporter
.
reportError
(
this
.
getEntityScanner
(),
XMLMessageFormatter
.
XML_DOMAIN
,
"EncodingByteOrderUnsupported"
,
new
Object
[]
{
encoding
},
XMLErrorReporter
.
SEVERITY_FATAL_ERROR
);
}
}
else
{
fErrorReporter
.
reportError
(
this
.
getEntityScanner
(),
XMLMessageFormatter
.
XML_DOMAIN
,
"EncodingByteOrderUnsupported"
,
new
Object
[]
{
encoding
},
XMLErrorReporter
.
SEVERITY_FATAL_ERROR
);
}
break
;
}
// check for valid name
boolean
validIANA
=
XMLChar
.
isValidIANAEncoding
(
encoding
);
boolean
validJava
=
XMLChar
.
isValidJavaEncoding
(
encoding
);
if
(!
validIANA
||
(
fAllowJavaEncodings
&&
!
validJava
))
{
fErrorReporter
.
reportError
(
this
.
getEntityScanner
(),
XMLMessageFormatter
.
XML_DOMAIN
,
fErrorReporter
.
reportError
(
this
.
getEntityScanner
(),
XMLMessageFormatter
.
XML_DOMAIN
,
"EncodingDeclInvalid"
,
new
Object
[]
{
encoding
},
XMLErrorReporter
.
SEVERITY_FATAL_ERROR
);
// NOTE: AndyH suggested that, on failure, we use ISO Latin 1
// because every byte is a valid ISO Latin 1 character.
// It may not translate correctly but if we failed on
// the encoding anyway, then we're expecting the content
// of the document to be bad. This will just prevent an
// invalid UTF-8 sequence to be detected. This is only
// important when continue-after-fatal-error is turned
// on. -Ac
// NOTE: AndyH suggested that, on failure, we use ISO Latin 1
// because every byte is a valid ISO Latin 1 character.
// It may not translate correctly but if we failed on
// the encoding anyway, then we're expecting the content
// of the document to be bad. This will just prevent an
// invalid UTF-8 sequence to be detected. This is only
// important when continue-after-fatal-error is turned
// on. -Ac
encoding
=
"ISO-8859-1"
;
}
// try to use a Java reader
String
javaEncoding
=
EncodingMap
.
getIANA2JavaMapping
(
ENCODING
);
String
javaEncoding
=
EncodingMap
.
getIANA2JavaMapping
(
enc
);
if
(
javaEncoding
==
null
)
{
if
(
fAllowJavaEncodings
)
{
if
(
fAllowJavaEncodings
)
{
javaEncoding
=
encoding
;
}
else
{
fErrorReporter
.
reportError
(
this
.
getEntityScanner
(),
XMLMessageFormatter
.
XML_DOMAIN
,
fErrorReporter
.
reportError
(
this
.
getEntityScanner
(),
XMLMessageFormatter
.
XML_DOMAIN
,
"EncodingDeclInvalid"
,
new
Object
[]
{
encoding
},
XMLErrorReporter
.
SEVERITY_FATAL_ERROR
);
// see comment above.
javaEncoding
=
"ISO8859_1"
;
// see comment above.
javaEncoding
=
"ISO8859_1"
;
}
}
if
(
DEBUG_ENCODINGS
)
{
...
...
@@ -2898,108 +2893,78 @@ public class XMLEntityManager implements XMLComponent, XMLEntityResolver {
}
// print()
/**
* Buffer used in entity manager to reuse character arrays instead
* of creating new ones every time.
*
* @xerces.internal
*
* @author Ankit Pasricha, IBM
*/
private
static
class
CharacterBuffer
{
/** character buffer */
private
char
[]
ch
;
/** whether the buffer is for an external or internal scanned entity */
private
boolean
isExternal
;
public
CharacterBuffer
(
boolean
isExternal
,
int
size
)
{
this
.
isExternal
=
isExternal
;
ch
=
new
char
[
size
];
}
}
/**
* Stores a number of character buffers and provides it to the entity
* manager to use when an entity is seen.
* Information about auto-detectable encodings.
*
* @xerces.internal
*
* @author
Ankit Pasricha
, IBM
* @author
Michael Glavassevich
, IBM
*/
private
static
class
CharacterBufferPool
{
private
static
final
int
DEFAULT_POOL_SIZE
=
3
;
private
CharacterBuffer
[]
fInternalBufferPool
;
private
CharacterBuffer
[]
fExternalBufferPool
;
private
int
fExternalBufferSize
;
private
int
fInternalBufferSize
;
private
int
poolSize
;
private
int
fInternalTop
;
private
int
fExternalTop
;
public
CharacterBufferPool
(
int
externalBufferSize
,
int
internalBufferSize
)
{
this
(
DEFAULT_POOL_SIZE
,
externalBufferSize
,
internalBufferSize
);
}
public
CharacterBufferPool
(
int
poolSize
,
int
externalBufferSize
,
int
internalBufferSize
)
{
fExternalBufferSize
=
externalBufferSize
;
fInternalBufferSize
=
internalBufferSize
;
this
.
poolSize
=
poolSize
;
init
();
}
/** Initializes buffer pool. **/
private
void
init
()
{
fInternalBufferPool
=
new
CharacterBuffer
[
poolSize
];
fExternalBufferPool
=
new
CharacterBuffer
[
poolSize
];
fInternalTop
=
-
1
;
fExternalTop
=
-
1
;
}
/** Retrieves buffer from pool. **/
public
CharacterBuffer
getBuffer
(
boolean
external
)
{
if
(
external
)
{
if
(
fExternalTop
>
-
1
)
{
return
fExternalBufferPool
[
fExternalTop
--];
}
else
{
return
new
CharacterBuffer
(
true
,
fExternalBufferSize
);
}
}
else
{
if
(
fInternalTop
>
-
1
)
{
return
fInternalBufferPool
[
fInternalTop
--];
}
else
{
return
new
CharacterBuffer
(
false
,
fInternalBufferSize
);
}
}
}
/** Returns buffer to pool. **/
public
void
returnToPool
(
CharacterBuffer
buffer
)
{
if
(
buffer
.
isExternal
)
{
if
(
fExternalTop
<
fExternalBufferPool
.
length
-
1
)
{
fExternalBufferPool
[++
fExternalTop
]
=
buffer
;
}
}
else
if
(
fInternalTop
<
fInternalBufferPool
.
length
-
1
)
{
fInternalBufferPool
[++
fInternalTop
]
=
buffer
;
}
}
/** Sets the size of external buffers and dumps the old pool. **/
public
void
setExternalBufferSize
(
int
bufferSize
)
{
fExternalBufferSize
=
bufferSize
;
fExternalBufferPool
=
new
CharacterBuffer
[
poolSize
];
fExternalTop
=
-
1
;
}
}
private
static
class
EncodingInfo
{
public
static
final
String
STR_UTF8
=
"UTF-8"
;
public
static
final
String
STR_UTF16
=
"UTF-16"
;
public
static
final
String
STR_UTF16BE
=
"UTF-16BE"
;
public
static
final
String
STR_UTF16LE
=
"UTF-16LE"
;
public
static
final
String
STR_UCS4
=
"ISO-10646-UCS-4"
;
public
static
final
String
STR_UCS2
=
"ISO-10646-UCS-2"
;
public
static
final
String
STR_CP037
=
"CP037"
;
/** UTF-8 **/
public
static
final
EncodingInfo
UTF_8
=
new
EncodingInfo
(
STR_UTF8
,
null
,
false
);
/** UTF-8, with BOM **/
public
static
final
EncodingInfo
UTF_8_WITH_BOM
=
new
EncodingInfo
(
STR_UTF8
,
null
,
true
);
/** UTF-16, big-endian **/
public
static
final
EncodingInfo
UTF_16_BIG_ENDIAN
=
new
EncodingInfo
(
STR_UTF16BE
,
STR_UTF16
,
Boolean
.
TRUE
,
false
);
/** UTF-16, big-endian with BOM **/
public
static
final
EncodingInfo
UTF_16_BIG_ENDIAN_WITH_BOM
=
new
EncodingInfo
(
STR_UTF16BE
,
STR_UTF16
,
Boolean
.
TRUE
,
true
);
/** UTF-16, little-endian **/
public
static
final
EncodingInfo
UTF_16_LITTLE_ENDIAN
=
new
EncodingInfo
(
STR_UTF16LE
,
STR_UTF16
,
Boolean
.
FALSE
,
false
);
/** UTF-16, little-endian with BOM **/
public
static
final
EncodingInfo
UTF_16_LITTLE_ENDIAN_WITH_BOM
=
new
EncodingInfo
(
STR_UTF16LE
,
STR_UTF16
,
Boolean
.
FALSE
,
true
);
/** UCS-4, big-endian **/
public
static
final
EncodingInfo
UCS_4_BIG_ENDIAN
=
new
EncodingInfo
(
STR_UCS4
,
Boolean
.
TRUE
,
false
);
/** UCS-4, little-endian **/
public
static
final
EncodingInfo
UCS_4_LITTLE_ENDIAN
=
new
EncodingInfo
(
STR_UCS4
,
Boolean
.
FALSE
,
false
);
/** UCS-4, unusual byte-order (2143) or (3412) **/
public
static
final
EncodingInfo
UCS_4_UNUSUAL_BYTE_ORDER
=
new
EncodingInfo
(
STR_UCS4
,
null
,
false
);
/** EBCDIC **/
public
static
final
EncodingInfo
EBCDIC
=
new
EncodingInfo
(
STR_CP037
,
null
,
false
);
public
final
String
autoDetectedEncoding
;
public
final
String
readerEncoding
;
public
final
Boolean
isBigEndian
;
public
final
boolean
hasBOM
;
private
EncodingInfo
(
String
autoDetectedEncoding
,
Boolean
isBigEndian
,
boolean
hasBOM
)
{
this
(
autoDetectedEncoding
,
autoDetectedEncoding
,
isBigEndian
,
hasBOM
);
}
// <init>(String,Boolean,boolean)
private
EncodingInfo
(
String
autoDetectedEncoding
,
String
readerEncoding
,
Boolean
isBigEndian
,
boolean
hasBOM
)
{
this
.
autoDetectedEncoding
=
autoDetectedEncoding
;
this
.
readerEncoding
=
readerEncoding
;
this
.
isBigEndian
=
isBigEndian
;
this
.
hasBOM
=
hasBOM
;
}
// <init>(String,String,Boolean,boolean)
}
// class EncodingInfo
/**
* This class wraps the byte inputstreams we're presented with.
...
...
@@ -3052,20 +3017,13 @@ public class XMLEntityManager implements XMLComponent, XMLEntityResolver {
fOffset
=
fStartOffset
;
}
public
int
read
()
throws
IOException
{
int
b
=
0
;
if
(
fOffset
<
fLength
)
{
return
fData
[
fOffset
++]
&
0xff
;
}
if
(
fOffset
==
fEndOffset
)
{
return
-
1
;
}
public
int
readAndBuffer
()
throws
IOException
{
if
(
fOffset
==
fData
.
length
)
{
byte
[]
newData
=
new
byte
[
fOffset
<<
1
];
System
.
arraycopy
(
fData
,
0
,
newData
,
0
,
fOffset
);
fData
=
newData
;
}
b
=
fInputStream
.
read
();
final
int
b
=
fInputStream
.
read
();
if
(
b
==
-
1
)
{
fEndOffset
=
fOffset
;
return
-
1
;
...
...
@@ -3075,18 +3033,27 @@ public class XMLEntityManager implements XMLComponent, XMLEntityResolver {
return
b
&
0xff
;
}
public
int
read
()
throws
IOException
{
if
(
fOffset
<
fLength
)
{
return
fData
[
fOffset
++]
&
0xff
;
}
if
(
fOffset
==
fEndOffset
)
{
return
-
1
;
}
if
(
fCurrentEntity
.
mayReadChunks
)
{
return
fInputStream
.
read
();
}
return
readAndBuffer
();
}
public
int
read
(
byte
[]
b
,
int
off
,
int
len
)
throws
IOException
{
int
bytesLeft
=
fLength
-
fOffset
;
final
int
bytesLeft
=
fLength
-
fOffset
;
if
(
bytesLeft
==
0
)
{
if
(
fOffset
==
fEndOffset
)
{
return
-
1
;
}
/**
* //System.out.println("fCurrentEntitty = " + fCurrentEntity );
* //System.out.println("fInputStream = " + fInputStream );
* // better get some more for the voracious reader... */
// read a block of data as requested
if
(
fCurrentEntity
.
mayReadChunks
||
!
fCurrentEntity
.
xmlDeclChunkRead
)
{
if
(!
fCurrentEntity
.
xmlDeclChunkRead
)
...
...
@@ -3096,15 +3063,13 @@ public class XMLEntityManager implements XMLComponent, XMLEntityResolver {
}
return
fInputStream
.
read
(
b
,
off
,
len
);
}
int
returnedVal
=
read
();
if
(
returnedVal
==
-
1
)
{
fEndOffset
=
fOffset
;
return
-
1
;
int
returnedVal
=
readAndBuffer
();
if
(
returnedVal
==
-
1
)
{
fEndOffset
=
fOffset
;
return
-
1
;
}
b
[
off
]
=
(
byte
)
returnedVal
;
return
1
;
}
if
(
len
<
bytesLeft
)
{
if
(
len
<=
0
)
{
...
...
@@ -3120,8 +3085,7 @@ public class XMLEntityManager implements XMLComponent, XMLEntityResolver {
return
len
;
}
public
long
skip
(
long
n
)
throws
IOException
{
public
long
skip
(
long
n
)
throws
IOException
{
int
bytesLeft
;
if
(
n
<=
0
)
{
return
0
;
...
...
@@ -3142,7 +3106,7 @@ public class XMLEntityManager implements XMLComponent, XMLEntityResolver {
return
bytesLeft
;
}
n
-=
bytesLeft
;
/*
/*
* In a manner of speaking, when this class isn't permitting more
* than one byte at a time to be read, it is "blocking". The
* available() method should indicate how much can be read without
...
...
@@ -3154,13 +3118,13 @@ public class XMLEntityManager implements XMLComponent, XMLEntityResolver {
}
public
int
available
()
throws
IOException
{
int
bytesLeft
=
fLength
-
fOffset
;
final
int
bytesLeft
=
fLength
-
fOffset
;
if
(
bytesLeft
==
0
)
{
if
(
fOffset
==
fEndOffset
)
{
return
-
1
;
}
return
fCurrentEntity
.
mayReadChunks
?
fInputStream
.
available
()
:
0
;
:
0
;
}
return
bytesLeft
;
}
...
...
@@ -3171,7 +3135,6 @@ public class XMLEntityManager implements XMLComponent, XMLEntityResolver {
public
void
reset
()
{
fOffset
=
fMark
;
//test();
}
public
boolean
markSupported
()
{
...
...
test/jaxp/javax/xml/jaxp/unittest/parsers/BaseParsingTest.java
浏览文件 @
75be47d5
/*
* Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2017,
2019,
Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
...
...
@@ -22,19 +22,22 @@
*/
package
parsers
;
import
java.io.ByteArrayInputStream
;
import
java.io.StringReader
;
import
javax.xml.parsers.DocumentBuilder
;
import
javax.xml.parsers.DocumentBuilderFactory
;
import
javax.xml.stream.XMLInputFactory
;
import
javax.xml.stream.XMLStreamReader
;
import
static
org
.
testng
.
Assert
.
assertEquals
;
import
org.testng.annotations.DataProvider
;
import
org.testng.annotations.Listeners
;
import
org.testng.annotations.Test
;
import
org.w3c.dom.Document
;
import
org.xml.sax.InputSource
;
/**
* @test
* @bug 8169450
* @bug 8169450
8222415
* @library /javax/xml/jaxp/libs /javax/xml/jaxp/unittest
* @run testng/othervm -DrunSecMngr=true parsers.BaseParsingTest
* @run testng/othervm parsers.BaseParsingTest
...
...
@@ -143,7 +146,7 @@ public class BaseParsingTest {
* @bug 8169450
* This particular issue does not appear in DOM parsing since the spaces are
* normalized during version detection. This test case then serves as a guard
* against such an issue from occuring in the version detection.
* against such an issue from occur
r
ing in the version detection.
*
* @param xml the test xml
* @throws Exception if the parser fails to parse the xml
...
...
@@ -151,8 +154,24 @@ public class BaseParsingTest {
@Test
(
dataProvider
=
"xmlDeclarations"
)
public
void
testWithDOM
(
String
xml
)
throws
Exception
{
DocumentBuilderFactory
dbf
=
DocumentBuilderFactory
.
newInstance
();
dbf
.
setValidating
(
true
);
DocumentBuilder
db
=
dbf
.
newDocumentBuilder
();
db
.
parse
(
new
InputSource
(
new
StringReader
(
xml
)));
}
/**
* @bug 8222415
* Verifies that the parser is configured properly for UTF-16BE or LE.
* @throws Exception
*/
@Test
public
void
testEncoding
()
throws
Exception
{
ByteArrayInputStream
bis
=
new
ByteArrayInputStream
(
"<?xml version=\"1.0\" encoding=\"UTF-16\"?> <a/>"
.
getBytes
(
"UnicodeLittle"
));
InputSource
is
=
new
InputSource
(
bis
);
DocumentBuilderFactory
dbf
=
DocumentBuilderFactory
.
newInstance
();
DocumentBuilder
db
=
dbf
.
newDocumentBuilder
();
Document
doc
=
db
.
parse
(
is
);
assertEquals
(
"UTF-16LE"
,
doc
.
getInputEncoding
());
}
}
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录