Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
xiaoxuan_i809
AndroidUtilCode
提交
6bc10a75
A
AndroidUtilCode
项目概览
xiaoxuan_i809
/
AndroidUtilCode
与 Fork 源项目一致
从无法访问的项目Fork
通知
1
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
A
AndroidUtilCode
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
前往新版Gitcode,体验更适合开发者的 AI 搜索 >>
提交
6bc10a75
编写于
11月 24, 2019
作者:
Jack---Jiao
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
feat: FileUtils.getFileCharsetSimple()区分UTF-8无BOM和GBK编码
上级
3513c74c
变更
2
隐藏空白更改
内联
并排
Showing
2 changed file
with
96 addition
and
80 deletion
+96
-80
lib/utilcode/src/main/java/com/blankj/utilcode/util/FileUtils.java
...ode/src/main/java/com/blankj/utilcode/util/FileUtils.java
+94
-79
lib/utilcode/src/test/res/file/GBK.txt
lib/utilcode/src/test/res/file/GBK.txt
+2
-1
未找到文件。
lib/utilcode/src/main/java/com/blankj/utilcode/util/FileUtils.java
浏览文件 @
6bc10a75
...
...
@@ -18,6 +18,7 @@ import java.security.DigestInputStream;
import
java.security.MessageDigest
;
import
java.security.NoSuchAlgorithmException
;
import
java.util.ArrayList
;
import
java.util.BitSet
;
import
java.util.Collections
;
import
java.util.Comparator
;
import
java.util.List
;
...
...
@@ -37,6 +38,8 @@ public final class FileUtils {
private
static
final
String
LINE_SEP
=
System
.
getProperty
(
"line.separator"
);
private
static
final
int
BYTE_SIZE
=
8
;
private
FileUtils
()
{
throw
new
UnsupportedOperationException
(
"u can't instantiate me..."
);
}
...
...
@@ -903,109 +906,121 @@ public final class FileUtils {
case
0xfeff
:
return
"UTF-16BE"
;
default
:
return
"GBK"
;
try
{
if
(
isUtf8
(
file
))
{
return
"UTF-8"
;
}
else
{
return
"GBK"
;
}
}
catch
(
Exception
e
)
{
e
.
printStackTrace
();
return
"GBK"
;
}
}
}
/**
* Return whether the charset of file is utf8.
*
* @param file
Path The path of
file.
* @param file
The
file.
* @return {@code true}: yes<br>{@code false}: no
*/
public
static
boolean
isUtf8
(
final
String
filePath
)
{
return
isUtf8
(
getFileByPath
(
filePath
));
private
static
boolean
isUtf8
(
File
file
)
throws
Exception
{
BufferedInputStream
bis
=
new
BufferedInputStream
(
new
FileInputStream
(
file
));
// 读取第一个字节
int
code
=
bis
.
read
();
do
{
BitSet
bitSet
=
convert2BitSet
(
code
);
if
(
bitSet
.
get
(
0
))
{
// 多字节时,再读取N个字节
if
(!
checkMultiByte
(
bis
,
bitSet
))
{
bis
.
close
();
return
false
;
}
}
// 单字节时什么都不用做,再次读取字节
code
=
bis
.
read
();
}
while
(
code
!=
-
1
);
bis
.
close
();
return
true
;
}
/**
* Return whether the charset of file is utf8.
*
* @param file The file.
* @return {@code true}: yes<br>{@code false}: no
* 检测多字节,判断是否符合utf8编码
*/
public
static
boolean
isUtf8
(
final
File
file
)
{
if
(
file
==
null
)
return
false
;
InputStream
is
=
null
;
try
{
byte
[]
bytes
=
new
byte
[
24
];
is
=
new
BufferedInputStream
(
new
FileInputStream
(
file
));
int
read
=
is
.
read
(
bytes
);
if
(
read
!=
-
1
)
{
byte
[]
readArr
=
new
byte
[
read
];
System
.
arraycopy
(
bytes
,
0
,
readArr
,
0
,
read
);
return
isUtf8
(
readArr
)
==
100
;
}
else
{
private
static
boolean
checkMultiByte
(
BufferedInputStream
bis
,
BitSet
bitSet
)
throws
Exception
{
int
count
=
getCountOfSequential
(
bitSet
);
// 已经读取了一个字节,不能再读取
byte
[]
bytes
=
new
byte
[
count
-
1
];
bis
.
read
(
bytes
);
for
(
byte
b
:
bytes
)
{
if
(!
checkUtf8Byte
(
b
))
{
return
false
;
}
}
catch
(
IOException
e
)
{
e
.
printStackTrace
();
}
finally
{
try
{
if
(
is
!=
null
)
{
is
.
close
();
}
}
catch
(
IOException
e
)
{
e
.
printStackTrace
();
}
}
return
fals
e
;
return
tru
e
;
}
private
static
int
isUtf8
(
byte
[]
raw
)
{
int
i
,
len
;
int
utf8
=
0
,
ascii
=
0
;
if
(
raw
.
length
>
3
)
{
if
((
raw
[
0
]
==
(
byte
)
0xEF
)
&&
(
raw
[
1
]
==
(
byte
)
0xBB
)
&&
(
raw
[
2
]
==
(
byte
)
0xBF
))
{
return
100
;
/**
* 检测bitSet中从开始有多少个连续的1
*/
private
static
int
getCountOfSequential
(
BitSet
bitSet
)
{
int
count
=
0
;
for
(
int
i
=
0
;
i
<
BYTE_SIZE
;
i
++)
{
if
(
bitSet
.
get
(
i
))
{
count
++;
}
else
{
break
;
}
}
len
=
raw
.
length
;
int
child
=
0
;
for
(
i
=
0
;
i
<
len
;
)
{
if
((
raw
[
i
]
&
(
byte
)
0xFF
)
==
(
byte
)
0xFF
||
(
raw
[
i
]
&
(
byte
)
0xFE
)
==
(
byte
)
0xFE
)
{
return
0
;
}
if
(
child
==
0
)
{
if
((
raw
[
i
]
&
(
byte
)
0x7F
)
==
raw
[
i
]
&&
raw
[
i
]
!=
0
)
{
ascii
++;
}
else
if
((
raw
[
i
]
&
(
byte
)
0xC0
)
==
(
byte
)
0xC0
)
{
for
(
int
bit
=
0
;
bit
<
8
;
bit
++)
{
if
((((
byte
)
(
0x80
>>
bit
))
&
raw
[
i
])
==
((
byte
)
(
0x80
>>
bit
)))
{
child
=
bit
;
}
else
{
break
;
}
}
utf8
++;
}
i
++;
}
else
{
child
=
(
raw
.
length
-
i
>
child
)
?
child
:
(
raw
.
length
-
i
);
boolean
currentNotUtf8
=
false
;
for
(
int
children
=
0
;
children
<
child
;
children
++)
{
if
((
raw
[
i
+
children
]
&
((
byte
)
0x80
))
!=
((
byte
)
0x80
))
{
if
((
raw
[
i
+
children
]
&
(
byte
)
0x7F
)
==
raw
[
i
+
children
]
&&
raw
[
i
]
!=
0
)
{
ascii
++;
}
currentNotUtf8
=
true
;
}
}
if
(
currentNotUtf8
)
{
utf8
--;
i
++;
}
else
{
utf8
+=
child
;
i
+=
child
;
}
child
=
0
;
return
count
;
}
/**
* 检测单字节,判断是否为utf8
*/
private
static
boolean
checkUtf8Byte
(
byte
b
)
throws
Exception
{
BitSet
bitSet
=
convert2BitSet
(
b
);
return
bitSet
.
get
(
0
)
&&
!
bitSet
.
get
(
1
);
}
/**
* 将整形转为BitSet
*/
private
static
BitSet
convert2BitSet
(
int
code
)
{
BitSet
bitSet
=
new
BitSet
(
BYTE_SIZE
);
for
(
int
i
=
0
;
i
<
BYTE_SIZE
;
i
++)
{
int
tmp3
=
code
>>
(
BYTE_SIZE
-
i
-
1
);
int
tmp2
=
0x1
&
tmp3
;
if
(
tmp2
==
1
)
{
bitSet
.
set
(
i
);
}
}
if
(
ascii
==
len
)
{
return
100
;
return
bitSet
;
}
/**
* Return whether the charset of file is utf8.
*
* @param filePath The path of file.
* @return {@code true}: yes<br>{@code false}: no
*/
public
static
boolean
isUtf8
(
final
String
filePath
)
{
try
{
return
isUtf8
(
getFileByPath
(
filePath
));
}
catch
(
Exception
e
)
{
e
.
printStackTrace
();
return
false
;
}
return
(
int
)
(
100
*
((
float
)
(
utf8
+
ascii
)
/
(
float
)
len
));
}
/**
* Return the number of lines of file.
*
...
...
lib/utilcode/src/test/res/file/GBK.txt
浏览文件 @
6bc10a75
GBK
\ No newline at end of file
GBK
Ұй
\ No newline at end of file
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录