提交 3d28a3fc 编写于 作者: R Raph Levien

Implement grapheme cluster breaking

This patch includes an implementation of grapheme cluster breaking,
which is especially useful for repositioning the cursor for left and
right arrow key presses. The implementation is closely based on Unicode
TR29, and uses the ICU grapheme cluster break property, but is tailored
to more closely match the existing implementation and expected behavior.

Part of a fix for b/15653110 Improve behavior of arrow keys in EditText

Change-Id: I8eb742f77039c9ab7b2838285018cf8a8fc88343
上级 bb601b67
/*
* Copyright (C) 2014 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINIKIN_GRAPHEME_BREAK_H
#define MINIKIN_GRAPHEME_BREAK_H
namespace android {
class GraphemeBreak {
public:
// These values must be kept in sync with CURSOR_AFTER etc in Paint.java
enum MoveOpt {
AFTER = 0,
AT_OR_AFTER = 1,
BEFORE = 2,
AT_OR_BEFORE = 3,
AT = 4
};
// Determine whether the given offset is a grapheme break.
// This implementation generally follows Unicode TR29 extended
// grapheme break, but with some tweaks to more closely match
// existing implementations.
static bool isGraphemeBreak(const uint16_t* buf, size_t start, size_t count, size_t offset);
// Matches Android's Java API. Note, return (size_t)-1 for AT to
// signal non-break because unsigned return type.
static size_t getTextRunCursor(const uint16_t* buf, size_t start, size_t count,
size_t offset, MoveOpt opt);
};
} // namespace android
#endif // MINIKIN_GRAPHEME_BREAK_H
\ No newline at end of file
......@@ -23,6 +23,7 @@ LOCAL_SRC_FILES := \
CssParse.cpp \
FontCollection.cpp \
FontFamily.cpp \
GraphemeBreak.cpp \
Layout.cpp \
MinikinInternal.cpp \
MinikinRefCounted.cpp \
......
/*
* Copyright (C) 2014 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <stdint.h>
#include <unicode/uchar.h>
#include <unicode/utf16.h>
#include <minikin/GraphemeBreak.h>
namespace android {
bool GraphemeBreak::isGraphemeBreak(const uint16_t* buf, size_t start, size_t count,
size_t offset) {
// This implementation closely follows Unicode Standard Annex #29 on
// Unicode Text Segmentation (http://www.unicode.org/reports/tr29/),
// implementing a tailored version of extended grapheme clusters.
// The GB rules refer to section 3.1.1, Grapheme Cluster Boundary Rules.
// Rule GB1, sot /; Rule GB2, / eot
if (offset <= start || offset >= start + count) {
return true;
}
if (U16_IS_TRAIL(buf[offset])) {
// Don't break a surrogate pair
return false;
}
uint32_t c1 = 0;
uint32_t c2 = 0;
size_t offset_back = offset;
U16_PREV(buf, start, offset_back, c1);
U16_NEXT(buf, offset, count, c2);
int32_t p1 = u_getIntPropertyValue(c1, UCHAR_GRAPHEME_CLUSTER_BREAK);
int32_t p2 = u_getIntPropertyValue(c2, UCHAR_GRAPHEME_CLUSTER_BREAK);
// Rule GB3, CR x LF
if (p1 == U_GCB_CR && p2 == U_GCB_LF) {
return false;
}
// Rule GB4, (Control | CR | LF) /
if (p1 == U_GCB_CONTROL || p1 == U_GCB_CR || p1 == U_GCB_LF) {
return true;
}
// Rule GB5, / (Control | CR | LF)
if (p2 == U_GCB_CONTROL || p2 == U_GCB_CR || p2 == U_GCB_LF) {
// exclude zero-width control characters from breaking (tailoring of TR29)
if (c2 == 0x00ad
|| (c2 >= 0x200b && c2 <= 0x200f)
|| (c2 >= 0x2028 && c2 <= 0x202e)
|| (c2 >= 0x2060 && c2 <= 0x206f)) {
return false;
}
return true;
}
// Rule GB6, L x ( L | V | LV | LVT )
if (p1 == U_GCB_L && (p2 == U_GCB_L || p2 == U_GCB_V || p2 == U_GCB_LV || p2 == U_GCB_LVT)) {
return false;
}
// Rule GB7, ( LV | V ) x ( V | T )
if ((p1 == U_GCB_LV || p1 == U_GCB_V) && (p2 == U_GCB_V || p2 == U_GCB_T)) {
return false;
}
// Rule GB8, ( LVT | T ) x T
if ((p1 == U_GCB_L || p1 == U_GCB_T) && p2 == U_GCB_T) {
return false;
}
// Rule GB8a, Regional_Indicator x Regional_Indicator
if (p1 == U_GCB_REGIONAL_INDICATOR && p2 == U_GCB_REGIONAL_INDICATOR) {
return false;
}
// Rule GB9, x Extend; Rule GB9a, x SpacingMark
if (p2 == U_GCB_EXTEND || p2 == U_GCB_SPACING_MARK) {
if (c2 == 0xe33) {
// most other implementations break THAI CHARACTER SARA AM
// (tailoring of TR29)
return true;
}
return false;
}
// Cluster indic syllables togeter (tailoring of TR29)
if (u_getIntPropertyValue(c1, UCHAR_CANONICAL_COMBINING_CLASS) == 9 // virama
&& u_getIntPropertyValue(c2, UCHAR_GENERAL_CATEGORY) == U_OTHER_LETTER) {
return false;
}
// Rule GB10, Any / Any
return true;
}
size_t GraphemeBreak::getTextRunCursor(const uint16_t* buf, size_t start, size_t count,
size_t offset, MoveOpt opt) {
switch (opt) {
case AFTER:
if (offset < start + count) {
offset++;
}
// fall through
case AT_OR_AFTER:
while (!isGraphemeBreak(buf, start, count, offset)) {
offset++;
}
break;
case BEFORE:
if (offset > start) {
offset--;
}
// fall through
case AT_OR_BEFORE:
while (!isGraphemeBreak(buf, start, count, offset)) {
offset--;
}
break;
case AT:
if (!isGraphemeBreak(buf, start, count, offset)) {
offset = (size_t)-1;
}
break;
}
return offset;
}
} // namespace android
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册