提交 9402eda0 编写于 作者: R Raph Levien 提交者: android-build-merger

Add penalty for breaks in URLs and email addresses

am: c88ef135

* commit 'c88ef135':
  Add penalty for breaks in URLs and email addresses
...@@ -50,6 +50,8 @@ public: ...@@ -50,6 +50,8 @@ public:
ssize_t wordEnd() const; ssize_t wordEnd() const;
int breakBadness() const;
void finish(); void finish();
private: private:
......
...@@ -207,7 +207,8 @@ float LineBreaker::addStyleRun(MinikinPaint* paint, const FontCollection* typefa ...@@ -207,7 +207,8 @@ float LineBreaker::addStyleRun(MinikinPaint* paint, const FontCollection* typefa
// Skip break for zero-width characters inside replacement span // Skip break for zero-width characters inside replacement span
if (paint != nullptr || current == end || mCharWidths[current] > 0) { if (paint != nullptr || current == end || mCharWidths[current] > 0) {
addWordBreak(current, mWidth, postBreak, 0.0, 0); float penalty = hyphenPenalty * mWordBreaker.breakBadness();
addWordBreak(current, mWidth, postBreak, penalty, 0);
} }
lastBreak = current; lastBreak = current;
lastBreakWidth = mWidth; lastBreakWidth = mWidth;
......
...@@ -193,6 +193,10 @@ ssize_t WordBreaker::wordEnd() const { ...@@ -193,6 +193,10 @@ ssize_t WordBreaker::wordEnd() const {
return result; return result;
} }
int WordBreaker::breakBadness() const {
return (mInEmailOrUrl && mCurrent < mScanOffset) ? 1 : 0;
}
void WordBreaker::finish() { void WordBreaker::finish() {
mText = nullptr; mText = nullptr;
// Note: calling utext_close multiply is safe // Note: calling utext_close multiply is safe
......
...@@ -42,10 +42,12 @@ TEST_F(WordBreakerTest, basic) { ...@@ -42,10 +42,12 @@ TEST_F(WordBreakerTest, basic) {
EXPECT_EQ(6, breaker.next()); // after "hello " EXPECT_EQ(6, breaker.next()); // after "hello "
EXPECT_EQ(0, breaker.wordStart()); // "hello" EXPECT_EQ(0, breaker.wordStart()); // "hello"
EXPECT_EQ(5, breaker.wordEnd()); EXPECT_EQ(5, breaker.wordEnd());
EXPECT_EQ(0, breaker.breakBadness());
EXPECT_EQ(6, breaker.current()); EXPECT_EQ(6, breaker.current());
EXPECT_EQ((ssize_t)NELEM(buf), breaker.next()); // end EXPECT_EQ((ssize_t)NELEM(buf), breaker.next()); // end
EXPECT_EQ(6, breaker.wordStart()); // "world" EXPECT_EQ(6, breaker.wordStart()); // "world"
EXPECT_EQ(11, breaker.wordEnd()); EXPECT_EQ(11, breaker.wordEnd());
EXPECT_EQ(0, breaker.breakBadness());
EXPECT_EQ(11, breaker.current()); EXPECT_EQ(11, breaker.current());
} }
...@@ -58,9 +60,11 @@ TEST_F(WordBreakerTest, softHyphen) { ...@@ -58,9 +60,11 @@ TEST_F(WordBreakerTest, softHyphen) {
EXPECT_EQ(7, breaker.next()); // after "hel{SOFT HYPHEN}lo " EXPECT_EQ(7, breaker.next()); // after "hel{SOFT HYPHEN}lo "
EXPECT_EQ(0, breaker.wordStart()); // "hel{SOFT HYPHEN}lo" EXPECT_EQ(0, breaker.wordStart()); // "hel{SOFT HYPHEN}lo"
EXPECT_EQ(6, breaker.wordEnd()); EXPECT_EQ(6, breaker.wordEnd());
EXPECT_EQ(0, breaker.breakBadness());
EXPECT_EQ((ssize_t)NELEM(buf), breaker.next()); // end EXPECT_EQ((ssize_t)NELEM(buf), breaker.next()); // end
EXPECT_EQ(7, breaker.wordStart()); // "world" EXPECT_EQ(7, breaker.wordStart()); // "world"
EXPECT_EQ(12, breaker.wordEnd()); EXPECT_EQ(12, breaker.wordEnd());
EXPECT_EQ(0, breaker.breakBadness());
} }
TEST_F(WordBreakerTest, punct) { TEST_F(WordBreakerTest, punct) {
...@@ -73,9 +77,11 @@ TEST_F(WordBreakerTest, punct) { ...@@ -73,9 +77,11 @@ TEST_F(WordBreakerTest, punct) {
EXPECT_EQ(9, breaker.next()); // after "¡¡hello, " EXPECT_EQ(9, breaker.next()); // after "¡¡hello, "
EXPECT_EQ(2, breaker.wordStart()); // "hello" EXPECT_EQ(2, breaker.wordStart()); // "hello"
EXPECT_EQ(7, breaker.wordEnd()); EXPECT_EQ(7, breaker.wordEnd());
EXPECT_EQ(0, breaker.breakBadness());
EXPECT_EQ((ssize_t)NELEM(buf), breaker.next()); // end EXPECT_EQ((ssize_t)NELEM(buf), breaker.next()); // end
EXPECT_EQ(9, breaker.wordStart()); // "world" EXPECT_EQ(9, breaker.wordStart()); // "world"
EXPECT_EQ(14, breaker.wordEnd()); EXPECT_EQ(14, breaker.wordEnd());
EXPECT_EQ(0, breaker.breakBadness());
} }
TEST_F(WordBreakerTest, email) { TEST_F(WordBreakerTest, email) {
...@@ -87,11 +93,14 @@ TEST_F(WordBreakerTest, email) { ...@@ -87,11 +93,14 @@ TEST_F(WordBreakerTest, email) {
EXPECT_EQ(0, breaker.current()); EXPECT_EQ(0, breaker.current());
EXPECT_EQ(11, breaker.next()); // after "foo@example" EXPECT_EQ(11, breaker.next()); // after "foo@example"
EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd()); EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd());
EXPECT_EQ(1, breaker.breakBadness());
EXPECT_EQ(16, breaker.next()); // after ".com " EXPECT_EQ(16, breaker.next()); // after ".com "
EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd()); EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd());
EXPECT_EQ(0, breaker.breakBadness());
EXPECT_EQ((ssize_t)NELEM(buf), breaker.next()); // end EXPECT_EQ((ssize_t)NELEM(buf), breaker.next()); // end
EXPECT_EQ(16, breaker.wordStart()); // "x" EXPECT_EQ(16, breaker.wordStart()); // "x"
EXPECT_EQ(17, breaker.wordEnd()); EXPECT_EQ(17, breaker.wordEnd());
EXPECT_EQ(0, breaker.breakBadness());
} }
TEST_F(WordBreakerTest, mailto) { TEST_F(WordBreakerTest, mailto) {
...@@ -103,13 +112,17 @@ TEST_F(WordBreakerTest, mailto) { ...@@ -103,13 +112,17 @@ TEST_F(WordBreakerTest, mailto) {
EXPECT_EQ(0, breaker.current()); EXPECT_EQ(0, breaker.current());
EXPECT_EQ(7, breaker.next()); // after "mailto:" EXPECT_EQ(7, breaker.next()); // after "mailto:"
EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd()); EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd());
EXPECT_EQ(1, breaker.breakBadness());
EXPECT_EQ(18, breaker.next()); // after "foo@example" EXPECT_EQ(18, breaker.next()); // after "foo@example"
EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd()); EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd());
EXPECT_EQ(1, breaker.breakBadness());
EXPECT_EQ(23, breaker.next()); // after ".com " EXPECT_EQ(23, breaker.next()); // after ".com "
EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd()); EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd());
EXPECT_EQ(0, breaker.breakBadness());
EXPECT_EQ((ssize_t)NELEM(buf), breaker.next()); // end EXPECT_EQ((ssize_t)NELEM(buf), breaker.next()); // end
EXPECT_EQ(23, breaker.wordStart()); // "x" EXPECT_EQ(23, breaker.wordStart()); // "x"
EXPECT_EQ(24, breaker.wordEnd()); EXPECT_EQ(24, breaker.wordEnd());
EXPECT_EQ(0, breaker.breakBadness());
} }
// The current logic always places a line break after a detected email address or URL // The current logic always places a line break after a detected email address or URL
...@@ -123,11 +136,14 @@ TEST_F(WordBreakerTest, emailNonAscii) { ...@@ -123,11 +136,14 @@ TEST_F(WordBreakerTest, emailNonAscii) {
EXPECT_EQ(0, breaker.current()); EXPECT_EQ(0, breaker.current());
EXPECT_EQ(11, breaker.next()); // after "foo@example" EXPECT_EQ(11, breaker.next()); // after "foo@example"
EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd()); EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd());
EXPECT_EQ(1, breaker.breakBadness());
EXPECT_EQ(15, breaker.next()); // after ".com" EXPECT_EQ(15, breaker.next()); // after ".com"
EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd()); EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd());
EXPECT_EQ(0, breaker.breakBadness());
EXPECT_EQ((ssize_t)NELEM(buf), breaker.next()); // end EXPECT_EQ((ssize_t)NELEM(buf), breaker.next()); // end
EXPECT_EQ(15, breaker.wordStart()); // "一" EXPECT_EQ(15, breaker.wordStart()); // "一"
EXPECT_EQ(16, breaker.wordEnd()); EXPECT_EQ(16, breaker.wordEnd());
EXPECT_EQ(0, breaker.breakBadness());
} }
TEST_F(WordBreakerTest, emailCombining) { TEST_F(WordBreakerTest, emailCombining) {
...@@ -139,11 +155,14 @@ TEST_F(WordBreakerTest, emailCombining) { ...@@ -139,11 +155,14 @@ TEST_F(WordBreakerTest, emailCombining) {
EXPECT_EQ(0, breaker.current()); EXPECT_EQ(0, breaker.current());
EXPECT_EQ(11, breaker.next()); // after "foo@example" EXPECT_EQ(11, breaker.next()); // after "foo@example"
EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd()); EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd());
EXPECT_EQ(1, breaker.breakBadness());
EXPECT_EQ(17, breaker.next()); // after ".com̃ " EXPECT_EQ(17, breaker.next()); // after ".com̃ "
EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd()); EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd());
EXPECT_EQ(0, breaker.breakBadness());
EXPECT_EQ((ssize_t)NELEM(buf), breaker.next()); // end EXPECT_EQ((ssize_t)NELEM(buf), breaker.next()); // end
EXPECT_EQ(17, breaker.wordStart()); // "x" EXPECT_EQ(17, breaker.wordStart()); // "x"
EXPECT_EQ(18, breaker.wordEnd()); EXPECT_EQ(18, breaker.wordEnd());
EXPECT_EQ(0, breaker.breakBadness());
} }
TEST_F(WordBreakerTest, lonelyAt) { TEST_F(WordBreakerTest, lonelyAt) {
...@@ -155,11 +174,14 @@ TEST_F(WordBreakerTest, lonelyAt) { ...@@ -155,11 +174,14 @@ TEST_F(WordBreakerTest, lonelyAt) {
EXPECT_EQ(2, breaker.next()); // after "a " EXPECT_EQ(2, breaker.next()); // after "a "
EXPECT_EQ(0, breaker.wordStart()); // "a" EXPECT_EQ(0, breaker.wordStart()); // "a"
EXPECT_EQ(1, breaker.wordEnd()); EXPECT_EQ(1, breaker.wordEnd());
EXPECT_EQ(0, breaker.breakBadness());
EXPECT_EQ(4, breaker.next()); // after "@ " EXPECT_EQ(4, breaker.next()); // after "@ "
EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd()); EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd());
EXPECT_EQ(0, breaker.breakBadness());
EXPECT_EQ((ssize_t)NELEM(buf), breaker.next()); // end EXPECT_EQ((ssize_t)NELEM(buf), breaker.next()); // end
EXPECT_EQ(4, breaker.wordStart()); // "b" EXPECT_EQ(4, breaker.wordStart()); // "b"
EXPECT_EQ(5, breaker.wordEnd()); EXPECT_EQ(5, breaker.wordEnd());
EXPECT_EQ(0, breaker.breakBadness());
} }
TEST_F(WordBreakerTest, url) { TEST_F(WordBreakerTest, url) {
...@@ -171,15 +193,20 @@ TEST_F(WordBreakerTest, url) { ...@@ -171,15 +193,20 @@ TEST_F(WordBreakerTest, url) {
EXPECT_EQ(0, breaker.current()); EXPECT_EQ(0, breaker.current());
EXPECT_EQ(5, breaker.next()); // after "http:" EXPECT_EQ(5, breaker.next()); // after "http:"
EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd()); EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd());
EXPECT_EQ(1, breaker.breakBadness());
EXPECT_EQ(7, breaker.next()); // after "//" EXPECT_EQ(7, breaker.next()); // after "//"
EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd()); EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd());
EXPECT_EQ(1, breaker.breakBadness());
EXPECT_EQ(14, breaker.next()); // after "example" EXPECT_EQ(14, breaker.next()); // after "example"
EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd()); EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd());
EXPECT_EQ(1, breaker.breakBadness());
EXPECT_EQ(19, breaker.next()); // after ".com " EXPECT_EQ(19, breaker.next()); // after ".com "
EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd()); EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd());
EXPECT_EQ(0, breaker.breakBadness());
EXPECT_EQ((ssize_t)NELEM(buf), breaker.next()); // end EXPECT_EQ((ssize_t)NELEM(buf), breaker.next()); // end
EXPECT_EQ(19, breaker.wordStart()); // "x" EXPECT_EQ(19, breaker.wordStart()); // "x"
EXPECT_EQ(20, breaker.wordEnd()); EXPECT_EQ(20, breaker.wordEnd());
EXPECT_EQ(0, breaker.breakBadness());
} }
// Breaks according to section 14.12 of Chicago Manual of Style, *URLs or DOIs and line breaks* // Breaks according to section 14.12 of Chicago Manual of Style, *URLs or DOIs and line breaks*
...@@ -192,38 +219,55 @@ TEST_F(WordBreakerTest, urlBreakChars) { ...@@ -192,38 +219,55 @@ TEST_F(WordBreakerTest, urlBreakChars) {
EXPECT_EQ(0, breaker.current()); EXPECT_EQ(0, breaker.current());
EXPECT_EQ(5, breaker.next()); // after "http:" EXPECT_EQ(5, breaker.next()); // after "http:"
EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd()); EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd());
EXPECT_EQ(1, breaker.breakBadness());
EXPECT_EQ(7, breaker.next()); // after "//" EXPECT_EQ(7, breaker.next()); // after "//"
EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd()); EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd());
EXPECT_EQ(1, breaker.breakBadness());
EXPECT_EQ(8, breaker.next()); // after "a" EXPECT_EQ(8, breaker.next()); // after "a"
EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd()); EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd());
EXPECT_EQ(1, breaker.breakBadness());
EXPECT_EQ(10, breaker.next()); // after ".b" EXPECT_EQ(10, breaker.next()); // after ".b"
EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd()); EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd());
EXPECT_EQ(1, breaker.breakBadness());
EXPECT_EQ(11, breaker.next()); // after "/" EXPECT_EQ(11, breaker.next()); // after "/"
EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd()); EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd());
EXPECT_EQ(1, breaker.breakBadness());
EXPECT_EQ(13, breaker.next()); // after "~c" EXPECT_EQ(13, breaker.next()); // after "~c"
EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd()); EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd());
EXPECT_EQ(1, breaker.breakBadness());
EXPECT_EQ(15, breaker.next()); // after ",d" EXPECT_EQ(15, breaker.next()); // after ",d"
EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd()); EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd());
EXPECT_EQ(1, breaker.breakBadness());
EXPECT_EQ(17, breaker.next()); // after "-e" EXPECT_EQ(17, breaker.next()); // after "-e"
EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd()); EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd());
EXPECT_EQ(1, breaker.breakBadness());
EXPECT_EQ(19, breaker.next()); // after "?f" EXPECT_EQ(19, breaker.next()); // after "?f"
EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd()); EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd());
EXPECT_EQ(1, breaker.breakBadness());
EXPECT_EQ(20, breaker.next()); // after "=" EXPECT_EQ(20, breaker.next()); // after "="
EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd()); EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd());
EXPECT_EQ(1, breaker.breakBadness());
EXPECT_EQ(21, breaker.next()); // after "g" EXPECT_EQ(21, breaker.next()); // after "g"
EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd()); EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd());
EXPECT_EQ(1, breaker.breakBadness());
EXPECT_EQ(22, breaker.next()); // after "&" EXPECT_EQ(22, breaker.next()); // after "&"
EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd()); EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd());
EXPECT_EQ(1, breaker.breakBadness());
EXPECT_EQ(23, breaker.next()); // after "h" EXPECT_EQ(23, breaker.next()); // after "h"
EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd()); EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd());
EXPECT_EQ(1, breaker.breakBadness());
EXPECT_EQ(25, breaker.next()); // after "#i" EXPECT_EQ(25, breaker.next()); // after "#i"
EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd()); EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd());
EXPECT_EQ(1, breaker.breakBadness());
EXPECT_EQ(27, breaker.next()); // after "%j" EXPECT_EQ(27, breaker.next()); // after "%j"
EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd()); EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd());
EXPECT_EQ(1, breaker.breakBadness());
EXPECT_EQ(29, breaker.next()); // after "_k" EXPECT_EQ(29, breaker.next()); // after "_k"
EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd()); EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd());
EXPECT_EQ(1, breaker.breakBadness());
EXPECT_EQ((ssize_t)NELEM(buf), breaker.next()); // end EXPECT_EQ((ssize_t)NELEM(buf), breaker.next()); // end
EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd()); EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd());
EXPECT_EQ(0, breaker.breakBadness());
} }
TEST_F(WordBreakerTest, urlNoHyphenBreak) { TEST_F(WordBreakerTest, urlNoHyphenBreak) {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册