LineBreaker.cpp 20.1 KB
Newer Older
R
Raph Levien 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
/*
 * Copyright (C) 2015 The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#define VERBOSE_DEBUG 0

19 20
#define LOG_TAG "Minikin"

R
Raph Levien 已提交
21 22
#include <limits>

23
#include <log/log.h>
R
Raph Levien 已提交
24

25
#include "LayoutUtils.h"
R
Raph Levien 已提交
26 27 28 29 30
#include <minikin/Layout.h>
#include <minikin/LineBreaker.h>

using std::vector;

S
Seigo Nonaka 已提交
31
namespace minikin {
R
Raph Levien 已提交
32 33 34 35 36 37 38 39 40

const int CHAR_TAB = 0x0009;

// Large scores in a hierarchy; we prefer desperate breaks to an overfull line. All these
// constants are larger than any reasonable actual width score.
const float SCORE_INFTY = std::numeric_limits<float>::max();
const float SCORE_OVERFULL = 1e12f;
const float SCORE_DESPERATE = 1e10f;

41 42 43 44 45 46 47
// Multiplier for hyphen penalty on last line.
const float LAST_LINE_PENALTY_MULTIPLIER = 4.0f;
// Penalty assigned to each line break (to try to minimize number of lines)
// TODO: when we implement full justification (so spaces can shrink and stretch), this is
// probably not the most appropriate method.
const float LINE_PENALTY_MULTIPLIER = 2.0f;

48 49 50
// Penalty assigned to shrinking the whitepsace.
const float SHRINK_PENALTY_MULTIPLIER = 4.0f;

51 52 53 54 55 56
// Very long words trigger O(n^2) behavior in hyphenation, so we disable hyphenation for
// unreasonably long words. This is somewhat of a heuristic because extremely long words
// are possible in some languages. This does mean that very long real words can get
// broken by desperate breaks, with no hyphens.
const size_t LONGEST_HYPHENATED_WORD = 45;

R
Raph Levien 已提交
57 58 59 60
// When the text buffer is within this limit, capacity of vectors is retained at finish(),
// to avoid allocation.
const size_t MAX_TEXT_BUF_RETAIN = 32678;

61 62 63
// Maximum amount that spaces can shrink, in justified text.
const float SHRINKABILITY = 1.0 / 3.0;

64
void LineBreaker::setLocale(const icu::Locale& locale, Hyphenator* hyphenator) {
65
    mWordBreaker.setLocale(locale);
66
    mLocale = locale;
67 68 69
    mHyphenator = hyphenator;
}

R
Raph Levien 已提交
70
void LineBreaker::setText() {
71
    mWordBreaker.setText(mTextBuf.data(), mTextBuf.size());
R
Raph Levien 已提交
72 73

    // handle initial break here because addStyleRun may never be called
74
    mWordBreaker.next();
R
Raph Levien 已提交
75
    mCandidates.clear();
76
    Candidate cand = {0, 0, 0.0, 0.0, 0.0, 0.0, 0, 0, 0, HyphenationType::DONT_BREAK};
R
Raph Levien 已提交
77 78 79 80 81 82 83 84 85 86
    mCandidates.push_back(cand);

    // reset greedy breaker state
    mBreaks.clear();
    mWidths.clear();
    mFlags.clear();
    mLastBreak = 0;
    mBestBreak = 0;
    mBestScore = SCORE_INFTY;
    mPreBreak = 0;
87
    mLastHyphenation = HyphenEdit::NO_EDIT;
R
Raph Levien 已提交
88
    mFirstTabIndex = INT_MAX;
89
    mSpaceCount = 0;
R
Raph Levien 已提交
90 91 92 93 94 95
}

void LineBreaker::setLineWidths(float firstWidth, int firstWidthLineCount, float restWidth) {
    mLineWidths.setWidths(firstWidth, firstWidthLineCount, restWidth);
}

96

R
Raph Levien 已提交
97 98
void LineBreaker::setIndents(const std::vector<float>& indents) {
    mLineWidths.setIndents(indents);
99 100
}

R
Raph Levien 已提交
101
// This function determines whether a character is a space that disappears at end of line.
102 103
// It is the Unicode set: [[:General_Category=Space_Separator:]-[:Line_Break=Glue:]],
// plus '\n'.
R
Raph Levien 已提交
104 105
// Note: all such characters are in the BMP, so it's ok to use code units for this.
static bool isLineEndSpace(uint16_t c) {
106 107
    return c == '\n' || c == ' ' || c == 0x1680 || (0x2000 <= c && c <= 0x200A && c != 0x2007) ||
            c == 0x205F || c == 0x3000;
R
Raph Levien 已提交
108 109 110 111 112 113 114
}

// Ordinarily, this method measures the text in the range given. However, when paint
// is nullptr, it assumes the widths have already been calculated and stored in the
// width buffer.
// This method finds the candidate word breaks (using the ICU break iterator) and sends them
// to addCandidate.
115
float LineBreaker::addStyleRun(MinikinPaint* paint, const std::shared_ptr<FontCollection>& typeface,
R
Raph Levien 已提交
116 117 118 119
        FontStyle style, size_t start, size_t end, bool isRtl) {
    float width = 0.0f;
    int bidiFlags = isRtl ? kBidi_Force_RTL : kBidi_Force_LTR;

120
    float hyphenPenalty = 0.0;
R
Raph Levien 已提交
121
    if (paint != nullptr) {
122 123
        width = Layout::measureText(mTextBuf.data(), start, end - start, mTextBuf.size(), bidiFlags,
                style, *paint, typeface, mCharWidths.data() + start);
124 125 126

        // a heuristic that seems to perform well
        hyphenPenalty = 0.5 * paint->size * paint->scaleX * mLineWidths.getLineWidth(0);
127 128 129
        if (mHyphenationFrequency == kHyphenationFrequency_Normal) {
            hyphenPenalty *= 4.0; // TODO: Replace with a better value after some testing
        }
130

131 132 133 134 135 136 137 138
        if (mJustified) {
            // Make hyphenation more aggressive for fully justified text (so that "normal" in
            // justified mode is the same as "full" in ragged-right).
            hyphenPenalty *= 0.25;
        } else {
            // Line penalty is zero for justified text.
            mLinePenalty = std::max(mLinePenalty, hyphenPenalty * LINE_PENALTY_MULTIPLIER);
        }
R
Raph Levien 已提交
139 140
    }

141 142
    size_t current = (size_t)mWordBreaker.current();
    size_t afterWord = start;
143 144 145
    size_t lastBreak = start;
    ParaWidth lastBreakWidth = mWidth;
    ParaWidth postBreak = mWidth;
146
    size_t postSpaceCount = mSpaceCount;
R
Raph Levien 已提交
147 148 149 150 151 152 153 154 155 156
    for (size_t i = start; i < end; i++) {
        uint16_t c = mTextBuf[i];
        if (c == CHAR_TAB) {
            mWidth = mPreBreak + mTabStops.nextTab(mWidth - mPreBreak);
            if (mFirstTabIndex == INT_MAX) {
                mFirstTabIndex = (int)i;
            }
            // fall back to greedy; other modes don't know how to deal with tabs
            mStrategy = kBreakStrategy_Greedy;
        } else {
157
            if (isWordSpace(c)) mSpaceCount += 1;
R
Raph Levien 已提交
158 159 160
            mWidth += mCharWidths[i];
            if (!isLineEndSpace(c)) {
                postBreak = mWidth;
161
                postSpaceCount = mSpaceCount;
162
                afterWord = i + 1;
R
Raph Levien 已提交
163 164 165
            }
        }
        if (i + 1 == current) {
166 167 168 169
            size_t wordStart = mWordBreaker.wordStart();
            size_t wordEnd = mWordBreaker.wordEnd();
            if (paint != nullptr && mHyphenator != nullptr &&
                    mHyphenationFrequency != kHyphenationFrequency_None &&
170 171
                    wordStart >= start && wordEnd > wordStart &&
                    wordEnd - wordStart <= LONGEST_HYPHENATED_WORD) {
172 173 174 175
                mHyphenator->hyphenate(&mHyphBuf,
                        &mTextBuf[wordStart],
                        wordEnd - wordStart,
                        mLocale);
176 177 178
#if VERBOSE_DEBUG
                std::string hyphenatedString;
                for (size_t j = wordStart; j < wordEnd; j++) {
179 180 181
                    if (mHyphBuf[j - wordStart] == HyphenationType::BREAK_AND_INSERT_HYPHEN) {
                        hyphenatedString.push_back('-');
                    }
182 183
                    // Note: only works with ASCII, should do UTF-8 conversion here
                    hyphenatedString.push_back(buffer()[j]);
184
                }
185 186
                ALOGD("hyphenated string: %s", hyphenatedString.c_str());
#endif
187

188 189
                // measure hyphenated substrings
                for (size_t j = wordStart; j < wordEnd; j++) {
190 191 192
                    HyphenationType hyph = mHyphBuf[j - wordStart];
                    if (hyph != HyphenationType::DONT_BREAK) {
                        paint->hyphenEdit = HyphenEdit::editForThisLine(hyph);
193 194 195 196 197
                        const float firstPartWidth = Layout::measureText(mTextBuf.data(),
                                lastBreak, j - lastBreak, mTextBuf.size(), bidiFlags, style,
                                *paint, typeface, nullptr);
                        ParaWidth hyphPostBreak = lastBreakWidth + firstPartWidth;

198
                        paint->hyphenEdit = HyphenEdit::editForNextLine(hyph);
199
                        const float secondPartWidth = Layout::measureText(mTextBuf.data(), j,
200 201
                                afterWord - j, mTextBuf.size(), bidiFlags, style, *paint,
                                typeface, nullptr);
202
                        ParaWidth hyphPreBreak = postBreak - secondPartWidth;
203

204 205
                        addWordBreak(j, hyphPreBreak, hyphPostBreak, postSpaceCount, postSpaceCount,
                                hyphenPenalty, hyph);
206 207

                        paint->hyphenEdit = HyphenEdit::NO_EDIT;
208
                    }
209
                }
R
Raph Levien 已提交
210
            }
211 212 213

            // Skip break for zero-width characters inside replacement span
            if (paint != nullptr || current == end || mCharWidths[current] > 0) {
214
                float penalty = hyphenPenalty * mWordBreaker.breakBadness();
215 216
                addWordBreak(current, mWidth, postBreak, mSpaceCount, postSpaceCount, penalty,
                        HyphenationType::DONT_BREAK);
217 218 219 220
            }
            lastBreak = current;
            lastBreakWidth = mWidth;
            current = (size_t)mWordBreaker.next();
R
Raph Levien 已提交
221 222 223 224 225 226 227 228 229
        }
    }

    return width;
}

// add a word break (possibly for a hyphenated fragment), and add desperate breaks if
// needed (ie when word exceeds current line width)
void LineBreaker::addWordBreak(size_t offset, ParaWidth preBreak, ParaWidth postBreak,
230
        size_t preSpaceCount, size_t postSpaceCount, float penalty, HyphenationType hyph) {
R
Raph Levien 已提交
231 232 233 234 235 236 237 238 239 240 241 242 243 244
    Candidate cand;
    ParaWidth width = mCandidates.back().preBreak;
    if (postBreak - width > currentLineWidth()) {
        // Add desperate breaks.
        // Note: these breaks are based on the shaping of the (non-broken) original text; they
        // are imprecise especially in the presence of kerning, ligatures, and Arabic shaping.
        size_t i = mCandidates.back().offset;
        width += mCharWidths[i++];
        for (; i < offset; i++) {
            float w = mCharWidths[i];
            if (w > 0) {
                cand.offset = i;
                cand.preBreak = width;
                cand.postBreak = width;
245 246 247
                // postSpaceCount doesn't include trailing spaces
                cand.preSpaceCount = postSpaceCount;
                cand.postSpaceCount = postSpaceCount;
R
Raph Levien 已提交
248
                cand.penalty = SCORE_DESPERATE;
249
                cand.hyphenType = HyphenationType::BREAK_AND_DONT_INSERT_HYPHEN;
R
Raph Levien 已提交
250
#if VERBOSE_DEBUG
251
                ALOGD("desperate cand: %zd %g:%g",
R
Raph Levien 已提交
252 253 254 255 256 257 258 259 260 261 262 263
                        mCandidates.size(), cand.postBreak, cand.preBreak);
#endif
                addCandidate(cand);
                width += w;
            }
        }
    }

    cand.offset = offset;
    cand.preBreak = preBreak;
    cand.postBreak = postBreak;
    cand.penalty = penalty;
264 265
    cand.preSpaceCount = preSpaceCount;
    cand.postSpaceCount = postSpaceCount;
266
    cand.hyphenType = hyph;
R
Raph Levien 已提交
267
#if VERBOSE_DEBUG
268
    ALOGD("cand: %zd %g:%g", mCandidates.size(), cand.postBreak, cand.preBreak);
R
Raph Levien 已提交
269 270 271 272
#endif
    addCandidate(cand);
}

273 274 275 276 277 278 279 280 281 282 283 284 285 286
// Helper method for addCandidate()
void LineBreaker::pushGreedyBreak() {
    const Candidate& bestCandidate = mCandidates[mBestBreak];
    pushBreak(bestCandidate.offset, bestCandidate.postBreak - mPreBreak,
            mLastHyphenation | HyphenEdit::editForThisLine(bestCandidate.hyphenType));
    mBestScore = SCORE_INFTY;
#if VERBOSE_DEBUG
    ALOGD("break: %d %g", mBreaks.back(), mWidths.back());
#endif
    mLastBreak = mBestBreak;
    mPreBreak = bestCandidate.preBreak;
    mLastHyphenation = HyphenEdit::editForNextLine(bestCandidate.hyphenType);
}

R
Raph Levien 已提交
287 288
// TODO performance: could avoid populating mCandidates if greedy only
void LineBreaker::addCandidate(Candidate cand) {
289
    const size_t candIndex = mCandidates.size();
R
Raph Levien 已提交
290
    mCandidates.push_back(cand);
291 292 293 294

    // mLastBreak is the index of the last line break we decided to do in mCandidates,
    // and mPreBreak is its preBreak value. mBestBreak is the index of the best line breaking candidate
    // we have found since then, and mBestScore is its penalty.
R
Raph Levien 已提交
295 296 297
    if (cand.postBreak - mPreBreak > currentLineWidth()) {
        // This break would create an overfull line, pick the best break and break there (greedy)
        if (mBestBreak == mLastBreak) {
298
            // No good break has been found since last break. Break here.
R
Raph Levien 已提交
299 300
            mBestBreak = candIndex;
        }
301
        pushGreedyBreak();
R
Raph Levien 已提交
302
    }
303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325

    while (mLastBreak != candIndex && cand.postBreak - mPreBreak > currentLineWidth()) {
        // We should rarely come here. But if we are here, we have broken the line, but the
        // remaining part still doesn't fit. We now need to break at the second best place after the
        // last break, but we have not kept that information, so we need to go back and find it.
        //
        // In some really rare cases, postBreak - preBreak of a candidate itself may be over the
        // current line width. We protect ourselves against an infinite loop in that case by
        // checking that we have not broken the line at this candidate already.
        for (size_t i = mLastBreak + 1; i < candIndex; i++) {
            const float penalty = mCandidates[i].penalty;
            if (penalty <= mBestScore) {
                mBestBreak = i;
                mBestScore = penalty;
            }
        }
        if (mBestBreak == mLastBreak) {
            // We didn't find anything good. Break here.
            mBestBreak = candIndex;
        }
        pushGreedyBreak();
    }

R
Raph Levien 已提交
326 327 328 329 330 331
    if (cand.penalty <= mBestScore) {
        mBestBreak = candIndex;
        mBestScore = cand.penalty;
    }
}

332
void LineBreaker::pushBreak(int offset, float width, uint8_t hyphenEdit) {
333 334 335
    mBreaks.push_back(offset);
    mWidths.push_back(width);
    int flags = (mFirstTabIndex < mBreaks.back()) << kTab_Shift;
336
    flags |= hyphenEdit;
337 338 339 340
    mFlags.push_back(flags);
    mFirstTabIndex = INT_MAX;
}

R
Raph Levien 已提交
341 342 343 344 345 346
void LineBreaker::addReplacement(size_t start, size_t end, float width) {
    mCharWidths[start] = width;
    std::fill(&mCharWidths[start + 1], &mCharWidths[end], 0.0f);
    addStyleRun(nullptr, nullptr, FontStyle(), start, end, false);
}

347 348 349 350 351 352 353 354 355 356 357 358
// Get the width of a space. May return 0 if there are no spaces.
// Note: if there are multiple different widths for spaces (for example, because of mixing of
// fonts), it's only guaranteed to pick one.
float LineBreaker::getSpaceWidth() const {
    for (size_t i = 0; i < mTextBuf.size(); i++) {
        if (isWordSpace(mTextBuf[i])) {
            return mCharWidths[i];
        }
    }
    return 0.0f;
}

R
Raph Levien 已提交
359 360 361 362 363 364 365 366
float LineBreaker::currentLineWidth() const {
    return mLineWidths.getLineWidth(mBreaks.size());
}

void LineBreaker::computeBreaksGreedy() {
    // All breaks but the last have been added in addCandidate already.
    size_t nCand = mCandidates.size();
    if (nCand == 1 || mLastBreak != nCand - 1) {
367
        pushBreak(mCandidates[nCand - 1].offset, mCandidates[nCand - 1].postBreak - mPreBreak,
368
                mLastHyphenation);
369
        // don't need to update mBestScore, because we're done
R
Raph Levien 已提交
370 371 372 373 374 375
#if VERBOSE_DEBUG
        ALOGD("final break: %d %g", mBreaks.back(), mWidths.back());
#endif
    }
}

376 377
// Follow "prev" links in mCandidates array, and copy to result arrays.
void LineBreaker::finishBreaksOptimal() {
R
Raph Levien 已提交
378 379 380 381
    // clear existing greedy break result
    mBreaks.clear();
    mWidths.clear();
    mFlags.clear();
382 383 384 385 386 387
    size_t nCand = mCandidates.size();
    size_t prev;
    for (size_t i = nCand - 1; i > 0; i = prev) {
        prev = mCandidates[i].prev;
        mBreaks.push_back(mCandidates[i].offset);
        mWidths.push_back(mCandidates[i].postBreak - mCandidates[prev].preBreak);
388 389 390 391 392
        int flags = HyphenEdit::editForThisLine(mCandidates[i].hyphenType);
        if (prev > 0) {
            flags |= HyphenEdit::editForNextLine(mCandidates[prev].hyphenType);
        }
        mFlags.push_back(flags);
393 394 395 396 397 398
    }
    std::reverse(mBreaks.begin(), mBreaks.end());
    std::reverse(mWidths.begin(), mWidths.end());
    std::reverse(mFlags.begin(), mFlags.end());
}

399
void LineBreaker::computeBreaksOptimal(bool isRectangle) {
400 401
    size_t active = 0;
    size_t nCand = mCandidates.size();
402
    float width = mLineWidths.getLineWidth(0);
403 404 405 406
    float shortLineFactor = mJustified ? 0.75f : 0.5f;
    float maxShrink = mJustified ? SHRINKABILITY * getSpaceWidth() : 0.0f;

    // "i" iterates through candidates for the end of the line.
407 408 409 410
    for (size_t i = 1; i < nCand; i++) {
        bool atEnd = i == nCand - 1;
        float best = SCORE_INFTY;
        size_t bestPrev = 0;
411
        size_t lineNumberLast = 0;
412

413 414 415 416
        if (!isRectangle) {
            size_t lineNumberLast = mCandidates[active].lineNumber;
            width = mLineWidths.getLineWidth(lineNumberLast);
        }
417 418 419
        ParaWidth leftEdge = mCandidates[i].postBreak - width;
        float bestHope = 0;

420
        // "j" iterates through candidates for the beginning of the line.
421
        for (size_t j = active; j < i; j++) {
422 423 424 425 426 427 428 429 430 431
            if (!isRectangle) {
                size_t lineNumber = mCandidates[j].lineNumber;
                if (lineNumber != lineNumberLast) {
                    float widthNew = mLineWidths.getLineWidth(lineNumber);
                    if (widthNew != width) {
                        leftEdge = mCandidates[i].postBreak - width;
                        bestHope = 0;
                        width = widthNew;
                    }
                    lineNumberLast = lineNumber;
432 433 434 435 436 437
                }
            }
            float jScore = mCandidates[j].score;
            if (jScore + bestHope >= best) continue;
            float delta = mCandidates[j].preBreak - leftEdge;

438
            // compute width score for line
439 440 441 442

            // Note: the "bestHope" optimization makes the assumption that, when delta is
            // non-negative, widthScore will increase monotonically as successive candidate
            // breaks are considered.
443
            float widthScore = 0.0f;
444
            float additionalPenalty = 0.0f;
445
            if ((atEnd || !mJustified) && delta < 0) {
446 447 448
                widthScore = SCORE_OVERFULL;
            } else if (atEnd && mStrategy != kBreakStrategy_Balanced) {
                // increase penalty for hyphen on last line
449
                additionalPenalty = LAST_LINE_PENALTY_MULTIPLIER * mCandidates[j].penalty;
450 451 452
                // Penalize very short (< 1 - shortLineFactor of total width) lines.
                float underfill = delta - shortLineFactor * width;
                widthScore = underfill > 0 ? underfill * underfill : 0;
453
            } else {
454
                widthScore = delta * delta;
455 456 457 458 459 460 461 462
                if (delta < 0) {
                    if (-delta < maxShrink *
                            (mCandidates[i].postSpaceCount - mCandidates[j].preSpaceCount)) {
                        widthScore *= SHRINK_PENALTY_MULTIPLIER;
                    } else {
                        widthScore = SCORE_OVERFULL;
                    }
                }
463
            }
R
Raph Levien 已提交
464 465 466 467 468 469 470

            if (delta < 0) {
                active = j + 1;
            } else {
                bestHope = widthScore;
            }

471
            float score = jScore + widthScore + additionalPenalty;
R
Raph Levien 已提交
472 473 474 475 476
            if (score <= best) {
                best = score;
                bestPrev = j;
            }
        }
477
        mCandidates[i].score = best + mCandidates[i].penalty + mLinePenalty;
R
Raph Levien 已提交
478
        mCandidates[i].prev = bestPrev;
479
        mCandidates[i].lineNumber = mCandidates[bestPrev].lineNumber + 1;
480
#if VERBOSE_DEBUG
481
        ALOGD("break %zd: score=%g, prev=%zd", i, mCandidates[i].score, mCandidates[i].prev);
482
#endif
R
Raph Levien 已提交
483
    }
484
    finishBreaksOptimal();
R
Raph Levien 已提交
485 486 487 488 489 490
}

size_t LineBreaker::computeBreaks() {
    if (mStrategy == kBreakStrategy_Greedy) {
        computeBreaksGreedy();
    } else {
491
        computeBreaksOptimal(mLineWidths.isConstant());
R
Raph Levien 已提交
492 493 494 495 496
    }
    return mBreaks.size();
}

void LineBreaker::finish() {
497
    mWordBreaker.finish();
R
Raph Levien 已提交
498
    mWidth = 0;
499
    mLineWidths.clear();
R
Raph Levien 已提交
500 501 502 503 504 505 506 507 508
    mCandidates.clear();
    mBreaks.clear();
    mWidths.clear();
    mFlags.clear();
    if (mTextBuf.size() > MAX_TEXT_BUF_RETAIN) {
        mTextBuf.clear();
        mTextBuf.shrink_to_fit();
        mCharWidths.clear();
        mCharWidths.shrink_to_fit();
509 510
        mHyphBuf.clear();
        mHyphBuf.shrink_to_fit();
R
Raph Levien 已提交
511 512 513 514 515 516
        mCandidates.shrink_to_fit();
        mBreaks.shrink_to_fit();
        mWidths.shrink_to_fit();
        mFlags.shrink_to_fit();
    }
    mStrategy = kBreakStrategy_Greedy;
517
    mHyphenationFrequency = kHyphenationFrequency_Normal;
518
    mLinePenalty = 0.0f;
519
    mJustified = false;
R
Raph Levien 已提交
520 521
}

S
Seigo Nonaka 已提交
522
}  // namespace minikin