LineBreaker.h 8.1 KB
Newer Older
R
Raph Levien 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28
/*
 * Copyright (C) 2015 The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

/**
 * A module for breaking paragraphs into lines, supporting high quality
 * hyphenation and justification.
 */

#ifndef MINIKIN_LINE_BREAKER_H
#define MINIKIN_LINE_BREAKER_H

#include "unicode/brkiter.h"
#include "unicode/locid.h"
#include <cmath>
#include <vector>
29
#include "minikin/Hyphenator.h"
30
#include "minikin/WordBreaker.h"
R
Raph Levien 已提交
31

S
Seigo Nonaka 已提交
32
namespace minikin {
R
Raph Levien 已提交
33 34 35 36 37 38 39

enum BreakStrategy {
    kBreakStrategy_Greedy = 0,
    kBreakStrategy_HighQuality = 1,
    kBreakStrategy_Balanced = 2
};

40 41 42 43 44 45
enum HyphenationFrequency {
    kHyphenationFrequency_None = 0,
    kHyphenationFrequency_Normal = 1,
    kHyphenationFrequency_Full = 2
};

R
Raph Levien 已提交
46 47 48 49 50 51 52 53
// TODO: want to generalize to be able to handle array of line widths
class LineWidths {
    public:
        void setWidths(float firstWidth, int firstWidthLineCount, float restWidth) {
            mFirstWidth = firstWidth;
            mFirstWidthLineCount = firstWidthLineCount;
            mRestWidth = restWidth;
        }
R
Raph Levien 已提交
54 55
        void setIndents(const std::vector<float>& indents) {
            mIndents = indents;
56
        }
57 58
        bool isConstant() const {
            // technically mFirstWidthLineCount == 0 would count too, but doesn't actually happen
R
Raph Levien 已提交
59
            return mRestWidth == mFirstWidth && mIndents.empty();
60
        }
R
Raph Levien 已提交
61
        float getLineWidth(int line) const {
62
            float width = (line < mFirstWidthLineCount) ? mFirstWidth : mRestWidth;
R
Raph Levien 已提交
63 64 65
            if (!mIndents.empty()) {
                if ((size_t)line < mIndents.size()) {
                    width -= mIndents[line];
66
                } else {
R
Raph Levien 已提交
67
                    width -= mIndents.back();
68 69 70
                }
            }
            return width;
R
Raph Levien 已提交
71
        }
72 73 74
        void clear() {
            mIndents.clear();
        }
R
Raph Levien 已提交
75 76 77 78
    private:
        float mFirstWidth;
        int mFirstWidthLineCount;
        float mRestWidth;
R
Raph Levien 已提交
79
        std::vector<float> mIndents;
R
Raph Levien 已提交
80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106
};

class TabStops {
    public:
        void set(const int* stops, size_t nStops, int tabWidth) {
            if (stops != nullptr) {
                mStops.assign(stops, stops + nStops);
            } else {
                mStops.clear();
            }
            mTabWidth = tabWidth;
        }
        float nextTab(float widthSoFar) const {
            for (size_t i = 0; i < mStops.size(); i++) {
                if (mStops[i] > widthSoFar) {
                    return mStops[i];
                }
            }
            return floor(widthSoFar / mTabWidth + 1) * mTabWidth;
        }
    private:
        std::vector<int> mStops;
        int mTabWidth;
};

class LineBreaker {
    public:
107 108
        const static int kTab_Shift = 29;  // keep synchronized with TAB_MASK in StaticLayout.java

R
Raph Levien 已提交
109 110 111 112 113 114
        // Note: Locale persists across multiple invocations (it is not cleaned up by finish()),
        // explicitly to avoid the cost of creating ICU BreakIterator objects. It should always
        // be set on the first invocation, but callers are encouraged not to call again unless
        // locale has actually changed.
        // That logic could be here but it's better for performance that it's upstream because of
        // the cost of constructing and comparing the ICU Locale object.
115 116
        // Note: caller is responsible for managing lifetime of hyphenator
        void setLocale(const icu::Locale& locale, Hyphenator* hyphenator);
R
Raph Levien 已提交
117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139

        void resize(size_t size) {
            mTextBuf.resize(size);
            mCharWidths.resize(size);
        }

        size_t size() const {
            return mTextBuf.size();
        }

        uint16_t* buffer() {
            return mTextBuf.data();
        }

        float* charWidths() {
            return mCharWidths.data();
        }

        // set text to current contents of buffer
        void setText();

        void setLineWidths(float firstWidth, int firstWidthLineCount, float restWidth);

R
Raph Levien 已提交
140
        void setIndents(const std::vector<float>& indents);
141

R
Raph Levien 已提交
142 143 144 145 146 147 148 149
        void setTabStops(const int* stops, size_t nStops, int tabWidth) {
            mTabStops.set(stops, nStops, tabWidth);
        }

        BreakStrategy getStrategy() const { return mStrategy; }

        void setStrategy(BreakStrategy strategy) { mStrategy = strategy; }

150 151 152 153 154 155
        HyphenationFrequency getHyphenationFrequency() const { return mHyphenationFrequency; }

        void setHyphenationFrequency(HyphenationFrequency frequency) {
            mHyphenationFrequency = frequency;
        }

R
Raph Levien 已提交
156 157 158 159
        // TODO: this class is actually fairly close to being general and not tied to using
        // Minikin to do the shaping of the strings. The main thing that would need to be changed
        // is having some kind of callback (or virtual class, or maybe even template), which could
        // easily be instantiated with Minikin's Layout. Future work for when needed.
160 161
        float addStyleRun(MinikinPaint* paint, const FontCollection* typeface, FontStyle style,
                size_t start, size_t end, bool isRtl);
R
Raph Levien 已提交
162 163 164 165 166 167 168 169 170 171 172 173 174

        void addReplacement(size_t start, size_t end, float width);

        size_t computeBreaks();

        const int* getBreaks() const {
            return mBreaks.data();
        }

        const float* getWidths() const {
            return mWidths.data();
        }

175
        const int* getFlags() const {
R
Raph Levien 已提交
176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195
            return mFlags.data();
        }

        void finish();

    private:
        // ParaWidth is used to hold cumulative width from beginning of paragraph. Note that for
        // very large paragraphs, accuracy could degrade using only 32-bit float. Note however
        // that float is used extensively on the Java side for this. This is a typedef so that
        // we can easily change it based on performance/accuracy tradeoff.
        typedef double ParaWidth;

        // A single candidate break
        struct Candidate {
            size_t offset;  // offset to text buffer, in code units
            size_t prev;  // index to previous break
            ParaWidth preBreak;
            ParaWidth postBreak;
            float penalty;  // penalty of this break (for example, hyphen penalty)
            float score;  // best score found for this break
196 197
            size_t lineNumber;  // only updated for non-constant line widths
            uint8_t hyphenEdit;
R
Raph Levien 已提交
198 199 200 201
        };

        float currentLineWidth() const;

202 203
        void addWordBreak(size_t offset, ParaWidth preBreak, ParaWidth postBreak, float penalty,
                uint8_t hyph);
R
Raph Levien 已提交
204 205 206

        void addCandidate(Candidate cand);

207 208 209
        // push an actual break to the output. Takes care of setting flags for tab
        void pushBreak(int offset, float width, uint8_t hyph);

R
Raph Levien 已提交
210 211
        void computeBreaksGreedy();

212
        void computeBreaksOptimal(bool isRectangular);
213 214

        void finishBreaksOptimal();
R
Raph Levien 已提交
215

216
        WordBreaker mWordBreaker;
R
Raph Levien 已提交
217 218 219
        std::vector<uint16_t>mTextBuf;
        std::vector<float>mCharWidths;

220 221 222
        Hyphenator* mHyphenator;
        std::vector<uint8_t> mHyphBuf;

R
Raph Levien 已提交
223 224
        // layout parameters
        BreakStrategy mStrategy = kBreakStrategy_Greedy;
225
        HyphenationFrequency mHyphenationFrequency = kHyphenationFrequency_Normal;
R
Raph Levien 已提交
226 227 228 229 230 231
        LineWidths mLineWidths;
        TabStops mTabStops;

        // result of line breaking
        std::vector<int> mBreaks;
        std::vector<float> mWidths;
232
        std::vector<int> mFlags;
R
Raph Levien 已提交
233 234 235

        ParaWidth mWidth = 0;
        std::vector<Candidate> mCandidates;
236
        float mLinePenalty = 0.0f;
R
Raph Levien 已提交
237 238 239 240 241 242 243 244 245

        // the following are state for greedy breaker (updated while adding style runs)
        size_t mLastBreak;
        size_t mBestBreak;
        float mBestScore;
        ParaWidth mPreBreak;  // prebreak of last break
        int mFirstTabIndex;
};

S
Seigo Nonaka 已提交
246
}  // namespace minikin
R
Raph Levien 已提交
247 248

#endif  // MINIKIN_LINE_BREAKER_H