FontLanguage.cpp 4.9 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63
/*
 * Copyright (C) 2015 The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#define LOG_TAG "Minikin"

#include "FontLanguage.h"

#include <hb.h>
#include <unicode/uloc.h>

namespace android {

#define SCRIPT_TAG(c1, c2, c3, c4) \
        (((uint32_t)(c1)) << 24 | ((uint32_t)(c2)) << 16 | ((uint32_t)(c3)) <<  8 | \
         ((uint32_t)(c4)))

// Parse BCP 47 language identifier into internal structure
FontLanguage::FontLanguage(const char* buf, size_t length) : FontLanguage() {
    size_t i;
    for (i = 0; i < length; i++) {
        char c = buf[i];
        if (c == '-' || c == '_') break;
    }
    if (i == 2 || i == 3) {  // only accept two or three letter language code.
        mLanguage = buf[0] | (buf[1] << 8) | ((i == 3) ? (buf[2] << 16) : 0);
    } else {
        // We don't understand anything other than two-letter or three-letter
        // language codes, so we skip parsing the rest of the string.
        mLanguage = 0ul;
        return;
    }

    size_t next;
    for (i++; i < length; i = next + 1) {
        for (next = i; next < length; next++) {
            char c = buf[next];
            if (c == '-' || c == '_') break;
        }
        if (next - i == 4 && 'A' <= buf[i] && buf[i] <= 'Z') {
            mScript = SCRIPT_TAG(buf[i], buf[i + 1], buf[i + 2], buf[i + 3]);
        }
    }

    mSubScriptBits = scriptToSubScriptBits(mScript);
}

//static
uint8_t FontLanguage::scriptToSubScriptBits(uint32_t script) {
    uint8_t subScriptBits = 0u;
    switch (script) {
S
Seigo Nonaka 已提交
64 65 66
        case SCRIPT_TAG('B', 'o', 'p', 'o'):
            subScriptBits = kBopomofoFlag;
            break;
67 68 69
        case SCRIPT_TAG('H', 'a', 'n', 'g'):
            subScriptBits = kHangulFlag;
            break;
S
Seigo Nonaka 已提交
70 71 72 73
        case SCRIPT_TAG('H', 'a', 'n', 'b'):
            // Bopomofo is almost exclusively used in Taiwan.
            subScriptBits = kHanFlag | kBopomofoFlag;
            break;
74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97
        case SCRIPT_TAG('H', 'a', 'n', 'i'):
            subScriptBits = kHanFlag;
            break;
        case SCRIPT_TAG('H', 'a', 'n', 's'):
            subScriptBits = kHanFlag | kSimplifiedChineseFlag;
            break;
        case SCRIPT_TAG('H', 'a', 'n', 't'):
            subScriptBits = kHanFlag | kTraditionalChineseFlag;
            break;
        case SCRIPT_TAG('H', 'i', 'r', 'a'):
            subScriptBits = kHiraganaFlag;
            break;
        case SCRIPT_TAG('H', 'r', 'k', 't'):
            subScriptBits = kKatakanaFlag | kHiraganaFlag;
            break;
        case SCRIPT_TAG('J', 'p', 'a', 'n'):
            subScriptBits = kHanFlag | kKatakanaFlag | kHiraganaFlag;
            break;
        case SCRIPT_TAG('K', 'a', 'n', 'a'):
            subScriptBits = kKatakanaFlag;
            break;
        case SCRIPT_TAG('K', 'o', 'r', 'e'):
            subScriptBits = kHanFlag | kHangulFlag;
            break;
S
Seigo Nonaka 已提交
98
        case SCRIPT_TAG('Z', 's', 'y', 'e'):
99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124
            subScriptBits = kEmojiFlag;
            break;
    }
    return subScriptBits;
}

std::string FontLanguage::getString() const {
    if (mLanguage == 0ul) {
        return "und";
    }
    char buf[16];
    size_t i = 0;
    buf[i++] = mLanguage & 0xFF ;
    buf[i++] = (mLanguage >> 8) & 0xFF;
    char third_letter = (mLanguage >> 16) & 0xFF;
    if (third_letter != 0) buf[i++] = third_letter;
    if (mScript != 0) {
      buf[i++] = '-';
      buf[i++] = (mScript >> 24) & 0xFFu;
      buf[i++] = (mScript >> 16) & 0xFFu;
      buf[i++] = (mScript >> 8) & 0xFFu;
      buf[i++] = mScript & 0xFFu;
    }
    return std::string(buf, i);
}

125
bool FontLanguage::isEqualScript(const FontLanguage& other) const {
126 127 128
    return other.mScript == mScript;
}

129 130 131 132
bool FontLanguage::supportsScript(uint8_t requestedBits) const {
    return requestedBits != 0 && (mSubScriptBits & requestedBits) == requestedBits;
}

133 134 135 136
bool FontLanguage::supportsHbScript(hb_script_t script) const {
    static_assert(SCRIPT_TAG('J', 'p', 'a', 'n') == HB_TAG('J', 'p', 'a', 'n'),
                  "The Minikin script and HarfBuzz hb_script_t have different encodings.");
    if (script == mScript) return true;
137
    return supportsScript(scriptToSubScriptBits(script));
138 139
}

140 141 142 143 144 145 146 147
int FontLanguage::getScoreFor(const FontLanguage other) const {
    if (isUnsupported() || other.isUnsupported()) {
        return 0;
    } else if (isEqualScript(other) || supportsScript(other.mSubScriptBits)) {
        return mLanguage == other.mLanguage ? 2 : 1;
    } else {
        return 0;
    }
148 149 150 151
}

#undef SCRIPT_TAG
}  // namespace android