adaptions.cpp 3.5 KB
Newer Older
T
tmbdev 已提交
1 2 3
/**********************************************************************
 * File:        adaptions.cpp  (Formerly adaptions.c)
 * Description: Functions used to adapt to blobs already confidently
S
Stefan Weil 已提交
4 5 6
 *              identified
 * Author:      Chris Newton
 * Created:     Thu Oct  7 10:17:28 BST 1993
T
tmbdev 已提交
7 8 9 10 11 12 13 14 15 16 17 18 19 20
 *
 * (C) Copyright 1992, Hewlett-Packard Ltd.
 ** Licensed under the Apache License, Version 2.0 (the "License");
 ** you may not use this file except in compliance with the License.
 ** You may obtain a copy of the License at
 ** http://www.apache.org/licenses/LICENSE-2.0
 ** Unless required by applicable law or agreed to in writing, software
 ** distributed under the License is distributed on an "AS IS" BASIS,
 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 ** See the License for the specific language governing permissions and
 ** limitations under the License.
 *
 **********************************************************************/

S
Stefan Weil 已提交
21
#include <ctype.h>
22
#include <cstring>
S
Stefan Weil 已提交
23 24 25 26 27 28 29
#include "tessbox.h"
#include "tessvars.h"
#include "memry.h"
#include "reject.h"
#include "control.h"
#include "stopper.h"
#include "tesseractclass.h"
T
tmbdev 已提交
30

31 32 33 34 35
// Include automatically generated configuration file if running autoconf.
#ifdef HAVE_CONFIG_H
#include "config_auto.h"
#endif

T
theraysmith 已提交
36
namespace tesseract {
A
Alexander Zaitsev 已提交
37 38 39
bool Tesseract::word_adaptable(  //should we adapt?
        WERD_RES* word,
        uint16_t mode) {
T
theraysmith 已提交
40 41
  if (tessedit_adaption_debug) {
    tprintf("Running word_adaptable() for %s rating %.4f certainty %.4f\n",
S
Stefan Weil 已提交
42
          word->best_choice == nullptr ? "" :
T
theraysmith 已提交
43 44 45
          word->best_choice->unichar_string().string(),
          word->best_choice->rating(), word->best_choice->certainty());
  }
T
tmbdev 已提交
46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63

  BOOL8 status = FALSE;
  BITS16 flags(mode);

  enum MODES
  {
    ADAPTABLE_WERD,
    ACCEPTABLE_WERD,
    CHECK_DAWGS,
    CHECK_SPACES,
    CHECK_ONE_ELL_CONFLICT,
    CHECK_AMBIG_WERD
  };

  /*
  0: NO adaption
  */
  if (mode == 0) {
T
theraysmith 已提交
64
    if (tessedit_adaption_debug) tprintf("adaption disabled\n");
A
Alexander Zaitsev 已提交
65
    return false;
T
tmbdev 已提交
66 67
  }

T
theraysmith 已提交
68 69 70 71 72 73
  if (flags.bit (ADAPTABLE_WERD)) {
    status |= word->tess_would_adapt;  // result of Classify::AdaptableWord()
    if (tessedit_adaption_debug && !status) {
      tprintf("tess_would_adapt bit is false\n");
    }
  }
T
tmbdev 已提交
74

T
theraysmith 已提交
75
  if (flags.bit (ACCEPTABLE_WERD)) {
T
tmbdev 已提交
76
    status |= word->tess_accepted;
T
theraysmith 已提交
77 78 79 80
    if (tessedit_adaption_debug && !status) {
      tprintf("tess_accepted bit is false\n");
    }
  }
T
tmbdev 已提交
81

T
theraysmith 已提交
82
  if (!status) {                  // If not set then
A
Alexander Zaitsev 已提交
83
    return false;                // ignore other checks
T
theraysmith 已提交
84
  }
T
tmbdev 已提交
85 86 87 88 89

  if (flags.bit (CHECK_DAWGS) &&
    (word->best_choice->permuter () != SYSTEM_DAWG_PERM) &&
    (word->best_choice->permuter () != FREQ_DAWG_PERM) &&
    (word->best_choice->permuter () != USER_DAWG_PERM) &&
T
theraysmith 已提交
90 91
    (word->best_choice->permuter () != NUMBER_PERM)) {
    if (tessedit_adaption_debug) tprintf("word not in dawgs\n");
A
Alexander Zaitsev 已提交
92
    return false;
T
theraysmith 已提交
93
  }
T
tmbdev 已提交
94

95
  if (flags.bit (CHECK_ONE_ELL_CONFLICT) && one_ell_conflict (word, false)) {
T
theraysmith 已提交
96
    if (tessedit_adaption_debug) tprintf("word has ell conflict\n");
A
Alexander Zaitsev 已提交
97
    return false;
T
theraysmith 已提交
98
  }
T
tmbdev 已提交
99 100

  if (flags.bit (CHECK_SPACES) &&
S
Stefan Weil 已提交
101
    (strchr(word->best_choice->unichar_string().string(), ' ') != nullptr)) {
T
theraysmith 已提交
102
    if (tessedit_adaption_debug) tprintf("word contains spaces\n");
A
Alexander Zaitsev 已提交
103
    return false;
T
theraysmith 已提交
104
  }
T
tmbdev 已提交
105 106

  if (flags.bit (CHECK_AMBIG_WERD) &&
107
      word->best_choice->dangerous_ambig_found()) {
T
theraysmith 已提交
108
    if (tessedit_adaption_debug) tprintf("word is ambiguous\n");
A
Alexander Zaitsev 已提交
109
    return false;
T
theraysmith 已提交
110
  }
T
tmbdev 已提交
111

T
theraysmith 已提交
112 113 114
  if (tessedit_adaption_debug) {
    tprintf("returning status %d\n", status);
  }
T
tmbdev 已提交
115 116 117
  return status;
}

T
theraysmith 已提交
118
}  // namespace tesseract