adaptions.cpp 3.7 KB
Newer Older
T
tmbdev 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
/**********************************************************************
 * File:        adaptions.cpp  (Formerly adaptions.c)
 * Description: Functions used to adapt to blobs already confidently
 *					identified
 * Author:		Chris Newton
 * Created:		Thu Oct  7 10:17:28 BST 1993
 *
 * (C) Copyright 1992, Hewlett-Packard Ltd.
 ** Licensed under the Apache License, Version 2.0 (the "License");
 ** you may not use this file except in compliance with the License.
 ** You may obtain a copy of the License at
 ** http://www.apache.org/licenses/LICENSE-2.0
 ** Unless required by applicable law or agreed to in writing, software
 ** distributed under the License is distributed on an "AS IS" BASIS,
 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 ** See the License for the specific language governing permissions and
 ** limitations under the License.
 *
 **********************************************************************/

#ifdef __UNIX__
#include          <assert.h>
#endif
#include          <ctype.h>
#include          <string.h>
#include          "tessbox.h"
#include          "tessvars.h"
#include          "memry.h"
#include          "reject.h"
#include          "control.h"
#include          "stopper.h"
T
theraysmith 已提交
32
#include          "tesseractclass.h"
T
tmbdev 已提交
33

34 35 36 37 38
// Include automatically generated configuration file if running autoconf.
#ifdef HAVE_CONFIG_H
#include "config_auto.h"
#endif

T
theraysmith 已提交
39 40 41
namespace tesseract {
BOOL8 Tesseract::word_adaptable(  //should we adapt?
                                WERD_RES *word,
42
                                uint16_t mode) {
T
theraysmith 已提交
43 44
  if (tessedit_adaption_debug) {
    tprintf("Running word_adaptable() for %s rating %.4f certainty %.4f\n",
S
Stefan Weil 已提交
45
          word->best_choice == nullptr ? "" :
T
theraysmith 已提交
46 47 48
          word->best_choice->unichar_string().string(),
          word->best_choice->rating(), word->best_choice->certainty());
  }
T
tmbdev 已提交
49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66

  BOOL8 status = FALSE;
  BITS16 flags(mode);

  enum MODES
  {
    ADAPTABLE_WERD,
    ACCEPTABLE_WERD,
    CHECK_DAWGS,
    CHECK_SPACES,
    CHECK_ONE_ELL_CONFLICT,
    CHECK_AMBIG_WERD
  };

  /*
  0: NO adaption
  */
  if (mode == 0) {
T
theraysmith 已提交
67
    if (tessedit_adaption_debug) tprintf("adaption disabled\n");
T
tmbdev 已提交
68 69 70
    return FALSE;
  }

T
theraysmith 已提交
71 72 73 74 75 76
  if (flags.bit (ADAPTABLE_WERD)) {
    status |= word->tess_would_adapt;  // result of Classify::AdaptableWord()
    if (tessedit_adaption_debug && !status) {
      tprintf("tess_would_adapt bit is false\n");
    }
  }
T
tmbdev 已提交
77

T
theraysmith 已提交
78
  if (flags.bit (ACCEPTABLE_WERD)) {
T
tmbdev 已提交
79
    status |= word->tess_accepted;
T
theraysmith 已提交
80 81 82 83
    if (tessedit_adaption_debug && !status) {
      tprintf("tess_accepted bit is false\n");
    }
  }
T
tmbdev 已提交
84

T
theraysmith 已提交
85
  if (!status) {                  // If not set then
T
tmbdev 已提交
86
    return FALSE;                // ignore other checks
T
theraysmith 已提交
87
  }
T
tmbdev 已提交
88 89 90 91 92

  if (flags.bit (CHECK_DAWGS) &&
    (word->best_choice->permuter () != SYSTEM_DAWG_PERM) &&
    (word->best_choice->permuter () != FREQ_DAWG_PERM) &&
    (word->best_choice->permuter () != USER_DAWG_PERM) &&
T
theraysmith 已提交
93 94
    (word->best_choice->permuter () != NUMBER_PERM)) {
    if (tessedit_adaption_debug) tprintf("word not in dawgs\n");
T
tmbdev 已提交
95
    return FALSE;
T
theraysmith 已提交
96
  }
T
tmbdev 已提交
97

T
theraysmith 已提交
98 99
  if (flags.bit (CHECK_ONE_ELL_CONFLICT) && one_ell_conflict (word, FALSE)) {
    if (tessedit_adaption_debug) tprintf("word has ell conflict\n");
T
tmbdev 已提交
100
    return FALSE;
T
theraysmith 已提交
101
  }
T
tmbdev 已提交
102 103

  if (flags.bit (CHECK_SPACES) &&
S
Stefan Weil 已提交
104
    (strchr(word->best_choice->unichar_string().string(), ' ') != nullptr)) {
T
theraysmith 已提交
105
    if (tessedit_adaption_debug) tprintf("word contains spaces\n");
T
tmbdev 已提交
106
    return FALSE;
T
theraysmith 已提交
107
  }
T
tmbdev 已提交
108 109

  if (flags.bit (CHECK_AMBIG_WERD) &&
110
      word->best_choice->dangerous_ambig_found()) {
T
theraysmith 已提交
111
    if (tessedit_adaption_debug) tprintf("word is ambiguous\n");
T
tmbdev 已提交
112
    return FALSE;
T
theraysmith 已提交
113
  }
T
tmbdev 已提交
114

T
theraysmith 已提交
115 116 117
  if (tessedit_adaption_debug) {
    tprintf("returning status %d\n", status);
  }
T
tmbdev 已提交
118 119 120
  return status;
}

T
theraysmith 已提交
121
}  // namespace tesseract