diff --git a/.github/workflows/unittest-macos.yml b/.github/workflows/unittest-macos.yml index 11e35f622ac2b74fa7f4f0e0a298e21b4f4c023d..3b8f16c91e44733a077de9f52d027a040a2e288f 100644 --- a/.github/workflows/unittest-macos.yml +++ b/.github/workflows/unittest-macos.yml @@ -25,7 +25,7 @@ jobs: run: | brew install autoconf automake libarchive brew install leptonica cairo pango - brew install cabextract abseil + brew install cabextract - name: Setup run: | diff --git a/.gitmodules b/.gitmodules index a20e2768fa4f8c1beed612999d0a82e480588a53..d64de322f2481c1a926cacdb62ebc38e96c32aa4 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,6 +1,3 @@ -[submodule "abseil"] - path = abseil - url = https://github.com/abseil/abseil-cpp.git [submodule "googletest"] path = googletest url = https://github.com/google/googletest.git diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 2cc8f1d071a95c7e0c2b26143eb5b9df56df8db3..394f8397b2521f6d1276f8d29d2fbf496025e2ae 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -69,7 +69,7 @@ your question has been asked (and has been answered) many times before... You should always make sure your changes build and run successfully. -For that, your clone needs to have all submodules (`abseil`, `googletest`, `test`) included. To do so, either specify `--recurse-submodules` during the initial clone, or run `git submodule update --init --recursive NAME` for each `NAME` later. If `configure` already created those directories (blocking the clone), remove them first (or `make distclean`), then clone and reconfigure. +For that, your clone needs to have all submodules (`googletest`, `test`) included. To do so, either specify `--recurse-submodules` during the initial clone, or run `git submodule update --init --recursive NAME` for each `NAME` later. If `configure` already created those directories (blocking the clone), remove them first (or `make distclean`), then clone and reconfigure. Have a look at [the README](./README.md) and [testing README](./test/testing/README.md) and the [documentation](https://tesseract-ocr.github.io/tessdoc/Compiling-%E2%80%93-GitInstallation.html#unit-test-builds) on installation. diff --git a/Makefile.am b/Makefile.am index 07a154665710900741107b9ad3a072a719a7e268..b487b2cac27d6752337f25a6f30ef95b4b7b3722 100644 --- a/Makefile.am +++ b/Makefile.am @@ -1156,7 +1156,6 @@ unittest_CPPFLAGS += $(pangocairo_CFLAGS) endif # ENABLE_TRAINING unittest_CPPFLAGS += -I$(top_srcdir)/src/viewer unittest_CPPFLAGS += -I$(top_srcdir)/src/wordrec -unittest_CPPFLAGS += -I$(top_srcdir)/abseil if TENSORFLOW unittest_CPPFLAGS += -DINCLUDE_TENSORFLOW unittest_CPPFLAGS += -I$(top_srcdir)/unittest @@ -1170,31 +1169,6 @@ libgtest_la_CPPFLAGS = -I$(top_srcdir)/googletest/googletest/include -I$(top_src libgtest_main_la_SOURCES = googletest/googletest/src/gtest_main.cc libgtest_main_la_CPPFLAGS = $(libgtest_la_CPPFLAGS) -# Build Abseil (needed for some unit tests). -check_LTLIBRARIES += libabseil.la -libabseil_la_SOURCES = -libabseil_la_SOURCES += abseil/absl/base/internal/raw_logging.cc -libabseil_la_SOURCES += abseil/absl/base/internal/spinlock.cc -libabseil_la_SOURCES += abseil/absl/base/internal/spinlock_wait.cc -libabseil_la_SOURCES += abseil/absl/base/internal/sysinfo.cc -libabseil_la_SOURCES += abseil/absl/base/internal/throw_delegate.cc -libabseil_la_SOURCES += abseil/absl/numeric/int128.cc -libabseil_la_SOURCES += abseil/absl/strings/ascii.cc -libabseil_la_SOURCES += abseil/absl/strings/charconv.cc -libabseil_la_SOURCES += abseil/absl/strings/internal/charconv_bigint.cc -libabseil_la_SOURCES += abseil/absl/strings/internal/charconv_parse.cc -libabseil_la_SOURCES += abseil/absl/strings/internal/memutil.cc -libabseil_la_SOURCES += abseil/absl/strings/internal/str_format/arg.cc -libabseil_la_SOURCES += abseil/absl/strings/internal/str_format/bind.cc -libabseil_la_SOURCES += abseil/absl/strings/internal/str_format/extension.cc -libabseil_la_SOURCES += abseil/absl/strings/internal/str_format/float_conversion.cc -libabseil_la_SOURCES += abseil/absl/strings/internal/str_format/output.cc -libabseil_la_SOURCES += abseil/absl/strings/internal/str_format/parser.cc -libabseil_la_SOURCES += abseil/absl/strings/match.cc -libabseil_la_SOURCES += abseil/absl/strings/numbers.cc -libabseil_la_SOURCES += abseil/absl/strings/string_view.cc -libabseil_la_CPPFLAGS = -I$(top_srcdir)/abseil - GMOCK_INCLUDES = -I$(top_srcdir)/googletest/googlemock/include \ -I$(top_srcdir)/googletest/googlemock \ -I$(top_srcdir)/googletest/googletest/include \ @@ -1208,7 +1182,6 @@ libgmock_main_la_CPPFLAGS = $(GMOCK_INCLUDES) \ -pthread # Build unittests -ABSEIL_LIBS = libabseil.la GTEST_LIBS = libgtest.la libgtest_main.la -lpthread GMOCK_LIBS = libgmock.la libgmock_main.la TESS_LIBS = $(GTEST_LIBS) @@ -1330,12 +1303,11 @@ endif # !DISABLED_LEGACY_ENGINE baseapi_test_SOURCES = unittest/baseapi_test.cc baseapi_test_CPPFLAGS = $(unittest_CPPFLAGS) -baseapi_test_LDADD = $(ABSEIL_LIBS) $(TRAINING_LIBS) $(LEPTONICA_LIBS) +baseapi_test_LDADD = $(TRAINING_LIBS) $(LEPTONICA_LIBS) baseapi_thread_test_SOURCES = unittest/baseapi_thread_test.cc baseapi_thread_test_CPPFLAGS = $(unittest_CPPFLAGS) -baseapi_thread_test_LDADD = $(ABSEIL_LIBS) -baseapi_thread_test_LDADD += $(TESS_LIBS) $(LEPTONICA_LIBS) +baseapi_thread_test_LDADD = $(TESS_LIBS) $(LEPTONICA_LIBS) if !DISABLED_LEGACY_ENGINE bitvector_test_SOURCES = unittest/bitvector_test.cc @@ -1432,19 +1404,19 @@ loadlang_test_LDADD = $(TESS_LIBS) $(LEPTONICA_LIBS) lstm_recode_test_SOURCES = unittest/lstm_recode_test.cc lstm_recode_test_CPPFLAGS = $(unittest_CPPFLAGS) -lstm_recode_test_LDADD = $(ABSEIL_LIBS) $(TRAINING_LIBS) +lstm_recode_test_LDADD = $(TRAINING_LIBS) lstm_squashed_test_SOURCES = unittest/lstm_squashed_test.cc lstm_squashed_test_CPPFLAGS = $(unittest_CPPFLAGS) -lstm_squashed_test_LDADD = $(ABSEIL_LIBS) $(TRAINING_LIBS) +lstm_squashed_test_LDADD = $(TRAINING_LIBS) lstm_test_SOURCES = unittest/lstm_test.cc lstm_test_CPPFLAGS = $(unittest_CPPFLAGS) -lstm_test_LDADD = $(ABSEIL_LIBS) $(TRAINING_LIBS) +lstm_test_LDADD = $(TRAINING_LIBS) lstmtrainer_test_SOURCES = unittest/lstmtrainer_test.cc lstmtrainer_test_CPPFLAGS = $(unittest_CPPFLAGS) -lstmtrainer_test_LDADD = $(ABSEIL_LIBS) $(TRAINING_LIBS) $(LEPTONICA_LIBS) +lstmtrainer_test_LDADD = $(TRAINING_LIBS) $(LEPTONICA_LIBS) if !DISABLED_LEGACY_ENGINE mastertrainer_test_SOURCES = unittest/mastertrainer_test.cc @@ -1489,7 +1461,7 @@ pango_font_info_test_SOURCES += unittest/util/utf8/unicodetext.cc pango_font_info_test_SOURCES += unittest/util/utf8/unilib.cc endif # TENSORFLOW pango_font_info_test_CPPFLAGS = $(unittest_CPPFLAGS) -pango_font_info_test_LDADD = $(ABSEIL_LIBS) $(TRAINING_LIBS) $(LEPTONICA_LIBS) +pango_font_info_test_LDADD = $(TRAINING_LIBS) $(LEPTONICA_LIBS) pango_font_info_test_LDADD += $(ICU_I18N_LIBS) pango_font_info_test_LDADD += $(pangocairo_LIBS) pango_font_info_test_LDADD += $(pangoft2_LIBS) @@ -1511,7 +1483,7 @@ progress_test_LDADD = $(GTEST_LIBS) $(GMOCK_LIBS) $(TESS_LIBS) $(LEPTONICA_LIBS) qrsequence_test_SOURCES = unittest/qrsequence_test.cc qrsequence_test_CPPFLAGS = $(unittest_CPPFLAGS) -qrsequence_test_LDADD = $(ABSEIL_LIBS) $(TESS_LIBS) +qrsequence_test_LDADD = $(TESS_LIBS) recodebeam_test_SOURCES = unittest/recodebeam_test.cc recodebeam_test_CPPFLAGS = $(unittest_CPPFLAGS) diff --git a/abseil b/abseil deleted file mode 160000 index e1d388e7e74803050423d035e4374131b9b57919..0000000000000000000000000000000000000000 --- a/abseil +++ /dev/null @@ -1 +0,0 @@ -Subproject commit e1d388e7e74803050423d035e4374131b9b57919 diff --git a/sw.cpp b/sw.cpp index 117072d307a2386fb3fecd7fc349c1fd3de05575..3d115f0658894f24bd57e18d2a3cd81772ca9cd6 100644 --- a/sw.cpp +++ b/sw.cpp @@ -222,7 +222,6 @@ void build(Solution &s) t += pango_training; t += "org.sw.demo.google.googletest.gmock.main"_dep; t += "org.sw.demo.google.googletest.gtest.main"_dep; - t += "org.sw.demo.google.abseil"_dep; if (t.getCompilerType() == CompilerType::MSVC) t.CompileOptions.push_back("-utf-8"); diff --git a/unittest/README.md b/unittest/README.md index f047f028cc5e5d320c8a893054d14f024a7dbdcd..39153923bb5c144d85d871d8781177c28d679353 100644 --- a/unittest/README.md +++ b/unittest/README.md @@ -60,7 +60,7 @@ │   └── script │   └── Latin.traineddata └── tesseract - ├── abseil + ├── googletest ... ├── test ├── unittest diff --git a/unittest/baseapi_test.cc b/unittest/baseapi_test.cc index d8165f04cd1846eaa9970a672daeb4e171822285..7c70b13dd5401463c963d788f5e71829de91d208 100644 --- a/unittest/baseapi_test.cc +++ b/unittest/baseapi_test.cc @@ -19,7 +19,6 @@ #include #include -#include "absl/strings/ascii.h" #include "gmock/gmock-matchers.h" #include @@ -48,7 +47,7 @@ std::string GetCleanedTextResult(tesseract::TessBaseAPI *tess, Image pix) { char *result = tess->GetUTF8Text(); std::string ocr_result = result; delete[] result; - absl::StripAsciiWhitespace(&ocr_result); + trim(ocr_result); return ocr_result; } @@ -80,7 +79,7 @@ TEST_F(TesseractTest, BasicTesseractTest) { ocr_text = GetCleanedTextResult(&api, src_pix); CHECK_OK( file::GetContents(TestDataNameToPath("phototest.gold.txt"), &truth_text, file::Defaults())); - absl::StripAsciiWhitespace(&truth_text); + trim(truth_text); EXPECT_STREQ(truth_text.c_str(), ocr_text.c_str()); src_pix.destroy(); } else { @@ -201,7 +200,7 @@ TEST_F(TesseractTest, AdaptToWordStrTest) { Image src_pix = pixRead(TestDataNameToPath(kTestPages[i]).c_str()); CHECK(src_pix); ocr_text = GetCleanedTextResult(&api, src_pix); - absl::StripAsciiWhitespace(&truth_text); + trim(truth_text); EXPECT_STREQ(kTestText[i], ocr_text.c_str()); src_pix.destroy(); } @@ -223,7 +222,7 @@ TEST_F(TesseractTest, BasicLSTMTest) { ocr_text = GetCleanedTextResult(&api, src_pix); CHECK_OK( file::GetContents(TestDataNameToPath("phototest.gold.txt"), &truth_text, file::Defaults())); - absl::StripAsciiWhitespace(&truth_text); + trim(truth_text); EXPECT_STREQ(truth_text.c_str(), ocr_text.c_str()); src_pix.destroy(); } diff --git a/unittest/baseapi_thread_test.cc b/unittest/baseapi_thread_test.cc index 94557463b0b90f1d2ae58af8628108b459da04db..d3121d48dadf6b7597feec62ebd247557ce08426 100644 --- a/unittest/baseapi_thread_test.cc +++ b/unittest/baseapi_thread_test.cc @@ -28,7 +28,6 @@ #endif #include #include -#include "absl/strings/ascii.h" // for absl::StripAsciiWhitespace #include "commandlineflags.h" #include "include_gunit.h" #include "log.h" @@ -148,12 +147,12 @@ static void InitTessInstance(TessBaseAPI *tess, const std::string &lang) { EXPECT_EQ(0, tess->Init(TESSDATA_DIR, lang.c_str())); } -static void GetCleanedText(TessBaseAPI *tess, Image pix, std::string *ocr_text) { +static void GetCleanedText(TessBaseAPI *tess, Image pix, std::string &ocr_text) { tess->SetImage(pix); char *result = tess->GetUTF8Text(); - *ocr_text = result; + ocr_text = result; delete[] result; - absl::StripAsciiWhitespace(ocr_text); + trim(ocr_text); } static void VerifyTextResult(TessBaseAPI *tess, Image pix, const std::string &lang, @@ -166,7 +165,7 @@ static void VerifyTextResult(TessBaseAPI *tess, Image pix, const std::string &la InitTessInstance(tess_local, lang); } std::string ocr_text; - GetCleanedText(tess_local, pix, &ocr_text); + GetCleanedText(tess_local, pix, ocr_text); EXPECT_STREQ(expected_text.c_str(), ocr_text.c_str()); if (tess_local != tess) { delete tess_local; @@ -180,7 +179,7 @@ TEST_F(BaseapiThreadTest, TestBasicSanity) { TessBaseAPI tess; InitTessInstance(&tess, langs_[i]); std::string ocr_text; - GetCleanedText(&tess, pix_[i], &ocr_text); + GetCleanedText(&tess, pix_[i], ocr_text); CHECK(strcmp(gt_text_[i].c_str(), ocr_text.c_str()) == 0) << "Failed with lang = " << langs_[i]; } } diff --git a/unittest/include_gunit.h b/unittest/include_gunit.h index 47914a0405a808743375db2411659c68c8155a2f..eddeb194b91bc79f5b40ea4fa5cd2f80b18ca2f7 100644 --- a/unittest/include_gunit.h +++ b/unittest/include_gunit.h @@ -20,6 +20,19 @@ const char *FLAGS_test_tmpdir = "./tmp"; +namespace tesseract { + +void trim(std::string &s) { + s.erase(s.begin(), std::find_if(s.begin(), s.end(), [](unsigned char ch) { + return !std::isspace(ch); + })); + s.erase(std::find_if(s.rbegin(), s.rend(), [](unsigned char ch) { + return !std::isspace(ch); + }).base(), s.end()); +} + +} // namespace tesseract + class file : public tesseract::File { public: static void MakeTmpdir() {