diff --git a/api/baseapi.cpp b/api/baseapi.cpp index b0849665b1675bcea8edb3ac04209317ab8d35f7..d68249a9dbeef5f553bed1a5e2bd3795d678c996 100644 --- a/api/baseapi.cpp +++ b/api/baseapi.cpp @@ -39,6 +39,9 @@ #include #include #include +#include +#include +#include #endif // _WIN32 #include @@ -109,6 +112,65 @@ const int kMaxCredibleResolution = 2400; /** Default resolution. */ const int kDefaultResolution = 300; +/* Add all available languages recursively. +*/ +static void addAvailableLanguages(const STRING &datadir, const STRING &base, + GenericVector* langs) +{ + const STRING base2 = (base.string()[0] == '\0') ? base : base + "/"; + const size_t extlen = sizeof(kTrainedDataSuffix); +#ifdef _WIN32 + WIN32_FIND_DATA data; + HANDLE handle = FindFirstFile((datadir + base2 + "*").string(), &data); + if (handle != INVALID_HANDLE_VALUE) { + BOOL result = TRUE; + for (; result;) { + char *name = data.cFileName; + // Skip '.', '..', and hidden files + if (name[0] != '.') { + if ((data.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) == + FILE_ATTRIBUTE_DIRECTORY) { + addAvailableLanguages(datadir, base2 + name, langs); + } else { + size_t len = strlen(name); + if (len > extlen && name[len - extlen] == '.' && + strcmp(&name[len - extlen + 1], kTrainedDataSuffix) == 0) { + name[len - extlen] = '\0'; + langs->push_back(base2 + name); + } + } + } + result = FindNextFile(handle, &data); + } + FindClose(handle); + } +#else // _WIN32 + DIR* dir = opendir((datadir + base).string()); + if (dir != NULL) { + dirent *de; + while ((de = readdir(dir))) { + char *name = de->d_name; + // Skip '.', '..', and hidden files + if (name[0] != '.') { + struct stat st; + if (stat((datadir + base2 + name).string(), &st) == 0 && + (st.st_mode & S_IFDIR) == S_IFDIR) { + addAvailableLanguages(datadir, base2 + name, langs); + } else { + size_t len = strlen(name); + if (len > extlen && name[len - extlen] == '.' && + strcmp(&name[len - extlen + 1], kTrainedDataSuffix) == 0) { + name[len - extlen] = '\0'; + langs->push_back(base2 + name); + } + } + } + } + closedir(dir); + } +#endif +} + TessBaseAPI::TessBaseAPI() : tesseract_(nullptr), osd_tesseract_(nullptr), @@ -393,45 +455,7 @@ void TessBaseAPI::GetAvailableLanguagesAsVector( GenericVector* langs) const { langs->clear(); if (tesseract_ != NULL) { -#ifdef _WIN32 - STRING pattern = tesseract_->datadir + "/*." + kTrainedDataSuffix; - char fname[_MAX_FNAME]; - WIN32_FIND_DATA data; - BOOL result = TRUE; - HANDLE handle = FindFirstFile(pattern.string(), &data); - if (handle != INVALID_HANDLE_VALUE) { - for (; result; result = FindNextFile(handle, &data)) { - _splitpath(data.cFileName, NULL, NULL, fname, NULL); - langs->push_back(STRING(fname)); - } - FindClose(handle); - } -#else // _WIN32 - DIR *dir; - struct dirent *dirent; - char *dot; - - STRING extension = STRING(".") + kTrainedDataSuffix; - - dir = opendir(tesseract_->datadir.string()); - if (dir != NULL) { - while ((dirent = readdir(dir))) { - // Skip '.', '..', and hidden files - if (dirent->d_name[0] != '.') { - if (strstr(dirent->d_name, extension.string()) != NULL) { - dot = strrchr(dirent->d_name, '.'); - // This ensures that .traineddata is at the end of the file name - if (strncmp(dot, extension.string(), - strlen(extension.string())) == 0) { - *dot = '\0'; - langs->push_back(STRING(dirent->d_name)); - } - } - } - } - closedir(dir); - } -#endif + addAvailableLanguages(tesseract_->datadir, "", langs); } }