From 4e8018d013e3cefa55f138c7446264ca8931861a Mon Sep 17 00:00:00 2001 From: Ray Smith Date: Wed, 19 Jul 2017 17:04:06 -0700 Subject: [PATCH] Important fix to RTL languages saves last space on each line, which was previously lost --- ccmain/resultiterator.cpp | 13 +++++++++++-- ccmain/resultiterator.h | 4 ++++ 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/ccmain/resultiterator.cpp b/ccmain/resultiterator.cpp index 77514a6a..4b654944 100644 --- a/ccmain/resultiterator.cpp +++ b/ccmain/resultiterator.cpp @@ -549,6 +549,12 @@ bool ResultIterator::IsAtFinalElement(PageIteratorLevel level, return true; } +// Returns the number of blanks before the current word. +int ResultIterator::BlanksBeforeWord() const { + if (CurrentParagraphIsLtr()) return LTRResultIterator::BlanksBeforeWord(); + return IsAtBeginningOf(RIL_TEXTLINE) ? 0 : 1; +} + /** * Returns the null terminated UTF-8 encoded text string for the current * object at the given level. Use delete [] to free after use. @@ -585,7 +591,7 @@ char* ResultIterator::GetUTF8Text(PageIteratorLevel level) const { if (at_beginning_of_minor_run_) { text += reading_direction_is_ltr ? kLRM : kRLM; } - text = it_->word()->BestUTF8(blob_index_, !reading_direction_is_ltr); + text = it_->word()->BestUTF8(blob_index_, false); if (IsAtFinalSymbolOfWord()) AppendSuffixMarks(&text); } break; @@ -608,7 +614,7 @@ void ResultIterator::AppendUTF8WordText(STRING *text) const { GenericVector blob_order; CalculateBlobOrder(&blob_order); for (int i = 0; i < blob_order.size(); i++) { - *text += it_->word()->BestUTF8(blob_order[i], !reading_direction_is_ltr); + *text += it_->word()->BestUTF8(blob_order[i], false); } AppendSuffixMarks(text); } @@ -643,6 +649,9 @@ void ResultIterator::IterateAndAppendUTF8TextlineText(STRING *text) { } AppendUTF8WordText(text); words_appended++; + if (BidiDebug(2)) { + tprintf("Num spaces=%d, text=%s\n", numSpaces, text->string()); + } } while (Next(RIL_WORD) && !IsAtBeginningOf(RIL_TEXTLINE)); if (BidiDebug(1)) { tprintf("%d words printed\n", words_appended); diff --git a/ccmain/resultiterator.h b/ccmain/resultiterator.h index e5516836..9651cab9 100644 --- a/ccmain/resultiterator.h +++ b/ccmain/resultiterator.h @@ -82,6 +82,10 @@ class TESS_API ResultIterator : public LTRResultIterator { virtual bool IsAtFinalElement(PageIteratorLevel level, PageIteratorLevel element) const; + // ============= Functions that refer to words only ============. + // Returns the number of blanks before the current word. + int BlanksBeforeWord() const; + // ============= Accessing data ==============. /** -- GitLab