提交 75a9a8fa 编写于 作者: D david.eger@gmail.com

Address "RIL_PARA doesn't work" comment in issue 622.

http://code.google.com/p/tesseract-ocr/issues/detail?id=622

The core of the problem is that in PSM_SINGLE_BLOCK mode, Tesseract
doesn't run paragraph detection, so no paragraphs get generated.  Here,
we make sure that even if run in a mode where no paragraphs get
generated, we treat each block as its own paragraph.



git-svn-id: https://tesseract-ocr.googlecode.com/svn/trunk@696 d0cd1f9f-072b-0410-8dd7-cf729c803f20
上级 8cc34e85
......@@ -244,7 +244,6 @@ bool PageIterator::BoundingBoxInternal(PageIteratorLevel level,
break;
case RIL_PARA:
para = it_->row()->row->para();
if (para == NULL) return false;
// explicit fall-through.
case RIL_TEXTLINE:
box = it_->row()->row->bounding_box();
......@@ -262,7 +261,9 @@ bool PageIterator::BoundingBoxInternal(PageIteratorLevel level,
PageIterator other = *this;
other.Begin();
do {
if (other.it_->row() && other.it_->row()->row &&
if (other.it_->block() &&
other.it_->block()->block == it_->block()->block &&
other.it_->row() && other.it_->row()->row &&
other.it_->row()->row->para() == para) {
box = box.bounding_union(other.it_->row()->row->bounding_box());
}
......@@ -347,13 +348,25 @@ Pix* PageIterator::GetBinaryImage(PageIteratorLevel level) const {
Pix* pix = NULL;
switch (level) {
case RIL_BLOCK:
case RIL_PARA:
int bleft, btop, bright, bbottom;
BoundingBoxInternal(RIL_BLOCK, &bleft, &btop, &bright, &bbottom);
pix = it_->block()->block->render_mask();
// AND the mask and the image.
pixRasterop(pix, 0, 0, pixGetWidth(pix), pixGetHeight(pix),
PIX_SRC & PIX_DST, tesseract_->pix_binary(),
left, top);
bleft, btop);
if (level == RIL_PARA) {
// RIL_PARA needs further attention:
// clip the paragraph from the block mask.
Box* box = boxCreate(left - bleft, top - btop,
right - left, bottom - top);
Pix* pix2 = pixClipRectangle(pix, box, NULL);
boxDestroy(&box);
pixDestroy(&pix);
pix = pix2;
}
break;
case RIL_PARA:
case RIL_TEXTLINE:
case RIL_WORD:
case RIL_SYMBOL:
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册