提交 16fd1439 编写于 作者: S Stefan Weil

Write image filename in ALTO output

Signed-off-by: NStefan Weil <sw@weilnetz.de>
上级 5f10fed5
...@@ -189,6 +189,9 @@ protected: ...@@ -189,6 +189,9 @@ protected:
bool BeginDocumentHandler() override; bool BeginDocumentHandler() override;
bool AddImageHandler(TessBaseAPI *api) override; bool AddImageHandler(TessBaseAPI *api) override;
bool EndDocumentHandler() override; bool EndDocumentHandler() override;
private:
bool begin_document;
}; };
/** /**
......
...@@ -55,6 +55,16 @@ static void AddBoxToAlto(const ResultIterator *it, PageIteratorLevel level, ...@@ -55,6 +55,16 @@ static void AddBoxToAlto(const ResultIterator *it, PageIteratorLevel level,
/// Append the ALTO XML for the beginning of the document /// Append the ALTO XML for the beginning of the document
/// ///
bool TessAltoRenderer::BeginDocumentHandler() { bool TessAltoRenderer::BeginDocumentHandler() {
// Delay the XML output because we need the name of the image file.
begin_document = true;
return true;
}
///
/// Append the ALTO XML for the layout of the image
///
bool TessAltoRenderer::AddImageHandler(TessBaseAPI *api) {
if (begin_document) {
AppendString( AppendString(
"<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
"<alto xmlns=\"http://www.loc.gov/standards/alto/ns-v3#\" " "<alto xmlns=\"http://www.loc.gov/standards/alto/ns-v3#\" "
...@@ -67,7 +77,7 @@ bool TessAltoRenderer::BeginDocumentHandler() { ...@@ -67,7 +77,7 @@ bool TessAltoRenderer::BeginDocumentHandler() {
"\t\t<sourceImageInformation>\n" "\t\t<sourceImageInformation>\n"
"\t\t\t<fileName>"); "\t\t\t<fileName>");
AppendString(title()); AppendString(api->GetInputName());
AppendString( AppendString(
"</fileName>\n" "</fileName>\n"
...@@ -84,14 +94,9 @@ bool TessAltoRenderer::BeginDocumentHandler() { ...@@ -84,14 +94,9 @@ bool TessAltoRenderer::BeginDocumentHandler() {
"\t\t</OCRProcessing>\n" "\t\t</OCRProcessing>\n"
"\t</Description>\n" "\t</Description>\n"
"\t<Layout>\n"); "\t<Layout>\n");
begin_document = false;
}
return true;
}
///
/// Append the ALTO XML for the layout of the image
///
bool TessAltoRenderer::AddImageHandler(TessBaseAPI *api) {
const std::unique_ptr<const char[]> text(api->GetAltoText(imagenum())); const std::unique_ptr<const char[]> text(api->GetAltoText(imagenum()));
if (text == nullptr) { if (text == nullptr) {
return false; return false;
...@@ -112,7 +117,8 @@ bool TessAltoRenderer::EndDocumentHandler() { ...@@ -112,7 +117,8 @@ bool TessAltoRenderer::EndDocumentHandler() {
} }
TessAltoRenderer::TessAltoRenderer(const char *outputbase) TessAltoRenderer::TessAltoRenderer(const char *outputbase)
: TessResultRenderer(outputbase, "xml") {} : TessResultRenderer(outputbase, "xml"),
begin_document(false) {}
/// ///
/// Make an XML-formatted string with ALTO markup from the internal /// Make an XML-formatted string with ALTO markup from the internal
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册