提交 1aacb142 编写于 作者: E Eliot Jones

add test for multiple page pdf from libre office

上级 21be34a9
namespace UglyToad.Pdf.Tests.Integration
{
using System;
using System.IO;
using Content;
using Xunit;
public class TwoPageTextOnlyLibreOfficeTests
{
private static string GetFilename()
{
var documentFolder = Path.GetFullPath(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "..", "..", "..", "Integration", "Documents"));
return Path.Combine(documentFolder, "Two Page Text Only - from libre office.pdf");
}
[Fact]
public void HasCorrectNumberOfPages()
{
var file = GetFilename();
using (var document = PdfDocument.Open(File.ReadAllBytes(file)))
{
Assert.Equal(2, document.NumberOfPages);
}
}
[Fact]
public void HasCorrectPageSize()
{
using (var document = PdfDocument.Open(GetFilename()))
{
var page = document.GetPage(1);
Assert.Equal(PageSize.A4, page.Size);
page = document.GetPage(2);
Assert.Equal(PageSize.A4, page.Size);
}
}
[Fact]
public void PagesStartWithCorrectText()
{
using (var document = PdfDocument.Open(GetFilename()))
{
var page = document.GetPage(1);
Assert.StartsWith("Apache License", page.Text);
page = document.GetPage(2);
Assert.StartsWith("2. Grant of Copyright", page.Text);
}
}
}
}
......@@ -16,6 +16,7 @@
<None Remove="Integration\Documents\Single Page Non Latin - from acrobat distiller.pdf" />
<None Remove="Integration\Documents\Single Page Simple - from google drive.pdf" />
<None Remove="Integration\Documents\Single Page Simple - from open office.pdf" />
<None Remove="Integration\Documents\Two Page Text Only - from libre office.pdf" />
</ItemGroup>
<ItemGroup>
......@@ -43,6 +44,9 @@
<Content Include="Integration\Documents\Single Page Simple - from open office.pdf">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</Content>
<Content Include="Integration\Documents\Two Page Text Only - from libre office.pdf">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</Content>
</ItemGroup>
<ItemGroup>
......
......@@ -2,6 +2,7 @@
{
using System;
using System.Collections.Generic;
using System.Linq;
public class Page
{
......@@ -18,6 +19,8 @@
public IReadOnlyList<Letter> Letters => Content?.Letters ?? new Letter[0];
public string Text { get; }
/// <summary>
/// Gets the width of the page in points.
/// </summary>
......@@ -44,11 +47,22 @@
MediaBox = mediaBox;
CropBox = cropBox;
Content = content;
Text = GetText(content);
Width = mediaBox.Bounds.Width;
Height = mediaBox.Bounds.Height;
Size = mediaBox.Bounds.GetPageSize();
}
private static string GetText(PageContent content)
{
if (content?.Letters == null)
{
return string.Empty;
}
return string.Join(string.Empty, content.Letters.Select(x => x.Value));
}
}
}
\ No newline at end of file
......@@ -91,6 +91,7 @@
bool found = pageNumber == soughtPageNumber;
locatedPages[pageNumber] = currentPageDictionary;
pageNumbersObserved.Add(pageNumber);
return found;
}
......@@ -117,6 +118,7 @@
if (thisPageMatches)
{
childFound = true;
break;
}
}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册