提交 29ff9339 编写于 作者: R roo00

仅支持windows

上级 d3ad7e02
......@@ -31,7 +31,7 @@ public class Person extends ConfigObject {
public static final String ICON_UNKOWN = "iVBORw0KGgoAAAANSUhEUgAAAJAAAACQCAYAAADnRuK4AAAAAXNSR0IArs4c6QAAFlZJREFUeAHtXXuMFdUZH5aF5bHLYxFcXgXchV0kuzxNqqVaMca2VBNNFFNTommjVm0Tq8YYTayvNDGFmiY+mqKNYLRR/6hWErEWGjRaElgEsuCyGNgHCKwswrIisEB/v+md69y78zgzc+bMOcueZO6ZO3Me3/fNb77zne88ZpB1gYWWlpaynp6emnPnztWdP3++FuxX4xiD84pBgwZVMMZ/93kvrnfjerc7RppuHJ241oyymktKSprnz5/fhv/ncf2CCYP6M6fbt28fe+bMmR/h4S/Gg52NuBbxdMQlKfF9EuW3oPxmlL8DxwYcmxYtWnQmpfoyL7ZfAaipqan89OnTV549e3YJJHs1Hua8FMEi9PBAQw8Sfox4A4718+bNa0R8ViizAYmMBxBAU3ny5MllkPXPcXwfR6nmcj8GAL0PYK9ZuHDhOpz3ak5vIHlGAgigGQrQ/BTCXw7uluJhDA3kUtOboP8wSHsD9K9GM9eoKZmBZBkFoM2bN9dD6HdD4NQ44wI5M+9mE3hbPWzYsFVz5szpMoV8IwC0devWy2DXPAoB3wDwGEFzAgCwt/fi0KFDV9bX1x9KUI6SrFo/jMbGxivRRX4MkrhWiTT0qoQ9upcBpGcBpHa9SPuOGi0BtGXLliXQNE+AzMXfkXrBntEFsBrHk7CT2nSTglYA+uyzzyajqVoJ8Nyim6A0oOcb0PAUjhU6+ZW0ABAAUwo757dorn4PAdETPBB8JIBmbRfkdS9ARCdl5iFzAKFnxWbqBRz1mUvDLAJeHz58+APosR3MkuzMALR3795hR44c+RPeqLvwRmVGR5bCl1D3cYzB3bdgwYI1EsqKVUQmD27btm21vb29bwI4DbGoHshUIAG8hK9MmDDhvqlTp54suKHgj3IAoYd1G/h6CeApV8DfhVQFHZE3Y3hkl0qmlQGovb19+KFDh/4M5n6lksELqS4AqCdnYL+qim8lAIKh/D0w9B6OAUNZzZNdBU30awAq9YHa1AEE384c+HbW4c2YrEZ2A7XkJLAW8S3o7tN/lFpIa2KVTTB8O1cAPB8NgCe15xdU8FJooA853SUoUdJ7qWkgGMs/A3DeBIHDkxI5kD++BACinaWlpdfNnTu3I34p/jlTARDAczvA81dUq/vkLn/J9K877QDSdWn00KQDKAeev/Uv+ZvPDQDEyWs/AIj2yORGqg2Ua7aoeQaCZhJAizABxwewiapkkiZNA+UM5g9B3IDNI/MJSS4LmmgbgHQVemfHZBQtRQOxq46RdPp5BsAj46mkWAbAMxcgeofr42RUk9jIpZMw5+cZK4OgrMoYPHiwhfnIFmYAWhigtA8I2wJv9oHlQtapU6csXjM9UAMdP36ck/lvBpgSLTFK1ITlhic2QaDGeZgJloqKCmvUqFFWeXm5ha5uKC4IHoLoxIkTVnd3t4WHYGFQODSfrgkAnj/CqH4oCX2JAATtswqV/zIJASrzUrNUVlZa48ePt0aMGJG4agLq66+/tr766isbTIkLVFwAAAQWzl8Pe4he61ghNoDQ47oNlb8Wq1bFmQicqqoqC1MeLDZVaQRqJsxvssGE5dRpVJFWmUfKysrmx524HwtAufk8mwEg7adkEDQTJ04UaqJkPCHaTF9++aV1+PBhY+wlaKJPMCntKsSR2+PIAOJMwq6urk0Aj9aTwWjjTJ8+3Ro5cqQMXEQu49tvv7Xa2tpsWyly5gwyADzPwh56OGrVkQEEu+dFVHJ31IpUpqfWmTx5st2TUlmvV1142WwgUTPpHGgPgb6fAETrotAZCUAAD7dJ2QjtEylfFIKSpKWtQ60zdqxeHgWs47f27Nlj0RWgc8CzbcWzvTTKFBBhRyIKZj/3BV3BM2TIEKuurk478BAwWD1h05ZVcyoKWjzbaQDRo6LpmU5Yk2CZ8e/gbV4RpXBVadGLsGbOnGkx1jlAfta+ffuso0ePaksmAHQaPrEGTP/gJlmhQUgDccVobtFfaIGqE9BzPGvWLO3BQ7mwiZ0xY4Y1evRo1WISrg9aaCico8+LZhACEAzAlSiwQrRQVeno06mpqbGHH1TVmbQevOHWJZdcIsWRmZQWv/wA0TVocW71u+++HgogOAy50YGWa9X5NtO+MC04moixrgEtzkr4+0J9IKEcADxP6Mgku+o6NwVhMqOfatq0aWHJsrw/ES3PXWEEBAKI+/OggMVhhai+zx4X/TymB47L8dA1QAs9GDbtIxBAKOAxHZnTxUkoQzbkhXaRpmEiZh3cEUSbL4C4rRwyarczGLvqOr+1QcL2usde5EUXXeR1S4trMGEexuE718UXQGj/IjmUVHFL20fjNzaWGHR+IQCe6Zx54ceYJ4AwZMHdUG/wy5TldZ2FHVcu9FDTrtM4PAIgeWLFUzUBPNxKV7uGmbMHRWYOJnkQnG1ITzG+p2FxXg9nHNLfRHcBe33jxo2TPqeIGnXMmDFWZ2dnEtLTzFsLk+ZqVPDv4kr6ACi3ifey4oQ6/OfU07QCZxYeOHDA4sBncUBnwgYTp7Du37/fnpjGOUYy/TgEp8YAsiADbureB0B91BJ3gEdCLTfxTsNpSA3zxRdf2IcXeLzAdPDgQWvnzp2eYCtOL/o/Dd5E6xZJBy15k5djsQ+AkJBI0zLIFjInfe3atcue1xyVYU5hbW5utpu6qHm90rM3ltZ0W6/6ol6DSVOOl+3G4nwFAMrt5LC0OJEO/9lcyBxtJ3gIgCRzdDhJDI62RGW4ZUvvtM4BIOqjXAoABBW+DIm0/HCJbOMZU3OlLMkhiFpbW6U8d90BhNaJg6yT3MwWAAg3+MkkLYNMg5XG6jffyNt3icY1e21Jg+ZdeS4SIF4KRunzAPr88885XYPf29IysKsrI0AI9qoJGWW5yzh2LPlSc51tIIdXyK9gdCIPIDRfP0SiPt16J2PWsSwNxO562Lot1kWHJcepROdX03+UNJgAIPC4GI7mvNczDxi05UuSCiDN/PTFyAhc/BcU2BviJDV3j49NHpfoBAURF0BQft4zAUDQQOWgk+Okn5DmvAbCudYACtMaZEYkBNkqbCarq6sLwMMyuRRaZg/Qj05ZWtavfFnXAaI8VmwAsfsO4c2VVUEa5dDhR/slaQjSZFz+7LdmPmxFhYyHL4O/pPIRyQ8ZFgIIPpGrQLwNJpECskrDLnPS4AcENlkcnvALQcBjHt274H58xbx+OVcoM68DGhrQ2ockTj+HOXxPos+ALJsnTnT36+kRPEFNH8uWMU5nigYCu/xQziLy7RjRdfyje+DwgV8TI0o7Nc3s2bPtgUunPE7oCnJU0lEYZoNxND1pMAhAfNlmg9+PbQCB8NqkzKvIz+EHGYE9LZE51QQNlySHOR05zcTda4tLo4wmOm7dUfM5mCnhpGmgaXrUArJIT42hMoiAh83epEkF3v3YJJoEIPBtK50SvF0zgSbHForNvIqMsjSQCK2cVBameVgOe25+hrlIPe407GkaFGwAlQL1RjRfFKxKAIU5HKl52AxefPHF0p65SQCC0plBj3Sp05ZJk0KKBVHFU8hBBq+M6tnr4iaafoHgobeato/MEGaoy6xLQlm0n6vZdNVIKExZETKGDMKIZZc9yO/DlSGywUOaZLgpwniTeR8vUg0BlLz/KZOqkLJUNGNBtg+1j8xmy82uYRqIXfmxBJB2u264hVp8rgJAQVqOe0unMW+H4AnSesVy0OE/6C0vgQ1kFICCHq4soQZpIAIojaDaRSGDB2igihL+yChMVRkqduQIakpkddmL5cWpHGl3DorrTPqfyocaKL3FVkkpLMpP2yMt+8OpCvIInCstw+Ps1OWOWS57drSxTAlGaSC+obI8vkEPKEj7cMpGmlqC2k3njRY85FZBP5ARTRibLhlzbjyE0OeS30NMEzwOEVw6rfMKVYfOXFxOZxD95/agatFNrf4mHYUXZYYDrVnuHEY+2YyxKdU9gMZeGtH+LleNOEij66wRe3lSCB5TeCV2aEQbAaC8hC+AE4LIhADsnCglikxQl0HGbRrC5jovfnWHjktOV+V0VxUuBPKimte48jNKA6l0tBE8nAvEMTEO4DLmfxmLB8MeFgeLTfFI2wDiTxhTOtxX4YF2+KTm8Qp+173Sxr2mks+4NDr5aP5wLMwIAHF4QVVT6zfe5nfdEaiMOGzyvow6ZJVhayAUpu2+am5GqdaD5ui40yY991ui43c9aX3u/NyowZSAF/owu/FCX2XRgSl+3FZF8Fsf5nddFk1svlS9JDJohmN1dwnebGMAxHnKKgTM3hbHpTi0wCEUxvyfZi+MzXPY+nsZD11WGVA8PQ0NDftLMTzQbIrVT+bZG+JHVmSswwoSJsGSJmDcdbOnx++Iydjhw11umucA/G6A6HzJ/Pnz21BR361J06w9QdkEOzfF3L17t9RNLhOQFDsrtQ6/7rxjx45Y+zTGrlhCRsf0oQ10HkeLhDKVFsGmDJtiGQ2ijo4Oq7293fY1KRWenMps04fdeHaPjbGD3LxTG6nwzbjrlHVOxyi1j6khr4FyDOwwlREVRnUasjGpu+7FP2zn7bxuayDEG7wSmXCNrn/TlsNQrkHzrnWXO7RPJz7Ku5N0OgDaRJ50J9yPPpO8tw4PJtLs0I54A21n/rcBhA/Nn8H5x7xgYjCp+0v5UmuaNOblgYn1zjVHA3G6aP6ic9OU2DQ7yDR6i3HgxkoeQFBJxgKIb7PK6R7FAo36n1sNGxw64DvMu33yAJo3b14jmEq+W3ZGkunq6sqo5mjV0vVgMoCgaAo6XHkA4cZZHO9HE4c+qTnQSs+u7oFAN2noyEOea93X8gDiRTyANe6bJp2zK6+7FiLA+a0xg8Nx7Ezyrpv+AgAtXLhwHbSQse5ReqV1frsJcJNsNTdQcudvYZfbgnHTAgABPFwj9oZHRiMu8eHo+oZzxJ1jXyYH9L76tFAFACJzULOrTWaSANLRy8tBU/p/TA1QLvvQ+9pYTH8fAMGpyN5YU3FCU/7TzuDcGsa6BI57he25qAutfnRAnq8BRH2E2gdALAAJjdZC9AvpZFCb3nQRE5i+2qf54nVPAGHy+CrcM2K1BpnwCpz+qkNgc2r4sAUVynvwE+72kqcngObMmdOFTC96ZTDlmi7jY6YPW+Se9zN+z90TQEyMXSpWIirosvkVouN19np06NKbskzZ7xlCkayHe+e/fvd9AVRfX38ImV/2y2jCdYIo66ADDUlkAOP56aD8vgBiJmihZxFxqoeRAX6LzOnGS5g5DXEJAO2foldeMPZVXFaghKGF2pFhdXEmU/5zTVfWwWQAQfv42j6OXAMBlEv0JGLjZivqAB7KTxc6cs9SOALwN0L7FAycemUOBRAKaUPGp7wy63xNxX6GIvxzyzwDQy+Af48I3aEAyhWyAojcJVKgLmnS2o43Kn8mAgjP+jn4fYRGI4QABC10Bu3hvVGFl2V6VZtyhvGoCx1hdLrud8CR/ITrf+CpEIBYQs4afz2wNI1u6vLg2JSq2BZGlujRc70fjuQTouUJA4gFoll4AJH2G9jQcE3rmxaignWnU7VJg7vOOOdoutYtWLDg7Sh5IwEIyDwIhN4XpYIs0nKjcB18QA7v3DzcgNAFM+XOqHRGAhALB0LXAKmvRK1IZfrx48errC60Lhr0OmlEL4LxTG/P9bi9bvteiwwgloR5sdRCQla6b80p3eDXBMvKylIqPX6xU6ZMiZ85/ZwrMN71zzjVxAIQ58UCsTfj6IlTaVp5aKyKfA8+rfqDyqVRT3BrGLis/ZG4dMUCECsDYnfp1LUHmO2dy3SyfYofCrVQeXl58eXM/kNmRyGvZXTTxCUiNoBYISp+FREnn2Ua2OuaNWuWpUvX3U8YBHl1dbUWdIKWc6BzOWzaVj96Ra4nHiqGFirdsmXLP1DZUpEKZaehn2XmzJlaPBRR3ji53tkJXzSP7HQA0N1oRf6StNxEGoiVgxAuNbgF8adJiYmanxqnrq7OKPCQR4KeGjNDm+hxGeAhL4k1EAthaGpqqsRO7h9BI136/yvp/QKsVlVVlf0BFJ6bHLhOntv7qpq5CHm9APBIG5aSKv1t27ZNgSA+wQOdmtZDHTVqlIVeoFHDA2Gy4KzFAwcOqNgz8W2AZxlARPtHSpAKIFIEe2g2ov9AE0nts3Kzb3bRdXfIJXkq/BYHgZTSipJ/YUjletiLp5LQWJxXOoBYAUBUAwB9gNMZxRVG/U+Nw+aqPwOnWCZcBsQVtgQS5Fh8O87/twCeX8gGDwlJBUAsGDZRFd6o9yGAufwfJdCXw/EjDknoMq8nCv2y0nLHEW4FzK1r4k7Op82DrvpvZDZbbv5SAxAr2bx582gQ/g5AdJW7Ur9z9qo4EFpZWWnsVFA/3pJc5/IkaiMCKeJ6t8fhq3sySd1heVMFECtvaWkpw9rwNwCiG4OIYRNFJ5upc4iDeJN5j702fuohKOS0zT2yuuqBdQXdlHUP4Bnc2Nj4B5T3IM59QcuvFfOT26bMn5ElH5FyIDfbLuIeSDz3CwAP13QvB3je80sj87rvw5RZiVMWmjR6q1/FEThBhvYPx410mRjv0J9VzPX1ra2tItvWbOLYVtLhiSh8KgUQCcOXaabCOPw73qIrgggleCZNmmTbRHirgpL223sc8qDG6ezsDNQ6OQGsQPxIkoHROILM5MkAPKVo0rho7SGcB9LAKRrURhdSs0ajmb0vduUFel/cCON2NFmx5vPEAY07T+DDcydM4xz+oh+j3JcAomlh5dPI5icn+7M/CHKw9zWiM1Hk+x8AzjrkuRNapy1MfmndzxRAZAp20QgI4lGc0sAOXYXHrj4di/xiYX9p2qhl2ExR6wiOiXXA1rkftk6kCfBpgChzADlMYRytFm3+8wDRNc61oJjTVjmaTZ+RqcY2hy4IHPp3BLei6cVL8xzXbUVZehMkx6T3tAGQwwhso1sBohU4JjnXgmJqIdpHBBJjerF1DtQwdApyz8Qom4GCz41cbiy6YlSVDLQDEBmHNhoJtX4X3soH8XeiqDDohGTTxvEzHrpoJm4/zI026QSM+qE5AOdTvEzPwM5ZKyoHlem0BJAjAHqxsUXcHRDgwzimO9dFY47gE0ich0zbSRWgaADze2Dc3o6AibO5OICzHjw/DeBsEOU3i3RaA8gRCATJabO34T9XD9Q616PGtJsIJB4858YHjOMACzTZBi/BwoMj6GySeCTZDxrAoQf5GXTLfbeVi8p3mumNAJAjADy0kq1bt16Npm05BH0T/ktZ4kC7iWBiE8hz98G6aeA6B3tMBIhIN9uhOywGL/vAy2sA8hq/3VDDysjqvlEAcguJdhIe5I0QPMF0DcHlvm/AOfcYeAtgXcMd4MGD/wCXxswYCyC3TNFzY4+NvbdrES9GLEUzueuQdN4BoNCmWQsXxLvFHy6RVIfSYvoFgNwSg2NyCJqiywCiJWh2luDe5TiGudOoOgdYOlEXAbMemma9+0t/qmhIu55+B6Bige3du3cYfC6L8DBnA1S1iGmE1+J8BuLS4vRx/qPMHpS3G3Ez8jczBmC289PYODeyaRKVQ78HkJ8gqKlwrxoPuAbHWGircsQVAEIFY9zjUY7//JJjN+Juxq5zbsJ0GNquuaGhYT+u92uggFfP8D85spfwanUgIQAAAABJRU5ErkJggg==";
public static final String REGULAREXPRESSION_SCRIPT = "^\\((.+?)\\)$";
public static final String DEFAULT_PASSWORD = "(person.getMobile())";
public static final String DEFAULT_PASSWORD = "(return person.getMobile())";
public static final Integer DEFAULT_PASSWORDPERIOD = 0;
public Person() {
......
package com.x.base.core.project.config;
import java.io.File;
import java.util.ArrayList;
import java.util.List;
import org.apache.commons.io.FileUtils;
import org.apache.commons.lang3.BooleanUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.tika.utils.SystemUtils;
import org.quartz.CronExpression;
import com.x.base.core.project.annotation.FieldDescribe;
......@@ -82,7 +81,7 @@ public class Query extends ConfigObject {
}
public Boolean getExtractImage() {
return BooleanUtils.isTrue(extractImage);
return SystemUtils.IS_OS_WINDOWS && BooleanUtils.isTrue(extractImage);
}
public String getTessLanguage() {
......
package com.x.base.core.project.tools;
import java.awt.image.BufferedImage;
import java.io.ByteArrayInputStream;
import java.io.File;
import java.util.List;
import javax.imageio.ImageIO;
import org.apache.commons.collections4.list.UnmodifiableList;
import org.apache.commons.io.FileUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.pdfbox.cos.COSDocument;
import org.apache.pdfbox.io.RandomAccessBuffer;
import org.apache.pdfbox.pdfparser.PDFParser;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.text.PDFTextStripper;
import org.apache.tika.Tika;
import org.junit.Test;
import com.x.base.core.project.config.Config;
import com.x.base.core.project.logger.Logger;
import com.x.base.core.project.logger.LoggerFactory;
import com.x.base.core.project.tools.DefaultCharset;
import com.x.base.core.project.tools.ListTools;
import net.sourceforge.tess4j.Tesseract;
public class ExtractTextTools {
private static Logger logger = LoggerFactory.getLogger(ExtractTextTools.class);
private static Tesseract tesseract = null;
private static Tika tika = null;
public static final Integer MAXLENGTH = 1024 * 1024 * 32;
public static boolean support(String name) {
String ext = StringUtils.substringAfterLast(name, ".");
if (StringUtils.isNotEmpty(ext)) {
ext = "." + StringUtils.lowerCase(ext);
return SUPPORT_TYPES.contains(ext);
}
return false;
}
public static boolean supportImage(String name) {
String ext = StringUtils.substringAfterLast(name, ".");
if (StringUtils.isNotEmpty(ext)) {
ext = "." + StringUtils.lowerCase(ext);
return SUPPORT_IMAGE_TYPES.contains(ext);
}
return false;
}
public static boolean available(byte[] bytes) {
if (null == bytes || bytes.length == 0 || bytes.length > MAXLENGTH) {
return false;
}
return true;
}
public static final List<String> SUPPORT_TYPES = UnmodifiableList.unmodifiableList(ListTools.toList(".doc", ".docx",
".pdf", ".xls", ".xlsx", ".txt", ".bmp", ".jpg", ".png", ".gif", ".jpeg", "jpe"));
public static final List<String> SUPPORT_IMAGE_TYPES = UnmodifiableList
.unmodifiableList(ListTools.toList(".bmp", ".jpg", ".png", ".gif", ".jpeg", "jpe"));
public static String extract(byte[] bytes, String name, Boolean office, Boolean pdf, Boolean txt, Boolean image) {
if ((null != bytes) && bytes.length > 0 && bytes.length < 1024 * 1024 * 10) {
if (office) {
if (StringUtils.endsWithIgnoreCase(name, ".doc") || StringUtils.endsWithIgnoreCase(name, ".docx")) {
return word(bytes);
}
if (StringUtils.endsWithIgnoreCase(name, ".xls") || StringUtils.endsWithIgnoreCase(name, ".xlsx")) {
return excel(bytes);
}
}
if (pdf) {
if (StringUtils.endsWithIgnoreCase(name, ".pdf")) {
return pdf(bytes);
}
}
if (txt) {
if (StringUtils.endsWithIgnoreCase(name, ".txt")) {
return text(bytes);
}
}
if (image) {
if (StringUtils.endsWithIgnoreCase(name, ".jpg") || StringUtils.endsWithIgnoreCase(name, ".png")
|| StringUtils.endsWithIgnoreCase(name, ".gif") || StringUtils.endsWithIgnoreCase(name, ".bmp")
|| StringUtils.endsWithIgnoreCase(name, ".jpeg")
|| StringUtils.endsWithIgnoreCase(name, ".jpe")) {
return image(bytes);
}
}
}
return null;
}
public static String pdf(byte[] bytes) {
try {
PDFParser parser = new PDFParser(new RandomAccessBuffer(bytes));
parser.parse();
try (COSDocument cos = parser.getDocument(); PDDocument pd = new PDDocument(cos)) {
PDFTextStripper stripper = new PDFTextStripper();
stripper.setStartPage(1);
stripper.setEndPage(pd.getNumberOfPages());
return stripper.getText(pd);
}
} catch (Exception e) {
logger.error(e);
}
return null;
}
public static String word(byte[] bytes) {
try (ByteArrayInputStream in = new ByteArrayInputStream(bytes)) {
return tikaInstance().parseToString(in);
} catch (Exception e) {
logger.error(e);
}
return null;
}
public static String excel(byte[] bytes) {
try (ByteArrayInputStream in = new ByteArrayInputStream(bytes)) {
return tikaInstance().parseToString(in);
} catch (Exception e) {
logger.error(e);
}
return null;
}
public static String text(byte[] bytes) {
return new String(bytes, DefaultCharset.charset);
}
public static String image(byte[] bytes) {
try (ByteArrayInputStream in = new ByteArrayInputStream(bytes)) {
BufferedImage image = ImageIO.read(in);
return tesseractInstance().doOCR(image);
} catch (Exception e) {
logger.error(e);
}
return null;
}
private static Tesseract tesseractInstance() throws Exception {
if (null == tesseract) {
synchronized (ExtractTextTools.class) {
if (null == tesseract) {
tesseract = new Tesseract();
tesseract.setDatapath(Config.dir_commons_tess4j_tessdata().getAbsolutePath());// 设置训练库的位置
tesseract.setLanguage(Config.query().getTessLanguage());// 中文识别
}
}
}
return tesseract;
}
private static Tika tikaInstance() throws Exception {
if (null == tika) {
synchronized (ExtractTextTools.class) {
if (null == tika) {
tika = new Tika();
}
}
}
return tika;
}
@Test
public void test1() throws Exception {
System.out.println(word(FileUtils.readFileToByteArray(new File("d:/1.html"))));
}
}
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册