From 20f4084e795a5f6895372501127140dc80ecb288 Mon Sep 17 00:00:00 2001 From: zhourui Date: Tue, 6 Jul 2021 16:12:13 +0800 Subject: [PATCH] =?UTF-8?q?=E5=A2=9E=E5=8A=A0pickPersonWithName=E8=AE=BE?= =?UTF-8?q?=E7=BD=AE?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../core/project/config/Organization.java | 11 +- .../nlp/NaturalLanguageProcessing.java | 154 ++++++++++++++++++ .../express/factory/PersonFactory.java | 8 +- 3 files changed, 169 insertions(+), 4 deletions(-) create mode 100644 o2server/x_base_core_project/src/main/java/com/x/base/core/project/nlp/NaturalLanguageProcessing.java diff --git a/o2server/x_base_core_project/src/main/java/com/x/base/core/project/config/Organization.java b/o2server/x_base_core_project/src/main/java/com/x/base/core/project/config/Organization.java index e0403ddf2f..67c7085f19 100644 --- a/o2server/x_base_core_project/src/main/java/com/x/base/core/project/config/Organization.java +++ b/o2server/x_base_core_project/src/main/java/com/x/base/core/project/config/Organization.java @@ -3,6 +3,7 @@ package com.x.base.core.project.config; import java.io.File; import org.apache.commons.io.FileUtils; +import org.apache.commons.lang3.BooleanUtils; import com.x.base.core.project.annotation.FieldDescribe; import com.x.base.core.project.gson.XGsonBuilder; @@ -14,7 +15,8 @@ public class Organization extends ConfigObject { private static final long serialVersionUID = -2193428649985413384L; - public final static Integer DEFAULT_UNITLEVELORDERNUMBERDIGITS = 10; + public static final Integer DEFAULT_UNITLEVELORDERNUMBERDIGITS = 10; + public static final Boolean DEFAULT_PICKPERSONWITHNAME = false; public static Organization defaultInstance() { return new Organization(); @@ -23,11 +25,18 @@ public class Organization extends ConfigObject { @FieldDescribe("unit中unitLevelOrderNumber扩充位数,<=0不扩充.") private Integer unitLevelOrderNumberDigits = DEFAULT_UNITLEVELORDERNUMBERDIGITS; + @FieldDescribe("人员识别过程中过程为先查找 distinguishedName 再查找中间的 unique 如果还是没有查找到是否要通过那么进行查找.") + private Boolean pickPersonWithName = DEFAULT_PICKPERSONWITHNAME; + public Integer getUnitLevelOrderNumberDigits() { return NumberTools.nullOrLessThan(this.unitLevelOrderNumberDigits, 1) ? DEFAULT_UNITLEVELORDERNUMBERDIGITS : this.unitLevelOrderNumberDigits; } + public Boolean getPickPersonWithName() { + return BooleanUtils.isTrue(this.pickPersonWithName); + } + public void save() throws Exception { File file = new File(Config.base(), Config.PATH_CONFIG_ORGANIZATION); FileUtils.write(file, XGsonBuilder.toJson(this), DefaultCharset.charset); diff --git a/o2server/x_base_core_project/src/main/java/com/x/base/core/project/nlp/NaturalLanguageProcessing.java b/o2server/x_base_core_project/src/main/java/com/x/base/core/project/nlp/NaturalLanguageProcessing.java new file mode 100644 index 0000000000..dc1e4a2a10 --- /dev/null +++ b/o2server/x_base_core_project/src/main/java/com/x/base/core/project/nlp/NaturalLanguageProcessing.java @@ -0,0 +1,154 @@ +package com.x.base.core.project.nlp; + +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.function.Function; +import java.util.stream.Collectors; + +import org.apache.commons.lang3.StringUtils; +import org.apache.commons.lang3.math.NumberUtils; + +import com.hankcs.hanlp.HanLP; +import com.hankcs.hanlp.seg.common.Term; +import com.x.base.core.project.gson.GsonPropertyObject; + +public class NaturalLanguageProcessing { + + public static String[] SKIP_START_WITH = new String[] { "~", "!", "#", "$", "%", "^", "&", "*", "(", ")", "<", ">", + "[", "]", "{", "}", "\\", "?" }; + + public static String[] SKIP_END_WITH = new String[] { "~", "!", "#", "$", "%", "^", "&", "*", "(", ")", "<", ">", + "[", "]", "{", "}", "\\", "?" }; + + public List word(String content) { + List items = new ArrayList<>(); + if (StringUtils.isNotBlank(content)) { + for (Term t : HanLP.segment(content)) { + Item item = new Item(); + item.setLabel(t.nature.toString()); + /* 去掉中文空格和空格 */ + item.setValue(StringUtils.trimToEmpty(StringUtils.replace(t.word, " ", " "))); + if (!skip(item)) { + items.add(item); + } + } + } + /* + * b 区别词 c 连词 d 副词 e 叹词 f 方位词 h 前缀 k 后缀 o 拟声词 p 介词 q 量词 r 代词 u 组词 w 标点 + */ + items = items.stream() +// .filter(o -> (StringUtils.length(o.getValue()) > 1) +// && (!StringUtils.startsWithAny(o.getValue(), SKIP_START_WITH)) +// && (!StringUtils.endsWithAny(o.getValue(), SKIP_END_WITH)) +// && (!StringUtils.startsWithIgnoreCase(o.getLabel(), "b")) +// && (!StringUtils.startsWithIgnoreCase(o.getLabel(), "c")) +// && (!StringUtils.startsWithIgnoreCase(o.getLabel(), "d")) +// && (!StringUtils.startsWithIgnoreCase(o.getLabel(), "e")) +// && (!StringUtils.startsWithIgnoreCase(o.getLabel(), "f")) +// && (!StringUtils.startsWithIgnoreCase(o.getLabel(), "h")) +// && (!StringUtils.startsWithIgnoreCase(o.getLabel(), "k")) +// && (!StringUtils.startsWithIgnoreCase(o.getLabel(), "o")) +// && (!StringUtils.startsWithIgnoreCase(o.getLabel(), "p")) +// && (!StringUtils.startsWithIgnoreCase(o.getLabel(), "q")) +// && (!StringUtils.startsWithIgnoreCase(o.getLabel(), "r")) +// && (!StringUtils.startsWithIgnoreCase(o.getLabel(), "u")) +// && (!StringUtils.startsWithIgnoreCase(o.getLabel(), "w")) && (!label_skip_m(o))) + .collect(Collectors.toList()); + Map map = items.stream().collect(Collectors.groupingBy(Function.identity(), Collectors.counting())); + List list = new ArrayList<>(); + map.entrySet().stream().sorted(Map.Entry.comparingByValue().reversed()).forEach(o -> { + Item t = o.getKey(); + t.setCount(o.getValue()); + list.add(t); + }); + return list; + } + + private boolean skip(Item o) { + if ((StringUtils.length(o.getValue()) > 1) && (!StringUtils.startsWithAny(o.getValue(), SKIP_START_WITH)) + && (!StringUtils.endsWithAny(o.getValue(), SKIP_END_WITH)) + && (!StringUtils.startsWithIgnoreCase(o.getLabel(), "b")) + && (!StringUtils.startsWithIgnoreCase(o.getLabel(), "c")) + && (!StringUtils.startsWithIgnoreCase(o.getLabel(), "d")) + && (!StringUtils.startsWithIgnoreCase(o.getLabel(), "e")) + && (!StringUtils.startsWithIgnoreCase(o.getLabel(), "f")) + && (!StringUtils.startsWithIgnoreCase(o.getLabel(), "h")) + && (!StringUtils.startsWithIgnoreCase(o.getLabel(), "k")) + && (!StringUtils.startsWithIgnoreCase(o.getLabel(), "o")) + && (!StringUtils.startsWithIgnoreCase(o.getLabel(), "p")) + && (!StringUtils.startsWithIgnoreCase(o.getLabel(), "q")) + && (!StringUtils.startsWithIgnoreCase(o.getLabel(), "r")) + && (!StringUtils.startsWithIgnoreCase(o.getLabel(), "u")) + && (!StringUtils.startsWithIgnoreCase(o.getLabel(), "w")) && (!label_skip_m(o))) { + return false; + } + return true; + } + + private boolean label_skip_m(Item item) { + if (!StringUtils.startsWithIgnoreCase(item.getLabel(), "m")) { + return false; + } else { + return NumberUtils.isParsable(item.getValue()); + } + } + + public static class Item extends GsonPropertyObject { + + private String value; + + private String label; + + private Long count; + + public String getValue() { + return value; + } + + public void setValue(String value) { + this.value = value; + } + + public String getLabel() { + return label; + } + + public void setLabel(String label) { + this.label = label; + } + + public Long getCount() { + return count; + } + + public void setCount(Long count) { + this.count = count; + } + + @Override + public int hashCode() { + final int prime = 31; + int result = 1; + result = prime * result + ((value == null) ? 0 : value.hashCode()); + return result; + } + + @Override + public boolean equals(Object obj) { + if (this == obj) + return true; + if (obj == null) + return false; + if (getClass() != obj.getClass()) + return false; + Item other = (Item) obj; + if (value == null) { + if (other.value != null) + return false; + } else if (!value.equals(other.value)) + return false; + return true; + } + } +} \ No newline at end of file diff --git a/o2server/x_organization_assemble_express/src/main/java/com/x/organization/assemble/express/factory/PersonFactory.java b/o2server/x_organization_assemble_express/src/main/java/com/x/organization/assemble/express/factory/PersonFactory.java index 47f57c2ecf..305492e687 100644 --- a/o2server/x_organization_assemble_express/src/main/java/com/x/organization/assemble/express/factory/PersonFactory.java +++ b/o2server/x_organization_assemble_express/src/main/java/com/x/organization/assemble/express/factory/PersonFactory.java @@ -13,12 +13,14 @@ import javax.persistence.criteria.CriteriaQuery; import javax.persistence.criteria.Predicate; import javax.persistence.criteria.Root; +import org.apache.commons.lang3.BooleanUtils; import org.apache.commons.lang3.StringUtils; import com.x.base.core.entity.JpaObject; import com.x.base.core.project.cache.Cache.CacheCategory; import com.x.base.core.project.cache.Cache.CacheKey; import com.x.base.core.project.cache.CacheManager; +import com.x.base.core.project.config.Config; import com.x.base.core.project.tools.ListTools; import com.x.organization.assemble.express.AbstractFactory; import com.x.organization.assemble.express.Business; @@ -69,7 +71,7 @@ public class PersonFactory extends AbstractFactory { this.entityManagerContainer().get(Person.class).detach(o); } } - if (null == o) { + if ((null == o) && BooleanUtils.isTrue(Config.organization().getPickPersonWithName())) { EntityManager em = this.entityManagerContainer().get(Person.class); CriteriaBuilder cb = em.getCriteriaBuilder(); CriteriaQuery cq = cb.createQuery(Person.class); @@ -169,8 +171,8 @@ public class PersonFactory extends AbstractFactory { CriteriaQuery cq = cb.createQuery(String.class); Root root = cq.from(Person.class); Predicate p = cb.equal(root.get(Person_.superior), person.getId()); - list = em.createQuery(cq.select(root.get(Person_.id)).where(p)) - .getResultList().stream().distinct().collect(Collectors.toList()); + list = em.createQuery(cq.select(root.get(Person_.id)).where(p)).getResultList().stream().distinct() + .collect(Collectors.toList()); return list; } -- GitLab