From fdea6dafd413a71fa879f45a674b9219bafd8360 Mon Sep 17 00:00:00 2001 From: lroyia Date: Tue, 24 Oct 2023 10:43:53 +0800 Subject: [PATCH] =?UTF-8?q?=E6=95=B0=E6=8D=AE=E8=AF=BB=E5=8F=96=E4=B8=8E?= =?UTF-8?q?=E5=BD=92=E4=B8=80=E5=8C=96?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- pom.xml | 26 +++ .../{ApplicationRun.java => KnnRun.java} | 2 +- src/main/java/io/lroyia/bean/EntCalInfo.java | 58 +++++++ src/main/java/io/lroyia/bean/EntInfo.java | 93 +++++++++++ src/main/java/io/lroyia/util/DataUtil.java | 153 ++++++++++++++++++ .../resources/市场主体测试数据.csv | 0 6 files changed, 331 insertions(+), 1 deletion(-) rename src/main/java/io/lroyia/{ApplicationRun.java => KnnRun.java} (83%) create mode 100644 src/main/java/io/lroyia/bean/EntCalInfo.java create mode 100644 src/main/java/io/lroyia/bean/EntInfo.java create mode 100644 src/main/java/io/lroyia/util/DataUtil.java rename 市场主体测试数据.csv => src/main/resources/市场主体测试数据.csv (100%) diff --git a/pom.xml b/pom.xml index 2664a0b..97d8dda 100644 --- a/pom.xml +++ b/pom.xml @@ -19,8 +19,34 @@ 21 21 false + 1.0.0-M2.1 + nd4j-native + + + org.nd4j + ${nd4j.backend} + ${dl4j-master.version} + + + + + org.apache.commons + commons-csv + 1.10.0 + + + + + org.projectlombok + lombok + 1.18.30 + provided + + + + diff --git a/src/main/java/io/lroyia/ApplicationRun.java b/src/main/java/io/lroyia/KnnRun.java similarity index 83% rename from src/main/java/io/lroyia/ApplicationRun.java rename to src/main/java/io/lroyia/KnnRun.java index cec7267..7151d84 100644 --- a/src/main/java/io/lroyia/ApplicationRun.java +++ b/src/main/java/io/lroyia/KnnRun.java @@ -6,7 +6,7 @@ package io.lroyia; * @author lroyia * @since 2023/10/20 9:33 **/ -public class ApplicationRun { +public class KnnRun { public static void main(String[] args) { diff --git a/src/main/java/io/lroyia/bean/EntCalInfo.java b/src/main/java/io/lroyia/bean/EntCalInfo.java new file mode 100644 index 0000000..6fc6953 --- /dev/null +++ b/src/main/java/io/lroyia/bean/EntCalInfo.java @@ -0,0 +1,58 @@ +package io.lroyia.bean; + +import lombok.Data; +import lombok.experimental.Accessors; + +import java.io.Serializable; + +/** + * 主体分类计算转换bean + * @author lroyia + * @since 2023/10/24 9:48 + **/ +@Data +@Accessors(chain = true) +public class EntCalInfo implements Serializable { + + private static final long serialVersionUID = 1L; + + /** + * 主体id + */ + private String pripid; + + /** + * 企业名称 + */ + private String entName; + + /** + * 成立时间 + */ + private double estDate; + + /** + * 企业类型 + */ + private double entType; + + /** + * 注册金额 + */ + private double regCap; + + /** + * 行业门类 + */ + private String industryPhy; + + /** + * 行业编码 + */ + private double industryCo; + + /** + * 登记状态 + */ + private double regState; +} diff --git a/src/main/java/io/lroyia/bean/EntInfo.java b/src/main/java/io/lroyia/bean/EntInfo.java new file mode 100644 index 0000000..cf1d384 --- /dev/null +++ b/src/main/java/io/lroyia/bean/EntInfo.java @@ -0,0 +1,93 @@ +package io.lroyia.bean; + +import lombok.Data; +import lombok.experimental.Accessors; +import org.apache.commons.lang3.StringUtils; + +import java.io.Serializable; +import java.math.BigDecimal; +import java.time.LocalDate; + +/** + * 主体信息 + * + * @author lroyia + * @since 2023/10/24 9:19 + **/ +@Data +@Accessors(chain = true) +public class EntInfo implements Serializable { + + private static final long serialVersionUID = 1L; + + /** + * 主体id + */ + private String pripid; + + /** + * 企业名称 + */ + private String entName; + + /** + * 成立日期 + */ + private LocalDate estDate; + + /** + * 企业类型 + */ + private String entType; + + /** + * 注册金额 + */ + private BigDecimal regCap; + + /** + * 行业门类 + */ + private String industryPhy; + + /** + * 行业编码 + */ + private String industryCo; + + /** + * 登记状态 + */ + private String regState; + + /** + * 转计算Bean + * + * @return 转换结果 + * @author lroyia + * @since 2023年10月24日 09:54:11 + */ + public EntCalInfo toCalInfo() { + EntCalInfo result = new EntCalInfo(); + result.setPripid(pripid); + result.setEntName(entName); + result.setIndustryPhy(industryPhy); + if (estDate != null) { + LocalDate now = LocalDate.now(); + result.setEstDate(now.getYear() - estDate.getYear()); + } + if (StringUtils.isNotBlank(entType)) { + result.setEntType(Double.parseDouble(entType)); + } + if (regCap != null) { + result.setRegCap(regCap.doubleValue()); + } + if (StringUtils.isNotBlank(industryCo)) { + result.setIndustryCo(Double.parseDouble(industryCo)); + } + if (StringUtils.isNotBlank(regState)) { + result.setRegState(Double.parseDouble(regState)); + } + return result; + } +} diff --git a/src/main/java/io/lroyia/util/DataUtil.java b/src/main/java/io/lroyia/util/DataUtil.java new file mode 100644 index 0000000..09e8e3e --- /dev/null +++ b/src/main/java/io/lroyia/util/DataUtil.java @@ -0,0 +1,153 @@ +package io.lroyia.util; + +import io.lroyia.bean.EntCalInfo; +import io.lroyia.bean.EntInfo; +import org.apache.commons.csv.CSVFormat; +import org.apache.commons.csv.CSVParser; +import org.apache.commons.csv.CSVRecord; +import org.apache.commons.lang3.StringUtils; + +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.math.BigDecimal; +import java.time.LocalDate; +import java.util.ArrayList; +import java.util.List; + +/** + * 数据操作工具 + * + * @author lroyia + * @since 2023/10/24 9:26 + **/ +public abstract class DataUtil { + + /** + * 获取所有测试数据 + * + * @return 测试数据 + * @author lroyia + * @since 2023年10月24日 09:43:48 + */ + public static List getAllTestEntInfo() { + CSVFormat format = CSVFormat.Builder.create() + .setHeader() // 读取header作为csv的key,否则CSVRecord.get(headerName)会报错 + .setSkipHeaderRecord(true) // 跳过第一行的列名,列名单独是文件的自行搜索CSVFormat构造 + .build(); + try (InputStream resourceAsStream = DataUtil.class.getResourceAsStream("/市场主体测试数据.csv"); + InputStreamReader reader = new InputStreamReader(resourceAsStream)) { + CSVParser parse = format.parse(reader); + List result = new ArrayList<>(); + for (CSVRecord each : parse) { + EntInfo atom = new EntInfo(); + result.add(atom); + atom.setPripid(each.get("PRIPID")); + atom.setEntName(each.get("ENTNAME")); + String estDateStr = each.get("ESTDATE"); + if (StringUtils.isNotBlank(estDateStr)) { + String[] dateArr = estDateStr.split(" ")[0].split("-"); + atom.setEstDate(LocalDate.of(Integer.parseInt(dateArr[0]), Integer.parseInt(dateArr[1]), Integer.parseInt(dateArr[2]))); + } + String entType = each.get("SUBENTTYPE"); + if (StringUtils.isBlank(entType)) { + entType = each.get("ENTTYPE"); + } + atom.setEntType(entType); + String regCap = each.get("REGCAP"); + if (StringUtils.isNotBlank(regCap)) { + atom.setRegCap(new BigDecimal(regCap)); + } + atom.setIndustryPhy(each.get("INDUSTRYPHY")); + atom.setIndustryCo(each.get("INDUSTRYCO")); + atom.setRegState(each.get("ENTSTATE")); + } + return result; + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + /** + * 获取所有计算Bean + * + * @return 计算Bean清单 + * @author lroyia + * @since 2023年10月24日 09:57:15 + */ + public static List getAllCalInfo() { + List allTestEntInfo = getAllTestEntInfo(); + List result = new ArrayList<>(); + for (EntInfo each : allTestEntInfo) { + result.add(each.toCalInfo()); + } + return result; + } + + /** + * 归一 + * + * @param list list + * @return 归一结果 + * @author lroyia + * @since 2023年10月24日 10:36:08 + */ + public static List toOne(List list) { + List estDateList = new ArrayList<>(list.size()); + List entTypeList = new ArrayList<>(list.size()); + List regCapList = new ArrayList<>(list.size()); + List industryCoList = new ArrayList<>(list.size()); + List regStateList = new ArrayList<>(list.size()); + for (EntCalInfo each : list) { + estDateList.add(each.getEstDate()); + entTypeList.add(each.getEntType()); + regCapList.add(each.getRegCap()); + industryCoList.add(each.getIndustryCo()); + regStateList.add(each.getRegState()); + } + estDateList = columnToOne(estDateList); + entTypeList = columnToOne(entTypeList); + regCapList = columnToOne(regCapList); + industryCoList = columnToOne(industryCoList); + regStateList = columnToOne(regStateList); + for (int i = 0; i < list.size(); i++) { + EntCalInfo each = list.get(i); + each.setEstDate(estDateList.get(i)) + .setEntType(entTypeList.get(i)) + .setRegCap(regCapList.get(i)) + .setIndustryCo(industryCoList.get(i)) + .setRegState(regStateList.get(i)); + } + return list; + } + + /** + * 列归一 + * + * @param list 列数据 + * @return 归一 + * @author lroyia + * @since 2023年10月24日 10:27:05 + */ + private static List columnToOne(List list) { + double max = Double.MIN_VALUE; + double min = Double.MAX_VALUE; + for (Double each : list) { + max = Math.max(each, max); + min = Math.min(each, min); + } + double divisor = max - min; + List result = new ArrayList<>(list.size()); + for (Double each : list) { + result.add((each - min) / divisor); + } + return result; + } + + public static void main(String[] args) { + List one = toOne(getAllCalInfo()); + for (EntCalInfo each : one) { + System.out.println(each); + } + } +} diff --git a/市场主体测试数据.csv b/src/main/resources/市场主体测试数据.csv similarity index 100% rename from 市场主体测试数据.csv rename to src/main/resources/市场主体测试数据.csv