diff --git a/pom.xml b/pom.xml
index 2664a0b..97d8dda 100644
--- a/pom.xml
+++ b/pom.xml
@@ -19,8 +19,34 @@
21
21
false
+ 1.0.0-M2.1
+ nd4j-native
+
+
+ org.nd4j
+ ${nd4j.backend}
+ ${dl4j-master.version}
+
+
+
+
+ org.apache.commons
+ commons-csv
+ 1.10.0
+
+
+
+
+ org.projectlombok
+ lombok
+ 1.18.30
+ provided
+
+
+
+
diff --git a/src/main/java/io/lroyia/ApplicationRun.java b/src/main/java/io/lroyia/KnnRun.java
similarity index 83%
rename from src/main/java/io/lroyia/ApplicationRun.java
rename to src/main/java/io/lroyia/KnnRun.java
index cec7267..7151d84 100644
--- a/src/main/java/io/lroyia/ApplicationRun.java
+++ b/src/main/java/io/lroyia/KnnRun.java
@@ -6,7 +6,7 @@ package io.lroyia;
* @author lroyia
* @since 2023/10/20 9:33
**/
-public class ApplicationRun {
+public class KnnRun {
public static void main(String[] args) {
diff --git a/src/main/java/io/lroyia/bean/EntCalInfo.java b/src/main/java/io/lroyia/bean/EntCalInfo.java
new file mode 100644
index 0000000..6fc6953
--- /dev/null
+++ b/src/main/java/io/lroyia/bean/EntCalInfo.java
@@ -0,0 +1,58 @@
+package io.lroyia.bean;
+
+import lombok.Data;
+import lombok.experimental.Accessors;
+
+import java.io.Serializable;
+
+/**
+ * 主体分类计算转换bean
+ * @author lroyia
+ * @since 2023/10/24 9:48
+ **/
+@Data
+@Accessors(chain = true)
+public class EntCalInfo implements Serializable {
+
+ private static final long serialVersionUID = 1L;
+
+ /**
+ * 主体id
+ */
+ private String pripid;
+
+ /**
+ * 企业名称
+ */
+ private String entName;
+
+ /**
+ * 成立时间
+ */
+ private double estDate;
+
+ /**
+ * 企业类型
+ */
+ private double entType;
+
+ /**
+ * 注册金额
+ */
+ private double regCap;
+
+ /**
+ * 行业门类
+ */
+ private String industryPhy;
+
+ /**
+ * 行业编码
+ */
+ private double industryCo;
+
+ /**
+ * 登记状态
+ */
+ private double regState;
+}
diff --git a/src/main/java/io/lroyia/bean/EntInfo.java b/src/main/java/io/lroyia/bean/EntInfo.java
new file mode 100644
index 0000000..cf1d384
--- /dev/null
+++ b/src/main/java/io/lroyia/bean/EntInfo.java
@@ -0,0 +1,93 @@
+package io.lroyia.bean;
+
+import lombok.Data;
+import lombok.experimental.Accessors;
+import org.apache.commons.lang3.StringUtils;
+
+import java.io.Serializable;
+import java.math.BigDecimal;
+import java.time.LocalDate;
+
+/**
+ * 主体信息
+ *
+ * @author lroyia
+ * @since 2023/10/24 9:19
+ **/
+@Data
+@Accessors(chain = true)
+public class EntInfo implements Serializable {
+
+ private static final long serialVersionUID = 1L;
+
+ /**
+ * 主体id
+ */
+ private String pripid;
+
+ /**
+ * 企业名称
+ */
+ private String entName;
+
+ /**
+ * 成立日期
+ */
+ private LocalDate estDate;
+
+ /**
+ * 企业类型
+ */
+ private String entType;
+
+ /**
+ * 注册金额
+ */
+ private BigDecimal regCap;
+
+ /**
+ * 行业门类
+ */
+ private String industryPhy;
+
+ /**
+ * 行业编码
+ */
+ private String industryCo;
+
+ /**
+ * 登记状态
+ */
+ private String regState;
+
+ /**
+ * 转计算Bean
+ *
+ * @return 转换结果
+ * @author lroyia
+ * @since 2023年10月24日 09:54:11
+ */
+ public EntCalInfo toCalInfo() {
+ EntCalInfo result = new EntCalInfo();
+ result.setPripid(pripid);
+ result.setEntName(entName);
+ result.setIndustryPhy(industryPhy);
+ if (estDate != null) {
+ LocalDate now = LocalDate.now();
+ result.setEstDate(now.getYear() - estDate.getYear());
+ }
+ if (StringUtils.isNotBlank(entType)) {
+ result.setEntType(Double.parseDouble(entType));
+ }
+ if (regCap != null) {
+ result.setRegCap(regCap.doubleValue());
+ }
+ if (StringUtils.isNotBlank(industryCo)) {
+ result.setIndustryCo(Double.parseDouble(industryCo));
+ }
+ if (StringUtils.isNotBlank(regState)) {
+ result.setRegState(Double.parseDouble(regState));
+ }
+ return result;
+ }
+}
diff --git a/src/main/java/io/lroyia/util/DataUtil.java b/src/main/java/io/lroyia/util/DataUtil.java
new file mode 100644
index 0000000..09e8e3e
--- /dev/null
+++ b/src/main/java/io/lroyia/util/DataUtil.java
@@ -0,0 +1,153 @@
+package io.lroyia.util;
+
+import io.lroyia.bean.EntCalInfo;
+import io.lroyia.bean.EntInfo;
+import org.apache.commons.csv.CSVFormat;
+import org.apache.commons.csv.CSVParser;
+import org.apache.commons.csv.CSVRecord;
+import org.apache.commons.lang3.StringUtils;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.math.BigDecimal;
+import java.time.LocalDate;
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * 数据操作工具
+ *
+ * @author lroyia
+ * @since 2023/10/24 9:26
+ **/
+public abstract class DataUtil {
+
+ /**
+ * 获取所有测试数据
+ *
+ * @return 测试数据
+ * @author lroyia
+ * @since 2023年10月24日 09:43:48
+ */
+ public static List getAllTestEntInfo() {
+ CSVFormat format = CSVFormat.Builder.create()
+ .setHeader() // 读取header作为csv的key,否则CSVRecord.get(headerName)会报错
+ .setSkipHeaderRecord(true) // 跳过第一行的列名,列名单独是文件的自行搜索CSVFormat构造
+ .build();
+ try (InputStream resourceAsStream = DataUtil.class.getResourceAsStream("/市场主体测试数据.csv");
+ InputStreamReader reader = new InputStreamReader(resourceAsStream)) {
+ CSVParser parse = format.parse(reader);
+ List result = new ArrayList<>();
+ for (CSVRecord each : parse) {
+ EntInfo atom = new EntInfo();
+ result.add(atom);
+ atom.setPripid(each.get("PRIPID"));
+ atom.setEntName(each.get("ENTNAME"));
+ String estDateStr = each.get("ESTDATE");
+ if (StringUtils.isNotBlank(estDateStr)) {
+ String[] dateArr = estDateStr.split(" ")[0].split("-");
+ atom.setEstDate(LocalDate.of(Integer.parseInt(dateArr[0]), Integer.parseInt(dateArr[1]), Integer.parseInt(dateArr[2])));
+ }
+ String entType = each.get("SUBENTTYPE");
+ if (StringUtils.isBlank(entType)) {
+ entType = each.get("ENTTYPE");
+ }
+ atom.setEntType(entType);
+ String regCap = each.get("REGCAP");
+ if (StringUtils.isNotBlank(regCap)) {
+ atom.setRegCap(new BigDecimal(regCap));
+ }
+ atom.setIndustryPhy(each.get("INDUSTRYPHY"));
+ atom.setIndustryCo(each.get("INDUSTRYCO"));
+ atom.setRegState(each.get("ENTSTATE"));
+ }
+ return result;
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ /**
+ * 获取所有计算Bean
+ *
+ * @return 计算Bean清单
+ * @author lroyia
+ * @since 2023年10月24日 09:57:15
+ */
+ public static List getAllCalInfo() {
+ List allTestEntInfo = getAllTestEntInfo();
+ List result = new ArrayList<>();
+ for (EntInfo each : allTestEntInfo) {
+ result.add(each.toCalInfo());
+ }
+ return result;
+ }
+
+ /**
+ * 归一
+ *
+ * @param list list
+ * @return 归一结果
+ * @author lroyia
+ * @since 2023年10月24日 10:36:08
+ */
+ public static List toOne(List list) {
+ List estDateList = new ArrayList<>(list.size());
+ List entTypeList = new ArrayList<>(list.size());
+ List regCapList = new ArrayList<>(list.size());
+ List industryCoList = new ArrayList<>(list.size());
+ List regStateList = new ArrayList<>(list.size());
+ for (EntCalInfo each : list) {
+ estDateList.add(each.getEstDate());
+ entTypeList.add(each.getEntType());
+ regCapList.add(each.getRegCap());
+ industryCoList.add(each.getIndustryCo());
+ regStateList.add(each.getRegState());
+ }
+ estDateList = columnToOne(estDateList);
+ entTypeList = columnToOne(entTypeList);
+ regCapList = columnToOne(regCapList);
+ industryCoList = columnToOne(industryCoList);
+ regStateList = columnToOne(regStateList);
+ for (int i = 0; i < list.size(); i++) {
+ EntCalInfo each = list.get(i);
+ each.setEstDate(estDateList.get(i))
+ .setEntType(entTypeList.get(i))
+ .setRegCap(regCapList.get(i))
+ .setIndustryCo(industryCoList.get(i))
+ .setRegState(regStateList.get(i));
+ }
+ return list;
+ }
+
+ /**
+ * 列归一
+ *
+ * @param list 列数据
+ * @return 归一
+ * @author lroyia
+ * @since 2023年10月24日 10:27:05
+ */
+ private static List columnToOne(List list) {
+ double max = Double.MIN_VALUE;
+ double min = Double.MAX_VALUE;
+ for (Double each : list) {
+ max = Math.max(each, max);
+ min = Math.min(each, min);
+ }
+ double divisor = max - min;
+ List result = new ArrayList<>(list.size());
+ for (Double each : list) {
+ result.add((each - min) / divisor);
+ }
+ return result;
+ }
+
+ public static void main(String[] args) {
+ List one = toOne(getAllCalInfo());
+ for (EntCalInfo each : one) {
+ System.out.println(each);
+ }
+ }
+}
diff --git a/市场主体测试数据.csv b/src/main/resources/市场主体测试数据.csv
similarity index 100%
rename from 市场主体测试数据.csv
rename to src/main/resources/市场主体测试数据.csv