Browse Source

Revert "①修复Excel模块若干问题;②修复jdbc模块一些问题;③实现通过Intellij IDEA对HData进行断点调试"

Jayer 7 years ago
parent
commit
5024cb1577

+ 1 - 6
.gitignore

@@ -1,7 +1,2 @@
 target
-/build/
-*.iml
-*.bak
-.idea
-META-INF
-out
+/build/

+ 2 - 2
bin/hdata

@@ -40,8 +40,8 @@ if [ ! -z "$HDATA_CLASSPATH_APPEND" ]; then
     HDATA_CLASSPATH="${HDATA_CLASSPATH}:$HDATA_CLASSPATH_APPEND";
 fi
 
-JAVA_OPTS="$JAVA_OPTS -Xss1024k"
-JAVA_OPTS="$JAVA_OPTS -Xms4G -Xmx4G -Xmn2048M"
+JAVA_OPTS="$JAVA_OPTS -Xss256k"
+JAVA_OPTS="$JAVA_OPTS -Xms1G -Xmx1G -Xmn512M"
 JAVA_OPTS="$JAVA_OPTS -XX:+UseParNewGC"
 JAVA_OPTS="$JAVA_OPTS -XX:+UseConcMarkSweepGC"
 JAVA_OPTS="$JAVA_OPTS -XX:+CMSClassUnloadingEnabled"

+ 2 - 2
bin/hdata.bat

@@ -24,8 +24,8 @@ set HDATA_CLASSPATH=.;%HDATA_LIB_DIR%\*
 ::)
 echo %HDATA_CLASSPATH% 
 
-set JAVA_OPTS=%JAVA_OPTS% -Xss1024k
-set JAVA_OPTS=%JAVA_OPTS% -Xms4G -Xmx4G -Xmn2048M
+set JAVA_OPTS=%JAVA_OPTS% -Xss256k
+set JAVA_OPTS=%JAVA_OPTS% -Xms1G -Xmx1G -Xmn512M
 set JAVA_OPTS=%JAVA_OPTS% -XX:+UseParNewGC
 set JAVA_OPTS=%JAVA_OPTS% -XX:+UseConcMarkSweepGC
 set JAVA_OPTS=%JAVA_OPTS% -XX:+CMSClassUnloadingEnabled

+ 0 - 21
bin/job.xml

@@ -1,21 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-
-<job id="job_example">
-    <reader name="jdbc">
-        <url>jdbc:mysql://127.0.0.1:3306/testdb?useSSL=true</url>
-        <driver>com.mysql.jdbc.Driver</driver>
-        <table>testtable</table>
-        <username>username</username>
-        <password>password</password>
-        <parallelism>3</parallelism>
-    </reader>
-
-    <writer name="hive">
-        <metastore.uris>thrift://127.0.0.1:9083</metastore.uris>
-        <hdfs.conf.path>/path/to/hdfs-site.xml</hdfs.conf.path>
-        <database>default</database>
-        <table>testtable</table>
-        <hadoop.user>hadoop</hadoop.user>
-        <parallelism>2</parallelism>
-    </writer>
-</job>

+ 1 - 1
conf/hdata.xml

@@ -3,7 +3,7 @@
 <configuration>
 	<property>
 		<name>hdata.storage.default.buffer.size</name>
-		<value>65536</value>
+		<value>16384</value>
 		<description>默认storage缓冲区大小,值必须为2^n</description>
 	</property>
 	<property>

BIN
doc/关于使用Intellij IDEA调试代码的配置/1.png


BIN
doc/关于使用Intellij IDEA调试代码的配置/2.png


+ 0 - 21
doc/关于使用Intellij IDEA调试代码的配置/run_debug_config.txt

@@ -1,21 +0,0 @@
-请将所有的 "D:\Dev\GitRepository\HData" 替换为你自己的 Hdata 项目路径
-===================================================================
-
-Main class:
-com.github.stuxuhai.hdata.CliDriver
-
-
-VM options:
--Xss256k -Xms1G -Xmx1G -Xmn512M -XX:+UseParNewGC -XX:+UseConcMarkSweepGC -XX:+CMSClassUnloadingEnabled -XX:+CMSParallelRemarkEnabled -XX:+DisableExplicitGC -XX:CMSInitiatingOccupancyFraction=75 -XX:+UseCMSInitiatingOccupancyOnly -XX:+HeapDumpOnOutOfMemoryError -XX:SoftRefLRUPolicyMSPerMB=0 -Dhdata.conf.dir="D:\Dev\GitRepository\HData\conf" -Dlog4j.configurationFile=D:\Dev\GitRepository\HData\conf\log4j2.xml
-
-
-Program arguments:
--f D:\Dev\GitRepository\HData\bin\job.xml
-
-
-Working directory:
-D:\Dev\GitRepository\HData
-
-
-Use classpath of modules:
-hdata-core

+ 1 - 7
hdata-core/src/main/java/com/github/stuxuhai/hdata/util/PluginUtils.java

@@ -26,13 +26,7 @@ public class PluginUtils {
         File file = new File(PluginUtils.class.getProtectionDomain().getCodeSource().getLocation().getPath().replaceAll("/lib/.*\\.jar", "")
                 + "/plugins/" + pluginName);
         if (!file.exists()) {
-            String jarFilePath = System.getProperty("user.dir").replace("\\", "/") + "/out/artifacts/hdata_" + pluginName + "_jar";
-            file = new File(jarFilePath);
-            if (!file.exists()) {
-                System.err.println("未找到" + pluginName + "模块的jar包所在目录,\n" +
-                        "您可以使用Intellij IDEA自动编译" + pluginName + "模块的jar包至以下目录:\n" + jarFilePath);
-                throw new HDataException("Plugin not found: " + pluginName);
-            }
+            throw new HDataException("Plugin not found: " + pluginName);
         }
 
         File[] jars = file.listFiles();

+ 0 - 57
hdata-excel/src/main/java/com/github/stuxuhai/hdata/plugin/excel/ExcelUtils.java

@@ -1,57 +0,0 @@
-package com.github.stuxuhai.hdata.plugin.excel;
-
-import org.apache.poi.ss.usermodel.Cell;
-
-public class ExcelUtils {
-
-    /**
-     * 获取对象的String值
-     *
-     * @param value Object
-     * @return String
-     */
-    public static String getValueFromRecord(Object value) {
-
-        if (value instanceof Cell) {/*如果Record是一个Excel单元格*/
-            Cell cell = (Cell) value;
-            int cellType = cell.getCellType();
-
-            switch (cellType) {
-                case Cell.CELL_TYPE_NUMERIC:
-                    value = String.valueOf(cell.getNumericCellValue());
-                    break;
-                case Cell.CELL_TYPE_STRING:
-                    value = cell.getRichStringCellValue();
-                    break;
-                case Cell.CELL_TYPE_FORMULA:
-                    value = cell.getCellFormula();
-                    break;
-                case Cell.CELL_TYPE_BLANK:
-                    value = "";
-                    break;
-                case Cell.CELL_TYPE_BOOLEAN:
-                    value = String.valueOf(cell.getBooleanCellValue());
-                    break;
-                case Cell.CELL_TYPE_ERROR:
-                    value = String.valueOf(cell.getErrorCellValue());
-                    break;
-            }
-        } else {
-            // 如果是其他类型 ...
-        }
-
-        return String.valueOf(value);
-        
-    }
-
-    /**
-     * 获取Excel单元格的String值;
-     *
-     * @param cell ExcelCell
-     * @return String
-     */
-    public static String getValueFromCell(Cell cell) {
-        return getValueFromRecord(cell);
-    }
-
-}

+ 22 - 22
hdata-excel/src/main/java/com/github/stuxuhai/hdata/plugin/excel/reader/ExcelReader.java

@@ -1,21 +1,28 @@
 package com.github.stuxuhai.hdata.plugin.excel.reader;
 
-import com.github.stuxuhai.hdata.api.*;
-import com.github.stuxuhai.hdata.exception.HDataException;
-import com.github.stuxuhai.hdata.plugin.excel.ExcelProperties;
-import com.github.stuxuhai.hdata.plugin.excel.ExcelUtils;
-import com.google.common.base.Preconditions;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+
 import org.apache.poi.hssf.usermodel.HSSFWorkbook;
 import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
-import org.apache.poi.ss.usermodel.Cell;
 import org.apache.poi.ss.usermodel.Row;
 import org.apache.poi.ss.usermodel.Sheet;
 import org.apache.poi.ss.usermodel.Workbook;
 import org.apache.poi.xssf.usermodel.XSSFWorkbook;
 
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.IOException;
+import com.github.stuxuhai.hdata.api.DefaultRecord;
+import com.github.stuxuhai.hdata.api.Fields;
+import com.github.stuxuhai.hdata.api.JobContext;
+import com.github.stuxuhai.hdata.api.OutputFieldsDeclarer;
+import com.github.stuxuhai.hdata.api.PluginConfig;
+import com.github.stuxuhai.hdata.api.Reader;
+import com.github.stuxuhai.hdata.api.Record;
+import com.github.stuxuhai.hdata.api.RecordCollector;
+import com.github.stuxuhai.hdata.api.Splitter;
+import com.github.stuxuhai.hdata.exception.HDataException;
+import com.github.stuxuhai.hdata.plugin.excel.ExcelProperties;
+import com.google.common.base.Preconditions;
 
 public class ExcelReader extends Reader {
 
@@ -46,16 +53,10 @@ public class ExcelReader extends Reader {
 		if (workbook.getNumberOfSheets() > 0) {
 			Sheet sheet = workbook.getSheetAt(0);
 
-            int cellLength = 0;
-            if (sheet.getPhysicalNumberOfRows() > 0) {
-                // 先根据第一行判断列的宽度,此处不推荐使用getPhysicalNumberOfCells方法
-                cellLength = sheet.getRow(0).getLastCellNum();
-            }
-
 			if (includeColumnNames && sheet.getPhysicalNumberOfRows() > 0) {
 				Row row = sheet.getRow(0);
-				cellLength = row.getPhysicalNumberOfCells();
-				for (int cellIndex = row.getFirstCellNum(); cellIndex < cellLength; cellIndex++) {
+				for (int cellIndex = row.getFirstCellNum(), cellLength = row
+						.getPhysicalNumberOfCells(); cellIndex < cellLength; cellIndex++) {
 					fields.add(row.getCell(cellIndex).toString());
 				}
 			}
@@ -64,11 +65,10 @@ public class ExcelReader extends Reader {
 			for (int rowIndex = startRow, rowLength = sheet
 					.getPhysicalNumberOfRows(); rowIndex < rowLength; rowIndex++) {
 				Row row = sheet.getRow(rowIndex);
-				Record record = new DefaultRecord(cellLength);
-				for (int cellIndex = row.getFirstCellNum(); cellIndex < cellLength; cellIndex++) {
-					Cell cell = row.getCell(cellIndex, Row.MissingCellPolicy.CREATE_NULL_AS_BLANK);
-					record.add(ExcelUtils.getValueFromCell(cell));
-
+				Record record = new DefaultRecord(row.getPhysicalNumberOfCells());
+				for (int cellIndex = row.getFirstCellNum(), cellLength = row
+						.getPhysicalNumberOfCells(); cellIndex < cellLength; cellIndex++) {
+					record.add(row.getCell(cellIndex).toString());
 				}
 
 				recordCollector.send(record);

+ 12 - 8
hdata-excel/src/main/java/com/github/stuxuhai/hdata/plugin/excel/writer/ExcelWriter.java

@@ -1,10 +1,8 @@
 package com.github.stuxuhai.hdata.plugin.excel.writer;
 
-import com.github.stuxuhai.hdata.api.*;
-import com.github.stuxuhai.hdata.exception.HDataException;
-import com.github.stuxuhai.hdata.plugin.excel.ExcelProperties;
-import com.github.stuxuhai.hdata.plugin.excel.ExcelUtils;
-import com.google.common.base.Preconditions;
+import java.io.FileOutputStream;
+import java.io.IOException;
+
 import org.apache.poi.ss.usermodel.Cell;
 import org.apache.poi.ss.usermodel.Row;
 import org.apache.poi.ss.usermodel.Sheet;
@@ -13,8 +11,14 @@ import org.apache.poi.xssf.usermodel.XSSFCell;
 import org.apache.poi.xssf.usermodel.XSSFRichTextString;
 import org.apache.poi.xssf.usermodel.XSSFWorkbook;
 
-import java.io.FileOutputStream;
-import java.io.IOException;
+import com.github.stuxuhai.hdata.api.Fields;
+import com.github.stuxuhai.hdata.api.JobContext;
+import com.github.stuxuhai.hdata.api.PluginConfig;
+import com.github.stuxuhai.hdata.api.Record;
+import com.github.stuxuhai.hdata.api.Writer;
+import com.github.stuxuhai.hdata.exception.HDataException;
+import com.github.stuxuhai.hdata.plugin.excel.ExcelProperties;
+import com.google.common.base.Preconditions;
 
 public class ExcelWriter extends Writer {
 
@@ -55,7 +59,7 @@ public class ExcelWriter extends Writer {
 		for (int i = 0, len = record.size(); i < len; i++) {
 			Cell cell = row.createCell(i);
 			cell.setCellType(XSSFCell.CELL_TYPE_STRING);
-            Object value = ExcelUtils.getValueFromRecord(record.get(i));
+			Object value = record.get(i);
 			XSSFRichTextString content = new XSSFRichTextString(value != null ? value.toString() : null);
 			cell.setCellValue(content);
 		}

+ 4 - 13
hdata-jdbc/src/main/java/com/github/stuxuhai/hdata/plugin/writer/jdbc/JDBCWriter.java

@@ -117,7 +117,7 @@ public class JDBCWriter extends Writer {
     private String buildInsertSql(String table, List<String> columns, List<String> upsertColumns) {
         String[] placeholder = new String[columns.size()];
         Arrays.fill(placeholder, "?");
-        String sql = String.format("INSERT INTO `%s`(%s) VALUES(%s)",
+        String sql = String.format("INSERT INTO %s(%s) VALUES(%s)",
                 table,
                 keywordEscaper + Joiner.on(keywordEscaper + ", " + keywordEscaper).join(columns) + keywordEscaper,
                 Joiner.on(", ").join(placeholder));
@@ -147,7 +147,7 @@ public class JDBCWriter extends Writer {
     private String buildInsertSql(String table, int columnSize, List<String> upsertColumns) {
         String[] placeholder = new String[columnSize];
         Arrays.fill(placeholder, "?");
-        String sql = String.format("INSERT INTO `%s` VALUES(%s)", table, Joiner.on(", ").join(placeholder));
+        String sql = String.format("INSERT INTO %s VALUES(%s)", table, Joiner.on(", ").join(placeholder));
         // TODO: Upsert only support mysql for now
         return appendMysqlUpsertTail(sql, upsertColumns);
     }
@@ -177,12 +177,7 @@ public class JDBCWriter extends Writer {
                 connection.commit();
             }
         } catch (SQLException e) {
-            if(e.getMessage().contains("Duplicate entry")){
-                // TODO 忽略唯一键的约束,继续执行导入操作,此处可以将重复数据导出到日志
-            }else{
-                throw new HDataException(e);
-            }
-
+            throw new HDataException(e);
         }
     }
 
@@ -198,11 +193,7 @@ public class JDBCWriter extends Writer {
                 statement.close();
             }
         } catch (SQLException e) {
-            if(e.getMessage().contains("Duplicate entry")){
-                // TODO 忽略唯一键的约束,继续执行导入操作,此处可以将重复数据导出到日志
-            }else{
-                throw new HDataException(e);
-            }
+            throw new HDataException(e);
         } finally {
             DbUtils.closeQuietly(connection);
         }