stuxuhai 11 rokov pred
commit
50ff4568fe
90 zmenil súbory, kde vykonal 4562 pridanie a 0 odobranie
  1. 19 0
      README.md
  2. 46 0
      bin/hdata
  3. 38 0
      bin/hdata.bat
  4. 19 0
      conf/hdata.xml
  5. 14 0
      conf/log4j2.xml
  6. 67 0
      conf/plugins.xml
  7. 23 0
      job-examples/ftp-ftp.xml
  8. 18 0
      job-examples/hbase-console.xml
  9. 20 0
      job-examples/hdfs-hive.xml
  10. 19 0
      job-examples/hdfs-jdbc.xml
  11. 20 0
      job-examples/hive-jdbc.xml
  12. 26 0
      job-examples/jdbc-hbase.xml
  13. 23 0
      job-examples/jdbc-hdfs.xml
  14. 26 0
      job-examples/jdbc-hive.xml
  15. 27 0
      job-examples/jdbc-jdbc.xml
  16. 23 0
      job-examples/jdbc-mongodb.xml
  17. 27 0
      job-examples/job.xml
  18. 13 0
      job-examples/mongodb-console.xml
  19. 166 0
      pom.xml
  20. 108 0
      src/main/java/opensource/hdata/CliDriver.java
  21. 13 0
      src/main/java/opensource/hdata/common/Constants.java
  22. 11 0
      src/main/java/opensource/hdata/common/HDataConfigConstants.java
  23. 66 0
      src/main/java/opensource/hdata/config/Configuration.java
  24. 36 0
      src/main/java/opensource/hdata/config/EngineConfig.java
  25. 112 0
      src/main/java/opensource/hdata/config/JobConfig.java
  26. 20 0
      src/main/java/opensource/hdata/config/PluginConfig.java
  27. 45 0
      src/main/java/opensource/hdata/core/DefaultRecord.java
  28. 20 0
      src/main/java/opensource/hdata/core/Fields.java
  29. 142 0
      src/main/java/opensource/hdata/core/HData.java
  30. 73 0
      src/main/java/opensource/hdata/core/JobContext.java
  31. 62 0
      src/main/java/opensource/hdata/core/Metric.java
  32. 14 0
      src/main/java/opensource/hdata/core/OutputFieldsDeclarer.java
  33. 56 0
      src/main/java/opensource/hdata/core/PluginLoader.java
  34. 27 0
      src/main/java/opensource/hdata/core/ReaderWorker.java
  35. 26 0
      src/main/java/opensource/hdata/core/RecordEvent.java
  36. 56 0
      src/main/java/opensource/hdata/core/RecordWorkHandler.java
  37. 49 0
      src/main/java/opensource/hdata/core/Storage.java
  38. 34 0
      src/main/java/opensource/hdata/core/WaitStrategyFactory.java
  39. 15 0
      src/main/java/opensource/hdata/core/plugin/AbstractPlugin.java
  40. 8 0
      src/main/java/opensource/hdata/core/plugin/Pluginable.java
  41. 20 0
      src/main/java/opensource/hdata/core/plugin/Reader.java
  42. 24 0
      src/main/java/opensource/hdata/core/plugin/ReaderPlugin.java
  43. 12 0
      src/main/java/opensource/hdata/core/plugin/Record.java
  44. 24 0
      src/main/java/opensource/hdata/core/plugin/RecordCollector.java
  45. 11 0
      src/main/java/opensource/hdata/core/plugin/Splitter.java
  46. 16 0
      src/main/java/opensource/hdata/core/plugin/Writer.java
  47. 15 0
      src/main/java/opensource/hdata/core/plugin/WriterPlugin.java
  48. 43 0
      src/main/java/opensource/hdata/exception/HDataException.java
  49. 99 0
      src/main/java/opensource/hdata/plugin/reader/ftp/FTPReader.java
  50. 16 0
      src/main/java/opensource/hdata/plugin/reader/ftp/FTPReaderProperties.java
  51. 60 0
      src/main/java/opensource/hdata/plugin/reader/ftp/FTPSplitter.java
  52. 106 0
      src/main/java/opensource/hdata/plugin/reader/hbase/HBaseReader.java
  53. 12 0
      src/main/java/opensource/hdata/plugin/reader/hbase/HBaseReaderProperties.java
  54. 118 0
      src/main/java/opensource/hdata/plugin/reader/hbase/HBaseSplitter.java
  55. 86 0
      src/main/java/opensource/hdata/plugin/reader/hdfs/HDFSReader.java
  56. 11 0
      src/main/java/opensource/hdata/plugin/reader/hdfs/HDFSReaderProperties.java
  57. 69 0
      src/main/java/opensource/hdata/plugin/reader/hdfs/HDFSSplitter.java
  58. 108 0
      src/main/java/opensource/hdata/plugin/reader/hive/HiveReader.java
  59. 14 0
      src/main/java/opensource/hdata/plugin/reader/hive/HiveReaderProperties.java
  60. 118 0
      src/main/java/opensource/hdata/plugin/reader/hive/HiveSplitter.java
  61. 15 0
      src/main/java/opensource/hdata/plugin/reader/jdbc/JBDCReaderProperties.java
  62. 87 0
      src/main/java/opensource/hdata/plugin/reader/jdbc/JDBCReader.java
  63. 164 0
      src/main/java/opensource/hdata/plugin/reader/jdbc/JDBCSplitter.java
  64. 78 0
      src/main/java/opensource/hdata/plugin/reader/mongodb/MongoDBReader.java
  65. 7 0
      src/main/java/opensource/hdata/plugin/reader/mongodb/MongoDBReaderProperties.java
  66. 95 0
      src/main/java/opensource/hdata/plugin/reader/mongodb/MongoDBSplitter.java
  67. 12 0
      src/main/java/opensource/hdata/plugin/writer/console/ConsoleWriter.java
  68. 116 0
      src/main/java/opensource/hdata/plugin/writer/ftp/FTPWriter.java
  69. 13 0
      src/main/java/opensource/hdata/plugin/writer/ftp/FTPWriterProperties.java
  70. 91 0
      src/main/java/opensource/hdata/plugin/writer/hbase/HBaseWriter.java
  71. 9 0
      src/main/java/opensource/hdata/plugin/writer/hbase/HBaseWriterProperties.java
  72. 117 0
      src/main/java/opensource/hdata/plugin/writer/hdfs/HDFSWriter.java
  73. 10 0
      src/main/java/opensource/hdata/plugin/writer/hdfs/HDFSWriterProperties.java
  74. 19 0
      src/main/java/opensource/hdata/plugin/writer/hive/HiveRecordWritable.java
  75. 211 0
      src/main/java/opensource/hdata/plugin/writer/hive/HiveWriter.java
  76. 11 0
      src/main/java/opensource/hdata/plugin/writer/hive/HiveWriterProperties.java
  77. 13 0
      src/main/java/opensource/hdata/plugin/writer/jdbc/JBDCWriterProperties.java
  78. 124 0
      src/main/java/opensource/hdata/plugin/writer/jdbc/JDBCWriter.java
  79. 67 0
      src/main/java/opensource/hdata/plugin/writer/mongodb/MongoDBWriter.java
  80. 6 0
      src/main/java/opensource/hdata/plugin/writer/mongodb/MongoDBWriterProperties.java
  81. 76 0
      src/main/java/opensource/hdata/tool/SQLExecuteTool.java
  82. 55 0
      src/main/java/opensource/hdata/util/EscaperUtils.java
  83. 71 0
      src/main/java/opensource/hdata/util/FTPUtils.java
  84. 60 0
      src/main/java/opensource/hdata/util/HiveMetaStoreUtils.java
  85. 45 0
      src/main/java/opensource/hdata/util/HiveTypeUtils.java
  86. 199 0
      src/main/java/opensource/hdata/util/JDBCUtils.java
  87. 11 0
      src/main/java/opensource/hdata/util/LoggerUtils.java
  88. 43 0
      src/main/java/opensource/hdata/util/TypeConvertUtils.java
  89. 89 0
      src/main/java/opensource/hdata/util/Utils.java
  90. 39 0
      src/main/java/opensource/hdata/util/XMLUtils.java

+ 19 - 0
README.md

@@ -0,0 +1,19 @@
+HData
+=======
+
+HData是一个异构的数据传输工具,致力于使用一个工具解决不同数据源(RDBMS、Hive、HDFS、HBase、MongoDB、FTP等)之间数据交换的问题。HData在设计上同时参考了开源的Sqoop、DataX,却与之有不同的实现。HData采用“框架+插件”的结构,具有较好的扩展性,框架相当于数据缓冲区,插件则为访问不同的数据源提供实现。<br>
+ 
+【HData特性】<br>
+1、异构数据源之间高速数据传输;<br>
+2、跨平台独立运行;<br>
+3、数据传输过程全内存操作,不读写磁盘;<br>
+4、插件式扩展。<br>
+
+【HData设计】<br>
+●配置文件:XML格式,配置Reader、Writer的参数(如:并行度、数据库连接地址、账号、密码等);<br>
+●Reader:数据读取模块,负责从数据源读取数据并写入RingBuffer;<br>
+●Splitter:根据配置文件中Reader的并行度构造相应数据的ReaderConfig对象供Reader使用,以实现数据的并行读取;<br>
+●RingBugffer:来自Disruptor的高性能环形数据缓冲区,基于事件监听模式的异步实现,采用无锁方式针对CPU缓存优化,在此用于Reader和Writer的数据交换;<br>
+●Writer:数据写入模块,负责从RingBuffer中读取数据并写入目标数据源。<br>
+
+HData框架通过配置读取解析、RingBugffer 缓冲区、线程池封装等技术,统一处理了数据传输中的基本问题,并提供Reader、Splitter、Writer插件接口,基于此可以方便地开发出各种插件,以满足各种数据源访问的需求。

+ 46 - 0
bin/hdata

@@ -0,0 +1,46 @@
+#!/usr/bin/env bash
+
+
+CDPATH=""
+SCRIPT="$0"
+
+while [ -h "$SCRIPT" ] ; do
+  ls=`ls -ld "$SCRIPT"`
+  link=`expr "$ls" : '.*-> \(.*\)$'`
+  if expr "$link" : '/.*' > /dev/null; then
+    SCRIPT="$link"
+  else
+    SCRIPT=`dirname "$SCRIPT"`/"$link"
+  fi
+done
+
+HDATA_HOME=`dirname "$SCRIPT"`/..
+HDATA_HOME=`cd "$HDATA_HOME"; pwd`
+HDATA_LIB_DIR=$HDATA_HOME/lib
+HDATA_CONF_DIR=$HDATA_HOME/conf
+
+if [ -x "$JAVA_HOME/bin/java" ]; then
+    JAVA="$JAVA_HOME/bin/java"
+else
+    JAVA=`which java`
+fi
+
+if [ ! -x "$JAVA" ]; then
+    echo "Could not find any executable java binary. Please install java in your PATH or set JAVA_HOME"
+    exit 1
+fi
+
+HDATA_CLASSPATH='.'
+for f in $HDATA_LIB_DIR/*.jar; do
+    HDATA_CLASSPATH=${HDATA_CLASSPATH}:$f;
+done
+
+JAVA_OPTS="$JAVA_OPTS -Dhdata.conf.dir=$HDATA_CONF_DIR"
+JAVA_OPTS="$JAVA_OPTS -Dlog4j.configurationFile=file:///$HDATA_CONF_DIR/log4j2.xml"
+
+MAIN_CLASS="com.suning.hdata.CliDriver"
+if [ "$1" = "execute-sql" ]; then
+    MAIN_CLASS="com.suning.hdata.tool.SQLExecuteTool"
+fi
+
+exec "$JAVA" $JAVA_OPTS -cp "$HDATA_CLASSPATH" $MAIN_CLASS "$@"

+ 38 - 0
bin/hdata.bat

@@ -0,0 +1,38 @@
+@echo off
+
+SETLOCAL
+
+if NOT DEFINED JAVA_HOME goto err
+
+set SCRIPT_DIR=%~dp0
+for %%I in ("%SCRIPT_DIR%..") do set HDATA_HOME=%%~dpfI
+
+set MAIN_CLASSPATH=.;%HDATA_HOME%\lib\*
+set HDATA_CONF_DIR=%HDATA_HOME%\conf
+
+set JAVA_OPTS=%JAVA_OPTS% -Xss256k
+set JAVA_OPTS=%JAVA_OPTS% -XX:+UseParNewGC
+set JAVA_OPTS=%JAVA_OPTS% -XX:+UseConcMarkSweepGC
+
+set JAVA_OPTS=%JAVA_OPTS% -XX:CMSInitiatingOccupancyFraction=75
+set JAVA_OPTS=%JAVA_OPTS% -XX:+UseCMSInitiatingOccupancyOnly
+set JAVA_OPTS=%JAVA_OPTS% -XX:+HeapDumpOnOutOfMemoryError
+set JAVA_OPTS=%JAVA_OPTS% -Dhdata.conf.dir="%HDATA_CONF_DIR%"
+set JAVA_OPTS=%JAVA_OPTS% -Dlog4j.configurationFile="file:///%HDATA_CONF_DIR%/log4j2.xml"
+
+set FIRST_ARG=%1
+set MAIN_CLASS="com.suning.hdata.CliDriver"
+if "%FIRST_ARG%"=="execute-sql" (set MAIN_CLASS="com.suning.hdata.tool.SQLExecuteTool")
+
+"%JAVA_HOME%\bin\java" %JAVA_OPTS% -cp "%MAIN_CLASSPATH%" %MAIN_CLASS% %*
+
+goto finally
+
+:err
+echo JAVA_HOME environment variable must be set!
+pause
+
+
+:finally
+
+ENDLOCAL

+ 19 - 0
conf/hdata.xml

@@ -0,0 +1,19 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<configuration>
+	<property>
+		<name>hdata.storage.default.buffer.size</name>
+		<value>16384</value>
+		<description>默认storage缓冲区大小,值必须为2^n</description>
+	</property>
+	<property>
+		<name>hdata.storage.disruptor.wait.strategy</name>
+		<value>BlockingWaitStrategy</value>
+		<description>线程等待策略,可选项:BlockingWaitStrategy、BusySpinWaitStrategy、SleepingWaitStrategy、YieldingWaitStrategy</description>
+	</property>
+	<property>
+		<name>hdata.hive.writer.tmp.dir</name>
+		<value>/tmp</value>
+		<description>Hive Writer写入HDFS文件的临时目录</description>
+	</property>
+</configuration>

+ 14 - 0
conf/log4j2.xml

@@ -0,0 +1,14 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<configuration status="off">
+	<appenders>
+		<Console name="Console" target="SYSTEM_OUT">
+			<PatternLayout
+				pattern="%d{yyyy-MM-dd HH:mm:ss.SSS} [%t] %-5level %logger{36} - %msg%n" />
+		</Console>
+	</appenders>
+	<loggers>
+		<root level="info">
+			<appender-ref ref="Console" />
+		</root>
+	</loggers>
+</configuration>

+ 67 - 0
conf/plugins.xml

@@ -0,0 +1,67 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<plugins>
+	<readers>
+		<reader>
+			<name>jdbc</name>
+			<class>opensource.hdata.plugin.reader.jdbc.JDBCReader</class>
+			<splitter>opensource.hdata.plugin.reader.jdbc.JDBCSplitter</splitter>
+		</reader>
+		<reader>
+			<name>hive</name>
+			<class>opensource.hdata.plugin.reader.hive.HiveReader</class>
+			<splitter>opensource.hdata.plugin.reader.hive.HiveSplitter</splitter>
+		</reader>
+		<reader>
+			<name>hdfs</name>
+			<class>opensource.hdata.plugin.reader.hdfs.HDFSReader</class>
+			<splitter>opensource.hdata.plugin.reader.hdfs.HDFSSplitter</splitter>
+		</reader>
+		<reader>
+			<name>ftp</name>
+			<class>opensource.hdata.plugin.reader.ftp.FTPReader</class>
+			<splitter>opensource.hdata.plugin.reader.ftp.FTPSplitter</splitter>
+		</reader>
+		<reader>
+			<name>mongodb</name>
+			<class>opensource.hdata.plugin.reader.mongodb.MongoDBReader</class>
+			<splitter>opensource.hdata.plugin.reader.mongodb.MongoDBSplitter</splitter>
+		</reader>
+		<reader>
+			<name>hbase</name>
+			<class>opensource.hdata.plugin.reader.hbase.HBaseReader</class>
+			<splitter>opensource.hdata.plugin.reader.hbase.HBaseSplitter</splitter>
+		</reader>
+	</readers>
+
+	<writers>
+		<writer>
+			<name>console</name>
+			<class>opensource.hdata.plugin.writer.console.ConsoleWriter</class>
+		</writer>
+		<writer>
+			<name>jdbc</name>
+			<class>opensource.hdata.plugin.writer.jdbc.JDBCWriter</class>
+		</writer>
+		<writer>
+			<name>hive</name>
+			<class>opensource.hdata.plugin.writer.hive.HiveWriter</class>
+		</writer>
+		<writer>
+			<name>hdfs</name>
+			<class>opensource.hdata.plugin.writer.hdfs.HDFSWriter</class>
+		</writer>
+		<writer>
+			<name>ftp</name>
+			<class>opensource.hdata.plugin.writer.ftp.FTPWriter</class>
+		</writer>
+		<writer>
+			<name>mongodb</name>
+			<class>opensource.hdata.plugin.writer.mongodb.MongoDBWriter</class>
+		</writer>
+		<writer>
+			<name>hbase</name>
+			<class>opensource.hdata.plugin.writer.hbase.HBaseWriter</class>
+		</writer>
+	</writers>
+</plugins>

+ 23 - 0
job-examples/ftp-ftp.xml

@@ -0,0 +1,23 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<job id="">
+    <reader name="ftp">
+    	<host>192.168.130.161</host>
+    	<username>1</username>
+    	<password>1@1</password>
+    	<dir>/etldata/input/sa_log/151_125</dir>
+    	<recursive></recursive>
+    	<filename>serv11-saIntf-pageTime-access-20140407_00.0.log</filename>
+    	<fieldsSeparator>|</fieldsSeparator>
+    	<encoding></encoding>
+		<parallelism>1</parallelism>
+	</reader>
+
+	<writer name="ftp">
+    	<host>localhost</host>
+    	<username>1</username>
+    	<password>1</password>
+    	<path>/ftp/tmp/1.txt</path>
+		<parallelism>1</parallelism>
+	</writer>
+</job>

+ 18 - 0
job-examples/hbase-console.xml

@@ -0,0 +1,18 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<job id="">
+    <reader name="hbase">
+        <zookeeperQuorum>192.168.142.16,192.168.142.17,192.168.142.18</zookeeperQuorum>
+		<zookeeperClientPort>2181</zookeeperClientPort>
+		<table>ip_address</table>
+		<columns>:rowkey,cf:start_ip,cf:end_ip,cf:start_ip_num,cf:end_ip_num,cf:country,cf:area,cf:province,cf:city,cf:isp</columns>
+		<schema>id,start_ip,end_ip,start_ip_num,end_ip_num,country,area,province,city,isp</schema>
+		<startRowkey>958200</startRowkey>
+		<endRowkey></endRowkey>
+		<parallelism>2</parallelism>
+    </reader>
+
+	<writer name="console">
+		<parallelism>1</parallelism>
+	</writer>
+</job>

+ 20 - 0
job-examples/hdfs-hive.xml

@@ -0,0 +1,20 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<job id="">
+    <reader name="hdfs">
+    	<dir>hdfs://192.168.142.21:8020/tmp/hdata_test</dir>
+    	<filename>.*\.csv</filename>
+    	<fieldsSeparator>,</fieldsSeparator>
+    	<encoding>gb18030</encoding>
+    	<hadoopUser>bigdata</hadoopUser>
+		<parallelism>1</parallelism>
+	</reader>
+
+	<writer name="hive">
+		<metastoreUris>thrift://192.168.142.21:9083</metastoreUris>
+		<database>default</database>
+		<table>tmp_hdata_rcfile_test</table>
+		<hadoopUser>bigdata</hadoopUser>
+		<parallelism>1</parallelism>
+	</writer>
+</job>

+ 19 - 0
job-examples/hdfs-jdbc.xml

@@ -0,0 +1,19 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<job id="">
+	<reader name="hdfs">
+		<dir>hdfs://192.168.142.21:8020/tmp/hdata_test</dir>
+		<filename>hdfs.test</filename>
+    	<hadoopUser>bigdata</hadoopUser>
+		<parallelism>1</parallelism>
+	</reader>
+	
+	<writer name="jdbc">
+		<driver>org.postgresql.Driver</driver>
+		<url>jdbc:postgresql://localhost:5432/ip</url>
+		<username>postgres</username>
+		<password>toor</password>
+		<table>tmp</table>
+		<parallelism>3</parallelism>
+	</writer>
+</job>

+ 20 - 0
job-examples/hive-jdbc.xml

@@ -0,0 +1,20 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<job id="">
+    <reader name="hive">
+        <metastoreUris>thrift://192.168.142.21:9083</metastoreUris>
+        <database>bi_td</database>
+        <table>tdm_common_td</table>
+        <partitions></partitions>
+        <parallelism>1</parallelism>
+    </reader>
+
+	<writer name="jdbc">
+		<driver>org.postgresql.Driver</driver>
+		<url>jdbc:postgresql://localhost:5432/tmp</url>
+		<username>postgres</username>
+		<password>toor</password>
+		<table>tdm_common_td</table>
+		<parallelism>3</parallelism>
+	</writer>
+</job>

+ 26 - 0
job-examples/jdbc-hbase.xml

@@ -0,0 +1,26 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<job id="">
+    <reader name="jdbc">
+		<driver>org.postgresql.Driver</driver>
+		<url>jdbc:postgresql://10.22.8.140:5432/ip</url>
+		<username>postgres</username>
+		<password>toor</password>
+		<table>ip_address</table>
+		<columns></columns>
+		<excludeColumns></excludeColumns>
+		<sql></sql>
+		<where></where>
+		<splitBy></splitBy>
+		<parallelism>1</parallelism>
+	</reader>
+
+	<writer name="hbase">
+		<zookeeperQuorum>192.168.142.16,192.168.142.17,192.168.142.18,192.168.142.19,192.168.142.20,192.168.142.21,192.168.142.23,192.168.142.24,192.168.142.25,192.168.142.26,192.168.142.27</zookeeperQuorum>
+		<zookeeperClientPort>2181</zookeeperClientPort>
+		<table>ip_address</table>
+		<columns>:rowkey,cf:start_ip,cf:end_ip,cf:start_ip_num,cf:end_ip_num,cf:country,cf:area,cf:province,cf:city,cf:isp</columns>
+		<batchInsertSize>10000</batchInsertSize>
+		<parallelism>1</parallelism>
+	</writer>
+</job>

+ 23 - 0
job-examples/jdbc-hdfs.xml

@@ -0,0 +1,23 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<job id="">
+    <reader name="jdbc">
+		<driver>org.postgresql.Driver</driver>
+		<url>jdbc:postgresql://10.22.8.140:5432/ip</url>
+		<username>postgres</username>
+		<password>toor</password>
+		<table>ip_address</table>
+		<columns></columns>
+		<excludeColumns></excludeColumns>
+		<sql></sql>
+		<where></where>
+		<splitBy></splitBy>
+		<parallelism>3</parallelism>
+	</reader>
+
+	<writer name="hdfs">
+		<path>hdfs://192.168.142.21:8020/tmp/hdata_test/hdfs.test</path>
+    	<hadoopUser>bigdata</hadoopUser>
+		<parallelism>1</parallelism>
+	</writer>
+</job>

+ 26 - 0
job-examples/jdbc-hive.xml

@@ -0,0 +1,26 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<job id="">
+    <reader name="jdbc">
+		<driver>org.postgresql.Driver</driver>
+		<url>jdbc:postgresql://localhost:5432/ip</url>
+		<username>postgres</username>
+		<password>toor</password>
+		<table>ip_address</table>
+		<columns></columns>
+		<excludeColumns></excludeColumns>
+		<sql></sql>
+		<where></where>
+		<splitBy></splitBy>
+		<parallelism>3</parallelism>
+	</reader>
+
+	<writer name="hive">
+		<metastoreUris>thrift://192.168.142.21:9083</metastoreUris>
+		<database>default</database>
+		<table>tmp_hdata_rcfile_test_p</table>
+		<partitions>p=20140407</partitions>
+		<hadoopUser>bigdata</hadoopUser>
+		<parallelism>3</parallelism>
+	</writer>
+</job>

+ 27 - 0
job-examples/jdbc-jdbc.xml

@@ -0,0 +1,27 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<job id="">
+	<reader name="jdbc">
+		<driver>org.postgresql.Driver</driver>
+		<url>jdbc:postgresql://localhost:5432/ip</url>
+		<username>postgres</username>
+		<password>toor</password>
+		<table>ip_address</table>
+		<columns></columns>
+		<excludeColumns></excludeColumns>
+		<sql></sql>
+		<where></where>
+		<splitBy></splitBy>
+		<parallelism>3</parallelism>
+	</reader>
+
+	<writer name="jdbc">
+		<driver>org.postgresql.Driver</driver>
+		<url>jdbc:postgresql://localhost:5432/ip</url>
+		<username>postgres</username>
+		<password>toor</password>
+		<table>tmp</table>
+		<batchInsertSize>10000</batchInsertSize>
+		<parallelism>3</parallelism>
+	</writer>
+</job>

+ 23 - 0
job-examples/jdbc-mongodb.xml

@@ -0,0 +1,23 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<job id="">
+    <reader name="jdbc">
+    	<driver>org.postgresql.Driver</driver>
+		<url>jdbc:postgresql://localhost:5432/ip</url>
+		<username>postgres</username>
+		<password>toor</password>
+		<table>ip_address</table>
+		<columns></columns>
+		<excludeColumns></excludeColumns>
+		<sql></sql>
+		<where></where>
+		<splitBy></splitBy>
+		<parallelism>3</parallelism>
+	</reader>
+
+	<writer name="mongodb">
+		<uri>mongodb://localhost/test.ip</uri>
+		<where></where>
+		<parallelism>3</parallelism>
+	</writer>
+</job>

+ 27 - 0
job-examples/job.xml

@@ -0,0 +1,27 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<job id="">
+	<reader name="jdbc">
+		<driver>com.mysql.jdbc.Driver</driver>
+        <url>jdbc:mysql://localhost:3306/test</url>
+        <username>root</username>
+        <password>toor</password>
+        <table>ip_address</table>
+        <columns></columns>
+		<excludeColumns></excludeColumns>
+		<sql></sql>
+		<where></where>
+		<splitBy></splitBy>
+		<parallelism>7</parallelism>
+	</reader>
+
+	<writer name="jdbc">
+		<driver>com.mysql.jdbc.Driver</driver>
+        <url>jdbc:mysql://localhost:3306/test?useUnicode=true&amp;characterEncoding=UTF-8</url>
+        <username>root</username>
+        <password>toor</password>
+        <table>tmp</table>
+		<batchInsertSize>10000</batchInsertSize>
+		<parallelism>3</parallelism>
+	</writer>
+</job>

+ 13 - 0
job-examples/mongodb-console.xml

@@ -0,0 +1,13 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<job id="">
+    <reader name="mongodb">
+        <uri>mongodb://localhost/test.ip</uri>
+        <query>{"city":"南京市"}</query>
+        <parallelism>1</parallelism>
+    </reader>
+
+	<writer name="console">
+		<parallelism>1</parallelism>
+	</writer>
+</job>

+ 166 - 0
pom.xml

@@ -0,0 +1,166 @@
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+	xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+	<modelVersion>4.0.0</modelVersion>
+	<groupId>opensource</groupId>
+	<artifactId>hdata</artifactId>
+	<name>hdata</name>
+	<version>0.1</version>
+
+	<properties>
+		<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
+		<hadoopVersion>1.2.1</hadoopVersion>
+		<hiveVersion>0.12.0</hiveVersion>
+		<hbaseVersion>0.94.16</hbaseVersion>
+	</properties>
+
+	<inceptionYear>2014</inceptionYear>
+	<developers>
+		<developer>
+			<name>Jayer</name>
+			<email>dczxxuhai@gmail.com</email>
+		</developer>
+	</developers>
+
+	<dependencies>
+		<dependency>
+			<groupId>org.apache.logging.log4j</groupId>
+			<artifactId>log4j-api</artifactId>
+			<version>2.0-rc1</version>
+		</dependency>
+		<dependency>
+			<groupId>org.apache.logging.log4j</groupId>
+			<artifactId>log4j-core</artifactId>
+			<version>2.0-rc1</version>
+		</dependency>
+		<dependency>
+			<groupId>com.google.guava</groupId>
+			<artifactId>guava</artifactId>
+			<version>16.0.1</version>
+		</dependency>
+		<dependency>
+			<groupId>com.lmax</groupId>
+			<artifactId>disruptor</artifactId>
+			<version>3.2.1</version>
+		</dependency>
+		<dependency>
+			<groupId>commons-cli</groupId>
+			<artifactId>commons-cli</artifactId>
+			<version>1.2</version>
+		</dependency>
+		<dependency>
+			<groupId>org.apache.commons</groupId>
+			<artifactId>commons-lang3</artifactId>
+			<version>3.3.2</version>
+		</dependency>
+		<dependency>
+			<groupId>commons-cli</groupId>
+			<artifactId>commons-cli</artifactId>
+			<version>1.2</version>
+		</dependency>
+		<dependency>
+			<groupId>org.jdom</groupId>
+			<artifactId>jdom2</artifactId>
+			<version>2.0.5</version>
+		</dependency>
+		<dependency>
+			<groupId>javassist</groupId>
+			<artifactId>javassist</artifactId>
+			<version>3.18.1-GA</version>
+		</dependency>
+		<dependency>
+			<groupId>org.antlr</groupId>
+			<artifactId>antlr-runtime</artifactId>
+			<version>3.4</version>
+		</dependency>
+		<dependency>
+			<groupId>commons-configuration</groupId>
+			<artifactId>commons-configuration</artifactId>
+			<version>1.9</version>
+		</dependency>
+		<dependency>
+			<groupId>commons-lang</groupId>
+			<artifactId>commons-lang</artifactId>
+			<version>2.6</version>
+		</dependency>
+		<dependency>
+			<groupId>commons-logging</groupId>
+			<artifactId>commons-logging</artifactId>
+			<version>1.1.1</version>
+		</dependency>
+		<dependency>
+			<groupId>commons-net</groupId>
+			<artifactId>commons-net</artifactId>
+			<version>3.3</version>
+		</dependency>
+		<dependency>
+			<groupId>log4j</groupId>
+			<artifactId>log4j</artifactId>
+			<version>1.2.17</version>
+		</dependency>
+		<dependency>
+			<groupId>org.slf4j</groupId>
+			<artifactId>slf4j-api</artifactId>
+			<version>1.7.6</version>
+		</dependency>
+		<dependency>
+			<groupId>org.slf4j</groupId>
+			<artifactId>slf4j-log4j12</artifactId>
+			<version>1.7.6</version>
+		</dependency>
+		<dependency>
+			<groupId>org.apache.hive</groupId>
+			<artifactId>hive-exec</artifactId>
+			<version>${hiveVersion}</version>
+		</dependency>
+		<dependency>
+			<groupId>org.apache.hive</groupId>
+			<artifactId>hive-metastore</artifactId>
+			<version>${hiveVersion}</version>
+		</dependency>
+		<dependency>
+			<groupId>org.apache.hadoop</groupId>
+			<artifactId>hadoop-core</artifactId>
+			<version>${hadoopVersion}</version>
+		</dependency>
+		<dependency>
+			<groupId>org.apache.hbase</groupId>
+			<artifactId>hbase</artifactId>
+			<version>${hbaseVersion}</version>
+		</dependency>
+		<dependency>
+			<groupId>org.apache.zookeeper</groupId>
+			<artifactId>zookeeper</artifactId>
+			<version>3.4.6</version>
+		</dependency>
+		<dependency>
+			<groupId>org.mongodb</groupId>
+			<artifactId>mongo-java-driver</artifactId>
+			<version>2.12.0</version>
+		</dependency>
+		<dependency>
+			<groupId>javax.jdo</groupId>
+			<artifactId>jdo-api</artifactId>
+			<version>3.0.1</version>
+		</dependency>
+		<dependency>
+			<groupId>org.apache.thrift</groupId>
+			<artifactId>libfb303</artifactId>
+			<version>0.9.0</version>
+		</dependency>
+		<dependency>
+			<groupId>org.datanucleus</groupId>
+			<artifactId>datanucleus-api-jdo</artifactId>
+			<version>3.2.1</version>
+		</dependency>
+		<dependency>
+			<groupId>org.datanucleus</groupId>
+			<artifactId>datanucleus-core</artifactId>
+			<version>3.2.2</version>
+		</dependency>
+		<dependency>
+			<groupId>org.datanucleus</groupId>
+			<artifactId>datanucleus-rdbms</artifactId>
+			<version>3.2.1</version>
+		</dependency>
+	</dependencies>
+</project>

+ 108 - 0
src/main/java/opensource/hdata/CliDriver.java

@@ -0,0 +1,108 @@
+package opensource.hdata;
+
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.Properties;
+
+import opensource.hdata.config.JobConfig;
+import opensource.hdata.config.PluginConfig;
+import opensource.hdata.core.HData;
+
+import org.apache.commons.cli.CommandLine;
+import org.apache.commons.cli.CommandLineParser;
+import org.apache.commons.cli.HelpFormatter;
+import org.apache.commons.cli.OptionBuilder;
+import org.apache.commons.cli.Options;
+import org.apache.commons.cli.ParseException;
+import org.apache.commons.cli.PosixParser;
+
+public class CliDriver {
+
+    private static final String XML_FILE = "f";
+    private static final String HDATA_VARS = "var";
+
+    /**
+     * 创建命令行选项
+     * 
+     * @return
+     */
+    public Options createOptions() {
+        Options options = new Options();
+        options.addOption(XML_FILE, null, true, "job xml path");
+        OptionBuilder.withValueSeparator();
+        OptionBuilder.hasArgs(2);
+        OptionBuilder.withArgName("property=value");
+        OptionBuilder.withLongOpt(HDATA_VARS);
+        options.addOption(OptionBuilder.create());
+        return options;
+    }
+
+    /**
+     * 打印命令行帮助信息
+     * 
+     * @param options
+     */
+    public void printHelp(Options options) {
+        HelpFormatter formatter = new HelpFormatter();
+        formatter.printHelp(" ", options);
+    }
+
+    /**
+     * 替换命令行变量
+     * 
+     * @param config
+     * @param vars
+     */
+    public void replaceConfigVars(PluginConfig config, Map<String, String> vars) {
+        for (Entry<Object, Object> confEntry : config.entrySet()) {
+            if (confEntry.getKey().getClass() == String.class && confEntry.getValue().getClass() == String.class) {
+                for (Entry<String, String> varEntry : vars.entrySet()) {
+                    String replaceVar = "${" + varEntry.getKey() + "}";
+                    if (confEntry.getValue().toString().contains(replaceVar)) {
+                        config.put(confEntry.getKey(), confEntry.getValue().toString().replace(replaceVar, varEntry.getValue()));
+                    }
+                }
+            }
+        }
+    }
+
+    /**
+     * 主程序入口
+     * 
+     * @param args
+     */
+    public static void main(String[] args) {
+        CliDriver cliDriver = new CliDriver();
+        Options options = cliDriver.createOptions();
+        if (args.length < 1) {
+            cliDriver.printHelp(options);
+            System.exit(-1);
+        }
+
+        CommandLineParser parser = new PosixParser();
+        CommandLine cmd = null;
+        try {
+            cmd = parser.parse(options, args);
+            String jobXmlPath = cmd.getOptionValue(XML_FILE);
+            JobConfig jobConfig = new JobConfig(jobXmlPath);
+            Map<String, String> vars = new HashMap<String, String>();
+            Properties properties = cmd.getOptionProperties(HDATA_VARS);
+            for (String key : properties.stringPropertyNames()) {
+                vars.put(key, properties.getProperty(key));
+            }
+
+            final PluginConfig readerConfig = jobConfig.getReaderConfig();
+            final PluginConfig writerConfig = jobConfig.getWriterConfig();
+
+            cliDriver.replaceConfigVars(readerConfig, vars);
+            cliDriver.replaceConfigVars(writerConfig, vars);
+
+            HData hData = new HData();
+            hData.start(jobConfig);
+        } catch (ParseException e) {
+            cliDriver.printHelp(options);
+            System.exit(-1);
+        }
+    }
+}

+ 13 - 0
src/main/java/opensource/hdata/common/Constants.java

@@ -0,0 +1,13 @@
+package opensource.hdata.common;
+
+public class Constants {
+
+    public static final String HDATA_XML = "hdata.xml";
+    public static final String PLUGINS_XML = "plugins.xml";
+    public static final String LOG4J2_XML = "log4j2.xml";
+    public static final String DATE_FORMAT_STRING = "yyyy-MM-dd HH:mm:ss";
+    public static final String COLUMNS_SPLIT_REGEX = "\\s*,\\s*";
+
+    private Constants() {
+    }
+}

+ 11 - 0
src/main/java/opensource/hdata/common/HDataConfigConstants.java

@@ -0,0 +1,11 @@
+package opensource.hdata.common;
+
+public class HDataConfigConstants {
+
+    public static final String STORAGE_BUFFER_SIZE = "hdata.storage.default.buffer.size";
+    public static final String HDATA_STORAGE_DISRUPTOR_WAIT_STRATEGY = "hdata.storage.disruptor.wait.strategy";
+    public static final String HDATA_SLEEP_MILLIS = "hdata.sleep.millis";
+
+    private HDataConfigConstants() {
+    }
+}

+ 66 - 0
src/main/java/opensource/hdata/config/Configuration.java

@@ -0,0 +1,66 @@
+package opensource.hdata.config;
+
+import java.util.Properties;
+
+public abstract class Configuration extends Properties {
+
+    private static final long serialVersionUID = 8606831740240321865L;
+
+    public String getString(String key, String defalutValue) {
+        String value = getProperty(key);
+        return value != null ? value : defalutValue;
+    }
+
+    public String getString(String key) {
+        return getProperty(key);
+    }
+
+    public void setString(String key, String value) {
+        setProperty(key, value);
+    }
+
+    public int getInt(String key, int defalutValue) {
+        String value = getProperty(key);
+        return value != null ? Integer.parseInt(value) : defalutValue;
+    }
+
+    public void setInt(String key, int value) {
+        setString(key, Integer.toString(value));
+    }
+
+    public long getLong(String key, long defalutValue) {
+        String value = getProperty(key);
+        return value != null ? Long.parseLong(value) : defalutValue;
+    }
+
+    public void setLong(String key, long value) {
+        setString(key, Long.toString(value));
+    }
+
+    public double getDouble(String key, double defalutValue) {
+        String value = getProperty(key);
+        return value != null ? Double.parseDouble(value) : defalutValue;
+    }
+
+    public void setDouble(String key, double value) {
+        setString(key, Double.toString(value));
+    }
+
+    public boolean getBoolean(String key, boolean defalutValue) {
+        String value = getProperty(key);
+        return value != null ? Boolean.parseBoolean(value) : defalutValue;
+    }
+
+    public void setBoolean(String key, boolean value) {
+        setString(key, Boolean.toString(value));
+    }
+
+    public float getFloat(String key, float defalutValue) {
+        String value = getProperty(key);
+        return value != null ? Float.parseFloat(value) : defalutValue;
+    }
+
+    public void setFloat(String key, float value) {
+        setString(key, Float.toString(value));
+    }
+}

+ 36 - 0
src/main/java/opensource/hdata/config/EngineConfig.java

@@ -0,0 +1,36 @@
+package opensource.hdata.config;
+
+import java.util.List;
+
+import opensource.hdata.common.Constants;
+import opensource.hdata.exception.HDataException;
+import opensource.hdata.util.Utils;
+import opensource.hdata.util.XMLUtils;
+
+import org.jdom2.Element;
+
+public class EngineConfig extends Configuration {
+
+    private static final long serialVersionUID = -4751544524691015405L;
+
+    private EngineConfig() {
+        super();
+    }
+
+    public static EngineConfig create() {
+        EngineConfig conf = new EngineConfig();
+        Element root = null;
+        try {
+            root = XMLUtils.load(Utils.getConfigDir() + Constants.HDATA_XML);
+        } catch (Exception e) {
+            throw new HDataException("Init EngineConf error!", e);
+        }
+        List<Element> list = root.getChildren("property");
+
+        for (Element element : list) {
+            conf.setString(element.getChildText("name"), element.getChildText("value"));
+        }
+        return conf;
+    }
+
+}

+ 112 - 0
src/main/java/opensource/hdata/config/JobConfig.java

@@ -0,0 +1,112 @@
+package opensource.hdata.config;
+
+import opensource.hdata.core.PluginLoader;
+import opensource.hdata.core.plugin.Reader;
+import opensource.hdata.core.plugin.Splitter;
+import opensource.hdata.core.plugin.Writer;
+import opensource.hdata.exception.HDataException;
+import opensource.hdata.util.XMLUtils;
+
+import org.jdom2.Element;
+
+public class JobConfig extends Configuration {
+
+    private Element root;
+    private PluginConfig readerConfig;
+    private PluginConfig writerConfig;
+    private static final long serialVersionUID = -106497323171420503L;
+
+    public JobConfig() {
+        super();
+    }
+
+    public JobConfig(String jobXmlPath) {
+        this();
+        try {
+            root = XMLUtils.load(jobXmlPath);
+        } catch (Exception e) {
+            throw new HDataException("Can not load job xml file: " + jobXmlPath, e);
+        }
+    }
+
+    public PluginConfig getReaderConfig() {
+        if (readerConfig == null) {
+            readerConfig = new PluginConfig();
+            for (Element e : root.getChild("reader").getChildren()) {
+                if (!e.getValue().trim().isEmpty()) {
+                    readerConfig.setProperty(e.getName(), e.getValue());
+                }
+            }
+        }
+
+        return readerConfig;
+    }
+
+    public PluginConfig getWriterConfig() {
+        if (writerConfig == null) {
+            writerConfig = new PluginConfig();
+            for (Element e : root.getChild("writer").getChildren()) {
+                if (!e.getValue().trim().isEmpty()) {
+                    writerConfig.setProperty(e.getName(), e.getValue());
+                }
+            }
+        }
+        return writerConfig;
+    }
+
+    public String getReaderName() {
+        return root.getChild("reader").getAttributeValue("name");
+    }
+
+    public String getReaderClassName() {
+        return PluginLoader.getReaderPlugin(getReaderName()).getClassName();
+    }
+
+    public Reader newReader() {
+        String readerClassName = getReaderClassName();
+        if (readerClassName == null) {
+            throw new HDataException("Can not find class for reader: " + getReaderName());
+        }
+
+        try {
+            return (Reader) Class.forName(readerClassName).newInstance();
+        } catch (Exception e) {
+            throw new HDataException("Can not create new reader instance for: " + getReaderName(), e);
+        }
+    }
+
+    public Splitter newSplitter() {
+        String spliterClassName = PluginLoader.getReaderPlugin(getReaderName()).getSplitterClassName();
+
+        if (spliterClassName == null) {
+            return null;
+        }
+
+        try {
+            return (Splitter) Class.forName(spliterClassName.trim()).newInstance();
+        } catch (Exception e) {
+            throw new HDataException("Can not find splitter for reader: " + getReaderName(), e);
+        }
+    }
+
+    public String getWriterName() {
+        return root.getChild("writer").getAttributeValue("name");
+    }
+
+    public String getWriterClassName() {
+        return PluginLoader.getWriterPlugin(getWriterName()).getClassName();
+    }
+
+    public Writer newWriter() {
+        String writerClassName = getWriterClassName();
+        if (writerClassName == null) {
+            throw new HDataException("Can not find class for writer: " + getWriterName());
+        }
+
+        try {
+            return (Writer) Class.forName(getWriterClassName()).newInstance();
+        } catch (Exception e) {
+            throw new HDataException("Can not create new writer instance for: " + getWriterName(), e);
+        }
+    }
+}

+ 20 - 0
src/main/java/opensource/hdata/config/PluginConfig.java

@@ -0,0 +1,20 @@
+package opensource.hdata.config;
+
+public class PluginConfig extends Configuration {
+
+    private static final String PARALLELISM_KEY = "parallelism";
+    private static final int DEFAULT_PARALLELISM = 1;
+    private static final long serialVersionUID = 3311331304791946068L;
+
+    public PluginConfig() {
+        super();
+    }
+
+    public int getParallelism() {
+        int parallelism = getInt(PARALLELISM_KEY, DEFAULT_PARALLELISM);
+        if (parallelism < 1) {
+            throw new IllegalArgumentException("Reader and Writer parallelism must be >= 1.");
+        }
+        return parallelism;
+    }
+}

+ 45 - 0
src/main/java/opensource/hdata/core/DefaultRecord.java

@@ -0,0 +1,45 @@
+package opensource.hdata.core;
+
+import opensource.hdata.core.plugin.Record;
+
+public class DefaultRecord implements Record {
+
+    private Object[] fields;
+    private int cursor;
+
+    public DefaultRecord(int fieldCount) {
+        fields = new Object[fieldCount];
+    }
+
+    public void addField(int index, Object field) {
+        fields[index] = field;
+        this.cursor++;
+    }
+
+    public void addField(Object field) {
+        addField(cursor, field);
+    }
+
+    public Object getField(int index) {
+        return fields[index];
+    }
+
+    public int getFieldsCount() {
+        return fields.length;
+    }
+
+    @Override
+    public String toString() {
+        StringBuilder sb = new StringBuilder();
+        sb.append("{");
+        for (int i = 0, len = fields.length; i < len; i++) {
+            if (i > 0) {
+                sb.append(", ");
+            }
+            sb.append(fields[i]);
+        }
+        sb.append("}");
+        return sb.toString();
+    }
+
+}

+ 20 - 0
src/main/java/opensource/hdata/core/Fields.java

@@ -0,0 +1,20 @@
+package opensource.hdata.core;
+
+import java.util.ArrayList;
+
+public class Fields extends ArrayList<String> {
+
+    private static final long serialVersionUID = -174064216143075549L;
+
+    public Fields() {
+        super();
+    }
+
+    public Fields(String... fields) {
+        super();
+        for (String field : fields) {
+            this.add(field);
+        }
+    }
+
+}

+ 142 - 0
src/main/java/opensource/hdata/core/HData.java

@@ -0,0 +1,142 @@
+package opensource.hdata.core;
+
+import java.text.DecimalFormat;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+
+import opensource.hdata.common.HDataConfigConstants;
+import opensource.hdata.config.EngineConfig;
+import opensource.hdata.config.JobConfig;
+import opensource.hdata.config.PluginConfig;
+import opensource.hdata.core.plugin.Reader;
+import opensource.hdata.core.plugin.RecordCollector;
+import opensource.hdata.core.plugin.Splitter;
+import opensource.hdata.core.plugin.Writer;
+import opensource.hdata.exception.HDataException;
+import opensource.hdata.util.Utils;
+
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+
+import com.lmax.disruptor.WaitStrategy;
+import com.lmax.disruptor.dsl.Disruptor;
+import com.lmax.disruptor.dsl.ProducerType;
+
+public class HData {
+
+    private DecimalFormat df = new DecimalFormat("#0.00");
+    private static final Logger LOG = LogManager.getLogger(HData.class);
+
+    public void start(final JobConfig jobConfig) {
+        final PluginConfig readerConfig = jobConfig.getReaderConfig();
+        final PluginConfig writerConfig = jobConfig.getWriterConfig();
+
+        LOG.info("Reader: {}, Writer: {}", jobConfig.getReaderName(), jobConfig.getWriterName());
+        int writerParallelism = writerConfig.getParallelism();
+
+        final JobContext context = new JobContext();
+        context.setJobConfig(jobConfig);
+        final Metric metric = new Metric();
+        context.setMetric(metric);
+        final OutputFieldsDeclarer outputFieldsDeclarer = new OutputFieldsDeclarer(context);
+        context.setDeclarer(outputFieldsDeclarer);
+
+        final EngineConfig engineConfig = EngineConfig.create();
+        context.setEngineConfig(engineConfig);
+
+        long sleepMillis = engineConfig.getLong(HDataConfigConstants.HDATA_SLEEP_MILLIS, 3000);
+
+        List<PluginConfig> readerConfigList = null;
+        Splitter spliter = jobConfig.newSplitter();
+        if (spliter != null) {
+            LOG.info("Executing spliter for reader.");
+            readerConfigList = spliter.split(jobConfig);
+            if (readerConfigList == null || readerConfigList.size() == 0) {
+                LOG.info("Job Finished.");
+                System.exit(0);
+            }
+        } else if (readerConfig.getParallelism() > 1) {
+            throw new HDataException("Reader parallelism is " + readerConfig.getParallelism() + ", but can not find splitter.");
+        } else {
+            readerConfigList = new ArrayList<PluginConfig>();
+            readerConfigList.add(readerConfig);
+        }
+
+        Reader[] readers = new Reader[readerConfigList.size()];
+        for (int i = 0, len = readers.length; i < len; i++) {
+            readers[i] = jobConfig.newReader();
+        }
+
+        LOG.info("Reader parallelism: {}, Writer parallelism: {}", readers.length, writerParallelism);
+
+        final Writer[] writers = new Writer[writerParallelism];
+        final RecordWorkHandler[] handlers = new RecordWorkHandler[writerParallelism];
+        for (int i = 0; i < writerParallelism; i++) {
+            writers[i] = jobConfig.newWriter();
+            handlers[i] = new RecordWorkHandler(readers, writers[i], context, writerConfig);
+        }
+
+        int bufferSize = engineConfig.getInt(HDataConfigConstants.STORAGE_BUFFER_SIZE, 1024);
+        String WaitStrategyName = engineConfig.getString(HDataConfigConstants.HDATA_STORAGE_DISRUPTOR_WAIT_STRATEGY, "BlockingWaitStrategy");
+
+        Storage storage = createStorage(bufferSize, WaitStrategyName, readers.length, handlers);
+        context.setStorage(storage);
+        RecordCollector rc = new RecordCollector(storage, metric);
+
+        LOG.info("Transfering data from reader to writer...");
+        ExecutorService es = Executors.newFixedThreadPool(readers.length);
+        for (int i = 0, len = readerConfigList.size(); i < len; i++) {
+            es.submit(new ReaderWorker(readers[i], context, readerConfigList.get(i), rc));
+        }
+        es.shutdown();
+
+        metric.setReaderStartTime(System.currentTimeMillis());
+        metric.setWriterStartTime(System.currentTimeMillis());
+        while (!es.isTerminated()) {
+            Utils.sleep(sleepMillis);
+            LOG.info("Read: {}\tWrite: {}", metric.getReadCount().get(), metric.getWriteCount().get());
+        }
+        metric.setReaderEndTime(System.currentTimeMillis());
+
+        while (!storage.isEmpty()) {
+            if (context.isWriterError()) {
+                LOG.error("Write error.");
+                break;
+            }
+            Utils.sleep(sleepMillis);
+            LOG.info("Read Finished(total: {}), Write: {}", metric.getReadCount().get(), metric.getWriteCount().get());
+        }
+        storage.close();
+        LOG.info("Read Finished(total: {}), Write Finished(total: {})", metric.getReadCount().get(), metric.getWriteCount().get());
+
+        metric.setWriterEndTime(System.currentTimeMillis());
+        for (Writer writer : writers) {
+            writer.close();
+        }
+
+        double readSeconds = (metric.getReaderEndTime() - metric.getReaderStartTime()) / 1000d;
+        double writeSeconds = (metric.getWriterEndTime() - metric.getWriterStartTime()) / 1000d;
+        String readSpeed = df.format(metric.getReadCount().get() / readSeconds);
+        String writeSpeed = df.format(metric.getWriteCount().get() / writeSeconds);
+        LOG.info("Read spent time: {}s, Write spent time: {}s", df.format(readSeconds), df.format(writeSeconds));
+        LOG.info("Read records: {}/s, Write records: {}/s", readSpeed, writeSpeed);
+    }
+
+    private Storage createStorage(int bufferSize, String WaitStrategyName, int producerCount, RecordWorkHandler[] handlers) {
+        WaitStrategy waitStrategy = WaitStrategyFactory.build(WaitStrategyName);
+        ExecutorService executorService = Executors.newCachedThreadPool();
+        ProducerType producerType;
+        if (producerCount == 1) {
+            producerType = ProducerType.SINGLE;
+        } else {
+            producerType = ProducerType.MULTI;
+        }
+        Disruptor<RecordEvent> disruptor = new Disruptor<RecordEvent>(RecordEvent.FACTORY, bufferSize, executorService, producerType, waitStrategy);
+        Storage storage = new Storage(disruptor, handlers);
+        executorService.shutdown();
+        return storage;
+    }
+
+}

+ 73 - 0
src/main/java/opensource/hdata/core/JobContext.java

@@ -0,0 +1,73 @@
+package opensource.hdata.core;
+
+import opensource.hdata.config.Configuration;
+import opensource.hdata.config.EngineConfig;
+import opensource.hdata.config.JobConfig;
+
+public class JobContext {
+
+    private Fields fields;
+    private EngineConfig engineConfig;
+    private JobConfig jobConfig;
+    private OutputFieldsDeclarer declarer;
+    private Storage storage;
+    private Metric metric;
+    private boolean isWriterError;
+
+    public Fields getFields() {
+        return fields;
+    }
+
+    protected void setFields(Fields fields) {
+        this.fields = fields;
+    }
+
+    public Configuration getEngineConfig() {
+        return engineConfig;
+    }
+
+    public void setEngineConfig(EngineConfig engineConfig) {
+        this.engineConfig = engineConfig;
+    }
+
+    protected OutputFieldsDeclarer getDeclarer() {
+        return declarer;
+    }
+
+    protected void setDeclarer(OutputFieldsDeclarer declarer) {
+        this.declarer = declarer;
+    }
+
+    public Storage getStorage() {
+        return storage;
+    }
+
+    public void setStorage(Storage storage) {
+        this.storage = storage;
+    }
+
+    public Metric getMetric() {
+        return metric;
+    }
+
+    public void setMetric(Metric metric) {
+        this.metric = metric;
+    }
+
+    public JobConfig getJobConfig() {
+        return jobConfig;
+    }
+
+    public void setJobConfig(JobConfig jobConfig) {
+        this.jobConfig = jobConfig;
+    }
+
+    public boolean isWriterError() {
+        return isWriterError;
+    }
+
+    public void setWriterError(boolean isWriterError) {
+        this.isWriterError = isWriterError;
+    }
+
+}

+ 62 - 0
src/main/java/opensource/hdata/core/Metric.java

@@ -0,0 +1,62 @@
+package opensource.hdata.core;
+
+import java.util.concurrent.atomic.AtomicLong;
+
+public class Metric {
+
+    private AtomicLong readCount = new AtomicLong(0);
+    private AtomicLong writeCount = new AtomicLong(0);
+    private long readerStartTime;
+    private long readerEndTime;
+    private long writerStartTime;
+    private long writerEndTime;
+
+    public AtomicLong getReadCount() {
+        return readCount;
+    }
+
+    public void setReadCount(AtomicLong readCount) {
+        this.readCount = readCount;
+    }
+
+    public AtomicLong getWriteCount() {
+        return writeCount;
+    }
+
+    public void setWriteCount(AtomicLong writeCount) {
+        this.writeCount = writeCount;
+    }
+
+    public long getReaderStartTime() {
+        return readerStartTime;
+    }
+
+    public void setReaderStartTime(long readerStartTime) {
+        this.readerStartTime = readerStartTime;
+    }
+
+    public long getReaderEndTime() {
+        return readerEndTime;
+    }
+
+    public void setReaderEndTime(long readerEndTime) {
+        this.readerEndTime = readerEndTime;
+    }
+
+    public long getWriterStartTime() {
+        return writerStartTime;
+    }
+
+    public void setWriterStartTime(long writerStartTime) {
+        this.writerStartTime = writerStartTime;
+    }
+
+    public long getWriterEndTime() {
+        return writerEndTime;
+    }
+
+    public void setWriterEndTime(long writerEndTime) {
+        this.writerEndTime = writerEndTime;
+    }
+
+}

+ 14 - 0
src/main/java/opensource/hdata/core/OutputFieldsDeclarer.java

@@ -0,0 +1,14 @@
+package opensource.hdata.core;
+
+public class OutputFieldsDeclarer {
+
+    private JobContext context;
+
+    public OutputFieldsDeclarer(JobContext context) {
+        this.context = context;
+    }
+
+    public void declare(Fields fields) {
+        context.setFields(fields);
+    }
+}

+ 56 - 0
src/main/java/opensource/hdata/core/PluginLoader.java

@@ -0,0 +1,56 @@
+package opensource.hdata.core;
+
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import opensource.hdata.common.Constants;
+import opensource.hdata.core.plugin.ReaderPlugin;
+import opensource.hdata.core.plugin.WriterPlugin;
+import opensource.hdata.exception.HDataException;
+import opensource.hdata.util.Utils;
+import opensource.hdata.util.XMLUtils;
+
+import org.jdom2.Element;
+
+public class PluginLoader {
+
+    private static Map<String, ReaderPlugin> readerMap;
+    private static Map<String, WriterPlugin> writerMap;
+
+    public static ReaderPlugin getReaderPlugin(String name) {
+        return readerMap.get(name);
+    }
+
+    public static WriterPlugin getWriterPlugin(String name) {
+        return writerMap.get(name);
+    }
+
+    static {
+        readerMap = new HashMap<String, ReaderPlugin>();
+        writerMap = new HashMap<String, WriterPlugin>();
+
+        Element root;
+        try {
+            root = XMLUtils.load(Utils.getConfigDir() + Constants.PLUGINS_XML);
+        } catch (Exception e) {
+            throw new HDataException(e);
+        }
+        List<Element> readers = root.getChild("readers").getChildren("reader");
+        for (Element e : readers) {
+            ReaderPlugin readerPlugin = new ReaderPlugin();
+            readerPlugin.setPluginName(e.getChildText("name"));
+            readerPlugin.setClassName(e.getChildText("class"));
+            readerPlugin.setSplitterClassName(e.getChildText("splitter"));
+            readerMap.put(readerPlugin.getPluginName(), readerPlugin);
+        }
+
+        List<Element> writers = root.getChild("writers").getChildren("writer");
+        for (Element e : writers) {
+            WriterPlugin writerPlugin = new WriterPlugin();
+            writerPlugin.setPluginName(e.getChildText("name"));
+            writerPlugin.setClassName(e.getChildText("class"));
+            writerMap.put(writerPlugin.getPluginName(), writerPlugin);
+        }
+    }
+}

+ 27 - 0
src/main/java/opensource/hdata/core/ReaderWorker.java

@@ -0,0 +1,27 @@
+package opensource.hdata.core;
+
+import opensource.hdata.config.PluginConfig;
+import opensource.hdata.core.plugin.Reader;
+import opensource.hdata.core.plugin.RecordCollector;
+
+public class ReaderWorker implements Runnable {
+
+    private Reader reader;
+    private JobContext context;
+    private PluginConfig readerConfig;
+    private RecordCollector rc;
+
+    public ReaderWorker(Reader reader, JobContext context, PluginConfig readerConfig, RecordCollector rc) {
+        this.reader = reader;
+        this.context = context;
+        this.readerConfig = readerConfig;
+        this.rc = rc;
+    }
+
+    public void run() {
+        reader.prepare(context, readerConfig);
+        reader.execute(rc);
+        reader.close();
+    }
+
+}

+ 26 - 0
src/main/java/opensource/hdata/core/RecordEvent.java

@@ -0,0 +1,26 @@
+package opensource.hdata.core;
+
+import opensource.hdata.core.plugin.Record;
+
+import com.lmax.disruptor.EventFactory;
+
+public class RecordEvent {
+
+    private Record record;
+
+    public Record getRecord() {
+        return record;
+    }
+
+    public void setRecord(Record record) {
+        this.record = record;
+    }
+
+    public static final EventFactory<RecordEvent> FACTORY = new EventFactory<RecordEvent>() {
+
+        public RecordEvent newInstance() {
+            return new RecordEvent();
+        }
+    };
+
+}

+ 56 - 0
src/main/java/opensource/hdata/core/RecordWorkHandler.java

@@ -0,0 +1,56 @@
+package opensource.hdata.core;
+
+import opensource.hdata.config.PluginConfig;
+import opensource.hdata.core.plugin.Reader;
+import opensource.hdata.core.plugin.Writer;
+
+import com.lmax.disruptor.WorkHandler;
+
+public class RecordWorkHandler implements WorkHandler<RecordEvent> {
+
+    private Reader[] readers;
+    private Writer writer;
+    private JobContext context;
+    private PluginConfig writerConfig;
+    private boolean writerPrepared;
+    private boolean isWriterError;
+    private Metric metric;
+
+    public RecordWorkHandler(Reader[] readers, Writer writer, JobContext context, PluginConfig writerConfig) {
+        this.readers = readers;
+        this.writer = writer;
+        this.context = context;
+        this.writerConfig = writerConfig;
+        this.metric = context.getMetric();
+    }
+
+    public void onEvent(RecordEvent event) {
+        if (!isWriterError) {
+            try {
+                if (!writerPrepared) {
+                    for (Reader reader : readers) {
+                        if (context.getFields() == null) {
+                            reader.declareOutputFields(context.getDeclarer());
+                        } else {
+                            break;
+                        }
+                    }
+                    writer.prepare(context, writerConfig);
+                    writerPrepared = true;
+
+                    if (metric.getWriterStartTime() == 0) {
+                        metric.setWriterStartTime(System.currentTimeMillis());
+                    }
+                }
+
+                writer.execute(event.getRecord());
+                metric.getWriteCount().incrementAndGet();
+            } catch (Exception e) {
+                this.isWriterError = true;
+                context.setWriterError(true);
+                e.printStackTrace();
+            }
+        }
+    }
+
+}

+ 49 - 0
src/main/java/opensource/hdata/core/Storage.java

@@ -0,0 +1,49 @@
+package opensource.hdata.core;
+
+import opensource.hdata.core.plugin.Record;
+
+import com.lmax.disruptor.EventTranslatorOneArg;
+import com.lmax.disruptor.RingBuffer;
+import com.lmax.disruptor.dsl.Disruptor;
+
+public class Storage {
+
+    private Disruptor<RecordEvent> disruptor;
+    private RingBuffer<RecordEvent> ringBuffer;
+
+    private static final EventTranslatorOneArg<RecordEvent, Record> TRANSLATOR = new EventTranslatorOneArg<RecordEvent, Record>() {
+
+        public void translateTo(RecordEvent event, long sequence, Record record) {
+            event.setRecord(record);
+        }
+    };
+
+    public Storage(Disruptor<RecordEvent> disruptor, RecordWorkHandler[] handlers) {
+        this.disruptor = disruptor;
+        disruptor.handleEventsWithWorkerPool(handlers);
+        ringBuffer = disruptor.start();
+    }
+
+    public void put(Record record) {
+        disruptor.publishEvent(TRANSLATOR, record);
+    }
+
+    public void put(Record[] records) {
+        for (Record record : records) {
+            put(record);
+        }
+    }
+
+    public boolean isEmpty() {
+        return ringBuffer.remainingCapacity() == ringBuffer.getBufferSize();
+    }
+
+    public int size() {
+        return ringBuffer.getBufferSize();
+    }
+
+    public void close() {
+        disruptor.shutdown();
+    }
+
+}

+ 34 - 0
src/main/java/opensource/hdata/core/WaitStrategyFactory.java

@@ -0,0 +1,34 @@
+package opensource.hdata.core;
+
+import opensource.hdata.exception.HDataException;
+
+import com.lmax.disruptor.BlockingWaitStrategy;
+import com.lmax.disruptor.BusySpinWaitStrategy;
+import com.lmax.disruptor.SleepingWaitStrategy;
+import com.lmax.disruptor.WaitStrategy;
+import com.lmax.disruptor.YieldingWaitStrategy;
+
+public class WaitStrategyFactory {
+
+    /**
+     * 构造线程等待策略
+     * 
+     * @param name
+     * @return
+     */
+    public static WaitStrategy build(String name) {
+        WaitStrategy waitStrategy = null;
+        if ("BlockingWaitStrategy".equals(name)) {
+            waitStrategy = new BlockingWaitStrategy();
+        } else if ("BusySpinWaitStrategy".equals(name)) {
+            waitStrategy = new BusySpinWaitStrategy();
+        } else if ("SleepingWaitStrategy".equals(name)) {
+            waitStrategy = new SleepingWaitStrategy();
+        } else if ("YieldingWaitStrategy".equals(name)) {
+            waitStrategy = new YieldingWaitStrategy();
+        } else {
+            throw new HDataException("Invalid wait strategy: " + name);
+        }
+        return waitStrategy;
+    }
+}

+ 15 - 0
src/main/java/opensource/hdata/core/plugin/AbstractPlugin.java

@@ -0,0 +1,15 @@
+package opensource.hdata.core.plugin;
+
+public abstract class AbstractPlugin implements Pluginable {
+
+    private String pluginName;
+
+    public String getPluginName() {
+        return this.pluginName;
+    }
+
+    public void setPluginName(String name) {
+        this.pluginName = name;
+    }
+
+}

+ 8 - 0
src/main/java/opensource/hdata/core/plugin/Pluginable.java

@@ -0,0 +1,8 @@
+package opensource.hdata.core.plugin;
+
+public interface Pluginable {
+
+    public String getPluginName();
+
+    public void setPluginName(String name);
+}

+ 20 - 0
src/main/java/opensource/hdata/core/plugin/Reader.java

@@ -0,0 +1,20 @@
+package opensource.hdata.core.plugin;
+
+import opensource.hdata.config.PluginConfig;
+import opensource.hdata.core.JobContext;
+import opensource.hdata.core.OutputFieldsDeclarer;
+
+public abstract class Reader extends AbstractPlugin {
+
+    public void prepare(JobContext context, PluginConfig readerConfig) {
+    }
+
+    public void execute(RecordCollector recordCollector) {
+    }
+
+    public void close() {
+    }
+
+    public void declareOutputFields(OutputFieldsDeclarer declarer) {
+    }
+}

+ 24 - 0
src/main/java/opensource/hdata/core/plugin/ReaderPlugin.java

@@ -0,0 +1,24 @@
+package opensource.hdata.core.plugin;
+
+public class ReaderPlugin extends AbstractPlugin {
+
+    private String className;
+    private String splitterClassName;
+
+    public String getClassName() {
+        return className;
+    }
+
+    public void setClassName(String className) {
+        this.className = className;
+    }
+
+    public String getSplitterClassName() {
+        return splitterClassName;
+    }
+
+    public void setSplitterClassName(String splitterClassName) {
+        this.splitterClassName = splitterClassName;
+    }
+
+}

+ 12 - 0
src/main/java/opensource/hdata/core/plugin/Record.java

@@ -0,0 +1,12 @@
+package opensource.hdata.core.plugin;
+
+public interface Record {
+
+    public void addField(Object field);
+
+    public void addField(int index, Object field);
+
+    public Object getField(int index);
+
+    public int getFieldsCount();
+}

+ 24 - 0
src/main/java/opensource/hdata/core/plugin/RecordCollector.java

@@ -0,0 +1,24 @@
+package opensource.hdata.core.plugin;
+
+import opensource.hdata.core.Metric;
+import opensource.hdata.core.Storage;
+
+public class RecordCollector {
+
+    private Storage storage;
+    private Metric metric;
+
+    public RecordCollector(Storage storage, Metric metric) {
+        this.storage = storage;
+        this.metric = metric;
+    }
+
+    public void send(Record record) {
+        storage.put(record);
+        metric.getReadCount().incrementAndGet();
+    }
+
+    public void send(Record[] records) {
+        storage.put(records);
+    }
+}

+ 11 - 0
src/main/java/opensource/hdata/core/plugin/Splitter.java

@@ -0,0 +1,11 @@
+package opensource.hdata.core.plugin;
+
+import java.util.List;
+
+import opensource.hdata.config.JobConfig;
+import opensource.hdata.config.PluginConfig;
+
+public abstract class Splitter extends AbstractPlugin {
+
+    public abstract List<PluginConfig> split(JobConfig jobConfig);
+}

+ 16 - 0
src/main/java/opensource/hdata/core/plugin/Writer.java

@@ -0,0 +1,16 @@
+package opensource.hdata.core.plugin;
+
+import opensource.hdata.config.PluginConfig;
+import opensource.hdata.core.JobContext;
+
+public abstract class Writer extends AbstractPlugin {
+
+    public void prepare(JobContext context, PluginConfig writerConfig) {
+    }
+
+    public void execute(Record record) {
+    }
+
+    public void close() {
+    }
+}

+ 15 - 0
src/main/java/opensource/hdata/core/plugin/WriterPlugin.java

@@ -0,0 +1,15 @@
+package opensource.hdata.core.plugin;
+
+public class WriterPlugin extends AbstractPlugin {
+
+    private String className;
+
+    public String getClassName() {
+        return className;
+    }
+
+    public void setClassName(String className) {
+        this.className = className;
+    }
+
+}

+ 43 - 0
src/main/java/opensource/hdata/exception/HDataException.java

@@ -0,0 +1,43 @@
+package opensource.hdata.exception;
+
+public class HDataException extends RuntimeException {
+
+    private static final long serialVersionUID = 2510267358921118998L;
+
+    private String message;
+
+    public HDataException() {
+        super();
+    }
+
+    public HDataException(final String message) {
+        super(message);
+    }
+
+    public HDataException(final Exception e) {
+        super(e);
+    }
+
+    public HDataException(Throwable cause) {
+        super(cause);
+    }
+
+    public HDataException(final String message, final Throwable cause) {
+        super(message, cause);
+    }
+
+    @Override
+    public String getMessage() {
+        return this.message == null ? super.getMessage() : this.message;
+    }
+
+    public void setMessage(String message) {
+        this.message = message;
+    }
+
+    @Override
+    public String toString() {
+        return this.message;
+    }
+
+}

+ 99 - 0
src/main/java/opensource/hdata/plugin/reader/ftp/FTPReader.java

@@ -0,0 +1,99 @@
+package opensource.hdata.plugin.reader.ftp;
+
+import java.io.BufferedReader;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.zip.GZIPInputStream;
+
+import opensource.hdata.config.PluginConfig;
+import opensource.hdata.core.DefaultRecord;
+import opensource.hdata.core.Fields;
+import opensource.hdata.core.JobContext;
+import opensource.hdata.core.OutputFieldsDeclarer;
+import opensource.hdata.core.plugin.Reader;
+import opensource.hdata.core.plugin.Record;
+import opensource.hdata.core.plugin.RecordCollector;
+import opensource.hdata.exception.HDataException;
+import opensource.hdata.util.EscaperUtils;
+import opensource.hdata.util.FTPUtils;
+
+import org.apache.commons.lang3.StringUtils;
+import org.apache.commons.net.ftp.FTPClient;
+
+public class FTPReader extends Reader {
+
+    private Fields fields;
+    private String host;
+    private int port;
+    private String username;
+    private String password;
+    private String fieldsSeparator;
+    private String encoding;
+    private int fieldsCount;
+    private List<String> files = new ArrayList<String>();
+
+    @SuppressWarnings("unchecked")
+    @Override
+    public void prepare(JobContext context, PluginConfig readerConfig) {
+        host = readerConfig.getString(FTPReaderProperties.HOST);
+        port = readerConfig.getInt(FTPReaderProperties.PORT, 21);
+        username = readerConfig.getString(FTPReaderProperties.USERNAME, "anonymous");
+        password = readerConfig.getString(FTPReaderProperties.PASSWORD, "");
+        fieldsSeparator = EscaperUtils.parse(readerConfig.getString(FTPReaderProperties.FIELDS_SEPARATOR, "\t"));
+        encoding = readerConfig.getString(FTPReaderProperties.ENCODING, "UTF-8");
+        files = (List<String>) readerConfig.get(FTPReaderProperties.FILES);
+        fieldsCount = readerConfig.getInt(FTPReaderProperties.FIELDS_COUNT_FILTER, 0);
+
+        if (readerConfig.containsKey(FTPReaderProperties.SCHEMA)) {
+            fields = new Fields();
+            String[] tokens = readerConfig.getString(FTPReaderProperties.SCHEMA).split("\\s*,\\s*");
+            for (String field : tokens) {
+                fields.add(field);
+            }
+        }
+    }
+
+    @Override
+    public void execute(RecordCollector recordCollector) {
+        FTPClient ftpClient = null;
+        try {
+            ftpClient = FTPUtils.getFtpClient(host, port, username, password);
+            for (String file : files) {
+                InputStream is = ftpClient.retrieveFileStream(file);
+                BufferedReader br = null;
+                if (file.endsWith(".gz")) {
+                    GZIPInputStream gzin = new GZIPInputStream(is);
+                    br = new BufferedReader(new InputStreamReader(gzin, encoding));
+                } else {
+                    br = new BufferedReader(new InputStreamReader(is, encoding));
+                }
+
+                String line = null;
+                while ((line = br.readLine()) != null) {
+                    String[] tokens = StringUtils.splitByWholeSeparator(line, fieldsSeparator);
+                    if (tokens.length >= fieldsCount) {
+                        Record record = new DefaultRecord(tokens.length);
+                        for (String field : tokens) {
+                            record.addField(field);
+                        }
+                        recordCollector.send(record);
+                    }
+                }
+                ftpClient.completePendingCommand();
+                br.close();
+                is.close();
+            }
+        } catch (Exception e) {
+            throw new HDataException(e);
+        } finally {
+            FTPUtils.closeFtpClient(ftpClient);
+        }
+    }
+
+    @Override
+    public void declareOutputFields(OutputFieldsDeclarer declarer) {
+        declarer.declare(fields);
+    }
+}

+ 16 - 0
src/main/java/opensource/hdata/plugin/reader/ftp/FTPReaderProperties.java

@@ -0,0 +1,16 @@
+package opensource.hdata.plugin.reader.ftp;
+
+public class FTPReaderProperties {
+    public static final String HOST = "host";
+    public static final String PORT = "port";
+    public static final String USERNAME = "username";
+    public static final String PASSWORD = "password";
+    public static final String DIR = "dir";
+    public static final String FILENAME = "filename";
+    public static final String RECURSIVE = "recursive";
+    public static final String ENCODING = "encoding";
+    public static final String FIELDS_SEPARATOR = "fieldsSeparator";
+    public static final String SCHEMA = "schema";
+    public static final String FIELDS_COUNT_FILTER = "fieldsCountFilter";
+    public static final String FILES = "reader.files";
+}

+ 60 - 0
src/main/java/opensource/hdata/plugin/reader/ftp/FTPSplitter.java

@@ -0,0 +1,60 @@
+package opensource.hdata.plugin.reader.ftp;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import opensource.hdata.config.JobConfig;
+import opensource.hdata.config.PluginConfig;
+import opensource.hdata.core.plugin.Splitter;
+import opensource.hdata.exception.HDataException;
+import opensource.hdata.util.FTPUtils;
+
+import org.apache.commons.net.ftp.FTPClient;
+
+public class FTPSplitter extends Splitter {
+
+    @Override
+    public List<PluginConfig> split(JobConfig jobConfig) {
+        List<PluginConfig> list = new ArrayList<PluginConfig>();
+        PluginConfig readerConfig = jobConfig.getReaderConfig();
+        String host = readerConfig.getString(FTPReaderProperties.HOST);
+        int port = readerConfig.getInt(FTPReaderProperties.PORT, 21);
+        String username = readerConfig.getString(FTPReaderProperties.USERNAME, "anonymous");
+        String password = readerConfig.getString(FTPReaderProperties.PASSWORD, "");
+        String dir = readerConfig.getString(FTPReaderProperties.DIR);
+        String filenameRegexp = readerConfig.getString(FTPReaderProperties.FILENAME);
+        boolean recursive = readerConfig.getBoolean(FTPReaderProperties.RECURSIVE, false);
+        int parallelism = readerConfig.getParallelism();
+
+        FTPClient ftpClient = null;
+        try {
+            ftpClient = FTPUtils.getFtpClient(host, port, username, password);
+            List<String> files = new ArrayList<String>();
+            FTPUtils.listFile(files, ftpClient, dir, filenameRegexp, recursive);
+            if (files.size() > 0) {
+                if (parallelism == 1) {
+                    readerConfig.put(FTPReaderProperties.FILES, files);
+                    list.add(readerConfig);
+                } else {
+                    double step = (double) files.size() / parallelism;
+                    for (int i = 0; i < parallelism; i++) {
+                        List<String> splitedFiles = new ArrayList<String>();
+                        for (int start = (int) Math.ceil(step * i), end = (int) Math.ceil(step * (i + 1)); start < end; start++) {
+                            splitedFiles.add(files.get(start));
+                        }
+                        PluginConfig pluginConfig = (PluginConfig) readerConfig.clone();
+                        pluginConfig.put(FTPReaderProperties.FILES, splitedFiles);
+                        list.add(pluginConfig);
+                    }
+                }
+            }
+        } catch (Exception e) {
+            throw new HDataException(e);
+        } finally {
+            FTPUtils.closeFtpClient(ftpClient);
+        }
+
+        return list;
+    }
+
+}

+ 106 - 0
src/main/java/opensource/hdata/plugin/reader/hbase/HBaseReader.java

@@ -0,0 +1,106 @@
+package opensource.hdata.plugin.reader.hbase;
+
+import java.io.IOException;
+
+import opensource.hdata.config.PluginConfig;
+import opensource.hdata.core.DefaultRecord;
+import opensource.hdata.core.Fields;
+import opensource.hdata.core.JobContext;
+import opensource.hdata.core.OutputFieldsDeclarer;
+import opensource.hdata.core.plugin.Reader;
+import opensource.hdata.core.plugin.Record;
+import opensource.hdata.core.plugin.RecordCollector;
+import opensource.hdata.exception.HDataException;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.HBaseConfiguration;
+import org.apache.hadoop.hbase.client.HTable;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.client.ResultScanner;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.util.Bytes;
+
+public class HBaseReader extends Reader {
+
+    private Fields fields = new Fields();
+    private HTable table;
+    private byte[] startRowkey;
+    private byte[] endRowkey;
+    private String[] columns;
+    private int rowkeyIndex = -1;
+    private static final String ROWKEY = ":rowkey";
+
+    @Override
+    public void prepare(JobContext context, PluginConfig readerConfig) {
+        startRowkey = (byte[]) readerConfig.get(HBaseReaderProperties.START_ROWKWY);
+        endRowkey = (byte[]) readerConfig.get(HBaseReaderProperties.END_ROWKWY);
+
+        String[] schema = readerConfig.getString(HBaseReaderProperties.SCHEMA).split(",");
+        for (String field : schema) {
+            fields.add(field);
+        }
+
+        Configuration conf = HBaseConfiguration.create();
+        conf.set("hbase.zookeeper.quorum", readerConfig.getString(HBaseReaderProperties.ZOOKEEPER_QUORUM));
+        conf.set("hbase.zookeeper.property.clientPort", readerConfig.getString(HBaseReaderProperties.ZOOKEEPER_PROPERTY_CLIENTPORT, "2181"));
+        columns = readerConfig.getString(HBaseReaderProperties.COLUMNS).split("\\s*,\\s*");
+        for (int i = 0, len = columns.length; i < len; i++) {
+            if (ROWKEY.equalsIgnoreCase(columns[i])) {
+                rowkeyIndex = i;
+                break;
+            }
+        }
+
+        try {
+            table = new HTable(conf, readerConfig.getString(HBaseReaderProperties.TABLE));
+        } catch (IOException e) {
+            e.printStackTrace();
+            throw new HDataException(e);
+        }
+    }
+
+    @Override
+    public void execute(RecordCollector recordCollector) {
+        Scan scan = new Scan();
+        if (startRowkey.length > 0) {
+            scan.setStartRow(startRowkey);
+        }
+        if (endRowkey.length > 0) {
+            scan.setStopRow(endRowkey);
+        }
+
+        for (int i = 0, len = columns.length; i < len; i++) {
+            if (i != rowkeyIndex) {
+                String[] column = columns[i].split(":");
+                scan.addColumn(Bytes.toBytes(column[0]), Bytes.toBytes(column[1]));
+            }
+        }
+
+        try {
+            ResultScanner results = table.getScanner(scan);
+            for (Result result : results) {
+                Record record = new DefaultRecord(fields.size());
+                for (int i = 0, len = fields.size(); i < len; i++) {
+                    if (i == rowkeyIndex) {
+                        record.addField(Bytes.toString(result.getRow()));
+                    } else {
+                        String[] column = columns[i].split(":");
+                        record.addField(Bytes.toString(result.getValue(Bytes.toBytes(column[0]), Bytes.toBytes(column[1]))));
+                    }
+                }
+                recordCollector.send(record);
+            }
+
+            if (table != null) {
+                table.close();
+            }
+        } catch (IOException e) {
+            throw new HDataException(e);
+        }
+    }
+
+    @Override
+    public void declareOutputFields(OutputFieldsDeclarer declarer) {
+        declarer.declare(fields);
+    }
+}

+ 12 - 0
src/main/java/opensource/hdata/plugin/reader/hbase/HBaseReaderProperties.java

@@ -0,0 +1,12 @@
+package opensource.hdata.plugin.reader.hbase;
+
+public class HBaseReaderProperties {
+
+    public static final String ZOOKEEPER_QUORUM = "zookeeperQuorum";
+    public static final String ZOOKEEPER_PROPERTY_CLIENTPORT = "zookeeperClientPort";
+    public static final String TABLE = "table";
+    public static final String START_ROWKWY = "startRowkey";
+    public static final String END_ROWKWY = "endRowkey";
+    public static final String COLUMNS = "columns";
+    public static final String SCHEMA = "schema";
+}

+ 118 - 0
src/main/java/opensource/hdata/plugin/reader/hbase/HBaseSplitter.java

@@ -0,0 +1,118 @@
+package opensource.hdata.plugin.reader.hbase;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+import opensource.hdata.config.JobConfig;
+import opensource.hdata.config.PluginConfig;
+import opensource.hdata.core.plugin.Splitter;
+import opensource.hdata.exception.HDataException;
+import opensource.hdata.plugin.writer.hbase.HBaseWriterProperties;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.HBaseConfiguration;
+import org.apache.hadoop.hbase.client.HTable;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.util.Pair;
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+
+public class HBaseSplitter extends Splitter {
+
+    private static final Logger LOG = LogManager.getLogger(HBaseSplitter.class);
+
+    @Override
+    public List<PluginConfig> split(JobConfig jobConfig) {
+        List<PluginConfig> list = new ArrayList<PluginConfig>();
+        PluginConfig readerConfig = jobConfig.getReaderConfig();
+        int parallelism = readerConfig.getParallelism();
+
+        String startRowkey = readerConfig.getString(HBaseReaderProperties.START_ROWKWY, "");
+        String endRowkey = readerConfig.getString(HBaseReaderProperties.END_ROWKWY, "");
+        byte[] startRowkeyBytes = startRowkey.getBytes();
+        byte[] endRowkeyBytes = endRowkey.getBytes();
+
+        if (parallelism == 1) {
+            readerConfig.put(HBaseReaderProperties.START_ROWKWY, startRowkeyBytes);
+            readerConfig.put(HBaseReaderProperties.END_ROWKWY, endRowkeyBytes);
+            list.add(readerConfig);
+            return list;
+        } else {
+            Configuration conf = HBaseConfiguration.create();
+            conf.set("hbase.zookeeper.quorum", readerConfig.getString(HBaseReaderProperties.ZOOKEEPER_QUORUM));
+            conf.set("hbase.zookeeper.property.clientPort", readerConfig.getString(HBaseReaderProperties.ZOOKEEPER_PROPERTY_CLIENTPORT, "2181"));
+            try {
+                HTable table = new HTable(conf, readerConfig.getString(HBaseWriterProperties.TABLE));
+                Pair<byte[][], byte[][]> startEndKeysPair = table.getStartEndKeys();
+                table.close();
+                List<Pair<byte[], byte[]>> selectedPairList = new ArrayList<Pair<byte[], byte[]>>();
+                byte[][] startKeys = startEndKeysPair.getFirst();
+                byte[][] endKeys = startEndKeysPair.getSecond();
+
+                if (startKeys.length == 1) {
+                    Pair<byte[], byte[]> pair = new Pair<byte[], byte[]>();
+                    pair.setFirst(startRowkeyBytes);
+                    pair.setSecond(endRowkeyBytes);
+                    selectedPairList.add(pair);
+                } else {
+                    if (startRowkeyBytes.length == 0 && endRowkeyBytes.length == 0) {
+                        for (int i = 0, len = startKeys.length; i < len; i++) {
+                            Pair<byte[], byte[]> pair = new Pair<byte[], byte[]>();
+                            pair.setFirst(startKeys[i]);
+                            pair.setSecond(endKeys[i]);
+                            selectedPairList.add(pair);
+                        }
+                    } else if (endRowkeyBytes.length == 0) {
+                        for (int i = 0, len = startKeys.length; i < len; i++) {
+                            if (Bytes.compareTo(endKeys[i], startRowkeyBytes) >= 0) {
+                                Pair<byte[], byte[]> pair = new Pair<byte[], byte[]>();
+                                pair.setFirst(Bytes.compareTo(startKeys[i], startRowkeyBytes) >= 0 ? startKeys[i] : startRowkeyBytes);
+                                pair.setSecond(endKeys[i]);
+                                selectedPairList.add(pair);
+                            }
+                        }
+                    } else {
+                        for (int i = 0, len = startKeys.length; i < len; i++) {
+                            if (len == 1) {
+                                Pair<byte[], byte[]> pair = new Pair<byte[], byte[]>();
+                                pair.setFirst(startRowkeyBytes);
+                                pair.setSecond(endRowkeyBytes);
+                                selectedPairList.add(pair);
+                                break;
+                            } else if (Bytes.compareTo(endKeys[i], startRowkeyBytes) >= 0 && Bytes.compareTo(endRowkeyBytes, startKeys[i]) >= 0) {
+                                Pair<byte[], byte[]> pair = new Pair<byte[], byte[]>();
+                                pair.setFirst(Bytes.compareTo(startKeys[i], startRowkeyBytes) >= 0 ? startKeys[i] : startRowkeyBytes);
+                                pair.setSecond(Bytes.compareTo(endKeys[i], endRowkeyBytes) <= 0 ? endKeys[i] : endRowkeyBytes);
+                                selectedPairList.add(pair);
+                            }
+                        }
+                    }
+                }
+
+                if (parallelism > selectedPairList.size()) {
+                    LOG.info(
+                            "parallelism: {} is greater than the region count: {} in the currently open table: {}, so parallelism is set equal to region count.",
+                            parallelism, selectedPairList.size(), Bytes.toString(table.getTableName()));
+                    parallelism = selectedPairList.size();
+                }
+
+                double step = (double) selectedPairList.size() / parallelism;
+                for (int i = 0; i < parallelism; i++) {
+                    List<Pair<byte[], byte[]>> splitedPairs = new ArrayList<Pair<byte[], byte[]>>();
+                    for (int start = (int) Math.ceil(step * i), end = (int) Math.ceil(step * (i + 1)); start < end; start++) {
+                        splitedPairs.add(selectedPairList.get(start));
+                    }
+                    PluginConfig pluginConfig = (PluginConfig) readerConfig.clone();
+                    pluginConfig.put(HBaseReaderProperties.START_ROWKWY, splitedPairs.get(0).getFirst());
+                    pluginConfig.put(HBaseReaderProperties.END_ROWKWY, splitedPairs.get(splitedPairs.size() - 1).getSecond());
+                    list.add(pluginConfig);
+                }
+            } catch (IOException e) {
+                throw new HDataException(e);
+            }
+
+            return list;
+        }
+    }
+}

+ 86 - 0
src/main/java/opensource/hdata/plugin/reader/hdfs/HDFSReader.java

@@ -0,0 +1,86 @@
+package opensource.hdata.plugin.reader.hdfs;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.util.ArrayList;
+import java.util.List;
+
+import opensource.hdata.config.PluginConfig;
+import opensource.hdata.core.DefaultRecord;
+import opensource.hdata.core.Fields;
+import opensource.hdata.core.JobContext;
+import opensource.hdata.core.OutputFieldsDeclarer;
+import opensource.hdata.core.plugin.Reader;
+import opensource.hdata.core.plugin.Record;
+import opensource.hdata.core.plugin.RecordCollector;
+import opensource.hdata.exception.HDataException;
+import opensource.hdata.util.EscaperUtils;
+
+import org.apache.commons.lang3.StringUtils;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.compress.CompressionCodec;
+import org.apache.hadoop.io.compress.CompressionCodecFactory;
+
+public class HDFSReader extends Reader {
+
+    private Fields fields;
+    private String fieldsSeparator;
+    private String encoding;
+    private List<Path> files = new ArrayList<Path>();
+
+    @SuppressWarnings("unchecked")
+    @Override
+    public void prepare(JobContext context, PluginConfig readerConfig) {
+        fieldsSeparator = EscaperUtils.parse(readerConfig.getString(HDFSReaderProperties.FIELDS_SEPARATOR, "\t"));
+        files = (List<Path>) readerConfig.get(HDFSReaderProperties.FILES);
+        encoding = readerConfig.getString(HDFSReaderProperties.ENCODING, "UTF-8");
+        if (readerConfig.containsKey(HDFSReaderProperties.SCHEMA)) {
+            fields = new Fields();
+            String[] tokens = readerConfig.getString(HDFSReaderProperties.SCHEMA).split("\\s*,\\s*");
+            for (String field : tokens) {
+                fields.add(field);
+            }
+        }
+    }
+
+    @Override
+    public void execute(RecordCollector recordCollector) {
+        Configuration conf = new Configuration();
+        CompressionCodecFactory codecFactory = new CompressionCodecFactory(conf);
+        try {
+            for (Path file : files) {
+                FileSystem fs = file.getFileSystem(conf);
+                CompressionCodec codec = codecFactory.getCodec(file);
+                FSDataInputStream input = fs.open(file);
+                BufferedReader br;
+                String line = null;
+                if (codec == null) {
+                    br = new BufferedReader(new InputStreamReader(input, encoding));
+                } else {
+                    br = new BufferedReader(new InputStreamReader(codec.createInputStream(input), encoding));
+                }
+                while ((line = br.readLine()) != null) {
+                    String[] tokens = StringUtils.splitByWholeSeparator(line, fieldsSeparator);
+                    Record record = new DefaultRecord(tokens.length);
+                    for (String field : tokens) {
+                        record.addField(field);
+                    }
+                    recordCollector.send(record);
+                }
+                br.close();
+            }
+        } catch (IOException e) {
+            e.printStackTrace();
+            throw new HDataException(e);
+        }
+    }
+
+    @Override
+    public void declareOutputFields(OutputFieldsDeclarer declarer) {
+        declarer.declare(fields);
+    }
+}

+ 11 - 0
src/main/java/opensource/hdata/plugin/reader/hdfs/HDFSReaderProperties.java

@@ -0,0 +1,11 @@
+package opensource.hdata.plugin.reader.hdfs;
+
+public class HDFSReaderProperties {
+    public static final String DIR = "dir";
+    public static final String FILENAME_REGEXP = "filename";
+    public static final String SCHEMA = "schema";
+    public static final String FIELDS_SEPARATOR = "fieldsSeparator";
+    public static final String ENCODING = "encoding";
+    public static final String HADOOP_USER = "hadoopUser";
+    public static final String FILES = "reader.files";
+}

+ 69 - 0
src/main/java/opensource/hdata/plugin/reader/hdfs/HDFSSplitter.java

@@ -0,0 +1,69 @@
+package opensource.hdata.plugin.reader.hdfs;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import opensource.hdata.config.JobConfig;
+import opensource.hdata.config.PluginConfig;
+import opensource.hdata.core.plugin.Splitter;
+import opensource.hdata.exception.HDataException;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+
+public class HDFSSplitter extends Splitter {
+
+    @Override
+    public List<PluginConfig> split(JobConfig jobConfig) {
+        List<PluginConfig> list = new ArrayList<PluginConfig>();
+        List<Path> matchedFiles = new ArrayList<Path>();
+        PluginConfig readerConfig = jobConfig.getReaderConfig();
+        Path dir = new Path(readerConfig.getString(HDFSReaderProperties.DIR));
+        int parallelism = readerConfig.getParallelism();
+
+        System.setProperty("HADOOP_USER_NAME", readerConfig.getString(HDFSReaderProperties.HADOOP_USER));
+        Configuration conf = new Configuration();
+        try {
+            FileSystem fs = dir.getFileSystem(conf);
+            Pattern filenamePattern = Pattern.compile(readerConfig.getString(HDFSReaderProperties.FILENAME_REGEXP));
+            if (fs.exists(dir)) {
+                for (FileStatus fileStatus : fs.listStatus(dir)) {
+                    Matcher m = filenamePattern.matcher(fileStatus.getPath().getName());
+                    if (m.matches()) {
+                        matchedFiles.add(fileStatus.getPath());
+                    }
+                }
+
+                if (matchedFiles.size() > 0) {
+                    if (parallelism == 1) {
+                        readerConfig.put(HDFSReaderProperties.FILES, matchedFiles);
+                        list.add(readerConfig);
+                    } else {
+                        double step = (double) matchedFiles.size() / parallelism;
+                        for (int i = 0; i < parallelism; i++) {
+                            List<Path> splitedFiles = new ArrayList<Path>();
+                            for (int start = (int) Math.ceil(step * i), end = (int) Math.ceil(step * (i + 1)); start < end; start++) {
+                                splitedFiles.add(matchedFiles.get(start));
+                            }
+                            PluginConfig pluginConfig = (PluginConfig) readerConfig.clone();
+                            pluginConfig.put(HDFSReaderProperties.FILES, splitedFiles);
+                            list.add(pluginConfig);
+                        }
+                    }
+                }
+
+            } else {
+                throw new HDataException(String.format("Path %s not found.", dir));
+            }
+        } catch (IOException e) {
+            throw new HDataException(e);
+        }
+
+        return list;
+    }
+}

+ 108 - 0
src/main/java/opensource/hdata/plugin/reader/hive/HiveReader.java

@@ -0,0 +1,108 @@
+package opensource.hdata.plugin.reader.hive;
+
+import java.util.List;
+
+import opensource.hdata.config.PluginConfig;
+import opensource.hdata.core.DefaultRecord;
+import opensource.hdata.core.Fields;
+import opensource.hdata.core.JobContext;
+import opensource.hdata.core.OutputFieldsDeclarer;
+import opensource.hdata.core.plugin.Reader;
+import opensource.hdata.core.plugin.Record;
+import opensource.hdata.core.plugin.RecordCollector;
+import opensource.hdata.exception.HDataException;
+import opensource.hdata.util.HiveTypeUtils;
+
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.metastore.api.FieldSchema;
+import org.apache.hadoop.hive.serde2.Deserializer;
+import org.apache.hadoop.hive.serde2.SerDeException;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
+import org.apache.hadoop.hive.serde2.objectinspector.StructField;
+import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.mapred.FileInputFormat;
+import org.apache.hadoop.mapred.FileSplit;
+import org.apache.hadoop.mapred.InputFormat;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.RecordReader;
+import org.apache.hadoop.mapred.Reporter;
+
+@SuppressWarnings("deprecation")
+public class HiveReader extends Reader {
+
+    private final Fields fields = new Fields();
+    private List<String> files;
+    private List<String> partitionValues;
+    private Class<? extends InputFormat<Writable, Writable>> inputFormat;
+    private StructObjectInspector oi;
+    private List<? extends StructField> structFields;
+
+    private Deserializer deserializer;
+
+    @SuppressWarnings("unchecked")
+    @Override
+    public void prepare(JobContext context, PluginConfig readerConfig) {
+        inputFormat = (Class<? extends InputFormat<Writable, Writable>>) readerConfig.get(HiveReaderProperties.INPUT_FORMAT_CLASS);
+        deserializer = (Deserializer) readerConfig.get(HiveReaderProperties.DESERIALIZER);
+        files = (List<String>) readerConfig.get(HiveReaderProperties.TABLE_FILES);
+        partitionValues = (List<String>) readerConfig.get(HiveReaderProperties.PARTITION_VALUES);
+        List<FieldSchema> columns = (List<FieldSchema>) readerConfig.get(HiveReaderProperties.TABLE_COLUMNS);
+
+        for (FieldSchema fs : columns) {
+            fields.add(fs.getName());
+        }
+
+        try {
+            oi = (StructObjectInspector) deserializer.getObjectInspector();
+        } catch (SerDeException e) {
+            throw new HDataException(e);
+        }
+        structFields = oi.getAllStructFieldRefs();
+    }
+
+    @Override
+    public void execute(RecordCollector recordCollector) {
+
+        int columnsCount = fields.size();
+        int partitionValueCount = partitionValues == null ? 0 : partitionValues.size();
+
+        JobConf jobConf = new JobConf();
+        for (String file : files) {
+            Path path = new Path(file);
+            try {
+                FileSystem fs = path.getFileSystem(jobConf);
+                FileInputFormat<Writable, Writable> fileInputFormat = (FileInputFormat<Writable, Writable>) inputFormat.newInstance();
+                long filelen = fs.getFileStatus(path).getLen();
+                FileSplit split = new FileSplit(path, 0, filelen, (String[]) null);
+                RecordReader<Writable, Writable> reader = fileInputFormat.getRecordReader(split, jobConf, Reporter.NULL);
+                Writable key = reader.createKey();
+                Writable value = reader.createValue();
+                while (reader.next(key, value)) {
+                    Object row = deserializer.deserialize(value);
+                    Record record = new DefaultRecord(columnsCount);
+                    for (int i = 0, len = structFields.size(); i < len; i++) {
+                        Object fieldData = oi.getStructFieldData(row, structFields.get(i));
+                        Object standardData = ObjectInspectorUtils.copyToStandardJavaObject(fieldData, structFields.get(i).getFieldObjectInspector());
+                        record.addField(HiveTypeUtils.toJavaObject(standardData));
+                    }
+
+                    for (int i = 0, len = partitionValueCount; i < len; i++) {
+                        record.addField(partitionValues.get(i));
+                    }
+                    recordCollector.send(record);
+                }
+                reader.close();
+            } catch (Exception e) {
+                throw new HDataException(e);
+            }
+        }
+    }
+
+    @Override
+    public void declareOutputFields(OutputFieldsDeclarer declarer) {
+        declarer.declare(fields);
+    }
+
+}

+ 14 - 0
src/main/java/opensource/hdata/plugin/reader/hive/HiveReaderProperties.java

@@ -0,0 +1,14 @@
+package opensource.hdata.plugin.reader.hive;
+
+public class HiveReaderProperties {
+    public static final String METASTORE_URIS = "metastoreUris";
+    public static final String DATABASE = "database";
+    public static final String TABLE = "table";
+    public static final String SELECT_COLUMNS = "columns";
+    public static final String TABLE_COLUMNS = "reader.columns";
+    public static final String PARTITIONS = "partitions";
+    public static final String TABLE_FILES = "reader.table.files";
+    public static final String PARTITION_VALUES = "reader.partition.values";
+    public static final String INPUT_FORMAT_CLASS = "reader.input.format.class";
+    public static final String DESERIALIZER = "reader.deserializer";
+}

+ 118 - 0
src/main/java/opensource/hdata/plugin/reader/hive/HiveSplitter.java

@@ -0,0 +1,118 @@
+package opensource.hdata.plugin.reader.hive;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+import opensource.hdata.config.JobConfig;
+import opensource.hdata.config.PluginConfig;
+import opensource.hdata.core.plugin.Splitter;
+import opensource.hdata.exception.HDataException;
+import opensource.hdata.util.LoggerUtils;
+import opensource.hdata.util.Utils;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
+import org.apache.hadoop.hive.ql.metadata.Hive;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.metadata.Table;
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+
+public class HiveSplitter extends Splitter {
+
+    private static final Logger LOG = LogManager.getLogger(HiveSplitter.class);
+
+    @Override
+    public List<PluginConfig> split(JobConfig jobConfig) {
+        List<PluginConfig> list = new ArrayList<PluginConfig>();
+        PluginConfig readerConfig = jobConfig.getReaderConfig();
+        String metastoreUris = readerConfig.getString(HiveReaderProperties.METASTORE_URIS);
+        String dbName = readerConfig.getString(HiveReaderProperties.DATABASE, "default");
+        String tableName = readerConfig.getString(HiveReaderProperties.TABLE);
+        int parallelism = readerConfig.getParallelism();
+        List<String> partitionValues = null;
+
+        HiveConf conf = new HiveConf();
+        conf.set(ConfVars.METASTOREURIS.varname, metastoreUris);
+
+        Hive hive;
+        Table table;
+        try {
+            hive = Hive.get(conf, true);
+            table = hive.getTable(dbName, tableName, false);
+        } catch (HiveException e) {
+            throw new HDataException(e);
+        }
+
+        if (table == null) {
+            throw new HDataException(String.format("Table %s.%s is not exist.", dbName, tableName));
+        }
+
+        readerConfig.put(HiveReaderProperties.TABLE_COLUMNS, table.getAllCols());
+        readerConfig.put(HiveReaderProperties.INPUT_FORMAT_CLASS, table.getInputFormatClass());
+        readerConfig.put(HiveReaderProperties.DESERIALIZER, table.getDeserializer());
+
+        String tableLocation = Utils.fixLocaltion(table.getDataLocation().toString(), metastoreUris);
+        if (readerConfig.containsKey(HiveReaderProperties.PARTITIONS)) {
+            String partitions = readerConfig.getString(HiveReaderProperties.PARTITIONS);
+            tableLocation += "/" + partitions.replaceAll("\\s*,\\s*", "/");
+            partitionValues = Utils.parsePartitionValue(partitions);
+            readerConfig.put(HiveReaderProperties.PARTITION_VALUES, partitionValues);
+        }
+
+        List<String> files = getTableFiles(tableLocation);
+        if (files == null || files.size() < 1) {
+            LOG.info("Can not find files on path {}", tableLocation);
+            return null;
+        }
+
+        if (parallelism > files.size()) {
+            parallelism = files.size();
+            LOG.info("Reader parallelism is greater than file count, so parallelism is set to equal with file count.");
+        }
+
+        if (parallelism == 1) {
+            readerConfig.put(HiveReaderProperties.TABLE_FILES, files);
+            list.add(readerConfig);
+        } else {
+            double step = (double) files.size() / parallelism;
+            for (int i = 0; i < parallelism; i++) {
+                List<String> splitedFiles = new ArrayList<String>();
+                for (int start = (int) Math.ceil(step * i), end = (int) Math.ceil(step * (i + 1)); start < end; start++) {
+                    splitedFiles.add(files.get(start));
+                }
+                PluginConfig pluginConfig = (PluginConfig) readerConfig.clone();
+                pluginConfig.put(HiveReaderProperties.TABLE_FILES, splitedFiles);
+                list.add(pluginConfig);
+            }
+        }
+
+        Hive.closeCurrent();
+        return list;
+    }
+
+    private List<String> getTableFiles(String tableLocation) {
+        try {
+            Configuration conf = new Configuration();
+            Path path = new Path(tableLocation);
+            FileSystem hdfs = path.getFileSystem(conf);
+            FileStatus[] fileStatus = hdfs.listStatus(path);
+            List<String> files = new ArrayList<String>();
+            for (FileStatus fs : fileStatus) {
+                if (!fs.isDir() && !fs.getPath().getName().startsWith("_")) {
+                    files.add(fs.getPath().toString());
+                }
+            }
+            return files;
+        } catch (IOException e) {
+            LoggerUtils.error(LOG, e);
+            return null;
+        }
+    }
+
+}

+ 15 - 0
src/main/java/opensource/hdata/plugin/reader/jdbc/JBDCReaderProperties.java

@@ -0,0 +1,15 @@
+package opensource.hdata.plugin.reader.jdbc;
+
+public class JBDCReaderProperties {
+
+    public static final String DRIVER = "driver";
+    public static final String URL = "url";
+    public static final String USERNAME = "username";
+    public static final String PASSWORD = "password";
+    public static final String TABLE = "table";
+    public static final String COLUMNS = "columns";
+    public static final String EXCLUDE_COLUMNS = "excludeColumns";
+    public static final String WHERE = "where";
+    public static final String SQL = "sql";
+    public static final String SPLIT_BY = "splitBy";
+}

+ 87 - 0
src/main/java/opensource/hdata/plugin/reader/jdbc/JDBCReader.java

@@ -0,0 +1,87 @@
+package opensource.hdata.plugin.reader.jdbc;
+
+import java.sql.Connection;
+import java.sql.ResultSet;
+import java.sql.ResultSetMetaData;
+import java.sql.SQLException;
+import java.sql.Statement;
+
+import opensource.hdata.config.PluginConfig;
+import opensource.hdata.core.DefaultRecord;
+import opensource.hdata.core.Fields;
+import opensource.hdata.core.JobContext;
+import opensource.hdata.core.OutputFieldsDeclarer;
+import opensource.hdata.core.plugin.Reader;
+import opensource.hdata.core.plugin.Record;
+import opensource.hdata.core.plugin.RecordCollector;
+import opensource.hdata.exception.HDataException;
+import opensource.hdata.util.JDBCUtils;
+
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+
+public class JDBCReader extends Reader {
+
+    private Connection connection;
+    private String sql;
+    private Fields fields;
+    private static final Logger LOG = LogManager.getLogger(JDBCReader.class);
+
+    @Override
+    public void prepare(JobContext context, PluginConfig readerConfig) {
+        String driver = readerConfig.getString(JBDCReaderProperties.DRIVER);
+        String url = readerConfig.getString(JBDCReaderProperties.URL);
+        String username = readerConfig.getString(JBDCReaderProperties.USERNAME);
+        String password = readerConfig.getString(JBDCReaderProperties.PASSWORD);
+        sql = readerConfig.getString(JBDCReaderProperties.SQL);
+        LOG.debug(sql);
+
+        try {
+            connection = JDBCUtils.getConnection(driver, url, username, password);
+        } catch (Exception e) {
+            throw new HDataException(e);
+        }
+    }
+
+    @Override
+    public void execute(RecordCollector recordCollector) {
+        try {
+            Statement statement = connection.createStatement();
+
+            ResultSet rs = statement.executeQuery(sql);
+            ResultSetMetaData metaData = rs.getMetaData();
+            int ColumnCount = metaData.getColumnCount();
+
+            if (fields == null) {
+                fields = new Fields();
+                for (int i = 1; i <= ColumnCount; i++) {
+                    fields.add(metaData.getColumnName(i));
+                }
+            }
+
+            while (rs.next()) {
+                Record r = new DefaultRecord(ColumnCount);
+                for (int i = 1; i <= ColumnCount; i++) {
+                    r.addField(i - 1, rs.getObject(i));
+                }
+                recordCollector.send(r);
+            }
+            rs.close();
+            statement.close();
+        } catch (SQLException e) {
+            e.printStackTrace();
+            JDBCUtils.closeConnection(connection);
+            throw new HDataException(e);
+        }
+    }
+
+    @Override
+    public void close() {
+        JDBCUtils.closeConnection(connection);
+    }
+
+    @Override
+    public void declareOutputFields(OutputFieldsDeclarer declarer) {
+        declarer.declare(fields);
+    }
+}

+ 164 - 0
src/main/java/opensource/hdata/plugin/reader/jdbc/JDBCSplitter.java

@@ -0,0 +1,164 @@
+package opensource.hdata.plugin.reader.jdbc;
+
+import java.sql.Connection;
+import java.sql.SQLException;
+import java.util.ArrayList;
+import java.util.List;
+
+import opensource.hdata.common.Constants;
+import opensource.hdata.config.JobConfig;
+import opensource.hdata.config.PluginConfig;
+import opensource.hdata.core.plugin.Splitter;
+import opensource.hdata.exception.HDataException;
+import opensource.hdata.util.JDBCUtils;
+import opensource.hdata.util.Utils;
+
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+
+import com.google.common.base.Joiner;
+
+public class JDBCSplitter extends Splitter {
+
+    private static final String CONDITIONS_REGEX = "\\$CONDITIONS";
+    private static final Logger LOG = LogManager.getLogger(JDBCSplitter.class);
+
+    private void checkIfContainsConditionKey(String sql, String errorMessage) {
+        if (!sql.contains("$CONDITIONS")) {
+            throw new HDataException(errorMessage);
+        }
+    }
+
+    private List<PluginConfig> buildPluginConfigs(Connection conn, String sql, String splitColumn, PluginConfig readerConfig) {
+        List<PluginConfig> list = new ArrayList<PluginConfig>();
+        try {
+            int parallelism = readerConfig.getParallelism();
+            double[] minAndMax = JDBCUtils.querySplitColumnRange(conn, sql.replaceAll(CONDITIONS_REGEX, "(1 = 1)"), splitColumn);
+            double min = minAndMax[0];
+            double max = minAndMax[1] + 1;
+            double step = (max - min) / parallelism;
+            for (int i = 0, len = parallelism; i < len; i++) {
+                PluginConfig otherReaderConfig = (PluginConfig) readerConfig.clone();
+                StringBuilder sb = new StringBuilder();
+                sb.append(splitColumn);
+                sb.append(" >= ");
+                sb.append((long) Math.ceil(min + step * i));
+                sb.append(" AND ");
+                sb.append(splitColumn);
+
+                if (i == (len - 1)) {
+                    sb.append(" <= ");
+                } else {
+                    sb.append(" < ");
+                }
+                sb.append((long) Math.ceil(min + step * (i + 1)));
+
+                otherReaderConfig.setProperty(JBDCReaderProperties.SQL, sql.toString().replaceAll(CONDITIONS_REGEX, sb.toString()));
+                list.add(otherReaderConfig);
+            }
+            return list;
+        } catch (SQLException e) {
+            throw new HDataException(e);
+        } finally {
+            JDBCUtils.closeConnection(conn);
+        }
+    }
+
+    @Override
+    public List<PluginConfig> split(JobConfig jobConfig) {
+        PluginConfig readerConfig = jobConfig.getReaderConfig();
+        String driver = readerConfig.getString(JBDCReaderProperties.DRIVER);
+        String url = readerConfig.getString(JBDCReaderProperties.URL);
+        String username = readerConfig.getString(JBDCReaderProperties.USERNAME);
+        String password = readerConfig.getString(JBDCReaderProperties.PASSWORD);
+        int parallelism = readerConfig.getParallelism();
+
+        StringBuilder sql = new StringBuilder();
+        if (readerConfig.containsKey(JBDCReaderProperties.SQL)) {
+            if (parallelism > 1) {
+                checkIfContainsConditionKey(readerConfig.getString(JBDCReaderProperties.SQL),
+                        "Reader must contains key word \"$CONDITIONS\" in sql property when parallelism > 1.");
+            }
+            sql.append(readerConfig.get(JBDCReaderProperties.SQL));
+        } else {
+            String table = readerConfig.getString(JBDCReaderProperties.TABLE);
+            sql.append("SELECT ");
+            if (!readerConfig.containsKey(JBDCReaderProperties.COLUMNS) && !readerConfig.containsKey(JBDCReaderProperties.EXCLUDE_COLUMNS)) {
+                sql.append("*");
+            } else if (readerConfig.containsKey(JBDCReaderProperties.COLUMNS)) {
+                String columns = readerConfig.getString(JBDCReaderProperties.COLUMNS);
+                sql.append(columns);
+            } else if (readerConfig.containsKey(JBDCReaderProperties.EXCLUDE_COLUMNS)) {
+                String[] excludeColumns = readerConfig.getString(JBDCReaderProperties.EXCLUDE_COLUMNS).trim().split(Constants.COLUMNS_SPLIT_REGEX);
+                Connection conn = null;
+                try {
+                    conn = JDBCUtils.getConnection(driver, url, username, password);
+                    String selectColumns = Joiner.on(", ").join(Utils.getColumns(JDBCUtils.getColumnNames(conn, table), excludeColumns));
+                    sql.append(selectColumns);
+                } catch (Exception e) {
+                    e.printStackTrace();
+                    JDBCUtils.closeConnection(conn);
+                    throw new HDataException(e);
+                }
+
+            }
+            sql.append(" FROM ");
+            sql.append(table);
+
+            if (readerConfig.containsKey(JBDCReaderProperties.WHERE)) {
+                String where = readerConfig.getString(JBDCReaderProperties.WHERE);
+                sql.append(" WHERE ");
+                sql.append(where);
+                sql.append(" AND $CONDITIONS");
+            } else {
+                sql.append(" WHERE $CONDITIONS");
+            }
+        }
+
+        if (readerConfig.containsKey(JBDCReaderProperties.SPLIT_BY)) {
+            String splitColumn = readerConfig.getString(JBDCReaderProperties.SPLIT_BY);
+            LOG.debug("Get split-by column: {}", splitColumn);
+
+            Connection conn = null;
+            try {
+                conn = JDBCUtils.getConnection(driver, url, username, password);
+                return buildPluginConfigs(conn, sql.toString(), splitColumn, readerConfig);
+            } catch (Exception e) {
+                throw new HDataException(e);
+            } finally {
+                JDBCUtils.closeConnection(conn);
+            }
+        } else {
+            if (readerConfig.containsKey(JBDCReaderProperties.TABLE)) {
+                Connection conn = null;
+                try {
+                    String table = readerConfig.getString(JBDCReaderProperties.TABLE);
+                    LOG.info("Attemp to query digital primary key for table: {}", table);
+                    conn = JDBCUtils.getConnection(driver, url, username, password);
+                    String splitColumn = JDBCUtils.getDigitalPrimaryKey(conn, conn.getCatalog(), null, table);
+                    if (splitColumn != null) {
+                        LOG.info("Table {} find digital primary key: {}", table, splitColumn);
+                        return buildPluginConfigs(conn, sql.toString(), splitColumn, readerConfig);
+                    } else {
+                        LOG.info("Table {} can not find digital primary key.", table);
+                    }
+                } catch (Exception e) {
+                    throw new HDataException(e);
+                } finally {
+                    JDBCUtils.closeConnection(conn);
+                }
+            }
+
+            if (parallelism > 1) {
+                LOG.warn(
+                        "Reader parallelism is set to {}, but the \"split-by\" config is not given, so reader parallelism is set to default value: 1.",
+                        parallelism);
+            }
+
+            List<PluginConfig> list = new ArrayList<PluginConfig>();
+            readerConfig.setProperty(JBDCReaderProperties.SQL, sql.toString().replaceAll(CONDITIONS_REGEX, "(1 = 1)"));
+            list.add(readerConfig);
+            return list;
+        }
+    }
+}

+ 78 - 0
src/main/java/opensource/hdata/plugin/reader/mongodb/MongoDBReader.java

@@ -0,0 +1,78 @@
+package opensource.hdata.plugin.reader.mongodb;
+
+import java.net.UnknownHostException;
+import java.util.Set;
+
+import opensource.hdata.config.PluginConfig;
+import opensource.hdata.core.DefaultRecord;
+import opensource.hdata.core.Fields;
+import opensource.hdata.core.JobContext;
+import opensource.hdata.core.OutputFieldsDeclarer;
+import opensource.hdata.core.plugin.Reader;
+import opensource.hdata.core.plugin.Record;
+import opensource.hdata.core.plugin.RecordCollector;
+import opensource.hdata.exception.HDataException;
+
+import com.mongodb.BasicDBObject;
+import com.mongodb.DB;
+import com.mongodb.DBCollection;
+import com.mongodb.DBCursor;
+import com.mongodb.DBObject;
+import com.mongodb.MongoClient;
+import com.mongodb.MongoClientURI;
+
+public class MongoDBReader extends Reader {
+
+    private Fields fields;
+    private String uri;
+    private BasicDBObject condition;
+    private static final String OBJECT_ID_KEY = "_id";
+
+    @Override
+    public void prepare(JobContext context, PluginConfig readerConfig) {
+        uri = readerConfig.getString(MongoDBReaderProperties.URI);
+        condition = (BasicDBObject) readerConfig.get(MongoDBReaderProperties.QUERY);
+    }
+
+    @Override
+    public void execute(RecordCollector recordCollector) {
+        MongoClientURI clientURI = new MongoClientURI(uri);
+        MongoClient mongoClient = null;
+        try {
+            mongoClient = new MongoClient(clientURI);
+            DB db = mongoClient.getDB(clientURI.getDatabase());
+            DBCollection coll = db.getCollection(clientURI.getCollection());
+            DBCursor cur = coll.find(condition);
+            while (cur.hasNext()) {
+                DBObject doc = cur.next();
+                Set<String> keys = doc.keySet();
+                Record record = new DefaultRecord(keys.size() - 1);
+                if (fields == null) {
+                    fields = new Fields();
+                    for (String key : keys) {
+                        fields.add(key);
+                    }
+                }
+
+                for (String key : keys) {
+                    if (!OBJECT_ID_KEY.equals(key)) {
+                        record.addField(doc.get(key));
+                    }
+                }
+
+                recordCollector.send(record);
+            }
+        } catch (UnknownHostException e) {
+            throw new HDataException(e);
+        } finally {
+            if (mongoClient != null) {
+                mongoClient.close();
+            }
+        }
+    }
+
+    @Override
+    public void declareOutputFields(OutputFieldsDeclarer declarer) {
+        declarer.declare(fields);
+    }
+}

+ 7 - 0
src/main/java/opensource/hdata/plugin/reader/mongodb/MongoDBReaderProperties.java

@@ -0,0 +1,7 @@
+package opensource.hdata.plugin.reader.mongodb;
+
+public class MongoDBReaderProperties {
+
+    public static final String URI = "uri";
+    public static final String QUERY = "query";
+}

+ 95 - 0
src/main/java/opensource/hdata/plugin/reader/mongodb/MongoDBSplitter.java

@@ -0,0 +1,95 @@
+package opensource.hdata.plugin.reader.mongodb;
+
+import java.math.BigInteger;
+import java.net.UnknownHostException;
+import java.util.ArrayList;
+import java.util.List;
+
+import opensource.hdata.config.JobConfig;
+import opensource.hdata.config.PluginConfig;
+import opensource.hdata.core.plugin.Splitter;
+import opensource.hdata.exception.HDataException;
+
+import org.bson.types.ObjectId;
+
+import com.mongodb.BasicDBObject;
+import com.mongodb.DB;
+import com.mongodb.DBCollection;
+import com.mongodb.DBCursor;
+import com.mongodb.DBObject;
+import com.mongodb.MongoClient;
+import com.mongodb.MongoClientURI;
+import com.mongodb.util.JSON;
+
+public class MongoDBSplitter extends Splitter {
+
+    private static final String OBJECT_ID_KEY = "_id";
+    private static final int HEXADECIMAL = 16;
+
+    @Override
+    public List<PluginConfig> split(JobConfig jobConfig) {
+        List<PluginConfig> list = new ArrayList<PluginConfig>();
+        PluginConfig readerConfig = jobConfig.getReaderConfig();
+        String uri = readerConfig.getString(MongoDBReaderProperties.URI);
+        int parallelism = readerConfig.getParallelism();
+
+        MongoClientURI clientURI = new MongoClientURI(uri);
+        MongoClient mongoClient = null;
+        try {
+            mongoClient = new MongoClient(clientURI);
+            DB db = mongoClient.getDB(clientURI.getDatabase());
+            DBCollection coll = db.getCollection(clientURI.getCollection());
+
+            String maxID = "";
+            String minID = "";
+            DBObject sort = new BasicDBObject();
+            sort.put(OBJECT_ID_KEY, -1);
+            DBCursor cursor = coll.find().sort(sort).limit(1);
+            while (cursor.hasNext()) {
+                maxID = cursor.next().get(OBJECT_ID_KEY).toString();
+            }
+
+            sort.put(OBJECT_ID_KEY, 1);
+            cursor = coll.find().sort(sort).limit(1);
+            while (cursor.hasNext()) {
+                minID = cursor.next().get(OBJECT_ID_KEY).toString();
+            }
+
+            if (!maxID.isEmpty() && !minID.isEmpty()) {
+                BigInteger maxBigInteger = new BigInteger(maxID, HEXADECIMAL);
+                BigInteger minBigInteger = new BigInteger(minID, HEXADECIMAL);
+                BigInteger step = (maxBigInteger.subtract(minBigInteger).divide(BigInteger.valueOf(parallelism)));
+                for (int i = 0, len = parallelism; i < len; i++) {
+                    BasicDBObject condition = null;
+                    if (readerConfig.containsKey(MongoDBReaderProperties.QUERY)) {
+                        condition = (BasicDBObject) JSON.parse(readerConfig.getString(MongoDBReaderProperties.QUERY));
+                    } else {
+                        condition = new BasicDBObject();
+                    }
+
+                    BasicDBObject idRange = new BasicDBObject("$gte", new ObjectId(minBigInteger.add(step.multiply(BigInteger.valueOf(i))).toString(
+                            HEXADECIMAL)));
+                    if (i == len - 1) {
+                        idRange.append("$lte", new ObjectId(maxBigInteger.toString(HEXADECIMAL)));
+                    } else {
+                        idRange.append("$lt", new ObjectId(minBigInteger.add(step.multiply(BigInteger.valueOf(i + 1))).toString(HEXADECIMAL)));
+                    }
+
+                    condition.put(OBJECT_ID_KEY, idRange);
+
+                    PluginConfig pluginConfig = (PluginConfig) readerConfig.clone();
+                    pluginConfig.put(MongoDBReaderProperties.QUERY, condition);
+                    list.add(pluginConfig);
+                }
+            }
+        } catch (UnknownHostException e) {
+            throw new HDataException(e);
+        } finally {
+            if (mongoClient != null) {
+                mongoClient.close();
+            }
+        }
+
+        return list;
+    }
+}

+ 12 - 0
src/main/java/opensource/hdata/plugin/writer/console/ConsoleWriter.java

@@ -0,0 +1,12 @@
+package opensource.hdata.plugin.writer.console;
+
+import opensource.hdata.core.plugin.Record;
+import opensource.hdata.core.plugin.Writer;
+
+public class ConsoleWriter extends Writer {
+
+    @Override
+    public void execute(Record record) {
+        System.out.println(record);
+    }
+}

+ 116 - 0
src/main/java/opensource/hdata/plugin/writer/ftp/FTPWriter.java

@@ -0,0 +1,116 @@
+package opensource.hdata.plugin.writer.ftp;
+
+import java.io.BufferedWriter;
+import java.io.IOException;
+import java.io.OutputStream;
+import java.io.OutputStreamWriter;
+import java.util.concurrent.atomic.AtomicInteger;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+import java.util.zip.GZIPOutputStream;
+
+import opensource.hdata.config.PluginConfig;
+import opensource.hdata.core.JobContext;
+import opensource.hdata.core.plugin.Record;
+import opensource.hdata.core.plugin.Writer;
+import opensource.hdata.exception.HDataException;
+import opensource.hdata.util.EscaperUtils;
+import opensource.hdata.util.FTPUtils;
+
+import org.apache.commons.net.ftp.FTPClient;
+
+import com.google.common.base.Joiner;
+
+public class FTPWriter extends Writer {
+
+    private String host;
+    private int port;
+    private String username;
+    private String password;
+    private String fieldsSeparator;
+    private String lineSeparator;
+    private String encoding;
+    private String path;
+    private boolean gzipCompress;
+    private FTPClient ftpClient;
+    private BufferedWriter bw;
+    private String[] strArray;
+    private static AtomicInteger sequence = new AtomicInteger(0);
+    private static final Pattern REG_FILE_PATH_WITHOUT_EXTENSION = Pattern.compile(".*?(?=\\.\\w+$)");
+    private static final Pattern REG_FILE_EXTENSION = Pattern.compile("(\\.\\w+)$");
+
+    @Override
+    public void prepare(JobContext context, PluginConfig writerConfig) {
+        host = writerConfig.getString(FTPWriterProperties.HOST);
+        port = writerConfig.getInt(FTPWriterProperties.PORT, 21);
+        username = writerConfig.getString(FTPWriterProperties.USERNAME, "anonymous");
+        password = writerConfig.getString(FTPWriterProperties.PASSWORD, "");
+        fieldsSeparator = EscaperUtils.parse(writerConfig.getString(FTPWriterProperties.FIELDS_SEPARATOR, "\t"));
+        lineSeparator = EscaperUtils.parse(writerConfig.getString(FTPWriterProperties.LINE_SEPARATOR, "\n"));
+        encoding = writerConfig.getString(FTPWriterProperties.ENCODING, "UTF-8");
+        path = writerConfig.getString(FTPWriterProperties.PATH);
+        gzipCompress = writerConfig.getBoolean(FTPWriterProperties.GZIP_COMPRESS, false);
+
+        int parallelism = writerConfig.getParallelism();
+        if (parallelism > 1) {
+            String filePathWithoutExtension = "";
+            String fileExtension = "";
+            Matcher m1 = REG_FILE_PATH_WITHOUT_EXTENSION.matcher(path.trim());
+            if (m1.find()) {
+                filePathWithoutExtension = m1.group();
+            }
+
+            Matcher m2 = REG_FILE_EXTENSION.matcher(path.trim());
+            if (m2.find()) {
+                fileExtension = m2.group();
+            }
+            path = String.format("%s_%04d%s", filePathWithoutExtension, sequence.getAndIncrement(), fileExtension);
+        }
+
+        try {
+            ftpClient = FTPUtils.getFtpClient(host, port, username, password);
+            OutputStream outputStream = ftpClient.storeFileStream(path);
+            if (gzipCompress) {
+                bw = new BufferedWriter(new OutputStreamWriter(new GZIPOutputStream(outputStream), encoding));
+            } else {
+                bw = new BufferedWriter(new OutputStreamWriter(outputStream, encoding));
+            }
+        } catch (Exception e) {
+            throw new HDataException(e);
+        }
+    }
+
+    @Override
+    public void execute(Record record) {
+        if (strArray == null) {
+            strArray = new String[record.getFieldsCount()];
+        }
+
+        for (int i = 0, len = record.getFieldsCount(); i < len; i++) {
+            Object o = record.getField(i);
+            if (o == null) {
+                strArray[i] = "NULL";
+            } else {
+                strArray[i] = o.toString();
+            }
+        }
+        try {
+            bw.write(Joiner.on(fieldsSeparator).join(strArray));
+            bw.write(lineSeparator);
+        } catch (IOException e) {
+            throw new HDataException(e);
+        }
+    }
+
+    @Override
+    public void close() {
+        if (bw != null) {
+            try {
+                bw.close();
+            } catch (IOException e) {
+                throw new HDataException(e);
+            }
+        }
+        FTPUtils.closeFtpClient(ftpClient);
+    }
+}

+ 13 - 0
src/main/java/opensource/hdata/plugin/writer/ftp/FTPWriterProperties.java

@@ -0,0 +1,13 @@
+package opensource.hdata.plugin.writer.ftp;
+
+public class FTPWriterProperties {
+    public static final String HOST = "host";
+    public static final String PORT = "port";
+    public static final String USERNAME = "username";
+    public static final String PASSWORD = "password";
+    public static final String PATH = "path";
+    public static final String ENCODING = "encoding";
+    public static final String FIELDS_SEPARATOR = "fieldsSeparator";
+    public static final String LINE_SEPARATOR = "lineSeparator";
+    public static final String GZIP_COMPRESS = "gzipCompress";
+}

+ 91 - 0
src/main/java/opensource/hdata/plugin/writer/hbase/HBaseWriter.java

@@ -0,0 +1,91 @@
+package opensource.hdata.plugin.writer.hbase;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+import opensource.hdata.config.PluginConfig;
+import opensource.hdata.core.JobContext;
+import opensource.hdata.core.plugin.Record;
+import opensource.hdata.core.plugin.Writer;
+import opensource.hdata.exception.HDataException;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.HBaseConfiguration;
+import org.apache.hadoop.hbase.client.HTable;
+import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.util.Bytes;
+
+public class HBaseWriter extends Writer {
+
+    private HTable table;
+    private int batchSize;
+    private int rowkeyIndex = -1;
+    private List<Put> putList = new ArrayList<Put>();
+    private String[] columns;
+    private static final String ROWKEY = ":rowkey";
+
+    @Override
+    public void prepare(JobContext context, PluginConfig writerConfig) {
+        Configuration conf = HBaseConfiguration.create();
+        conf.set("hbase.zookeeper.quorum", writerConfig.getString(HBaseWriterProperties.ZOOKEEPER_QUORUM));
+        conf.set("hbase.zookeeper.property.clientPort", writerConfig.getString(HBaseWriterProperties.ZOOKEEPER_PROPERTY_CLIENTPORT, "2181"));
+        batchSize = writerConfig.getInt(HBaseWriterProperties.BATCH_INSERT_SIZE, 10000);
+        columns = writerConfig.getString(HBaseWriterProperties.COLUMNS).split(",");
+        for (int i = 0, len = columns.length; i < len; i++) {
+            if (ROWKEY.equalsIgnoreCase(columns[i])) {
+                rowkeyIndex = i;
+                break;
+            }
+        }
+
+        if (rowkeyIndex == -1) {
+            throw new IllegalArgumentException("Can not find :rowkey in columnsMapping of HBase Writer!");
+        }
+
+        try {
+            table = new HTable(conf, writerConfig.getString(HBaseWriterProperties.TABLE));
+        } catch (IOException e) {
+            throw new HDataException(e);
+        }
+    }
+
+    @Override
+    public void execute(Record record) {
+        Object rowkeyValue = record.getField(rowkeyIndex);
+        Put put = new Put(Bytes.toBytes(rowkeyValue == null ? "NULL" : rowkeyValue.toString()));
+        for (int i = 0, len = record.getFieldsCount(); i < len; i++) {
+            if (i != rowkeyIndex) {
+                String[] tokens = columns[i].split(":");
+                put.add(Bytes.toBytes(tokens[0]), Bytes.toBytes(tokens[1]),
+                        record.getField(i) == null ? null : Bytes.toBytes(record.getField(i).toString()));
+            }
+        }
+
+        putList.add(put);
+        if (putList.size() == batchSize) {
+            try {
+                table.put(putList);
+            } catch (IOException e) {
+                throw new HDataException(e);
+            }
+            putList.clear();
+        }
+    }
+
+    @Override
+    public void close() {
+        if (table != null) {
+            try {
+                if (putList.size() > 0) {
+                    table.put(putList);
+                }
+
+                table.close();
+            } catch (IOException e) {
+                throw new HDataException(e);
+            }
+            putList.clear();
+        }
+    }
+}

+ 9 - 0
src/main/java/opensource/hdata/plugin/writer/hbase/HBaseWriterProperties.java

@@ -0,0 +1,9 @@
+package opensource.hdata.plugin.writer.hbase;
+
+public class HBaseWriterProperties {
+    public static final String ZOOKEEPER_QUORUM = "zookeeperQuorum";
+    public static final String ZOOKEEPER_PROPERTY_CLIENTPORT = "zookeeperClientPort";
+    public static final String TABLE = "table";
+    public static final String COLUMNS = "columns";
+    public static final String BATCH_INSERT_SIZE = "batchInsertSize";
+}

+ 117 - 0
src/main/java/opensource/hdata/plugin/writer/hdfs/HDFSWriter.java

@@ -0,0 +1,117 @@
+package opensource.hdata.plugin.writer.hdfs;
+
+import java.io.BufferedWriter;
+import java.io.IOException;
+import java.io.OutputStreamWriter;
+import java.util.concurrent.atomic.AtomicInteger;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import opensource.hdata.config.PluginConfig;
+import opensource.hdata.core.JobContext;
+import opensource.hdata.core.plugin.Record;
+import opensource.hdata.core.plugin.Writer;
+import opensource.hdata.exception.HDataException;
+import opensource.hdata.util.EscaperUtils;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.compress.CompressionCodec;
+import org.apache.hadoop.io.compress.CompressionCodecFactory;
+
+import com.google.common.base.Joiner;
+
+public class HDFSWriter extends Writer {
+
+    private String path;
+    private String fieldsSeparator;
+    private String lineSeparator;
+    private String encoding;
+    private String compressCodec;
+    private String hadoopUser;
+    private BufferedWriter bw;
+    private String[] strArray;
+    private static AtomicInteger sequence = new AtomicInteger(0);
+    private static final Pattern REG_FILE_PATH_WITHOUT_EXTENSION = Pattern.compile(".*?(?=\\.\\w+$)");
+    private static final Pattern REG_FILE_EXTENSION = Pattern.compile("(\\.\\w+)$");
+
+    @Override
+    public void prepare(JobContext context, PluginConfig writerConfig) {
+        path = writerConfig.getString(HDFSWriterProperties.PATH);
+        fieldsSeparator = EscaperUtils.parse(writerConfig.getString(HDFSWriterProperties.FIELDS_SEPARATOR, "\t"));
+        lineSeparator = EscaperUtils.parse(writerConfig.getString(HDFSWriterProperties.LINE_SEPARATOR, "\n"));
+        encoding = writerConfig.getString(HDFSWriterProperties.ENCODING, "UTF-8");
+        compressCodec = writerConfig.getProperty(HDFSWriterProperties.COMPRESS_CODEC);
+        hadoopUser = writerConfig.getString(HDFSWriterProperties.HADOOP_USER);
+        System.setProperty("HADOOP_USER_NAME", hadoopUser);
+
+        int parallelism = writerConfig.getParallelism();
+        if (parallelism > 1) {
+            String filePathWithoutExtension = "";
+            String fileExtension = "";
+            Matcher m1 = REG_FILE_PATH_WITHOUT_EXTENSION.matcher(path.trim());
+            if (m1.find()) {
+                filePathWithoutExtension = m1.group();
+            }
+
+            Matcher m2 = REG_FILE_EXTENSION.matcher(path.trim());
+            if (m2.find()) {
+                fileExtension = m2.group();
+            }
+            path = String.format("%s_%04d%s", filePathWithoutExtension, sequence.getAndIncrement(), fileExtension);
+        }
+
+        Path hdfsPath = new Path(path);
+        Configuration conf = new Configuration();
+        try {
+            FileSystem fs = hdfsPath.getFileSystem(conf);
+            FSDataOutputStream output = fs.create(hdfsPath);
+            if (compressCodec == null) {
+                bw = new BufferedWriter(new OutputStreamWriter(output, encoding));
+            } else {
+                CompressionCodecFactory factory = new CompressionCodecFactory(conf);
+                CompressionCodec codec = factory.getCodecByClassName(compressCodec);
+                bw = new BufferedWriter(new OutputStreamWriter(codec.createOutputStream(output), encoding));
+            }
+        } catch (IOException e) {
+            throw new HDataException(e);
+        }
+
+    }
+
+    @Override
+    public void execute(Record record) {
+        if (strArray == null) {
+            strArray = new String[record.getFieldsCount()];
+        }
+
+        for (int i = 0, len = record.getFieldsCount(); i < len; i++) {
+            Object o = record.getField(i);
+            if (o == null) {
+                strArray[i] = "NULL";
+            } else {
+                strArray[i] = o.toString();
+            }
+        }
+        try {
+            bw.write(Joiner.on(fieldsSeparator).join(strArray));
+            bw.write(lineSeparator);
+        } catch (IOException e) {
+            throw new HDataException(e);
+        }
+    }
+
+    @Override
+    public void close() {
+        if (bw != null) {
+            try {
+                bw.flush();
+                bw.close();
+            } catch (IOException e) {
+                throw new HDataException(e);
+            }
+        }
+    }
+}

+ 10 - 0
src/main/java/opensource/hdata/plugin/writer/hdfs/HDFSWriterProperties.java

@@ -0,0 +1,10 @@
+package opensource.hdata.plugin.writer.hdfs;
+
+public class HDFSWriterProperties {
+    public static final String PATH = "path";
+    public static final String FIELDS_SEPARATOR = "fieldsSeparator";
+    public static final String LINE_SEPARATOR = "lineSeparator";
+    public static final String ENCODING = "encoding";
+    public static final String COMPRESS_CODEC = "compressCodec";
+    public static final String HADOOP_USER = "hadoopUser";
+}

+ 19 - 0
src/main/java/opensource/hdata/plugin/writer/hive/HiveRecordWritable.java

@@ -0,0 +1,19 @@
+package opensource.hdata.plugin.writer.hive;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+
+import org.apache.hadoop.io.Writable;
+
+public class HiveRecordWritable implements Writable {
+
+    public void write(DataOutput dataOutput) throws IOException {
+        throw new UnsupportedOperationException("no write");
+    }
+
+    public void readFields(DataInput dataInput) throws IOException {
+        throw new UnsupportedOperationException("no read");
+    }
+
+}

+ 211 - 0
src/main/java/opensource/hdata/plugin/writer/hive/HiveWriter.java

@@ -0,0 +1,211 @@
+package opensource.hdata.plugin.writer.hive;
+
+import java.io.IOException;
+import java.lang.reflect.Field;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.UUID;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import javassist.ClassPool;
+import javassist.CtClass;
+import javassist.CtField;
+import opensource.hdata.config.PluginConfig;
+import opensource.hdata.core.JobContext;
+import opensource.hdata.core.plugin.Record;
+import opensource.hdata.core.plugin.Writer;
+import opensource.hdata.exception.HDataException;
+import opensource.hdata.plugin.reader.hive.HiveReaderProperties;
+import opensource.hdata.util.HiveTypeUtils;
+import opensource.hdata.util.LoggerUtils;
+import opensource.hdata.util.TypeConvertUtils;
+import opensource.hdata.util.Utils;
+
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
+import org.apache.hadoop.hive.metastore.api.FieldSchema;
+import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
+import org.apache.hadoop.hive.ql.io.HiveOutputFormat;
+import org.apache.hadoop.hive.ql.metadata.Hive;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.metadata.Partition;
+import org.apache.hadoop.hive.ql.metadata.Table;
+import org.apache.hadoop.hive.serde2.Serializer;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
+import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.Reporter;
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+
+@SuppressWarnings("deprecation")
+public class HiveWriter extends Writer {
+
+    private Serializer serializer;
+    private HiveOutputFormat<?, ?> outputFormat;
+    private StructObjectInspector inspector;
+    private FileSinkOperator.RecordWriter writer;
+    private Path path = null;
+    private Map<String, String> partitionSpecify = new HashMap<String, String>();
+    private int partitionKeySize;
+    private PluginConfig writerConfig;
+    private Object hiveRecord;
+    private String hdfsTmpDir;
+
+    private static Class<?> hiveRecordWritale;
+    private static List<Field> classFields = new ArrayList<Field>();
+    private static List<Path> files = new ArrayList<Path>();
+    private static final Pattern HDFS_MASTER = Pattern.compile("hdfs://[\\w\\.]+:\\d+");
+    private static final Logger LOG = LogManager.getLogger(HiveWriter.class);
+
+    private synchronized static void createHiveRecordClass(List<FieldSchema> columns) {
+        if (hiveRecordWritale == null) {
+            ClassPool pool = ClassPool.getDefault();
+            try {
+                CtClass ctClass = pool.get("opensource.hdata.plugin.writer.hive.HiveRecordWritable");
+                for (FieldSchema fieldSchema : columns) {
+                    PrimitiveCategory primitiveCategory = HiveTypeUtils.getPrimitiveCategory(fieldSchema.getType().replaceAll("\\(.*\\)", "")
+                            .toUpperCase());
+                    Class<?> fieldTypeClazz = PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(primitiveCategory)
+                            .getJavaPrimitiveClass();
+                    CtField ctField = new CtField(pool.get(fieldTypeClazz.getName()), fieldSchema.getName(), ctClass);
+                    ctClass.addField(ctField);
+                }
+                hiveRecordWritale = ctClass.toClass();
+                for (Field field : hiveRecordWritale.getDeclaredFields()) {
+                    field.setAccessible(true);
+                    classFields.add(field);
+                }
+            } catch (Exception e) {
+                throw new HDataException(e);
+            }
+        }
+    }
+
+    @Override
+    public void prepare(JobContext context, PluginConfig writerConfig) {
+        hdfsTmpDir = context.getEngineConfig().getString("hdata.hive.writer.tmp.dir", "/tmp");
+        this.writerConfig = writerConfig;
+        String metastoreUris = writerConfig.getString(HiveWriterProperties.METASTORE_URIS);
+        String dbName = writerConfig.getString(HiveWriterProperties.DATABASE, "default");
+        String tableName = writerConfig.getString(HiveWriterProperties.TABLE);
+        boolean isCompress = writerConfig.getBoolean(HiveWriterProperties.COMPRESS, true);
+
+        System.setProperty("HADOOP_USER_NAME", writerConfig.getString(HiveWriterProperties.HADOOP_USER));
+
+        HiveConf conf = new HiveConf();
+        conf.set(ConfVars.METASTOREURIS.varname, metastoreUris);
+
+        Hive hive;
+        Table table;
+        try {
+            hive = Hive.get(conf, true);
+            table = hive.getTable(dbName, tableName, false);
+
+            partitionKeySize = table.getPartitionKeys().size();
+            serializer = (Serializer) table.getDeserializer();
+            outputFormat = (HiveOutputFormat<?, ?>) table.getOutputFormatClass().newInstance();
+            if (writerConfig.containsKey(HiveReaderProperties.PARTITIONS)) {
+                String partitions = writerConfig.getString(HiveReaderProperties.PARTITIONS);
+                String[] partKVs = partitions.split("\\s*,\\s*");
+                for (String kv : partKVs) {
+                    String[] tokens = kv.split("=");
+                    if (tokens.length == 2) {
+                        partitionSpecify.put(tokens[0], tokens[1]);
+                    }
+                }
+            } else if (partitionKeySize > 0) {
+                throw new HDataException(String.format("Table %s.%s is partition table, but partition config is not given.", dbName, tableName));
+            }
+
+            createHiveRecordClass(table.getCols());
+            hiveRecord = hiveRecordWritale.newInstance();
+
+            String tableLocation = Utils.fixLocaltion(table.getDataLocation().toString(), metastoreUris);
+            Matcher m = HDFS_MASTER.matcher(tableLocation);
+            if (m.find()) {
+                path = new Path(String.format("%s/%s/%s-%s.tmp", m.group(), hdfsTmpDir, tableName, UUID.randomUUID().toString().replaceAll("-", "")));
+                files.add(path);
+            }
+
+            inspector = (StructObjectInspector) ObjectInspectorFactory.getReflectionObjectInspector(HiveRecordWritable.class,
+                    ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
+            JobConf jobConf = new JobConf();
+            writer = outputFormat.getHiveRecordWriter(jobConf, path, HiveRecordWritable.class, isCompress, table.getMetadata(), Reporter.NULL);
+        } catch (Exception e) {
+            throw new HDataException(e);
+        } finally {
+            Hive.closeCurrent();
+        }
+    }
+
+    @Override
+    public void execute(Record record) {
+        try {
+            for (int i = 0, len = record.getFieldsCount() - partitionKeySize; i < len; i++) {
+                classFields.get(i).set(hiveRecord, TypeConvertUtils.convert(record.getField(i), classFields.get(i).getType()));
+            }
+            writer.write(serializer.serialize(hiveRecord, inspector));
+        } catch (Exception e) {
+            throw new HDataException(e);
+        }
+    }
+
+    private synchronized static Partition createPartition(Hive hive, Table table, Map<String, String> partSpec) {
+        Partition p = null;
+        try {
+            p = hive.getPartition(table, partSpec, false);
+            if (p == null) {
+                p = hive.getPartition(table, partSpec, true);
+            }
+        } catch (HiveException e) {
+            throw new HDataException(e);
+        }
+        return p;
+    }
+
+    @Override
+    public void close() {
+        if (writer != null) {
+            try {
+                writer.close(true);
+
+                String metastoreUris = writerConfig.getString(HiveWriterProperties.METASTORE_URIS);
+                String dbName = writerConfig.getString(HiveWriterProperties.DATABASE, "default");
+                String tableName = writerConfig.getString(HiveWriterProperties.TABLE);
+                HiveConf conf = new HiveConf();
+                conf.set(ConfVars.METASTOREURIS.varname, metastoreUris);
+                Path renamedPath = new Path(path.toString().replaceFirst("\\.tmp$", ""));
+                FileSystem fs = renamedPath.getFileSystem(conf);
+                fs.rename(path, renamedPath);
+
+                Hive hive;
+                try {
+                    hive = Hive.get(conf, true);
+                    if (partitionKeySize == 0) {
+                        LOG.info("Loading data {} into table {}.{}", renamedPath.toString(), dbName, tableName);
+                        hive.loadTable(renamedPath, dbName + "." + tableName, false, false);
+                    } else {
+                        Table table = hive.getTable(dbName, tableName, false);
+                        Partition p = createPartition(hive, table, partitionSpecify);
+                        LOG.info("Loading data {} into table {}.{} partition({})", renamedPath.toString(), dbName, tableName, p.getName());
+                        hive.loadPartition(renamedPath, dbName + "." + tableName, partitionSpecify, false, false, true, false);
+                    }
+                } catch (Exception e) {
+                    throw new HDataException(e);
+                } finally {
+                    Hive.closeCurrent();
+                }
+            } catch (IOException e) {
+                LoggerUtils.error(LOG, e);
+            }
+        }
+    }
+}

+ 11 - 0
src/main/java/opensource/hdata/plugin/writer/hive/HiveWriterProperties.java

@@ -0,0 +1,11 @@
+package opensource.hdata.plugin.writer.hive;
+
+public class HiveWriterProperties {
+
+    public static final String METASTORE_URIS = "metastoreUris";
+    public static final String DATABASE = "database";
+    public static final String TABLE = "table";
+    public static final String PARTITIONS = "partitions";
+    public static final String COMPRESS = "compress";
+    public static final String HADOOP_USER = "hadoopUser";
+}

+ 13 - 0
src/main/java/opensource/hdata/plugin/writer/jdbc/JBDCWriterProperties.java

@@ -0,0 +1,13 @@
+package opensource.hdata.plugin.writer.jdbc;
+
+public class JBDCWriterProperties {
+
+    public static final String DRIVER = "driver";
+    public static final String URL = "url";
+    public static final String USERNAME = "username";
+    public static final String PASSWORD = "password";
+    public static final String TABLE = "table";
+    public static final String BATCH_INSERT_SIZE = "batchInsertSize";
+    public static final String PARALLELISM = "parallelism";
+
+}

+ 124 - 0
src/main/java/opensource/hdata/plugin/writer/jdbc/JDBCWriter.java

@@ -0,0 +1,124 @@
+package opensource.hdata.plugin.writer.jdbc;
+
+import java.sql.Connection;
+import java.sql.PreparedStatement;
+import java.sql.SQLException;
+import java.sql.Timestamp;
+import java.sql.Types;
+import java.text.SimpleDateFormat;
+import java.util.Arrays;
+import java.util.Map;
+
+import opensource.hdata.common.Constants;
+import opensource.hdata.config.PluginConfig;
+import opensource.hdata.core.Fields;
+import opensource.hdata.core.JobContext;
+import opensource.hdata.core.plugin.Record;
+import opensource.hdata.core.plugin.Writer;
+import opensource.hdata.exception.HDataException;
+import opensource.hdata.util.JDBCUtils;
+
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+
+import com.google.common.base.Joiner;
+
+public class JDBCWriter extends Writer {
+
+    private Connection connection = null;
+    private PreparedStatement statement = null;
+    private int count;
+    private int batchInsertSize;
+    private Fields columns;
+    private String table;
+    private Map<String, Integer> columnTypes;
+    private final SimpleDateFormat DATE_FORMAT = new SimpleDateFormat(Constants.DATE_FORMAT_STRING);
+    private final int DEFAULT_BATCH_INSERT_SIZE = 10000;
+    private static final Logger LOG = LogManager.getLogger(JDBCWriter.class);
+
+    @Override
+    public void prepare(JobContext context, PluginConfig writerConfig) {
+        columns = context.getFields();
+        String driver = writerConfig.getString(JBDCWriterProperties.DRIVER);
+        String url = writerConfig.getString(JBDCWriterProperties.URL);
+        String username = writerConfig.getString(JBDCWriterProperties.USERNAME);
+        String password = writerConfig.getString(JBDCWriterProperties.PASSWORD);
+        String table = writerConfig.getString(JBDCWriterProperties.TABLE);
+        this.table = table;
+        batchInsertSize = writerConfig.getInt(JBDCWriterProperties.BATCH_INSERT_SIZE, DEFAULT_BATCH_INSERT_SIZE);
+        if (batchInsertSize < 1) {
+            batchInsertSize = DEFAULT_BATCH_INSERT_SIZE;
+        }
+
+        try {
+            connection = JDBCUtils.getConnection(driver, url, username, password);
+            connection.setAutoCommit(false);
+            columnTypes = JDBCUtils.getColumnTypes(connection, table);
+
+            String sql = null;
+            if (columns != null) {
+                String[] placeholder = new String[columns.size()];
+                Arrays.fill(placeholder, "?");
+                sql = String.format("INSERT INTO %s(%s) VALUES(%s)", table, Joiner.on(", ").join(columns), Joiner.on(", ").join(placeholder));
+                LOG.debug(sql);
+                statement = connection.prepareStatement(sql);
+            }
+        } catch (Exception e) {
+            JDBCUtils.closeConnection(connection);
+            throw new HDataException("Writer prepare failed.", e);
+        }
+    }
+
+    @Override
+    public void execute(Record record) {
+        try {
+            if (statement == null) {
+                String[] placeholder = new String[record.getFieldsCount()];
+                Arrays.fill(placeholder, "?");
+                String sql = String.format("INSERT INTO %s VALUES(%s)", table, Joiner.on(", ").join(placeholder));
+                LOG.debug(sql);
+                statement = connection.prepareStatement(sql);
+            }
+
+            for (int i = 0, len = record.getFieldsCount(); i < len; i++) {
+                if (record.getField(i) instanceof Timestamp
+                        && !Integer.valueOf(Types.TIMESTAMP).equals(columnTypes.get(columns.get(i).toLowerCase()))) {
+                    statement.setObject(i + 1, DATE_FORMAT.format(record.getField(i)));
+                } else {
+                    statement.setObject(i + 1, record.getField(i));
+                }
+            }
+
+            count++;
+            statement.addBatch();
+
+            if (count % batchInsertSize == 0) {
+                count = 0;
+                statement.executeBatch();
+                connection.commit();
+            }
+        } catch (SQLException e) {
+            close();
+            throw new HDataException("Writer execute failed.", e);
+        }
+    }
+
+    @Override
+    public void close() {
+        try {
+            if (count > 0) {
+                statement.executeBatch();
+                connection.commit();
+            }
+
+            if (statement != null) {
+                statement.close();
+            }
+
+        } catch (SQLException e) {
+            throw new HDataException(e);
+        } finally {
+            JDBCUtils.closeConnection(connection);
+        }
+    }
+}

+ 67 - 0
src/main/java/opensource/hdata/plugin/writer/mongodb/MongoDBWriter.java

@@ -0,0 +1,67 @@
+package opensource.hdata.plugin.writer.mongodb;
+
+import java.net.UnknownHostException;
+
+import opensource.hdata.config.PluginConfig;
+import opensource.hdata.core.Fields;
+import opensource.hdata.core.JobContext;
+import opensource.hdata.core.plugin.Record;
+import opensource.hdata.core.plugin.Writer;
+import opensource.hdata.exception.HDataException;
+
+import org.apache.commons.lang3.ArrayUtils;
+
+import com.mongodb.BasicDBObject;
+import com.mongodb.DB;
+import com.mongodb.DBCollection;
+import com.mongodb.MongoClient;
+import com.mongodb.MongoClientURI;
+
+public class MongoDBWriter extends Writer {
+
+    private Fields fields;
+    private MongoClient mongoClient = null;
+    private DBCollection coll;
+    private BasicDBObject[] insertDocs;
+    private int batchsize;
+    private int count;
+
+    @Override
+    public void prepare(JobContext context, PluginConfig writerConfig) {
+        fields = context.getFields();
+        batchsize = writerConfig.getInt(MongoDBWriterProperties.BATCH_INSERT_SIZE, 1000);
+        insertDocs = new BasicDBObject[batchsize];
+        MongoClientURI clientURI = new MongoClientURI(writerConfig.getString(MongoDBWriterProperties.URI));
+        try {
+            mongoClient = new MongoClient(clientURI);
+            DB db = mongoClient.getDB(clientURI.getDatabase());
+            coll = db.getCollection(clientURI.getCollection());
+        } catch (UnknownHostException e) {
+            throw new HDataException(e);
+        }
+    }
+
+    @Override
+    public void execute(Record record) {
+        BasicDBObject doc = new BasicDBObject();
+        for (int i = 0, len = fields.size(); i < len; i++) {
+            doc.put(fields.get(i), record.getField(i));
+        }
+
+        insertDocs[count++] = doc;
+        if (count == batchsize) {
+            coll.insert(insertDocs);
+            count = 0;
+        }
+    }
+
+    @Override
+    public void close() {
+        if (mongoClient != null) {
+            if (count > 0) {
+                coll.insert(ArrayUtils.subarray(insertDocs, 0, count));
+            }
+            mongoClient.close();
+        }
+    }
+}

+ 6 - 0
src/main/java/opensource/hdata/plugin/writer/mongodb/MongoDBWriterProperties.java

@@ -0,0 +1,6 @@
+package opensource.hdata.plugin.writer.mongodb;
+
+public class MongoDBWriterProperties {
+    public static final String URI = "uri";
+    public static final String BATCH_INSERT_SIZE = "batchInsertSize";
+}

+ 76 - 0
src/main/java/opensource/hdata/tool/SQLExecuteTool.java

@@ -0,0 +1,76 @@
+package opensource.hdata.tool;
+
+import java.sql.Connection;
+import java.sql.Statement;
+
+import opensource.hdata.exception.HDataException;
+import opensource.hdata.util.JDBCUtils;
+
+import org.apache.commons.cli.CommandLine;
+import org.apache.commons.cli.CommandLineParser;
+import org.apache.commons.cli.HelpFormatter;
+import org.apache.commons.cli.Options;
+import org.apache.commons.cli.ParseException;
+import org.apache.commons.cli.PosixParser;
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+
+public class SQLExecuteTool {
+
+    private static final String JDBC_DRIVER = "jdbc-driver";
+    private static final String JDBC_URL = "jdbc-url";
+    private static final String JDBC_USERNAME = "jdbc-username";
+    private static final String JDBC_PASSWORD = "jdbc-password";
+    private static final String SQL = "sql";
+    private static final Logger LOG = LogManager.getLogger(SQLExecuteTool.class);
+
+    public Options createOptions() {
+        Options options = new Options();
+        options.addOption(null, JDBC_DRIVER, true, "jdbc driver class name");
+        options.addOption(null, JDBC_URL, true, "jdbc url, e.g., jdbc:mysql://localhost:3306/database");
+        options.addOption(null, JDBC_USERNAME, true, "jdbc username");
+        options.addOption(null, JDBC_PASSWORD, true, "jdbc password");
+        options.addOption(null, SQL, true, "sql");
+        return options;
+    }
+
+    public void printHelp(Options options) {
+        HelpFormatter formatter = new HelpFormatter();
+        formatter.printHelp(" ", options);
+    }
+
+    public static void main(String[] args) {
+        SQLExecuteTool tool = new SQLExecuteTool();
+        Options options = tool.createOptions();
+        if (args.length < 1) {
+            tool.printHelp(options);
+            System.exit(-1);
+        }
+
+        CommandLineParser parser = new PosixParser();
+        CommandLine cmd = null;
+        Connection conn = null;
+        try {
+            cmd = parser.parse(options, args);
+            String driver = cmd.getOptionValue(JDBC_DRIVER);
+            String url = cmd.getOptionValue(JDBC_URL);
+            String username = cmd.getOptionValue(JDBC_USERNAME);
+            String password = cmd.getOptionValue(JDBC_PASSWORD);
+            String sql = cmd.getOptionValue(SQL);
+            conn = JDBCUtils.getConnection(driver, url, username, password);
+            Statement statement = conn.createStatement();
+
+            LOG.info("Executing sql: {}", sql);
+            statement.execute(sql);
+            LOG.info("Execute successfully.");
+        } catch (ParseException e) {
+            tool.printHelp(options);
+            System.exit(-1);
+        } catch (Exception e) {
+            throw new HDataException(e);
+        } finally {
+            JDBCUtils.closeConnection(conn);
+        }
+    }
+
+}

+ 55 - 0
src/main/java/opensource/hdata/util/EscaperUtils.java

@@ -0,0 +1,55 @@
+package opensource.hdata.util;
+
+import java.util.HashMap;
+import java.util.Map;
+
+public class EscaperUtils {
+    private static Map<Character, Character> map = null;
+    private static final char CHAR_SLASH = '\\';
+
+    /**
+     * 特殊字符转义
+     * 
+     * @param input
+     * @return
+     */
+    public static String parse(String input) {
+        int cursor = 0;
+        int index = input.indexOf(CHAR_SLASH, cursor);
+
+        if (index < 0) {
+            return input;
+        }
+
+        StringBuilder sb = new StringBuilder();
+        int len = input.length();
+        while ((index = input.indexOf('\\', cursor)) != -1) {
+            if (index < len - 1) {
+                if (map.containsKey(input.charAt(index + 1))) {
+                    sb.append(input.substring(cursor, index));
+                    sb.append(map.get(input.charAt(index + 1)));
+                } else {
+                    sb.append(input.substring(cursor, index + 2));
+                }
+                cursor = index + 2;
+            } else {
+                break;
+            }
+        }
+        sb.append(input.substring(cursor));
+
+        return sb.toString();
+    }
+
+    static {
+        map = new HashMap<Character, Character>();
+        map.put('b', '\b');
+        map.put('t', '\t');
+        map.put('n', '\n');
+        map.put('f', '\f');
+        map.put('r', '\r');
+        map.put('"', '\"');
+        map.put('\'', '\'');
+        map.put('\\', '\\');
+    }
+}

+ 71 - 0
src/main/java/opensource/hdata/util/FTPUtils.java

@@ -0,0 +1,71 @@
+package opensource.hdata.util;
+
+import java.io.IOException;
+import java.net.SocketException;
+import java.util.List;
+import java.util.regex.Pattern;
+
+import org.apache.commons.net.ftp.FTP;
+import org.apache.commons.net.ftp.FTPClient;
+import org.apache.commons.net.ftp.FTPFile;
+import org.apache.commons.net.ftp.FTPReply;
+
+public class FTPUtils {
+
+    public static FTPClient getFtpClient(String host, int port, String username, String password) throws SocketException, IOException {
+        String LOCAL_CHARSET = "GB18030";
+        FTPClient ftpClient = new FTPClient();
+        ftpClient.connect(host, port);
+        // 检测服务器是否支持UTF-8编码,如果支持就用UTF-8编码,否则就使用本地编码GB18030
+        if (FTPReply.isPositiveCompletion(ftpClient.sendCommand("OPTS UTF8", "ON"))) {
+            LOCAL_CHARSET = "UTF-8";
+        }
+        ftpClient.setControlEncoding(LOCAL_CHARSET);
+        ftpClient.login(username, password);
+        ftpClient.setBufferSize(1024 * 1024 * 16);
+        ftpClient.enterLocalPassiveMode();
+        ftpClient.setFileType(FTP.BINARY_FILE_TYPE);
+        ftpClient.setControlKeepAliveTimeout(60);
+        return ftpClient;
+    }
+
+    /**
+     * 获取FTP目录下的文件
+     * 
+     * @param files
+     * @param ftpClient
+     * @param path
+     *            FTP目录
+     * @param filenameRegexp
+     *            文件名正则表达式
+     * @param recursive
+     *            是否递归搜索
+     * @throws IOException
+     */
+    public static void listFile(List<String> files, FTPClient ftpClient, String path, String filenameRegexp, boolean recursive) throws IOException {
+        for (FTPFile ftpFile : ftpClient.listFiles(path)) {
+            if (ftpFile.isFile()) {
+                if (Pattern.matches(filenameRegexp, ftpFile.getName())) {
+                    files.add(path + "/" + ftpFile.getName());
+                }
+            } else if (recursive && ftpFile.isDirectory()) {
+                listFile(files, ftpClient, path + "/" + ftpFile.getName(), filenameRegexp, recursive);
+            }
+        }
+    }
+
+    /**
+     * 关闭FTP客户端连接
+     * 
+     * @param ftpClient
+     */
+    public static void closeFtpClient(FTPClient ftpClient) {
+        if (ftpClient != null) {
+            try {
+                ftpClient.disconnect();
+            } catch (IOException e) {
+                e.printStackTrace();
+            }
+        }
+    }
+}

+ 60 - 0
src/main/java/opensource/hdata/util/HiveMetaStoreUtils.java

@@ -0,0 +1,60 @@
+package opensource.hdata.util;
+
+import org.apache.hadoop.hive.metastore.HiveMetaStoreClient;
+import org.apache.hadoop.hive.metastore.api.Partition;
+import org.apache.hadoop.hive.metastore.api.Table;
+
+public class HiveMetaStoreUtils {
+
+    /**
+     * 获取Hive表
+     * 
+     * @param client
+     * @param database
+     * @param table
+     * @return
+     */
+    public static Table getTable(HiveMetaStoreClient client, String database, String table) {
+        try {
+            return client.getTable(database, table);
+        } catch (Exception e) {
+            return null;
+        }
+    }
+
+    /**
+     * 判断是否为托管表
+     * 
+     * @param table
+     * @return
+     */
+    public static boolean isManagedTable(Table table) {
+        return "MANAGED_TABLE".equals(table.getTableType());
+    }
+
+    /**
+     * 判断是否为分区表
+     * 
+     * @param table
+     * @return
+     */
+    public static boolean isPartitionTable(Table table) {
+        return table.getPartitionKeys().size() > 0 ? true : false;
+    }
+
+    /**
+     * 获取Hive表的分区
+     * 
+     * @param client
+     * @param table
+     * @param partitionValues
+     * @return
+     */
+    public static Partition getPartition(HiveMetaStoreClient client, Table table, String partitionValues) {
+        try {
+            return client.getPartition(table.getDbName(), table.getTableName(), partitionValues.replaceAll("\"", "").replaceAll("\\s+,\\s+", ""));
+        } catch (Exception e) {
+            return null;
+        }
+    }
+}

+ 45 - 0
src/main/java/opensource/hdata/util/HiveTypeUtils.java

@@ -0,0 +1,45 @@
+package opensource.hdata.util;
+
+import org.apache.hadoop.hive.common.type.HiveBaseChar;
+import org.apache.hadoop.hive.common.type.HiveDecimal;
+import org.apache.hadoop.hive.common.type.HiveVarchar;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
+
+public class HiveTypeUtils {
+
+    /**
+     * 将Hive Writable类型转为标准Java类型
+     * 
+     * @param o
+     * @return
+     */
+    public static Object toJavaObject(Object o) {
+        if (o instanceof HiveBaseChar) {
+            return ((HiveVarchar) o).getValue();
+        } else if (o instanceof HiveDecimal) {
+            return ((HiveDecimal) o).bigDecimalValue();
+        }
+
+        return o;
+    }
+
+    /**
+     * 获取Hive类型的PrimitiveCategory
+     * 
+     * @param type
+     * @return
+     */
+    public static PrimitiveCategory getPrimitiveCategory(String type) {
+        if ("TINYINT".equals(type)) {
+            return PrimitiveObjectInspector.PrimitiveCategory.BYTE;
+        } else if ("SMALLINT".equals(type)) {
+            return PrimitiveObjectInspector.PrimitiveCategory.SHORT;
+        } else if ("BIGINT".equals(type)) {
+            return PrimitiveObjectInspector.PrimitiveCategory.LONG;
+        } else {
+            return PrimitiveObjectInspector.PrimitiveCategory.valueOf(type);
+        }
+    }
+
+}

+ 199 - 0
src/main/java/opensource/hdata/util/JDBCUtils.java

@@ -0,0 +1,199 @@
+package opensource.hdata.util;
+
+import java.sql.Connection;
+import java.sql.DriverManager;
+import java.sql.PreparedStatement;
+import java.sql.ResultSet;
+import java.sql.ResultSetMetaData;
+import java.sql.SQLException;
+import java.sql.Statement;
+import java.sql.Types;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+
+public class JDBCUtils {
+
+    private static final Logger LOG = LogManager.getLogger(JDBCUtils.class);
+
+    /**
+     * 获取JDBC连接
+     * 
+     * @param driver
+     * @param url
+     * @param username
+     * @param password
+     * @return
+     * @throws ClassNotFoundException
+     * @throws SQLException
+     */
+    public static Connection getConnection(String driver, String url, String username, String password) throws ClassNotFoundException, SQLException {
+        Class.forName(driver);
+        Connection conn = DriverManager.getConnection(url, username, password);
+        return conn;
+    }
+
+    /**
+     * 关闭JDBC连接
+     * 
+     * @param conn
+     */
+    public static void closeConnection(Connection conn) {
+        if (conn != null) {
+            try {
+                conn.close();
+            } catch (SQLException e) {
+                LoggerUtils.error(LOG, e);
+            }
+        }
+    }
+
+    /**
+     * 获取表的字段类型
+     * 
+     * @param connection
+     * @param table
+     * @return
+     * @throws SQLException
+     */
+    public static Map<String, Integer> getColumnTypes(Connection connection, String table) throws SQLException {
+        Map<String, Integer> map = new HashMap<String, Integer>();
+        StringBuilder sql = new StringBuilder();
+        sql.append("SELECT * FROM ");
+        sql.append(table);
+        sql.append(" WHERE 1=2");
+
+        PreparedStatement ps = connection.prepareStatement(sql.toString());
+        ResultSetMetaData rsd = ps.executeQuery().getMetaData();
+        for (int i = 0; i < rsd.getColumnCount(); i++) {
+            map.put(rsd.getColumnName(i + 1).toLowerCase(), rsd.getColumnType(i + 1));
+        }
+        ps.close();
+        return map;
+    }
+
+    /**
+     * 获取表的字段名称
+     * 
+     * @param conn
+     * @param table
+     * @return
+     * @throws SQLException
+     */
+    public static List<String> getColumnNames(Connection conn, String table) throws SQLException {
+        List<String> columnNames = new ArrayList<String>();
+        StringBuilder sql = new StringBuilder();
+        sql.append("SELECT * FROM ");
+        sql.append(table);
+        sql.append(" WHERE 1=2");
+
+        PreparedStatement ps = conn.prepareStatement(sql.toString());
+        ResultSet rs = ps.executeQuery();
+        ResultSetMetaData rsd = rs.getMetaData();
+
+        for (int i = 0, len = rsd.getColumnCount(); i < len; i++) {
+            columnNames.add(rsd.getColumnName(i + 1));
+        }
+        rs.close();
+        ps.close();
+
+        return columnNames;
+    }
+
+    /**
+     * 查询表中分割字段值的区域(最大值、最小值)
+     * 
+     * @param conn
+     * @param sql
+     * @param splitColumn
+     * @return
+     * @throws SQLException
+     */
+    public static double[] querySplitColumnRange(Connection conn, String sql, String splitColumn) throws SQLException {
+        double[] minAndMax = new double[2];
+        Pattern p = Pattern.compile("\\s+FROM\\s+.*", Pattern.CASE_INSENSITIVE);
+        Matcher m = p.matcher(sql);
+
+        if (m.find() && splitColumn != null && !splitColumn.trim().isEmpty()) {
+            StringBuilder sb = new StringBuilder();
+            sb.append("SELECT MIN(");
+            sb.append(splitColumn);
+            sb.append("), MAX(");
+            sb.append(splitColumn);
+            sb.append(")");
+            sb.append(m.group(0));
+
+            Statement statement = conn.createStatement();
+            ResultSet rs = statement.executeQuery(sb.toString());
+            while (rs.next()) {
+                minAndMax[0] = rs.getDouble(1);
+                minAndMax[1] = rs.getDouble(2);
+            }
+
+            rs.close();
+            statement.close();
+        }
+
+        return minAndMax;
+    }
+
+    /**
+     * 查询表数值类型的主键
+     * 
+     * @param conn
+     * @param catalog
+     * @param schema
+     * @param table
+     * @return
+     * @throws SQLException
+     */
+    public static String getDigitalPrimaryKey(Connection conn, String catalog, String schema, String table) throws SQLException {
+        List<String> primaryKeys = new ArrayList<String>();
+        ResultSet rs = conn.getMetaData().getPrimaryKeys(catalog, schema, table);
+        while (rs.next()) {
+            primaryKeys.add(rs.getString("COLUMN_NAME"));
+        }
+        rs.close();
+
+        if (primaryKeys.size() > 0) {
+            Map<String, Integer> map = getColumnTypes(conn, table);
+            for (String pk : primaryKeys) {
+                if (isDigitalType(map.get(pk))) {
+                    return pk;
+                }
+            }
+        }
+
+        return null;
+    }
+
+    /**
+     * 判断字段类型是否为数值类型
+     * 
+     * @param sqlType
+     * @return
+     */
+    public static boolean isDigitalType(int sqlType) {
+        switch (sqlType) {
+            case Types.NUMERIC:
+            case Types.DECIMAL:
+            case Types.SMALLINT:
+            case Types.INTEGER:
+            case Types.BIGINT:
+            case Types.REAL:
+            case Types.FLOAT:
+            case Types.DOUBLE:
+                return true;
+
+            default:
+                return false;
+        }
+    }
+
+}

+ 11 - 0
src/main/java/opensource/hdata/util/LoggerUtils.java

@@ -0,0 +1,11 @@
+package opensource.hdata.util;
+
+import org.apache.logging.log4j.Logger;
+
+public class LoggerUtils {
+
+    public static void error(Logger logger, Exception e) {
+        logger.error(e.getMessage());
+        logger.error(e.getStackTrace());
+    }
+}

+ 43 - 0
src/main/java/opensource/hdata/util/TypeConvertUtils.java

@@ -0,0 +1,43 @@
+package opensource.hdata.util;
+
+import java.math.BigDecimal;
+import java.math.BigInteger;
+
+public class TypeConvertUtils {
+
+    /**
+     * 数据类型转换
+     * 
+     * @param src
+     * @param clazz
+     * @return
+     */
+    public static Object convert(Object src, Class<?> clazz) {
+        if (src == null) {
+            return null;
+        } else if (src instanceof String) {
+            if (clazz == Integer.class) {
+                return Integer.valueOf(src.toString());
+            } else if (clazz == Long.class) {
+                return Long.valueOf(src.toString());
+            } else if (clazz == Double.class) {
+                return Double.valueOf(src.toString());
+            } else if (clazz == Float.class) {
+                return Float.valueOf(src.toString());
+            } else if (clazz == Boolean.class) {
+                return Boolean.valueOf(src.toString());
+            } else if (clazz == Short.class) {
+                return Short.valueOf(src.toString());
+            } else if (clazz == Byte.class) {
+                return Byte.valueOf(src.toString());
+            } else if (clazz == BigInteger.class) {
+                return BigInteger.valueOf(Long.valueOf(src.toString()));
+            } else if (clazz == BigDecimal.class) {
+                return new BigDecimal(src.toString());
+            }
+        } else if (clazz == String.class) {
+            return src.toString();
+        }
+        return src;
+    }
+}

+ 89 - 0
src/main/java/opensource/hdata/util/Utils.java

@@ -0,0 +1,89 @@
+package opensource.hdata.util;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.apache.commons.lang3.ArrayUtils;
+import org.apache.commons.lang3.StringUtils;
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+
+public class Utils {
+
+    private static final Logger LOG = LogManager.getLogger(Utils.class);
+
+    /**
+     * 线程休眠
+     * 
+     * @param millis
+     */
+    public static void sleep(long millis) {
+        try {
+            Thread.sleep(millis);
+        } catch (InterruptedException e) {
+            LoggerUtils.error(LOG, e);
+        }
+    }
+
+    public static List<String> getColumns(String[] columns, String[] excludeColumns) {
+        if (excludeColumns == null || excludeColumns.length < 1) {
+            return columns == null ? null : Arrays.asList(columns);
+        }
+
+        List<String> list = new ArrayList<String>();
+        for (String column : columns) {
+            if (!ArrayUtils.contains(excludeColumns, column)) {
+                list.add(column);
+            }
+        }
+        return list;
+    }
+
+    public static List<String> getColumns(List<String> columns, String[] excludeColumns) {
+        return getColumns(columns.toArray(new String[columns.size()]), excludeColumns);
+    }
+
+    /**
+     * 修复HDFS路径(将主机名改成IP)
+     * 
+     * @param srcLocaltion
+     * @param metastoreUris
+     * @return
+     */
+    public static String fixLocaltion(String srcLocaltion, String metastoreUris) {
+        Matcher ipMatcher = Pattern.compile("(\\d+\\.){3}\\d+").matcher(metastoreUris.split(",")[0].trim());
+        if (ipMatcher.find()) {
+            String masterIP = ipMatcher.group();
+            return srcLocaltion.replaceFirst("^hdfs://\\w+:", "hdfs://" + masterIP + ":");
+        }
+        return srcLocaltion;
+    }
+
+    /**
+     * 解析分区值
+     * 
+     * @param partitions
+     * @return
+     */
+    public static List<String> parsePartitionValue(String partitions) {
+        List<String> partitionValues = new ArrayList<String>();
+        String[] partitionKeyValue = partitions.split("\\s*,\\s*");
+        for (String kv : partitionKeyValue) {
+            String[] tokens = StringUtils.splitPreserveAllTokens(kv, "=");
+            partitionValues.add(tokens[1]);
+        }
+        return partitionValues;
+    }
+
+    /**
+     * 获取配置目录
+     * 
+     * @return
+     */
+    public static String getConfigDir() {
+        return System.getProperty("hdata.conf.dir") + System.getProperty("file.separator");
+    }
+}

+ 39 - 0
src/main/java/opensource/hdata/util/XMLUtils.java

@@ -0,0 +1,39 @@
+package opensource.hdata.util;
+
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+
+import javax.xml.parsers.DocumentBuilder;
+import javax.xml.parsers.DocumentBuilderFactory;
+import javax.xml.parsers.ParserConfigurationException;
+
+import org.jdom2.Document;
+import org.jdom2.Element;
+import org.jdom2.input.DOMBuilder;
+import org.xml.sax.SAXException;
+
+public class XMLUtils {
+
+    /**
+     * 加载XML文件
+     * 
+     * @param input
+     * @return
+     * @throws ParserConfigurationException
+     * @throws SAXException
+     * @throws IOException
+     */
+    public static Element load(InputStream input) throws ParserConfigurationException, SAXException, IOException {
+        DOMBuilder domBuilder = new DOMBuilder();
+        DocumentBuilder builder = DocumentBuilderFactory.newInstance().newDocumentBuilder();
+        Document doc = domBuilder.build(builder.parse(input));
+        Element root = doc.getRootElement();
+        return root;
+    }
+
+    public static Element load(String xmlpath) throws ParserConfigurationException, SAXException, IOException {
+        FileInputStream fis = new FileInputStream(xmlpath);
+        return load(fis);
+    }
+}