|
@@ -1,13 +1,15 @@
|
|
package com.lts.job.tracker.support;
|
|
package com.lts.job.tracker.support;
|
|
|
|
|
|
import com.lts.job.core.cluster.Node;
|
|
import com.lts.job.core.cluster.Node;
|
|
-import com.lts.job.core.cluster.NodeManager;
|
|
|
|
import com.lts.job.core.cluster.NodeType;
|
|
import com.lts.job.core.cluster.NodeType;
|
|
|
|
+import com.lts.job.core.constant.Constants;
|
|
import com.lts.job.core.domain.LogType;
|
|
import com.lts.job.core.domain.LogType;
|
|
import com.lts.job.core.protocol.JobProtos;
|
|
import com.lts.job.core.protocol.JobProtos;
|
|
|
|
+import com.lts.job.core.protocol.command.CommandWrapper;
|
|
import com.lts.job.core.protocol.command.JobAskRequest;
|
|
import com.lts.job.core.protocol.command.JobAskRequest;
|
|
import com.lts.job.core.protocol.command.JobAskResponse;
|
|
import com.lts.job.core.protocol.command.JobAskResponse;
|
|
import com.lts.job.core.remoting.RemotingServerDelegate;
|
|
import com.lts.job.core.remoting.RemotingServerDelegate;
|
|
|
|
+import com.lts.job.core.support.Application;
|
|
import com.lts.job.core.support.SingletonBeanContext;
|
|
import com.lts.job.core.support.SingletonBeanContext;
|
|
import com.lts.job.core.repository.JobMongoRepository;
|
|
import com.lts.job.core.repository.JobMongoRepository;
|
|
import com.lts.job.core.repository.po.JobPo;
|
|
import com.lts.job.core.repository.po.JobPo;
|
|
@@ -37,7 +39,7 @@ public class DeadJobChecker {
|
|
|
|
|
|
private static final Logger LOGGER = LoggerFactory.getLogger(DeadJobChecker.class);
|
|
private static final Logger LOGGER = LoggerFactory.getLogger(DeadJobChecker.class);
|
|
|
|
|
|
- private static JobMongoRepository jobRepository;
|
|
|
|
|
|
+ private JobMongoRepository jobRepository;
|
|
// 5 分钟没有收到反馈信息 (并且该节点不存在了),表示这个任务已经死掉了
|
|
// 5 分钟没有收到反馈信息 (并且该节点不存在了),表示这个任务已经死掉了
|
|
private static final long MAX_DEAD_CHECK_TIME = 5 * 60 * 1000;
|
|
private static final long MAX_DEAD_CHECK_TIME = 5 * 60 * 1000;
|
|
// 5 分钟没有收到反馈信息 并且该节点存在, 那么主动去询问taskTracker 这个任务是否在执行, 如果没有,则表示这个任务已经死掉了
|
|
// 5 分钟没有收到反馈信息 并且该节点存在, 那么主动去询问taskTracker 这个任务是否在执行, 如果没有,则表示这个任务已经死掉了
|
|
@@ -45,8 +47,14 @@ public class DeadJobChecker {
|
|
|
|
|
|
private final ScheduledExecutorService FIXED_EXECUTOR_SERVICE = Executors.newScheduledThreadPool(1);
|
|
private final ScheduledExecutorService FIXED_EXECUTOR_SERVICE = Executors.newScheduledThreadPool(1);
|
|
|
|
|
|
- public DeadJobChecker() {
|
|
|
|
- jobRepository = SingletonBeanContext.getBean(JobMongoRepository.class);
|
|
|
|
|
|
+ private Application application;
|
|
|
|
+ private ChannelManager channelManager;
|
|
|
|
+ private CommandWrapper commandWrapper;
|
|
|
|
+
|
|
|
|
+ public DeadJobChecker(Application application) {
|
|
|
|
+ this.application = application;
|
|
|
|
+ this.channelManager = application.getAttribute(Constants.CHANNEL_MANAGER);
|
|
|
|
+ this.commandWrapper = application.getCommandWrapper();
|
|
}
|
|
}
|
|
|
|
|
|
public void start() {
|
|
public void start() {
|
|
@@ -56,9 +64,9 @@ public class DeadJobChecker {
|
|
try {
|
|
try {
|
|
// 查询出所有死掉的任务 (其实可以直接在数据库中fix的, 查询出来主要是为了日志打印)
|
|
// 查询出所有死掉的任务 (其实可以直接在数据库中fix的, 查询出来主要是为了日志打印)
|
|
// 一般来说这个是没有多大的,我就不分页去查询了
|
|
// 一般来说这个是没有多大的,我就不分页去查询了
|
|
- List<JobPo> jobPos = jobRepository.getDeadJob(MAX_DEAD_CHECK_TIME);
|
|
|
|
|
|
+ List<JobPo> jobPos = getJobRepository().getDeadJob(MAX_DEAD_CHECK_TIME);
|
|
if (jobPos != null && jobPos.size() > 0) {
|
|
if (jobPos != null && jobPos.size() > 0) {
|
|
- List<Node> nodes = NodeManager.getNodeList(NodeType.TASK_TRACKER);
|
|
|
|
|
|
+ List<Node> nodes = application.getNodeManager().getNodeList(NodeType.TASK_TRACKER);
|
|
HashSet<String/*identity*/> identities = new HashSet<String>();
|
|
HashSet<String/*identity*/> identities = new HashSet<String>();
|
|
if (CollectionUtils.isNotEmpty(nodes)) {
|
|
if (CollectionUtils.isNotEmpty(nodes)) {
|
|
for (Node node : nodes) {
|
|
for (Node node : nodes) {
|
|
@@ -85,12 +93,12 @@ public class DeadJobChecker {
|
|
}
|
|
}
|
|
|
|
|
|
if (CollectionUtils.isNotEmpty(timeoutMap)) {
|
|
if (CollectionUtils.isNotEmpty(timeoutMap)) {
|
|
- RemotingServerDelegate remotingServer = RemotingServerManager.getRemotingServer();
|
|
|
|
|
|
+ RemotingServerDelegate remotingServer = application.getAttribute(Constants.REMOTING_SERVER);
|
|
for (Map.Entry<TaskTrackerNode, List<String>> entry : timeoutMap.entrySet()) {
|
|
for (Map.Entry<TaskTrackerNode, List<String>> entry : timeoutMap.entrySet()) {
|
|
TaskTrackerNode taskTrackerNode = entry.getKey();
|
|
TaskTrackerNode taskTrackerNode = entry.getKey();
|
|
- ChannelWrapper channelWrapper = ChannelManager.getChannel(taskTrackerNode.getNodeGroup(), NodeType.TASK_TRACKER, taskTrackerNode.getIdentity());
|
|
|
|
|
|
+ ChannelWrapper channelWrapper = channelManager.getChannel(taskTrackerNode.getNodeGroup(), NodeType.TASK_TRACKER, taskTrackerNode.getIdentity());
|
|
if (channelWrapper != null && channelWrapper.getChannel() != null && channelWrapper.isOpen()) {
|
|
if (channelWrapper != null && channelWrapper.getChannel() != null && channelWrapper.isOpen()) {
|
|
- JobAskRequest requestBody = new JobAskRequest();
|
|
|
|
|
|
+ JobAskRequest requestBody = commandWrapper.wrapper(new JobAskRequest());
|
|
requestBody.setJobIds(entry.getValue());
|
|
requestBody.setJobIds(entry.getValue());
|
|
RemotingCommand request = RemotingCommand.createRequestCommand(JobProtos.RequestCode.JOB_ASK.code(), requestBody);
|
|
RemotingCommand request = RemotingCommand.createRequestCommand(JobProtos.RequestCode.JOB_ASK.code(), requestBody);
|
|
RemotingCommand response = remotingServer.invokeSync(channelWrapper.getChannel(), request);
|
|
RemotingCommand response = remotingServer.invokeSync(channelWrapper.getChannel(), request);
|
|
@@ -123,9 +131,9 @@ public class DeadJobChecker {
|
|
*
|
|
*
|
|
* @param node
|
|
* @param node
|
|
*/
|
|
*/
|
|
- public static void fixedDeadLock(Node node) {
|
|
|
|
|
|
+ public void fixedDeadLock(Node node) {
|
|
try {
|
|
try {
|
|
- List<JobPo> jobPos = jobRepository.getJobByTaskTracker(node.getIdentity());
|
|
|
|
|
|
+ List<JobPo> jobPos = getJobRepository().getJobByTaskTracker(node.getIdentity());
|
|
if (CollectionUtils.isNotEmpty(jobPos)) {
|
|
if (CollectionUtils.isNotEmpty(jobPos)) {
|
|
for (JobPo jobPo : jobPos) {
|
|
for (JobPo jobPo : jobPos) {
|
|
fixedDeadJob(jobPo);
|
|
fixedDeadJob(jobPo);
|
|
@@ -136,12 +144,19 @@ public class DeadJobChecker {
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
- private static void fixedDeadJob(JobPo jobPo) {
|
|
|
|
- jobRepository.setJobRunnable(jobPo);
|
|
|
|
|
|
+ private void fixedDeadJob(JobPo jobPo) {
|
|
|
|
+ getJobRepository().setJobRunnable(jobPo);
|
|
JobLogger.log(jobPo, LogType.FIXED_DEAD);
|
|
JobLogger.log(jobPo, LogType.FIXED_DEAD);
|
|
LOGGER.info("修复死掉的任务成功! {}", jobPo);
|
|
LOGGER.info("修复死掉的任务成功! {}", jobPo);
|
|
}
|
|
}
|
|
|
|
|
|
|
|
+ private JobMongoRepository getJobRepository() {
|
|
|
|
+ if (jobRepository == null) {
|
|
|
|
+ jobRepository = SingletonBeanContext.getBean(JobMongoRepository.class);
|
|
|
|
+ }
|
|
|
|
+ return jobRepository;
|
|
|
|
+ }
|
|
|
|
+
|
|
public void stop() {
|
|
public void stop() {
|
|
FIXED_EXECUTOR_SERVICE.shutdown();
|
|
FIXED_EXECUTOR_SERVICE.shutdown();
|
|
}
|
|
}
|