package com.xebialabs.xlrelease.scheduler

import com.xebialabs.xlplatform.cluster.ClusterMode.Standalone
import com.xebialabs.xlrelease.actors.{ActorSystemHolder, ReleaseActorService}
import com.xebialabs.xlrelease.config.XlrConfig
import com.xebialabs.xlrelease.repository.JobRunnerRepository
import com.xebialabs.xlrelease.spring.configuration.ExecutorNames.TASK_EXECUTION_EXECUTOR_NAME
import com.xebialabs.xlrelease.scheduler.domain.LocalJobRunner
import com.xebialabs.xlrelease.scheduler.service.JobService
import com.xebialabs.xlrelease.scheduler.workers.CompositeWorker
import com.xebialabs.xlrelease.service.{XlrServiceLifecycle, XlrServiceLifecycleOrder}
import com.xebialabs.xlrelease.user.User
import org.springframework.beans.factory.annotation.Qualifier
import org.springframework.stereotype.Service

import java.util.concurrent.atomic.AtomicBoolean
import scala.collection.mutable

@Service
class WorkManager(val xlrConfig: XlrConfig,
                  val jobQueue: JobQueue,
                  val compositeWorker: CompositeWorker,
                  val jobService: JobService,
                  val jobRunnerRepository: JobRunnerRepository,
                  val releaseActorService: ReleaseActorService,
                  val actorSystemHolder: ActorSystemHolder,
                  @Qualifier(TASK_EXECUTION_EXECUTOR_NAME)
                  val threadPool: RestartableExecutorService
                 ) extends JobRecoveryLogic with XlrServiceLifecycle {

  private lazy val maxThreadsCount: Int = xlrConfig.executors.scheduler.maxThreadsCount

  private val shutdownInProgress: AtomicBoolean = new AtomicBoolean(false)
  private val workerThreads: mutable.Buffer[WorkerThread] = mutable.Buffer()

  override def getOrder(): Int = XlrServiceLifecycleOrder.WORK_MANAGER

  override def doStart(): Unit = {
    // when work manager is started it needs to make sure local jobs queue is not polluted so 1st call stop
    // also, it after we initialize worker threads we have to re-submit/recover previously submitted persisted
    // local jobs so that queued jobs, abort scripts or scheduled jobs would resume
    doStop()
    logger.info(s"Starting work manager with $maxThreadsCount threads")
    // make sure that local job queue is cleaned up before we start
    jobQueue.cancelIf(_ => true)
    jobQueue.start()
    if (xlrConfig.maintenanceModeEnabled) {
      logger.debug("[MAINTENANCE MODE] Job recovery - DISABLED")
    } else {
      recoverJobs()
    }
    startThreads()
    logger.info("Started work manager")
  }

  def recoverJobs(): Unit = {
    // this is 1st entry point executed only once when node is started
    try {
      logger.debug("Job recovery started")
      xlrConfig.cluster.mode match {
        case Standalone =>
          // special case if server was previously started in clustered mode we have to recover all jobs, including those from other nodes
          recoverAllJobs()
        case _ =>
          recoverJobs(nodeId)
          // jobs with nodeId == null are created by upgraders
          // reason why nodeId is null: at the creation time we don't know nodeId yet
          recoverJobs(null)
      }
      logger.debug("Job recovery completed, proceeding with startup")
    } catch {
      case t: Throwable =>
        logger.error("Job recovery failed", t)
    }
  }

  private def startThreads(): Unit = {
    shutdownInProgress.set(false)
    threadPool.enable()
    workerThreads.clear()
    for (_ <- 0 until maxThreadsCount) {
      val workerThread = new WorkerThread(jobQueue, compositeWorker)
      workerThreads += workerThread
      threadPool.execute(workerThread)
    }
  }

  private def stopWorkers(): Unit = {
    for (worker <- workerThreads) {
      worker.stop()
    }
    workerThreads.clear()
    jobQueue.stop()
  }


  def doStop(): Unit = {
    if (shutdownInProgress.compareAndSet(false, true)) {
      stopWorkers()
      threadPool.shutdown()
      // cleanup local job queue only once we gave enough time to worker threads to consume stop msgs
      jobQueue.cancelIf(_ => true)
      shutdownInProgress.set(false)
    } else {
      logger.info(s"skipping shutdown ${serviceName()}, because shutdown already happened or in progress")
    }
  }

  def submit(job: Job): Unit = {
    jobQueue.submit(job)
  }

  def replace(job: Job): Unit = {
    jobQueue.replace(job)
  }

  def abortJobByTaskId(taskId: String): Unit = {
    logger.debug(s"Aborting scheduled job $taskId")
    requestRunnerToAbortTask(taskId)
    jobService.deleteByTaskId(taskId)
    jobQueue.cancelIf {
      case job: TaskJob[_] => job.taskId.equals(taskId)
      case _ => false
    }
  }

  private def requestRunnerToAbortTask(taskId: String): Unit = {
    jobService.findByTaskId(taskId).foreach { jobRow =>
      if (jobRow.jobType == JobType.CONTAINER_TASK) {
        if (Seq(JobStatus.RESERVED, JobStatus.RUNNING).contains(jobRow.status) && null != jobRow.runnerId) {
          jobRunnerRepository.findRunner(jobRow.runnerId) match {
            case Some(runner) => runner.abortJob(jobRow.id, jobRow.executionId)
            case None => logger.error(s"Can't find runner [${jobRow.runnerId}]. Can't request runner to abort the job [${jobRow.id}]")
          }
          if (jobRow.status == JobStatus.RESERVED) {
            releaseActorService.failTaskAsync(taskId, "Task was aborted", User.SYSTEM, None)
          }
        } else if (jobRow.status == JobStatus.QUEUED) {
          releaseActorService.failTaskAsync(taskId, "Task was aborted", User.SYSTEM, None)
        }
      } else {
        LocalJobRunner.abortJob(jobRow.id, jobRow.executionId)
      }
    }
  }

}
