diff --git a/clients/job1/autograde-Makefile b/clients/job1/autograde-Makefile index d2865ec3..59f40e4b 100644 --- a/clients/job1/autograde-Makefile +++ b/clients/job1/autograde-Makefile @@ -1,4 +1,4 @@ autograde: - ./hello.sh + bash hello.sh diff --git a/clients/job2/autograde-Makefile b/clients/job2/autograde-Makefile index d2865ec3..59f40e4b 100644 --- a/clients/job2/autograde-Makefile +++ b/clients/job2/autograde-Makefile @@ -1,4 +1,4 @@ autograde: - ./hello.sh + bash hello.sh diff --git a/clients/job4/autograde-Makefile b/clients/job4/autograde-Makefile index b7c98a20..ec749919 100644 --- a/clients/job4/autograde-Makefile +++ b/clients/job4/autograde-Makefile @@ -1,5 +1,5 @@ autograde: - (./hello.sh; exit 2) + (bash hello.sh; exit 2) diff --git a/clients/job5/autograde-Makefile b/clients/job5/autograde-Makefile index d2865ec3..c6d06dd8 100644 --- a/clients/job5/autograde-Makefile +++ b/clients/job5/autograde-Makefile @@ -1,4 +1,2 @@ autograde: - ./hello.sh - - + bash hello.sh \ No newline at end of file diff --git a/clients/job6/autograde-Makefile b/clients/job6/autograde-Makefile index d2865ec3..59f40e4b 100644 --- a/clients/job6/autograde-Makefile +++ b/clients/job6/autograde-Makefile @@ -1,4 +1,4 @@ autograde: - ./hello.sh + bash hello.sh diff --git a/clients/job7/autograde-Makefile b/clients/job7/autograde-Makefile index 0361c6e2..3a263b4b 100644 --- a/clients/job7/autograde-Makefile +++ b/clients/job7/autograde-Makefile @@ -1,5 +1,5 @@ autograde: - ./bug + bash bug diff --git a/config.template.py b/config.template.py index d12f5919..9a187b6c 100644 --- a/config.template.py +++ b/config.template.py @@ -29,7 +29,7 @@ class Config: LOGLEVEL = logging.DEBUG # Courselabs directory. Must be created before starting Tango - COURSELABS = "" + COURSELABS = "courselabs" # VMMS to use. Must be set to a VMMS implemented in vmms/ before # starting Tango. Options are: "localSSH", "tashiSSH", "ec2SSH" @@ -62,6 +62,16 @@ class Config: RUNJOB_TIMEOUT = 60 COPYOUT_TIMEOUT = 30 + # Docker constants + BOOT2DOCKER_INIT_TIMEOUT = 5 + BOOT2DOCKER_START_TIMEOUT = 30 + BOOT2DOCKER_ENV_TIMEOUT = 5 + DOCKER_IMAGE_BUILD_TIMEOUT = 300 + DOCKER_RM_TIMEOUT = 5 + # Must be absolute path with trailing slash + # Default value of '*'' points this path to /path/to/Tango/volumes/ + DOCKER_VOLUME_PATH = '*' + # Maximum size for output file in bytes MAX_OUTPUT_FILE_SIZE = 1000 * 1024 @@ -88,10 +98,12 @@ class Config: POOL_SIZE = 2 # Path for tashi images - TASHI_IMAGE_PATH = "/raid/tashi/images/" + TASHI_IMAGE_PATH = '' + # Optionally log finer-grained timing information - LOG_TIMING = True + LOG_TIMING = False + # Largest job ID MAX_JOBID = 500 diff --git a/jobManager.py b/jobManager.py index c56777dc..ae7d0bbd 100644 --- a/jobManager.py +++ b/jobManager.py @@ -89,6 +89,9 @@ def __manage(self): elif Config.VMMS_NAME == "ec2SSH": from vmms.ec2SSH import Ec2SSH vmms = Ec2SSH() + elif Config.VMMS_NAME == "localDocker": + from vmms.localDocker import LocalDocker + vmms = LocalDocker() vmms = {Config.VMMS_NAME: vmms} preallocator = Preallocator(vmms) diff --git a/restful-tango/tangoREST.py b/restful-tango/tangoREST.py index cbc2f8a1..12935836 100644 --- a/restful-tango/tangoREST.py +++ b/restful-tango/tangoREST.py @@ -67,6 +67,12 @@ class TangoREST: def __init__(self): + logging.basicConfig( + filename = self.LOGFILE, + format = "%(levelname)s|%(asctime)s|%(name)s|%(message)s", + level = Config.LOGLEVEL + ) + vmms = None if Config.VMMS_NAME == "localSSH": @@ -78,6 +84,10 @@ def __init__(self): elif Config.VMMS_NAME == "ec2SSH": from vmms.ec2SSH import Ec2SSH vmms = Ec2SSH() + elif Config.VMMS_NAME == "localDocker": + from vmms.localDocker import LocalDocker + vmms = LocalDocker() + self.vmms = {Config.VMMS_NAME: vmms} self.preallocator = Preallocator(self.vmms) @@ -90,11 +100,13 @@ def __init__(self): JobManager(self.queue, self.vmms, self.preallocator) self.tango = TangoServer(self.queue, self.preallocator, self.vmms) + logging.basicConfig( filename=self.LOGFILE, format="%(levelname)s|%(asctime)s|%(name)s|%(message)s", level=Config.LOGLEVEL ) + logging.getLogger('boto').setLevel(logging.INFO) self.log = logging.getLogger("TangoREST") self.log.info("Starting RESTful Tango server") diff --git a/tangod.py b/tangod.py index 81839904..4db3799a 100755 --- a/tangod.py +++ b/tangod.py @@ -97,7 +97,7 @@ def getJobs(self, item): return self.jobQueue.deadJobs.values() elif item == 0: # return the list of live jobs - return self.jobQueue.jobQueue.values() + return self.jobQueue.liveJobs.values() else: # invalid parameter return [] @@ -207,7 +207,7 @@ def resetTango(self, vmms): log.warning("Killed these %s VMs on restart: %s" % (vmms_name, namelist)) - for job in self.jobQueue.jobQueue.values(): + for job in self.jobQueue.liveJobs.values(): self.log.debug("job: %s, assigned: %s" % (str(job.name), str(job.assigned))) diff --git a/vmms/Dockerfile b/vmms/Dockerfile new file mode 100644 index 00000000..e9053445 --- /dev/null +++ b/vmms/Dockerfile @@ -0,0 +1,34 @@ +# Autolab - autograding docker image + +FROM ubuntu:14.04 +MAINTAINER Mihir Pandya + +RUN apt-get update +RUN apt-get install -y gcc +RUN apt-get install -y make +RUN apt-get install -y build-essential + +# Install autodriver +WORKDIR /home +RUN useradd autolab +RUN useradd autograde +RUN mkdir autolab autograde output +RUN chown autolab:autolab autolab +RUN chown autolab:autolab output +RUN chown autograde:autograde autograde +RUN apt-get install -y git +RUN git clone https://github.com/autolab/Tango.git +WORKDIR Tango/autodriver +RUN make clean && make +RUN cp autodriver /usr/bin/autodriver +RUN chmod +s /usr/bin/autodriver + +# Clean up +WORKDIR /home +RUN apt-get remove -y git +RUN apt-get -y autoremove +RUN rm -rf Tango/ + +# Check installation +RUN ls -l /home +RUN which autodriver \ No newline at end of file diff --git a/vmms/localDocker.py b/vmms/localDocker.py new file mode 100644 index 00000000..f48e7f3c --- /dev/null +++ b/vmms/localDocker.py @@ -0,0 +1,219 @@ +# +# localDocker.py - Implements the Tango VMMS interface to run Tango jobs in +# docker containers. In this context, VMs are docker containers. +# +import random, subprocess, re, time, logging, threading, os, sys, shutil +import config +from tangoObjects import TangoMachine + +def timeout(command, time_out=1): + """ timeout - Run a unix command with a timeout. Return -1 on + timeout, otherwise return the return value from the command, which + is typically 0 for success, 1-255 for failure. + """ + + # Launch the command + p = subprocess.Popen(command, + stdout=open("/dev/null", 'w'), + stderr=subprocess.STDOUT) + + # Wait for the command to complete + t = 0.0 + while t < time_out and p.poll() is None: + time.sleep(config.Config.TIMER_POLL_INTERVAL) + t += config.Config.TIMER_POLL_INTERVAL + + # Determine why the while loop terminated + if p.poll() is None: + subprocess.call(["/bin/kill", "-9", str(p.pid)]) + returncode = -1 + else: + returncode = p.poll() + return returncode + +def timeoutWithReturnStatus(command, time_out, returnValue = 0): + """ timeoutWithReturnStatus - Run a Unix command with a timeout, + until the expected value is returned by the command; On timeout, + return last error code obtained from the command. + """ + p = subprocess.Popen(command, + stdout=open("/dev/null", 'w'), + stderr=subprocess.STDOUT) + t = 0.0 + while (t < time_out): + ret = p.poll() + if ret is None: + time.sleep(config.Config.TIMER_POLL_INTERVAL) + t += config.Config.TIMER_POLL_INTERVAL + elif ret == returnValue: + return ret + else: + p = subprocess.Popen(command, + stdout=open("/dev/null", 'w'), + stderr=subprocess.STDOUT) + return ret + +# +# User defined exceptions +# + +class LocalDocker: + + def __init__(self): + """ Checks if the machine is ready to run docker containers. + Initialize boot2docker if running on OS X. + """ + try: + self.log = logging.getLogger("LocalDocker") + + # Check import docker constants are defined in config + if len(config.Config.DOCKER_VOLUME_PATH) == 0: + raise Exception('DOCKER_VOLUME_PATH not defined in config.') + + except Exception as e: + self.log.error(str(e)) + exit(1) + + def instanceName(self, id, name): + """ instanceName - Constructs a VM instance name. Always use + this function when you need a VM instance name. Never generate + instance names manually. + """ + return "%s-%s-%s" % (config.Config.PREFIX, id, name) + + def getVolumePath(self, instanceName): + volumePath = config.Config.DOCKER_VOLUME_PATH + if '*' in volumePath: + volumePath = os.getcwd() + '/' + 'volumes/' + volumePath = volumePath + instanceName + '/' + return volumePath + + def domainName(self, vm): + """ Returns the domain name that is stored in the vm + instance. + """ + return vm.domain_name + + # + # VMMS API functions + # + def initializeVM(self, vm): + """ initializeVM - Nothing to do for initializeVM + """ + return vm + + def waitVM(self, vm, max_secs): + """ waitVM - Nothing to do for waitVM + """ + return + + def copyIn(self, vm, inputFiles): + """ copyIn - Create a directory to be mounted as a volume + for the docker containers. Copy input files to this directory. + """ + instanceName = self.instanceName(vm.id, vm.image) + volumePath = self.getVolumePath(instanceName) + + # Create a fresh volume + os.makedirs(volumePath) + for file in inputFiles: + shutil.copy(file.localFile, volumePath + file.destFile) + self.log.debug('Copied in file %s to %s' % (file.localFile, volumePath + file.destFile)) + return 0 + + def runJob(self, vm, runTimeout, maxOutputFileSize): + """ runJob - Run a docker container by doing the follows: + - mount directory corresponding to this job to /home/autolab + in the container + - run autodriver with corresponding ulimits and timeout as + autolab user + """ + instanceName = self.instanceName(vm.id, vm.image) + volumePath = self.getVolumePath(instanceName) + args = ['docker', 'run', '--name', instanceName, '-v'] + args = args + ['%s:%s' % (volumePath, '/home/mount')] + args = args + [vm.image] + args = args + ['sh', '-c'] + + autodriverCmd = 'autodriver -u %d -f %d -t %d -o %d autolab &> output/feedback' % \ + (config.Config.VM_ULIMIT_USER_PROC, + config.Config.VM_ULIMIT_FILE_SIZE, + runTimeout, config.Config.MAX_OUTPUT_FILE_SIZE) + + args = args + ['cp -r mount/* autolab/; su autolab -c "%s"; \ + cp output/feedback mount/feedback' % + autodriverCmd] + + self.log.debug('Running job: %s' % str(args)) + ret = timeout(args, runTimeout) + self.log.debug('runJob returning %d' % ret) + + return ret + + + def copyOut(self, vm, destFile): + """ copyOut - Copy the autograder feedback from container to + destFile on the Tango host. Then, destroy that container. + Containers are never reused. + """ + instanceName = self.instanceName(vm.id, vm.image) + volumePath = self.getVolumePath(instanceName) + shutil.move(volumePath + 'feedback', destFile) + self.log.debug('Copied feedback file to %s' % destFile) + self.destroyVM(vm) + + return 0 + + def destroyVM(self, vm): + """ destroyVM - Delete the docker container. + """ + instanceName = self.instanceName(vm.id, vm.image) + volumePath = self.getVolumePath('') + # Do a hard kill on corresponding docker container. + # Return status does not matter. + timeout(['docker', 'rm', '-f', instanceName], + config.Config.DOCKER_RM_TIMEOUT) + # Destroy corresponding volume if it exists. + if instanceName in os.listdir(volumePath): + shutil.rmtree(volumePath + instanceName) + self.log.debug('Deleted volume %s' % instanceName) + return + + def safeDestroyVM(self, vm): + """ safeDestroyVM - Delete the docker container and make + sure it is removed. + """ + start_time = time.time() + while self.existsVM(vm): + if (time.time()-start_time > config.Config.DESTROY_SECS): + self.log.error("Failed to safely destroy container %s" + % vm.name) + return + self.destroyVM(vm) + return + + def getVMs(self): + """ getVMs - Executes and parses `docker ps` + """ + # Get all volumes of docker containers + machines = [] + volumePath = self.getVolumePath('') + for volume in os.listdir(volumePath): + if re.match("%s-" % config.Config.PREFIX, volume): + machine = TangoMachine() + machine.vmms = 'localDocker' + machine.name = volume + volume_l = volume.split('-') + machine.id = volume_l[1] + machine.image = volume_l[2] + machines.append(machine) + return machines + + def existsVM(self, vm): + """ existsVM - Executes `docker inspect CONTAINER`, which returns + a non-zero status upon not finding a container. + """ + instanceName = self.instanceName(vm.id, vm.name) + ret = timeout(['docker', 'inspect', instanceName]) + return (ret is 0) + diff --git a/vmms/localSSH.py b/vmms/localSSH.py index a376dd8b..22ff69a4 100644 --- a/vmms/localSSH.py +++ b/vmms/localSSH.py @@ -75,6 +75,7 @@ def __init__(self): Checks if the machine is ready to run Tango jobs. """ self.log = logging.getLogger("LocalSSH") + try: checkBinary = subprocess.check_call(["which", "autodriver"]) checkAutogradeUser = subprocess.check_call( @@ -84,6 +85,7 @@ def __init__(self): self.log.error(e) exit(1) + def instanceName(self, id, name): """ instanceName - Constructs a VM instance name. Always use this function when you need a VM instance name. Never generate diff --git a/worker.py b/worker.py index 6c5114a5..13a92de8 100644 --- a/worker.py +++ b/worker.py @@ -82,6 +82,7 @@ def rescheduleJob(self, hdrfile, ret, err): # Here is where we give up else: self.jobQueue.makeDead(self.job.id, err) + self.appendMsg( hdrfile, "Internal error: Unable to complete job after %d tries. Pleae resubmit" % @@ -93,6 +94,7 @@ def rescheduleJob(self, hdrfile, ret, err): ret["copyin"], ret["runjob"], ret["copyout"])) + self.catFiles(hdrfile, self.job.outputFile) self.detachVM(return_vm=False, replace_vm=True) self.notifyServer(self.job) @@ -301,6 +303,7 @@ def run(self): else: # This should never happen msg = "Error: Unknown autodriver error (status=%d)" % ( ret["runjob"]) + elif ret["copyout"] != 0: msg += "Error: Copy out from VM failed (status=%d)" % ( ret["copyout"])