From 8c3c0116130c830c1cc3de53c176ba8add387cb7 Mon Sep 17 00:00:00 2001 From: Mihir Pandya Date: Sat, 14 Mar 2015 18:30:00 -0400 Subject: [PATCH 01/25] Implemented boot2docker initialization of OS X. --- vmms/dockerSSH.py | 284 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 284 insertions(+) create mode 100644 vmms/dockerSSH.py diff --git a/vmms/dockerSSH.py b/vmms/dockerSSH.py new file mode 100644 index 00000000..296f14a3 --- /dev/null +++ b/vmms/dockerSSH.py @@ -0,0 +1,284 @@ +# +# dockerSSH.py - Implements the Tango VMMS interface to run Tango jobs in +# docker containers. +# +import random, subprocess, re, time, logging, threading, os, sys + +import config + +def timeout(command, time_out=1): + """ timeout - Run a unix command with a timeout. Return -1 on + timeout, otherwise return the return value from the command, which + is typically 0 for success, 1-255 for failure. + """ + + # Launch the command + p = subprocess.Popen(command, + stdout=open("/dev/null", 'w'), + stderr=subprocess.STDOUT) + + # Wait for the command to complete + t = 0.0 + while t < time_out and p.poll() is None: + time.sleep(config.Config.TIMER_POLL_INTERVAL) + t += config.Config.TIMER_POLL_INTERVAL + + # Determine why the while loop terminated + if p.poll() is None: + subprocess.call(["/bin/kill", "-9", str(p.pid)]) + returncode = -1 + else: + returncode = p.poll() + return returncode + +def timeoutWithReturnStatus(command, time_out, returnValue = 0): + """ timeoutWithReturnStatus - Run a Unix command with a timeout, + until the expected value is returned by the command; On timeout, + return last error code obtained from the command. + """ + p = subprocess.Popen(command, stdout=open("/dev/null", 'w'), stderr=subprocess.STDOUT) + t = 0.0 + while (t < time_out): + ret = p.poll() + if ret is None: + time.sleep(config.Config.TIMER_POLL_INTERVAL) + t += config.Config.TIMER_POLL_INTERVAL + elif ret == returnValue: + return ret + else: + p = subprocess.Popen(command, + stdout=open("/dev/null", 'w'), + stderr=subprocess.STDOUT) + return ret + +# +# User defined exceptions +# + +class DockerSSH: + _SSH_FLAGS = ["-o", "StrictHostKeyChecking no", "-o", "GSSAPIAuthentication no"] + _OS_X = 'darwin' + LOCALHOST = '127.0.0.1' + + def __init__(self): + """ + Checks if the machine is ready to run docker containers. + Initialize boot2docker if running on OS X. + """ + self.log = logging.getLogger("DockerSSH") + try: + # If running on OS X, create a boot2docker VM + if sys.platform is self._OS_X: + self.boot2dockerVM() + self.docker_host_ip = subprocess.check_output(['boot2docker', 'ip']).strip('\n') + else: + self.docker_host_ip = self.LOCALHOST + + self.log.info("Docker host IP is %s" & self.docker_host_ip) + + except Exception as e: + self.log.error(e) + exit(1) + # try: + # checkBinary = subprocess.check_call(["which", "autodriver"]) + # checkAutogradeUser = subprocess.check_call("getent passwd | grep 'autograde'", shell=True) + # except subprocess.CalledProcessError as e: + # print "Local machine has not been bootstrapped for autograding. Please run localBootstrap.sh" + # self.log.error(e) + # exit(1) + + def boot2dockerVM(self): + """ + Initializes and starts a boot2docker VM and sets its environment + variables. If boot2docker VM has already been set up on the machine, + these steps will simply exit gracefully. + """ + init_ret = -1 + start_ret = -1 + env_ret = -1 + + # Initialize boot2docker VM + init_ret = timeout(['boot2docker', 'init'], + config.Config.BOOT2DOCKER_INIT_TIMEOUT) + # Start boot2docker VM + if init_ret == 0: + start_ret = timeout(['boot2docker', 'init'], + config.Config.BOOT2DOCKER_START_TIMEOUT) + # Set environment variable sof boot2docker VM + if start_ret == 0: + env_ret = timeout(['$(boot2docker shellinit)'], + config.Config.BOOT2DOCKER_ENV_TIMEOUT) + + if init_ret != 0: + raise Exception('Could not initialize boot2docker.') + if start_ret != 0: + raise Exception('Could not start boot2docker VM.') + if env_ret != 0: + raise Exception('Could not set environment variables of boot2docker VM.') + + + def instanceName(self, id, name): + """ instanceName - Constructs a VM instance name. Always use + this function when you need a VM instance name. Never generate + instance names manually. + """ + return "%s-%d-%s" % (config.Config.PREFIX, id, name) + + def domainName(self, vm): + """ Returns the domain name that is stored in the vm + instance. + """ + return vm.domain_name + + # + # VMMS API functions + # + def initializeVM(self, vm): + """ initializeVM - Set domain name to localhost + """ + # Create the instance and obtain the reservation + vm.domain_name = "127.0.0.1" + return vm + + def waitVM(self, vm, max_secs): + """ waitVM - Wait at most max_secs for a VM to become + ready. Return error if it takes too long. This should + be immediate since the VM is localhost. + """ + + # First, wait for ping to the vm instance to work + instance_down = 1 + instanceName = self.instanceName(vm.id, vm.name) + start_time = time.time() + domain_name = self.domainName(vm) + while instance_down: + instance_down = subprocess.call("ping -c 1 %s" % (domain_name), + shell=True, + stdout=open('/dev/null', 'w'), + stderr=subprocess.STDOUT) + + # Wait a bit and then try again if we haven't exceeded + # timeout + if instance_down: + time.sleep(config.Config.TIMER_POLL_INTERVAL) + elapsed_secs = time.time() - start_time + if (elapsed_secs > max_secs): + return -1 + + # The ping worked, so now wait for SSH to work before + # declaring that the VM is ready + self.log.debug("VM %s: ping completed" % (vm.name)) + while(True): + + elapsed_secs = time.time() - start_time + + # Give up if the elapsed time exceeds the allowable time + if elapsed_secs > max_secs: + self.log.info("VM %s: SSH timeout after %d secs" % (instanceName, elapsed_secs)) + return -1 + + # If the call to ssh returns timeout (-1) or ssh error + # (255), then success. Otherwise, keep trying until we run + # out of time. + ret = timeout(["ssh"] + LocalSSH._SSH_FLAGS + + ["%s" % (domain_name), + "(:)"], max_secs - elapsed_secs) + + self.log.debug("VM %s: ssh returned with %d" % (instanceName, ret)) + + if (ret != -1) and (ret != 255): + return 0 + + # Sleep a bit before trying again + time.sleep(config.Config.TIMER_POLL_INTERVAL) + + def copyIn(self, vm, inputFiles): + """ copyIn - Copy input files to VM + """ + domain_name = self.domainName(vm) + + # Create a fresh input directory + ret = subprocess.call(["ssh"] + LocalSSH._SSH_FLAGS + + ["%s" % (domain_name), + "(rm -rf autolab; mkdir autolab)"]) + + # Copy the input files to the input directory + for file in inputFiles: + ret = timeout(["scp"] + LocalSSH._SSH_FLAGS + + [file.localFile, "%s:autolab/%s" % + (domain_name, file.destFile)], config.Config.COPYIN_TIMEOUT) + if ret != 0: + return ret + return 0 + + def runJob(self, vm, runTimeout, maxOutputFileSize): + """ runJob - Run the make command on a VM using SSH and + redirect output to file "output". + """ + print "IN RUN JOB!!!" + domain_name = self.domainName(vm) + self.log.debug("runJob: Running job on VM %s" % self.instanceName(vm.id, vm.name)) + # Setting ulimits for VM and running job + runcmd = "/usr/bin/time --output=time.out autodriver -u %d -f %d -t \ + %d -o %d autolab &> output" % ( + config.Config.VM_ULIMIT_USER_PROC, config.Config.VM_ULIMIT_FILE_SIZE, + runTimeout, maxOutputFileSize) + return timeout(["ssh"] + LocalSSH._SSH_FLAGS + + ["%s" % (domain_name), runcmd], runTimeout * 2) + # runTimeout * 2 is a temporary hack. The driver will handle the timout + + def copyOut(self, vm, destFile): + """ copyOut - Copy the file output on the VM to the file + outputFile on the Tango host. + """ + domain_name = self.domainName(vm) + + # Optionally log finer grained runtime info. Adds about 1 sec + # to the job latency, so we typically skip this. + if config.Config.LOG_TIMING: + try: + # regular expression matcher for error message from cat + no_file = re.compile('No such file or directory') + + time_info = subprocess.check_output(['ssh'] + LocalSSH._SSH_FLAGS + + ['%s' % (domain_name), + 'cat time.out']).rstrip('\n') + + # If the output is empty, then ignore it (timing info wasn't + # collected), otherwise let's log it! + if no_file.match(time_info): + # runJob didn't produce an output file + pass + + else: + # remove newline character printed in timing info + # replaces first '\n' character with a space + time_info = re.sub('\n', ' ', time_info, count = 1) + self.log.info('Timing (%s): %s' % (domain_name, time_info)) + + except subprocess.CalledProcessError, re.error: + # Error copying out the timing data (probably runJob failed) + pass + + return timeout(["scp"] + LocalSSH._SSH_FLAGS + + ["%s:output" % (domain_name), destFile], + config.Config.COPYOUT_TIMEOUT) + + def destroyVM(self, vm): + """ destroyVM - Nothing to destroy for local. + """ + return + + def safeDestroyVM(self, vm): + return self.destroyVM(vm) + + def getVMs(self): + """ getVMs - Nothing to return for local. + """ + return [] + + def existsVM(self, vm): + """ existsVM - VM is simply localhost which exists. + """ + return True + From 6cfb53e850963446728211e3f4c0d17e10387071 Mon Sep 17 00:00:00 2001 From: Mihir Pandya Date: Sun, 15 Mar 2015 02:14:56 -0400 Subject: [PATCH 02/25] Implemented initializeVM and waitVM. --- jobManager.py | 3 ++ restful-tango/tangoREST.py | 4 ++ vmms/dockerSSH.py | 104 +++++++++++++++++-------------------- 3 files changed, 55 insertions(+), 56 deletions(-) diff --git a/jobManager.py b/jobManager.py index e187407e..6e15902c 100644 --- a/jobManager.py +++ b/jobManager.py @@ -82,6 +82,9 @@ def __manage(self): elif Config.VMMS_NAME == "ec2SSH": from vmms.ec2SSH import Ec2SSH vmms = Ec2SSH() + elif Config.VMMS_NAME == "dockerSSH": + from vmms.dockerSSH import DockerSSH + vmms = DockerSSH() vmms = {Config.VMMS_NAME: vmms} preallocator = Preallocator(vmms) diff --git a/restful-tango/tangoREST.py b/restful-tango/tangoREST.py index 67653ff3..66332dc8 100644 --- a/restful-tango/tangoREST.py +++ b/restful-tango/tangoREST.py @@ -69,6 +69,10 @@ def __init__(self): elif Config.VMMS_NAME == "ec2SSH": from vmms.ec2SSH import Ec2SSH vmms = Ec2SSH() + elif Config.VMMS_NAME == "dockerSSH": + from vmms.dockerSSH import DockerSSH + vmms = DockerSSH() + self.vmms = {Config.VMMS_NAME: vmms} self.preallocator = Preallocator(self.vmms) diff --git a/vmms/dockerSSH.py b/vmms/dockerSSH.py index 296f14a3..a6fe7c06 100644 --- a/vmms/dockerSSH.py +++ b/vmms/dockerSSH.py @@ -59,6 +59,7 @@ class DockerSSH: _SSH_FLAGS = ["-o", "StrictHostKeyChecking no", "-o", "GSSAPIAuthentication no"] _OS_X = 'darwin' LOCALHOST = '127.0.0.1' + DOCKER_IMAGE = 'mihirpandya/autolab' def __init__(self): """ @@ -79,13 +80,6 @@ def __init__(self): except Exception as e: self.log.error(e) exit(1) - # try: - # checkBinary = subprocess.check_call(["which", "autodriver"]) - # checkAutogradeUser = subprocess.check_call("getent passwd | grep 'autograde'", shell=True) - # except subprocess.CalledProcessError as e: - # print "Local machine has not been bootstrapped for autograding. Please run localBootstrap.sh" - # self.log.error(e) - # exit(1) def boot2dockerVM(self): """ @@ -96,18 +90,26 @@ def boot2dockerVM(self): init_ret = -1 start_ret = -1 env_ret = -1 + image_ret = -1 - # Initialize boot2docker VM + self.log.debug("Initializing boot2docker VM.") init_ret = timeout(['boot2docker', 'init'], config.Config.BOOT2DOCKER_INIT_TIMEOUT) - # Start boot2docker VM + + self.log.debug("Starting boot2docker VM.") if init_ret == 0: - start_ret = timeout(['boot2docker', 'init'], + start_ret = timeout(['boot2docker', 'start'], config.Config.BOOT2DOCKER_START_TIMEOUT) - # Set environment variable sof boot2docker VM + + self.log.debug("Setting environment variables for boot2docker VM.") if start_ret == 0: env_ret = timeout(['$(boot2docker shellinit)'], config.Config.BOOT2DOCKER_ENV_TIMEOUT) + + self.log.debug("Pulling the autolab docker image from docker hub.") + if env_ret == 0: + image_ret = timeout(['docker', 'pull', 'mihirpandya/autolab'], + config.Config.DOCKER_IMAGE_TIMEOUT) if init_ret != 0: raise Exception('Could not initialize boot2docker.') @@ -115,6 +117,8 @@ def boot2dockerVM(self): raise Exception('Could not start boot2docker VM.') if env_ret != 0: raise Exception('Could not set environment variables of boot2docker VM.') + if image_ret != 0: + raise Exception('Could not pull autolab docker image from docker hub.') def instanceName(self, id, name): @@ -134,10 +138,16 @@ def domainName(self, vm): # VMMS API functions # def initializeVM(self, vm): - """ initializeVM - Set domain name to localhost + """ initializeVM - Start running a dockerized autograding container. """ - # Create the instance and obtain the reservation - vm.domain_name = "127.0.0.1" + args = ['docker', 'run', '-d'] + args.append('--name') + args.append(self.instanceName(vm.id, vm.name)) + args.append(self.DOCKER_IMAGE) + args.append('/bin/bash') + args.append('-c') + args.append('while true; do sleep 1; done') + subprocess.Popen(args) return vm def waitVM(self, vm, max_secs): @@ -146,51 +156,33 @@ def waitVM(self, vm, max_secs): be immediate since the VM is localhost. """ - # First, wait for ping to the vm instance to work - instance_down = 1 instanceName = self.instanceName(vm.id, vm.name) start_time = time.time() domain_name = self.domainName(vm) - while instance_down: - instance_down = subprocess.call("ping -c 1 %s" % (domain_name), - shell=True, - stdout=open('/dev/null', 'w'), - stderr=subprocess.STDOUT) - - # Wait a bit and then try again if we haven't exceeded - # timeout - if instance_down: - time.sleep(config.Config.TIMER_POLL_INTERVAL) - elapsed_secs = time.time() - start_time - if (elapsed_secs > max_secs): - return -1 - - # The ping worked, so now wait for SSH to work before - # declaring that the VM is ready - self.log.debug("VM %s: ping completed" % (vm.name)) - while(True): - - elapsed_secs = time.time() - start_time - - # Give up if the elapsed time exceeds the allowable time - if elapsed_secs > max_secs: - self.log.info("VM %s: SSH timeout after %d secs" % (instanceName, elapsed_secs)) - return -1 - - # If the call to ssh returns timeout (-1) or ssh error - # (255), then success. Otherwise, keep trying until we run - # out of time. - ret = timeout(["ssh"] + LocalSSH._SSH_FLAGS + - ["%s" % (domain_name), - "(:)"], max_secs - elapsed_secs) - - self.log.debug("VM %s: ssh returned with %d" % (instanceName, ret)) - - if (ret != -1) and (ret != 255): - return 0 - - # Sleep a bit before trying again - time.sleep(config.Config.TIMER_POLL_INTERVAL) + echo_string = 'docker ready' + + while(True): + + elapsed_secs = time.time() - start_time + + # Give up if the elapsed time exceeds the allowable time + if elapsed_secs > max_secs: + self.log.info("Docker %s: Could not reach container after %d seconds." % (instanceName, elapsed_secs)) + return -1 + + # Give the docker container a string to echo back to us. + echo_args = ['docker', 'exec', instanceName] + echo_args.append('/bin/echo') + echo_args.append(echo_string) + echo = subprocess.check_output(echo_args).strip('\n') + + self.log.debug("Docker %s: echo returned with %d" % (instanceName, echo)) + + if echo is echo_string: + return 0 + + # Sleep a bit before trying again + time.sleep(config.Config.TIMER_POLL_INTERVAL) def copyIn(self, vm, inputFiles): """ copyIn - Copy input files to VM From a06c0acf18b3353769dc6dc138845cb1d57ce1f2 Mon Sep 17 00:00:00 2001 From: Mihir Pandya Date: Sun, 15 Mar 2015 14:16:10 -0400 Subject: [PATCH 03/25] Added Dockerfile to build an autograding docker image. --- Dockerfile | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) create mode 100644 Dockerfile diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 00000000..806abded --- /dev/null +++ b/Dockerfile @@ -0,0 +1,23 @@ +# Autolab - autograding docker image + +FROM ubuntu:14.04 +MAINTAINER Mihir Pandya + +RUN apt-get update +RUN apt-get install -y gcc +RUN apt-get install -y make +RUN apt-get install -y build-essential + +# Install autodriver +RUN apt-get install -y git +WORKDIR /home +RUN git clone https://github.com/autolab/Tango.git +WORKDIR Tango/autodriver +RUN ls . +RUN make clean && make +RUN cp autodriver /usr/bin/autodriver +RUN which autodriver + +# Clean up +RUN apt-get remove -y git +RUN apt-get -y autoremove \ No newline at end of file From b19d374a9946e8e8fd84da9854cd62b321bd4e3a Mon Sep 17 00:00:00 2001 From: Mihir Pandya Date: Sun, 15 Mar 2015 15:00:22 -0400 Subject: [PATCH 04/25] Implemented copyIn and runJob. Dockerfile creates autograde and autolab user during autodriver installation. --- Dockerfile | 18 ++++++-- vmms/dockerSSH.py | 112 +++++++++++++++++++++++++++++----------------- vmms/localSSH.py | 15 ++++--- 3 files changed, 94 insertions(+), 51 deletions(-) diff --git a/Dockerfile b/Dockerfile index 806abded..a224343c 100644 --- a/Dockerfile +++ b/Dockerfile @@ -9,15 +9,25 @@ RUN apt-get install -y make RUN apt-get install -y build-essential # Install autodriver -RUN apt-get install -y git WORKDIR /home +RUN useradd autograde +RUN useradd autolab +RUN mkdir autograde +RUN chown autograde autograde +RUN chown :autograde autograde +RUN apt-get install -y git RUN git clone https://github.com/autolab/Tango.git WORKDIR Tango/autodriver RUN ls . RUN make clean && make -RUN cp autodriver /usr/bin/autodriver -RUN which autodriver +COPY autodriver /usr/bin/autodriver # Clean up +WORK /home RUN apt-get remove -y git -RUN apt-get -y autoremove \ No newline at end of file +RUN apt-get -y autoremove +RUN rm -rf Tango/ + +# Check installation +RUN ls -l /home +RUN autodriver \ No newline at end of file diff --git a/vmms/dockerSSH.py b/vmms/dockerSSH.py index a6fe7c06..7c986b4b 100644 --- a/vmms/dockerSSH.py +++ b/vmms/dockerSSH.py @@ -1,6 +1,6 @@ # # dockerSSH.py - Implements the Tango VMMS interface to run Tango jobs in -# docker containers. +# docker containers. In this context, VMs are docker containers. # import random, subprocess, re, time, logging, threading, os, sys @@ -36,7 +36,9 @@ def timeoutWithReturnStatus(command, time_out, returnValue = 0): until the expected value is returned by the command; On timeout, return last error code obtained from the command. """ - p = subprocess.Popen(command, stdout=open("/dev/null", 'w'), stderr=subprocess.STDOUT) + p = subprocess.Popen(command, + stdout=open("/dev/null", 'w'), + stderr=subprocess.STDOUT) t = 0.0 while (t < time_out): ret = p.poll() @@ -51,15 +53,23 @@ def timeoutWithReturnStatus(command, time_out, returnValue = 0): stderr=subprocess.STDOUT) return ret +def dockerExec(container, cmd, time_out=1): + """ docerExec - Executes `docker exec container cmd` and + returns output. Container is the name of the docker + container and cmd is a list of commands to run. + """ + command = ['docker', 'exec', container, 'sh', '-c'] + cmd + return timeout(command, time_out) + # # User defined exceptions # class DockerSSH: - _SSH_FLAGS = ["-o", "StrictHostKeyChecking no", "-o", "GSSAPIAuthentication no"] + _SSH_FLAGS = ["-o", "StrictHostKeyChecking no", "-o", + "GSSAPIAuthentication no"] _OS_X = 'darwin' LOCALHOST = '127.0.0.1' - DOCKER_IMAGE = 'mihirpandya/autolab' def __init__(self): """ @@ -82,10 +92,10 @@ def __init__(self): exit(1) def boot2dockerVM(self): - """ - Initializes and starts a boot2docker VM and sets its environment - variables. If boot2docker VM has already been set up on the machine, - these steps will simply exit gracefully. + """ boot2dockerVM - Initializes and starts a boot2docker + VM and sets its environment variables. If boot2docker + VM has already been set up on the machine, these steps + will simply exit gracefully. """ init_ret = -1 start_ret = -1 @@ -116,10 +126,11 @@ def boot2dockerVM(self): if start_ret != 0: raise Exception('Could not start boot2docker VM.') if env_ret != 0: - raise Exception('Could not set environment variables of boot2docker VM.') + raise Exception('Could not set environment variables \ + of boot2docker VM.') if image_ret != 0: - raise Exception('Could not pull autolab docker image from docker hub.') - + raise Exception('Could not pull autolab docker image \ + from docker hub.') def instanceName(self, id, name): """ instanceName - Constructs a VM instance name. Always use @@ -138,22 +149,29 @@ def domainName(self, vm): # VMMS API functions # def initializeVM(self, vm): - """ initializeVM - Start running a dockerized autograding container. + """ initializeVM - Start dockerized autograding container by + running a trivially long-running process so that the container + continues to run. Otherwise, the container will stop running + once the program has come to completion. """ + instanceName = self.instanceName(vm.id, vm.name) args = ['docker', 'run', '-d'] args.append('--name') - args.append(self.instanceName(vm.id, vm.name)) - args.append(self.DOCKER_IMAGE) + args.append(instanceName) + args.append(config.Config.DOCKER_IMAGE) args.append('/bin/bash') args.append('-c') args.append('while true; do sleep 1; done') - subprocess.Popen(args) + ret = timeout(args, config.Config.INITIALIZEVM_TIMEOUT) + if ret != 0: + self.log.error("Failed to create container %s", instanceName) + return None return vm def waitVM(self, vm, max_secs): - """ waitVM - Wait at most max_secs for a VM to become - ready. Return error if it takes too long. This should - be immediate since the VM is localhost. + """ waitVM - Wait at most max_secs for a docker container to become + ready. Return error if it takes too long. This should be immediate + since the container is already initialized in initializeVM. """ instanceName = self.instanceName(vm.id, vm.name) @@ -167,39 +185,53 @@ def waitVM(self, vm, max_secs): # Give up if the elapsed time exceeds the allowable time if elapsed_secs > max_secs: - self.log.info("Docker %s: Could not reach container after %d seconds." % (instanceName, elapsed_secs)) + self.log.info("Docker %s: Could not reach container \ + after %d seconds." % (instanceName, elapsed_secs)) return -1 # Give the docker container a string to echo back to us. - echo_args = ['docker', 'exec', instanceName] - echo_args.append('/bin/echo') - echo_args.append(echo_string) - echo = subprocess.check_output(echo_args).strip('\n') + ret = dockerExec(instanceName, ['/bin/echo', echo_string]) - self.log.debug("Docker %s: echo returned with %d" % (instanceName, echo)) + self.log.debug("Docker %s: echo returned with \ + %d" % (instanceName, echo)) - if echo is echo_string: + if ret == 0: return 0 # Sleep a bit before trying again time.sleep(config.Config.TIMER_POLL_INTERVAL) def copyIn(self, vm, inputFiles): - """ copyIn - Copy input files to VM + """ copyIn - Copy input files to the docker container. This is + a little hacky because it actually does: + + `cat FILE | docker exec -i CONTAINER 'sh -c cat > FILE' + + This is because there is no direct way to copy files to a container + unless the container is mounted to a specific directory on the host. + The other option is to set up an ssh server on the container. This + option should be pursued in future. """ - domain_name = self.domainName(vm) + instanceName = self.instanceName(vm.id, vm.name) # Create a fresh input directory - ret = subprocess.call(["ssh"] + LocalSSH._SSH_FLAGS + - ["%s" % (domain_name), - "(rm -rf autolab; mkdir autolab)"]) + mkdir = dockerExec(instanceName, + ['(rm -rf /home/autolab; mkdir /home/autolab)']) + if mkdir is None: + self.log.error("Failed to create directory in container %s" + % instanceName) + return -1 + # Copy the input files to the input directory for file in inputFiles: - ret = timeout(["scp"] + LocalSSH._SSH_FLAGS + - [file.localFile, "%s:autolab/%s" % - (domain_name, file.destFile)], config.Config.COPYIN_TIMEOUT) + ret = timeout(['cat', file.localFile, '|', + 'docker', 'exec', '-i', instanceName, + 'sh', '-c', 'cat > /home/autolab/' + file.destFile], + config.Config.COPYIN_TIMEOUT) if ret != 0: + self.log.error("Failed to copy file %s to container %s" + % (file.localFile, instanceName)) return ret return 0 @@ -207,16 +239,16 @@ def runJob(self, vm, runTimeout, maxOutputFileSize): """ runJob - Run the make command on a VM using SSH and redirect output to file "output". """ - print "IN RUN JOB!!!" domain_name = self.domainName(vm) - self.log.debug("runJob: Running job on VM %s" % self.instanceName(vm.id, vm.name)) + instanceName = self.instanceName(vm.id, vm.name) + self.log.debug("runJob: Running job on VM %s" % instanceName) # Setting ulimits for VM and running job - runcmd = "/usr/bin/time --output=time.out autodriver -u %d -f %d -t \ - %d -o %d autolab &> output" % ( + runcmd = '"/usr/bin/time --output=time.out autodriver -u %d -f %d -t \ + %d -o %d autolab &> output"' % ( config.Config.VM_ULIMIT_USER_PROC, config.Config.VM_ULIMIT_FILE_SIZE, - runTimeout, maxOutputFileSize) - return timeout(["ssh"] + LocalSSH._SSH_FLAGS + - ["%s" % (domain_name), runcmd], runTimeout * 2) + runTimeout, 1000 * 1024) + args = ['su autolab -c ' + runcmd] + return dockerExec(instanceName, args, runTimeout * 2) # runTimeout * 2 is a temporary hack. The driver will handle the timout def copyOut(self, vm, destFile): diff --git a/vmms/localSSH.py b/vmms/localSSH.py index 118458b3..d827a765 100644 --- a/vmms/localSSH.py +++ b/vmms/localSSH.py @@ -65,13 +65,14 @@ def __init__(self): Checks if the machine is ready to run Tango jobs. """ self.log = logging.getLogger("LocalSSH") - try: - checkBinary = subprocess.check_call(["which", "autodriver"]) - checkAutogradeUser = subprocess.check_call("getent passwd | grep 'autograde'", shell=True) - except subprocess.CalledProcessError as e: - print "Local machine has not been bootstrapped for autograding. Please run localBootstrap.sh" - self.log.error(e) - exit(1) + self.log.info("LocalSSH ready.") + # try: + # checkBinary = subprocess.check_call(["which", "autodriver"]) + # checkAutogradeUser = subprocess.check_call("getent passwd | grep 'autograde'", shell=True) + # except subprocess.CalledProcessError as e: + # print "Local machine has not been bootstrapped for autograding. Please run localBootstrap.sh" + # self.log.error(e) + # exit(1) def instanceName(self, id, name): From fd1913dbf25dc42fd6498760e0d67fd4eb736514 Mon Sep 17 00:00:00 2001 From: Mihir Pandya Date: Sun, 15 Mar 2015 15:08:51 -0400 Subject: [PATCH 05/25] Fixed autodriver permission bit. Fixed runJob. --- Dockerfile | 1 + vmms/dockerSSH.py | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index a224343c..ccf49462 100644 --- a/Dockerfile +++ b/Dockerfile @@ -21,6 +21,7 @@ WORKDIR Tango/autodriver RUN ls . RUN make clean && make COPY autodriver /usr/bin/autodriver +RUN chmod +s /usr/bin/autodriver # Clean up WORK /home diff --git a/vmms/dockerSSH.py b/vmms/dockerSSH.py index 7c986b4b..38250298 100644 --- a/vmms/dockerSSH.py +++ b/vmms/dockerSSH.py @@ -243,8 +243,8 @@ def runJob(self, vm, runTimeout, maxOutputFileSize): instanceName = self.instanceName(vm.id, vm.name) self.log.debug("runJob: Running job on VM %s" % instanceName) # Setting ulimits for VM and running job - runcmd = '"/usr/bin/time --output=time.out autodriver -u %d -f %d -t \ - %d -o %d autolab &> output"' % ( + runcmd = '"cd /home/autolab; /usr/bin/time --output=time.out autodriver \ + -u %d -f %d -t %d -o %d autolab &> output"' % ( config.Config.VM_ULIMIT_USER_PROC, config.Config.VM_ULIMIT_FILE_SIZE, runTimeout, 1000 * 1024) args = ['su autolab -c ' + runcmd] From 0a21e247962b7c1bd72bec21135514d48f22d38d Mon Sep 17 00:00:00 2001 From: Mihir Pandya Date: Sun, 15 Mar 2015 21:56:27 -0400 Subject: [PATCH 06/25] Implemented copyOut, destroyVM, safeDestroyVM, getVMs, existsVM --- vmms/dockerSSH.py | 110 +++++++++++++++++++++++++--------------------- 1 file changed, 61 insertions(+), 49 deletions(-) diff --git a/vmms/dockerSSH.py b/vmms/dockerSSH.py index 38250298..ecca9d36 100644 --- a/vmms/dockerSSH.py +++ b/vmms/dockerSSH.py @@ -176,8 +176,6 @@ def waitVM(self, vm, max_secs): instanceName = self.instanceName(vm.id, vm.name) start_time = time.time() - domain_name = self.domainName(vm) - echo_string = 'docker ready' while(True): @@ -205,7 +203,7 @@ def copyIn(self, vm, inputFiles): """ copyIn - Copy input files to the docker container. This is a little hacky because it actually does: - `cat FILE | docker exec -i CONTAINER 'sh -c cat > FILE' + `cat FILE | docker exec -i CONTAINER 'sh -c cat > FILE'` This is because there is no direct way to copy files to a container unless the container is mounted to a specific directory on the host. @@ -215,10 +213,13 @@ def copyIn(self, vm, inputFiles): instanceName = self.instanceName(vm.id, vm.name) # Create a fresh input directory - mkdir = dockerExec(instanceName, - ['(rm -rf /home/autolab; mkdir /home/autolab)']) + mkdir = dockerExec(instanceName, ['(cd /home; \ + rm -rf autolab; mkdir autolab \ + chown autolab autolab; chown :autolab autolab \ + rm -rf output; mkdir output \ + chown autolab output; chown :autolab output)']) - if mkdir is None: + if mkdir != 0: self.log.error("Failed to create directory in container %s" % instanceName) return -1 @@ -243,66 +244,77 @@ def runJob(self, vm, runTimeout, maxOutputFileSize): instanceName = self.instanceName(vm.id, vm.name) self.log.debug("runJob: Running job on VM %s" % instanceName) # Setting ulimits for VM and running job - runcmd = '"cd /home/autolab; /usr/bin/time --output=time.out autodriver \ - -u %d -f %d -t %d -o %d autolab &> output"' % ( - config.Config.VM_ULIMIT_USER_PROC, config.Config.VM_ULIMIT_FILE_SIZE, - runTimeout, 1000 * 1024) + runcmd = '"cd /home/; autodriver -u %d -f %d -t %d -o %d \ + autolab &> output/feedback.out"' % (config.Config.VM_ULIMIT_USER_PROC, + config.Config.VM_ULIMIT_FILE_SIZE, runTimeout, 1000 * 1024) args = ['su autolab -c ' + runcmd] return dockerExec(instanceName, args, runTimeout * 2) # runTimeout * 2 is a temporary hack. The driver will handle the timout def copyOut(self, vm, destFile): - """ copyOut - Copy the file output on the VM to the file - outputFile on the Tango host. + """ copyOut - Copy the autograder feedback from container to + destFile on the Tango host. """ - domain_name = self.domainName(vm) + instanceName = self.instanceName(vm.id, vm.name) - # Optionally log finer grained runtime info. Adds about 1 sec - # to the job latency, so we typically skip this. - if config.Config.LOG_TIMING: - try: - # regular expression matcher for error message from cat - no_file = re.compile('No such file or directory') - - time_info = subprocess.check_output(['ssh'] + LocalSSH._SSH_FLAGS + - ['%s' % (domain_name), - 'cat time.out']).rstrip('\n') - - # If the output is empty, then ignore it (timing info wasn't - # collected), otherwise let's log it! - if no_file.match(time_info): - # runJob didn't produce an output file - pass - - else: - # remove newline character printed in timing info - # replaces first '\n' character with a space - time_info = re.sub('\n', ' ', time_info, count = 1) - self.log.info('Timing (%s): %s' % (domain_name, time_info)) - - except subprocess.CalledProcessError, re.error: - # Error copying out the timing data (probably runJob failed) - pass - - return timeout(["scp"] + LocalSSH._SSH_FLAGS + - ["%s:output" % (domain_name), destFile], - config.Config.COPYOUT_TIMEOUT) + cmd = ['docker', 'cp'] + cmd.append('%s:/home/output/feedback.out' % instanceName) + cmd.append(destFile) + ret = timeout(cmd, config.Config.COPYOUT_TIMEOUT) + + return ret def destroyVM(self, vm): - """ destroyVM - Nothing to destroy for local. + """ destroyVM - Stop and delete the docker. """ + instanceName = self.instanceName(vm.id, vm.name) + ret = timeout(['docker', 'stop', instanceName], + config.Config.DOCKER_STOP_TIMEOUT) + if ret != 0: + self.log.error("Failed to stop container %s" % instanceName) + ret = timeout(['docker', 'run', instanceName], + config.Config.DOCKER_RM_TIMEOUT) + if ret != 0: + self.log.error("Failed to destroy container %s" % instanceName) return def safeDestroyVM(self, vm): - return self.destroyVM(vm) + start_time = time.time() + instanceName = self.instanceName(vm.id, vm.name) + while self.existsVM(vm): + if (time.time()-start_time > config.Config.DESTROY_SECS): + self.log.error("Failed to safely destroy container %s" + % instanceName) + return + self.destroyVM(vm) + return def getVMs(self): - """ getVMs - Nothing to return for local. + """ getVMs - Executes and parses `docker ps` """ - return [] + # Get all docker containers + machines = [] + containers_str = subprocess.check_output(['docker', 'ps']) + containers_l = containers_str.split('\n') + for container in containers_l: + machine = TangoMachine() + machine.vmms = 'dockerSSH' + c = container.split(' ') + machine.id = c[0] + c.reverse() + for el in c: + if len(el) > 0: + machine.name = el + machines.append(machine) + return machines def existsVM(self, vm): - """ existsVM - VM is simply localhost which exists. + """ existsVM - Executes `docker inspect CONTAINER`, which returns + a non-zero status upon not finding a container. """ - return True + instanceName = self.instanceName(vm.id, vm.name) + p = subprocess.Popen(['docker', 'inspect', instanceName], + stdout=open('/dev/null'), + stderr=open('/dev/null')) + return (p.poll() is 0) From 5a9e4e3224236665d37bd65df60dc7a646a72211 Mon Sep 17 00:00:00 2001 From: Mihir Pandya Date: Mon, 16 Mar 2015 01:13:05 -0400 Subject: [PATCH 07/25] Fixed instruction in Dockerfile. --- Dockerfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index ccf49462..32ecd559 100644 --- a/Dockerfile +++ b/Dockerfile @@ -24,11 +24,11 @@ COPY autodriver /usr/bin/autodriver RUN chmod +s /usr/bin/autodriver # Clean up -WORK /home +WORKDIR /home RUN apt-get remove -y git RUN apt-get -y autoremove RUN rm -rf Tango/ # Check installation RUN ls -l /home -RUN autodriver \ No newline at end of file +RUN which autodriver \ No newline at end of file From 2fbc989cb231af9972525852f3ed2fe8cb5e54de Mon Sep 17 00:00:00 2001 From: Mihir Pandya Date: Mon, 16 Mar 2015 01:21:54 -0400 Subject: [PATCH 08/25] Removing boot2dockerVM initialization. It should be part of user's Tango set up process. --- restful-tango/tangoREST.py | 11 ++++++----- vmms/dockerSSH.py | 25 ++++++++++++++----------- 2 files changed, 20 insertions(+), 16 deletions(-) diff --git a/restful-tango/tangoREST.py b/restful-tango/tangoREST.py index 66332dc8..c022a73b 100644 --- a/restful-tango/tangoREST.py +++ b/restful-tango/tangoREST.py @@ -58,6 +58,12 @@ class TangoREST: def __init__(self): + logging.basicConfig( + filename = self.LOGFILE, + format = "%(levelname)s|%(asctime)s|%(name)s|%(message)s", + level = Config.LOGLEVEL + ) + vmms = None if Config.VMMS_NAME == "localSSH": @@ -85,11 +91,6 @@ def __init__(self): JobManager(self.queue, self.vmms, self.preallocator) self.tango = TangoServer(self.queue, self.preallocator, self.vmms) - logging.basicConfig( - filename = self.LOGFILE, - format = "%(levelname)s|%(asctime)s|%(name)s|%(message)s", - level = Config.LOGLEVEL - ) logging.getLogger('boto').setLevel(logging.INFO) self.log = logging.getLogger("TangoREST") self.log.info("Starting RESTful Tango server") diff --git a/vmms/dockerSSH.py b/vmms/dockerSSH.py index ecca9d36..702a5f47 100644 --- a/vmms/dockerSSH.py +++ b/vmms/dockerSSH.py @@ -3,8 +3,8 @@ # docker containers. In this context, VMs are docker containers. # import random, subprocess, re, time, logging, threading, os, sys - import config +from tangoObjects import TangoMachine def timeout(command, time_out=1): """ timeout - Run a unix command with a timeout. Return -1 on @@ -66,8 +66,6 @@ def dockerExec(container, cmd, time_out=1): # class DockerSSH: - _SSH_FLAGS = ["-o", "StrictHostKeyChecking no", "-o", - "GSSAPIAuthentication no"] _OS_X = 'darwin' LOCALHOST = '127.0.0.1' @@ -76,19 +74,21 @@ def __init__(self): Checks if the machine is ready to run docker containers. Initialize boot2docker if running on OS X. """ - self.log = logging.getLogger("DockerSSH") try: + self.log = logging.getLogger("DockerSSH") # If running on OS X, create a boot2docker VM - if sys.platform is self._OS_X: - self.boot2dockerVM() + if sys.platform == self._OS_X: + # self.boot2dockerVM() + # boot2docker initialization will be part of initial + # set up with Tango. self.docker_host_ip = subprocess.check_output(['boot2docker', 'ip']).strip('\n') else: self.docker_host_ip = self.LOCALHOST - self.log.info("Docker host IP is %s" & self.docker_host_ip) + self.log.info("Docker host IP is %s" % self.docker_host_ip) except Exception as e: - self.log.error(e) + self.log.error(str(e)) exit(1) def boot2dockerVM(self): @@ -113,13 +113,14 @@ def boot2dockerVM(self): self.log.debug("Setting environment variables for boot2docker VM.") if start_ret == 0: - env_ret = timeout(['$(boot2docker shellinit)'], + env_ret = timeout(['boot2docker', 'shellinit'], config.Config.BOOT2DOCKER_ENV_TIMEOUT) self.log.debug("Pulling the autolab docker image from docker hub.") if env_ret == 0: - image_ret = timeout(['docker', 'pull', 'mihirpandya/autolab'], - config.Config.DOCKER_IMAGE_TIMEOUT) + image_ret = timeout(['docker', 'build', '-t', + self.config.Config.DOCKER_IMAGE, '.'], + config.Config.DOCKER_IMAGE_BUILD_TIMEOUT) if init_ret != 0: raise Exception('Could not initialize boot2docker.') @@ -296,6 +297,8 @@ def getVMs(self): machines = [] containers_str = subprocess.check_output(['docker', 'ps']) containers_l = containers_str.split('\n') + containers_l.reverse() + containers_l.pop() for container in containers_l: machine = TangoMachine() machine.vmms = 'dockerSSH' From 13a4a523400993cdd86e131205b2c2799d319e06 Mon Sep 17 00:00:00 2001 From: Mihir Pandya Date: Wed, 18 Mar 2015 15:52:25 -0400 Subject: [PATCH 09/25] Fixed safeDestroyVM --- vmms/dockerSSH.py | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/vmms/dockerSSH.py b/vmms/dockerSSH.py index 702a5f47..50fbe3d0 100644 --- a/vmms/dockerSSH.py +++ b/vmms/dockerSSH.py @@ -138,7 +138,8 @@ def instanceName(self, id, name): this function when you need a VM instance name. Never generate instance names manually. """ - return "%s-%d-%s" % (config.Config.PREFIX, id, name) + # return str(id) + return "%s-%s-%s" % (config.Config.PREFIX, id, name) def domainName(self, vm): """ Returns the domain name that is stored in the vm @@ -165,7 +166,8 @@ def initializeVM(self, vm): args.append('while true; do sleep 1; done') ret = timeout(args, config.Config.INITIALIZEVM_TIMEOUT) if ret != 0: - self.log.error("Failed to create container %s", instanceName) + self.log.error("Failed to create container %s (%d)" % + (instanceName, ret)) return None return vm @@ -189,10 +191,10 @@ def waitVM(self, vm, max_secs): return -1 # Give the docker container a string to echo back to us. - ret = dockerExec(instanceName, ['/bin/echo', echo_string]) + ret = dockerExec(instanceName, ['/bin/echo', 'echo_string']) - self.log.debug("Docker %s: echo returned with \ - %d" % (instanceName, echo)) + self.log.debug("Docker %s: echo terminated with status \ + %d" % (instanceName, ret)) if ret == 0: return 0 @@ -272,11 +274,13 @@ def destroyVM(self, vm): ret = timeout(['docker', 'stop', instanceName], config.Config.DOCKER_STOP_TIMEOUT) if ret != 0: - self.log.error("Failed to stop container %s" % instanceName) + self.log.error("Failed to stop container %s (%d)" % + (instanceName, ret)) ret = timeout(['docker', 'run', instanceName], config.Config.DOCKER_RM_TIMEOUT) if ret != 0: - self.log.error("Failed to destroy container %s" % instanceName) + self.log.error("Failed to destroy container %s" % + (instanceName, ret)) return def safeDestroyVM(self, vm): @@ -303,11 +307,13 @@ def getVMs(self): machine = TangoMachine() machine.vmms = 'dockerSSH' c = container.split(' ') - machine.id = c[0] + # machine.id = c[0] c.reverse() for el in c: if len(el) > 0: machine.name = el + machine.id = el + break machines.append(machine) return machines From d3ec21d391729ad02932b91ce1913af628194376 Mon Sep 17 00:00:00 2001 From: Mihir Pandya Date: Tue, 24 Mar 2015 03:58:43 -0400 Subject: [PATCH 10/25] Modified Dockerfile to correctly install autodriver. Implemented VMMS with volumes but permission bits of autolab directory not maintained. --- Dockerfile | 8 ++- vmms/dockerSSH.py | 174 +++++++++------------------------------------- 2 files changed, 37 insertions(+), 145 deletions(-) diff --git a/Dockerfile b/Dockerfile index 32ecd559..e92d37c2 100644 --- a/Dockerfile +++ b/Dockerfile @@ -10,17 +10,19 @@ RUN apt-get install -y build-essential # Install autodriver WORKDIR /home -RUN useradd autograde RUN useradd autolab +RUN mkdir autolab +RUN chown autolab autolab +RUN chown :autolab autolab +RUN useradd autograde RUN mkdir autograde RUN chown autograde autograde RUN chown :autograde autograde RUN apt-get install -y git RUN git clone https://github.com/autolab/Tango.git WORKDIR Tango/autodriver -RUN ls . RUN make clean && make -COPY autodriver /usr/bin/autodriver +RUN cp autodriver /usr/bin/autodriver RUN chmod +s /usr/bin/autodriver # Clean up diff --git a/vmms/dockerSSH.py b/vmms/dockerSSH.py index 50fbe3d0..7842abbb 100644 --- a/vmms/dockerSSH.py +++ b/vmms/dockerSSH.py @@ -2,7 +2,7 @@ # dockerSSH.py - Implements the Tango VMMS interface to run Tango jobs in # docker containers. In this context, VMs are docker containers. # -import random, subprocess, re, time, logging, threading, os, sys +import random, subprocess, re, time, logging, threading, os, sys, shutil import config from tangoObjects import TangoMachine @@ -78,7 +78,6 @@ def __init__(self): self.log = logging.getLogger("DockerSSH") # If running on OS X, create a boot2docker VM if sys.platform == self._OS_X: - # self.boot2dockerVM() # boot2docker initialization will be part of initial # set up with Tango. self.docker_host_ip = subprocess.check_output(['boot2docker', 'ip']).strip('\n') @@ -91,48 +90,6 @@ def __init__(self): self.log.error(str(e)) exit(1) - def boot2dockerVM(self): - """ boot2dockerVM - Initializes and starts a boot2docker - VM and sets its environment variables. If boot2docker - VM has already been set up on the machine, these steps - will simply exit gracefully. - """ - init_ret = -1 - start_ret = -1 - env_ret = -1 - image_ret = -1 - - self.log.debug("Initializing boot2docker VM.") - init_ret = timeout(['boot2docker', 'init'], - config.Config.BOOT2DOCKER_INIT_TIMEOUT) - - self.log.debug("Starting boot2docker VM.") - if init_ret == 0: - start_ret = timeout(['boot2docker', 'start'], - config.Config.BOOT2DOCKER_START_TIMEOUT) - - self.log.debug("Setting environment variables for boot2docker VM.") - if start_ret == 0: - env_ret = timeout(['boot2docker', 'shellinit'], - config.Config.BOOT2DOCKER_ENV_TIMEOUT) - - self.log.debug("Pulling the autolab docker image from docker hub.") - if env_ret == 0: - image_ret = timeout(['docker', 'build', '-t', - self.config.Config.DOCKER_IMAGE, '.'], - config.Config.DOCKER_IMAGE_BUILD_TIMEOUT) - - if init_ret != 0: - raise Exception('Could not initialize boot2docker.') - if start_ret != 0: - raise Exception('Could not start boot2docker VM.') - if env_ret != 0: - raise Exception('Could not set environment variables \ - of boot2docker VM.') - if image_ret != 0: - raise Exception('Could not pull autolab docker image \ - from docker hub.') - def instanceName(self, id, name): """ instanceName - Constructs a VM instance name. Always use this function when you need a VM instance name. Never generate @@ -151,108 +108,46 @@ def domainName(self, vm): # VMMS API functions # def initializeVM(self, vm): - """ initializeVM - Start dockerized autograding container by - running a trivially long-running process so that the container - continues to run. Otherwise, the container will stop running - once the program has come to completion. + """ initializeVM - Nothing to do for initializeVM """ - instanceName = self.instanceName(vm.id, vm.name) - args = ['docker', 'run', '-d'] - args.append('--name') - args.append(instanceName) - args.append(config.Config.DOCKER_IMAGE) - args.append('/bin/bash') - args.append('-c') - args.append('while true; do sleep 1; done') - ret = timeout(args, config.Config.INITIALIZEVM_TIMEOUT) - if ret != 0: - self.log.error("Failed to create container %s (%d)" % - (instanceName, ret)) - return None return vm def waitVM(self, vm, max_secs): - """ waitVM - Wait at most max_secs for a docker container to become - ready. Return error if it takes too long. This should be immediate - since the container is already initialized in initializeVM. + """ waitVM - Nothing to do for waitVM """ - - instanceName = self.instanceName(vm.id, vm.name) - start_time = time.time() - - while(True): - - elapsed_secs = time.time() - start_time - - # Give up if the elapsed time exceeds the allowable time - if elapsed_secs > max_secs: - self.log.info("Docker %s: Could not reach container \ - after %d seconds." % (instanceName, elapsed_secs)) - return -1 - - # Give the docker container a string to echo back to us. - ret = dockerExec(instanceName, ['/bin/echo', 'echo_string']) - - self.log.debug("Docker %s: echo terminated with status \ - %d" % (instanceName, ret)) - - if ret == 0: - return 0 - - # Sleep a bit before trying again - time.sleep(config.Config.TIMER_POLL_INTERVAL) + return def copyIn(self, vm, inputFiles): - """ copyIn - Copy input files to the docker container. This is - a little hacky because it actually does: - - `cat FILE | docker exec -i CONTAINER 'sh -c cat > FILE'` - - This is because there is no direct way to copy files to a container - unless the container is mounted to a specific directory on the host. - The other option is to set up an ssh server on the container. This - option should be pursued in future. + """ copyIn - Create a directory to be mounted as a volume + for the docker containers. Copy input files to this directory. """ instanceName = self.instanceName(vm.id, vm.name) - # Create a fresh input directory - mkdir = dockerExec(instanceName, ['(cd /home; \ - rm -rf autolab; mkdir autolab \ - chown autolab autolab; chown :autolab autolab \ - rm -rf output; mkdir output \ - chown autolab output; chown :autolab output)']) - - if mkdir != 0: - self.log.error("Failed to create directory in container %s" - % instanceName) - return -1 - - # Copy the input files to the input directory + # Create a fresh volume + volume_path = config.Config.DOCKER_VOLUME_PATH + instanceName + os.makedirs(volume_path) for file in inputFiles: - ret = timeout(['cat', file.localFile, '|', - 'docker', 'exec', '-i', instanceName, - 'sh', '-c', 'cat > /home/autolab/' + file.destFile], - config.Config.COPYIN_TIMEOUT) - if ret != 0: - self.log.error("Failed to copy file %s to container %s" - % (file.localFile, instanceName)) - return ret + shutil.copy(file.localFile, volume_path + file.destFile) + return 0 def runJob(self, vm, runTimeout, maxOutputFileSize): - """ runJob - Run the make command on a VM using SSH and - redirect output to file "output". + """ runJob - Run a docker container by doing the follows: + - mount directory corresponding to this job to /home/autolab + in the container + - run autodriver with corresponding ulimits and timeout as + autolab user """ - domain_name = self.domainName(vm) instanceName = self.instanceName(vm.id, vm.name) - self.log.debug("runJob: Running job on VM %s" % instanceName) - # Setting ulimits for VM and running job - runcmd = '"cd /home/; autodriver -u %d -f %d -t %d -o %d \ - autolab &> output/feedback.out"' % (config.Config.VM_ULIMIT_USER_PROC, - config.Config.VM_ULIMIT_FILE_SIZE, runTimeout, 1000 * 1024) - args = ['su autolab -c ' + runcmd] - return dockerExec(instanceName, args, runTimeout * 2) - # runTimeout * 2 is a temporary hack. The driver will handle the timout + args = ['docker', 'run', '--name', instanceName, -v] + args.append('%s:%s' % + (config.Config.DOCKER_VOLUME_PATH + instanceName, '/home/autolab')) + args.append(config.Config.DOCKER_IMAGE) + args.append('autodriver -u %d -f %d -t %d -o %d autolab &> output' % + (config.Config.VM_ULIMIT_USER_PROC, config.Config.VM_ULIMIT_FILE_SIZE, + runTimeout, config.Config.MAX_OUTPUT_FILE_SIZE)) + return timeout(args, runTimeout) + def copyOut(self, vm, destFile): """ copyOut - Copy the autograder feedback from container to @@ -260,23 +155,15 @@ def copyOut(self, vm, destFile): """ instanceName = self.instanceName(vm.id, vm.name) - cmd = ['docker', 'cp'] - cmd.append('%s:/home/output/feedback.out' % instanceName) - cmd.append(destFile) - ret = timeout(cmd, config.Config.COPYOUT_TIMEOUT) + shutil.copy(self.config.Config.DOCKER_VOLUME_PATH + instancName, destFile) - return ret + return 0 def destroyVM(self, vm): - """ destroyVM - Stop and delete the docker. + """ destroyVM - Delete the docker container. """ instanceName = self.instanceName(vm.id, vm.name) - ret = timeout(['docker', 'stop', instanceName], - config.Config.DOCKER_STOP_TIMEOUT) - if ret != 0: - self.log.error("Failed to stop container %s (%d)" % - (instanceName, ret)) - ret = timeout(['docker', 'run', instanceName], + ret = timeout(['docker', 'run', '-f', instanceName], config.Config.DOCKER_RM_TIMEOUT) if ret != 0: self.log.error("Failed to destroy container %s" % @@ -284,6 +171,9 @@ def destroyVM(self, vm): return def safeDestroyVM(self, vm): + """ safeDestroyVM - Delete the docker container and make + sure it is removed. + """ start_time = time.time() instanceName = self.instanceName(vm.id, vm.name) while self.existsVM(vm): From 199ae353bb441259b66eee5d0f6adc47c9eaa0b3 Mon Sep 17 00:00:00 2001 From: Mihir Pandya Date: Tue, 24 Mar 2015 20:38:09 -0400 Subject: [PATCH 11/25] Mounting volumes works. One job1 is able to run. Multiple jobs are failing. --- Dockerfile | 3 +++ clients/job1/autograde-Makefile | 2 +- vmms/dockerSSH.py | 36 ++++++++++++++++++++++----------- 3 files changed, 28 insertions(+), 13 deletions(-) diff --git a/Dockerfile b/Dockerfile index e92d37c2..c6c8b89d 100644 --- a/Dockerfile +++ b/Dockerfile @@ -14,6 +14,9 @@ RUN useradd autolab RUN mkdir autolab RUN chown autolab autolab RUN chown :autolab autolab +RUN mkdir output +RUN chown autolab output +RUN chown :autolab output RUN useradd autograde RUN mkdir autograde RUN chown autograde autograde diff --git a/clients/job1/autograde-Makefile b/clients/job1/autograde-Makefile index d2865ec3..59f40e4b 100644 --- a/clients/job1/autograde-Makefile +++ b/clients/job1/autograde-Makefile @@ -1,4 +1,4 @@ autograde: - ./hello.sh + bash hello.sh diff --git a/vmms/dockerSSH.py b/vmms/dockerSSH.py index 7842abbb..d28e4392 100644 --- a/vmms/dockerSSH.py +++ b/vmms/dockerSSH.py @@ -124,11 +124,11 @@ def copyIn(self, vm, inputFiles): instanceName = self.instanceName(vm.id, vm.name) # Create a fresh volume - volume_path = config.Config.DOCKER_VOLUME_PATH + instanceName + volume_path = config.Config.DOCKER_VOLUME_PATH + instanceName +'/' os.makedirs(volume_path) for file in inputFiles: shutil.copy(file.localFile, volume_path + file.destFile) - + self.log.info('Copied in file %s to %s' % (file.localFile, volume_path + file.destFile)) return 0 def runJob(self, vm, runTimeout, maxOutputFileSize): @@ -139,13 +139,22 @@ def runJob(self, vm, runTimeout, maxOutputFileSize): autolab user """ instanceName = self.instanceName(vm.id, vm.name) - args = ['docker', 'run', '--name', instanceName, -v] - args.append('%s:%s' % - (config.Config.DOCKER_VOLUME_PATH + instanceName, '/home/autolab')) - args.append(config.Config.DOCKER_IMAGE) - args.append('autodriver -u %d -f %d -t %d -o %d autolab &> output' % - (config.Config.VM_ULIMIT_USER_PROC, config.Config.VM_ULIMIT_FILE_SIZE, - runTimeout, config.Config.MAX_OUTPUT_FILE_SIZE)) + args = ['docker', 'run', '--name', instanceName, '-v'] + args = args + ['%s:%s' % + (config.Config.DOCKER_VOLUME_PATH + instanceName, '/home/mount')] + args = args + [config.Config.DOCKER_IMAGE] + args = args + ['sh', '-c'] + + autodriverCmd = 'autodriver -u %d -f %d -t %d -o %d autolab &> output/feedback' % \ + (config.Config.VM_ULIMIT_USER_PROC, + config.Config.VM_ULIMIT_FILE_SIZE, + runTimeout, config.Config.MAX_OUTPUT_FILE_SIZE) + + args = args + ['cp -r mount/* autolab/; su autolab -c "%s"; \ + cp output/feedback mount/feedback' % autodriverCmd] + + self.log.info('Running job: %s' % str(args)) + return timeout(args, runTimeout) @@ -154,8 +163,11 @@ def copyOut(self, vm, destFile): destFile on the Tango host. """ instanceName = self.instanceName(vm.id, vm.name) - - shutil.copy(self.config.Config.DOCKER_VOLUME_PATH + instancName, destFile) + volume_path = config.Config.DOCKER_VOLUME_PATH + instanceName + shutil.copy(volume_path + '/feedback', destFile) + self.log.info('Copied feedback file to %s' % destFile) + shutil.rmtree(volume_path) + self.log.info('Deleted directory %s' % volume_path) return 0 @@ -163,7 +175,7 @@ def destroyVM(self, vm): """ destroyVM - Delete the docker container. """ instanceName = self.instanceName(vm.id, vm.name) - ret = timeout(['docker', 'run', '-f', instanceName], + ret = timeout(['docker', 'rm', '-f', instanceName], config.Config.DOCKER_RM_TIMEOUT) if ret != 0: self.log.error("Failed to destroy container %s" % From 360cf1d847019c5fd351ed11b25f4f4e5f66e276 Mon Sep 17 00:00:00 2001 From: Mihir Pandya Date: Wed, 25 Mar 2015 00:41:27 -0400 Subject: [PATCH 12/25] Dockerfile simplified. Docker constants added to config.template.py. --- Dockerfile | 13 ++++--------- config.template.py | 9 +++++++++ vmms/dockerSSH.py | 21 +++++++++------------ 3 files changed, 22 insertions(+), 21 deletions(-) diff --git a/Dockerfile b/Dockerfile index c6c8b89d..4324e5e3 100644 --- a/Dockerfile +++ b/Dockerfile @@ -11,16 +11,11 @@ RUN apt-get install -y build-essential # Install autodriver WORKDIR /home RUN useradd autolab -RUN mkdir autolab -RUN chown autolab autolab -RUN chown :autolab autolab -RUN mkdir output -RUN chown autolab output -RUN chown :autolab output RUN useradd autograde -RUN mkdir autograde -RUN chown autograde autograde -RUN chown :autograde autograde +RUN mkdir autolab output autograde +RUN chown autolab:autolab autolab +RUN chown autolab:autolab output +RUN chown autograde:autograde autograde RUN apt-get install -y git RUN git clone https://github.com/autolab/Tango.git WORKDIR Tango/autodriver diff --git a/config.template.py b/config.template.py index 4ae56583..6a3a0ba7 100644 --- a/config.template.py +++ b/config.template.py @@ -60,6 +60,15 @@ class Config: RUNJOB_TIMEOUT = 60 COPYOUT_TIMEOUT = 30 + # Docker constants + BOOT2DOCKER_INIT_TIMEOUT = 5 + BOOT2DOCKER_START_TIMEOUT = 30 + BOOT2DOCKER_ENV_TIMEOUT = 5 + DOCKER_IMAGE_BUILD_TIMEOUT = 300 + DOCKER_RM_TIMEOUT = 5 + DOCKER_IMAGE = '' + DOCKER_VOLUME_PATH = '' + # Maximum size for output file in bytes MAX_OUTPUT_FILE_SIZE = 1000 * 1024 diff --git a/vmms/dockerSSH.py b/vmms/dockerSSH.py index d28e4392..584d3ef0 100644 --- a/vmms/dockerSSH.py +++ b/vmms/dockerSSH.py @@ -53,14 +53,6 @@ def timeoutWithReturnStatus(command, time_out, returnValue = 0): stderr=subprocess.STDOUT) return ret -def dockerExec(container, cmd, time_out=1): - """ docerExec - Executes `docker exec container cmd` and - returns output. Container is the name of the docker - container and cmd is a list of commands to run. - """ - command = ['docker', 'exec', container, 'sh', '-c'] + cmd - return timeout(command, time_out) - # # User defined exceptions # @@ -70,9 +62,8 @@ class DockerSSH: LOCALHOST = '127.0.0.1' def __init__(self): - """ - Checks if the machine is ready to run docker containers. - Initialize boot2docker if running on OS X. + """ Checks if the machine is ready to run docker containers. + Initialize boot2docker if running on OS X. """ try: self.log = logging.getLogger("DockerSSH") @@ -84,6 +75,13 @@ def __init__(self): else: self.docker_host_ip = self.LOCALHOST + # Check import docker constants are defined in config + if len(config.Config.DOCKER_VOLUME_PATH) == 0: + raise Exception('DOCKER_VOLUME_PATH not defined in config.') + + if len(config.Config.DOCKER_IMAGE) == 0: + raise Exception('DOCKER_IMAGE not defined in config.') + self.log.info("Docker host IP is %s" % self.docker_host_ip) except Exception as e: @@ -95,7 +93,6 @@ def instanceName(self, id, name): this function when you need a VM instance name. Never generate instance names manually. """ - # return str(id) return "%s-%s-%s" % (config.Config.PREFIX, id, name) def domainName(self, vm): From 13f1c0cc3ba822e6b8d19ece673634f414150ad5 Mon Sep 17 00:00:00 2001 From: Mihir Pandya Date: Wed, 25 Mar 2015 01:57:54 -0400 Subject: [PATCH 13/25] Works smoother than ice ;) --- vmms/dockerSSH.py | 59 ++++++++++++++++++++++------------------------- 1 file changed, 27 insertions(+), 32 deletions(-) diff --git a/vmms/dockerSSH.py b/vmms/dockerSSH.py index 584d3ef0..92615cea 100644 --- a/vmms/dockerSSH.py +++ b/vmms/dockerSSH.py @@ -118,8 +118,7 @@ def copyIn(self, vm, inputFiles): """ copyIn - Create a directory to be mounted as a volume for the docker containers. Copy input files to this directory. """ - instanceName = self.instanceName(vm.id, vm.name) - + instanceName = self.instanceName(vm.id, vm.image) # Create a fresh volume volume_path = config.Config.DOCKER_VOLUME_PATH + instanceName +'/' os.makedirs(volume_path) @@ -135,7 +134,7 @@ def runJob(self, vm, runTimeout, maxOutputFileSize): - run autodriver with corresponding ulimits and timeout as autolab user """ - instanceName = self.instanceName(vm.id, vm.name) + instanceName = self.instanceName(vm.id, vm.image) args = ['docker', 'run', '--name', instanceName, '-v'] args = args + ['%s:%s' % (config.Config.DOCKER_VOLUME_PATH + instanceName, '/home/mount')] @@ -159,24 +158,31 @@ def copyOut(self, vm, destFile): """ copyOut - Copy the autograder feedback from container to destFile on the Tango host. """ - instanceName = self.instanceName(vm.id, vm.name) + instanceName = self.instanceName(vm.id, vm.image) volume_path = config.Config.DOCKER_VOLUME_PATH + instanceName - shutil.copy(volume_path + '/feedback', destFile) + print os.listdir(volume_path) + print volume_path + '/feedback' + shutil.move(volume_path + '/feedback', destFile) self.log.info('Copied feedback file to %s' % destFile) - shutil.rmtree(volume_path) - self.log.info('Deleted directory %s' % volume_path) + + # Must always clean up containers in order to maintain statelessness. + # A solution with `docker attach` could be explored. + self.destroyVM(vm) return 0 def destroyVM(self, vm): """ destroyVM - Delete the docker container. """ - instanceName = self.instanceName(vm.id, vm.name) - ret = timeout(['docker', 'rm', '-f', instanceName], + instanceName = self.instanceName(vm.id, vm.image) + # Do a hard kill on corresponding docker container. + # Return status does not matter. + timeout(['docker', 'rm', '-f', instanceName], config.Config.DOCKER_RM_TIMEOUT) - if ret != 0: - self.log.error("Failed to destroy container %s" % - (instanceName, ret)) + # Destroy corresponding volume if it exists. + if instanceName in os.listdir(config.Config.DOCKER_VOLUME_PATH): + shutil.rmtree(config.Config.DOCKER_VOLUME_PATH + instanceName) + self.log.debug('Deleted volume %s' % instanceName) return def safeDestroyVM(self, vm): @@ -184,11 +190,10 @@ def safeDestroyVM(self, vm): sure it is removed. """ start_time = time.time() - instanceName = self.instanceName(vm.id, vm.name) while self.existsVM(vm): if (time.time()-start_time > config.Config.DESTROY_SECS): self.log.error("Failed to safely destroy container %s" - % instanceName) + % vm.name) return self.destroyVM(vm) return @@ -196,23 +201,15 @@ def safeDestroyVM(self, vm): def getVMs(self): """ getVMs - Executes and parses `docker ps` """ - # Get all docker containers + # Get all volumes of docker containers machines = [] - containers_str = subprocess.check_output(['docker', 'ps']) - containers_l = containers_str.split('\n') - containers_l.reverse() - containers_l.pop() - for container in containers_l: + for volume in os.listdir(config.Config.DOCKER_VOLUME_PATH): machine = TangoMachine() machine.vmms = 'dockerSSH' - c = container.split(' ') - # machine.id = c[0] - c.reverse() - for el in c: - if len(el) > 0: - machine.name = el - machine.id = el - break + machine.name = volume + volume_l = volume.split('-') + machine.id = volume_l[1] + machine.image = volume_l[2] machines.append(machine) return machines @@ -221,8 +218,6 @@ def existsVM(self, vm): a non-zero status upon not finding a container. """ instanceName = self.instanceName(vm.id, vm.name) - p = subprocess.Popen(['docker', 'inspect', instanceName], - stdout=open('/dev/null'), - stderr=open('/dev/null')) - return (p.poll() is 0) + ret = timeout(['docker', 'inspect', instanceName]) + return (ret is 0) From 6d9242934aac523fadacc29764d2fdf505e7a96f Mon Sep 17 00:00:00 2001 From: Mihir Pandya Date: Wed, 25 Mar 2015 12:03:46 -0400 Subject: [PATCH 14/25] No need for output directory on the container. --- Dockerfile | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/Dockerfile b/Dockerfile index 4324e5e3..fa631b91 100644 --- a/Dockerfile +++ b/Dockerfile @@ -12,9 +12,8 @@ RUN apt-get install -y build-essential WORKDIR /home RUN useradd autolab RUN useradd autograde -RUN mkdir autolab output autograde +RUN mkdir autolab autograde RUN chown autolab:autolab autolab -RUN chown autolab:autolab output RUN chown autograde:autograde autograde RUN apt-get install -y git RUN git clone https://github.com/autolab/Tango.git @@ -31,4 +30,7 @@ RUN rm -rf Tango/ # Check installation RUN ls -l /home -RUN which autodriver \ No newline at end of file +RUN which autodriver + +# Job initialization steps +CMD cp -r mount/* autolab/; su autolab -c \ No newline at end of file From 3ab6fc5dd42cdcc8a7adeb698221e690c48c9cce Mon Sep 17 00:00:00 2001 From: Mihir Pandya Date: Wed, 25 Mar 2015 14:16:48 -0400 Subject: [PATCH 15/25] Autodriver fails with an unknown error with job2. Instead, autodriver should fail with a timeout error with job2. --- Dockerfile | 5 +---- clients/job2/autograde-Makefile | 2 +- vmms/dockerSSH.py | 38 ++++++++++++++++----------------- 3 files changed, 20 insertions(+), 25 deletions(-) diff --git a/Dockerfile b/Dockerfile index fa631b91..ec64b311 100644 --- a/Dockerfile +++ b/Dockerfile @@ -30,7 +30,4 @@ RUN rm -rf Tango/ # Check installation RUN ls -l /home -RUN which autodriver - -# Job initialization steps -CMD cp -r mount/* autolab/; su autolab -c \ No newline at end of file +RUN which autodriver \ No newline at end of file diff --git a/clients/job2/autograde-Makefile b/clients/job2/autograde-Makefile index d2865ec3..59f40e4b 100644 --- a/clients/job2/autograde-Makefile +++ b/clients/job2/autograde-Makefile @@ -1,4 +1,4 @@ autograde: - ./hello.sh + bash hello.sh diff --git a/vmms/dockerSSH.py b/vmms/dockerSSH.py index 92615cea..e3197f53 100644 --- a/vmms/dockerSSH.py +++ b/vmms/dockerSSH.py @@ -119,12 +119,13 @@ def copyIn(self, vm, inputFiles): for the docker containers. Copy input files to this directory. """ instanceName = self.instanceName(vm.id, vm.image) - # Create a fresh volume volume_path = config.Config.DOCKER_VOLUME_PATH + instanceName +'/' + + # Create a fresh volume os.makedirs(volume_path) for file in inputFiles: shutil.copy(file.localFile, volume_path + file.destFile) - self.log.info('Copied in file %s to %s' % (file.localFile, volume_path + file.destFile)) + self.log.debug('Copied in file %s to %s' % (file.localFile, volume_path + file.destFile)) return 0 def runJob(self, vm, runTimeout, maxOutputFileSize): @@ -141,32 +142,28 @@ def runJob(self, vm, runTimeout, maxOutputFileSize): args = args + [config.Config.DOCKER_IMAGE] args = args + ['sh', '-c'] - autodriverCmd = 'autodriver -u %d -f %d -t %d -o %d autolab &> output/feedback' % \ + autodriverCmd = 'autodriver -u %d -f %d -t %d -o %d autolab &> mount/feedback' % \ (config.Config.VM_ULIMIT_USER_PROC, config.Config.VM_ULIMIT_FILE_SIZE, runTimeout, config.Config.MAX_OUTPUT_FILE_SIZE) - args = args + ['cp -r mount/* autolab/; su autolab -c "%s"; \ - cp output/feedback mount/feedback' % autodriverCmd] + args = args + ['cp -r mount/* autolab/; su autolab -c "%s";' % + autodriverCmd] - self.log.info('Running job: %s' % str(args)) + self.log.debug('Running job: %s' % str(args)) return timeout(args, runTimeout) def copyOut(self, vm, destFile): """ copyOut - Copy the autograder feedback from container to - destFile on the Tango host. + destFile on the Tango host. Then, destroy that container. + Containers are never reused. """ instanceName = self.instanceName(vm.id, vm.image) volume_path = config.Config.DOCKER_VOLUME_PATH + instanceName - print os.listdir(volume_path) - print volume_path + '/feedback' shutil.move(volume_path + '/feedback', destFile) - self.log.info('Copied feedback file to %s' % destFile) - - # Must always clean up containers in order to maintain statelessness. - # A solution with `docker attach` could be explored. + self.log.debug('Copied feedback file to %s' % destFile) self.destroyVM(vm) return 0 @@ -204,13 +201,14 @@ def getVMs(self): # Get all volumes of docker containers machines = [] for volume in os.listdir(config.Config.DOCKER_VOLUME_PATH): - machine = TangoMachine() - machine.vmms = 'dockerSSH' - machine.name = volume - volume_l = volume.split('-') - machine.id = volume_l[1] - machine.image = volume_l[2] - machines.append(machine) + if re.match("%s-" % config.Config.PREFIX, volume): + machine = TangoMachine() + machine.vmms = 'dockerSSH' + machine.name = volume + volume_l = volume.split('-') + machine.id = volume_l[1] + machine.image = volume_l[2] + machines.append(machine) return machines def existsVM(self, vm): From 505bdb608407573245b1a57509cef9a97081e588 Mon Sep 17 00:00:00 2001 From: Mihir Pandya Date: Thu, 26 Mar 2015 12:35:00 -0400 Subject: [PATCH 16/25] Spin up docker container with user-specified image instead of default image defined in config --- config.template.py | 9 ++++----- vmms/dockerSSH.py | 2 +- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/config.template.py b/config.template.py index 6a3a0ba7..06922d4c 100644 --- a/config.template.py +++ b/config.template.py @@ -27,7 +27,7 @@ class Config: LOGLEVEL = logging.DEBUG # Courselabs directory. Must be created before starting Tango - COURSELABS = "" + COURSELABS = "courselabs" # VMMS to use. Must be set to a VMMS implemented in vmms/ before # starting Tango. Options are: "localSSH", "tashiSSH", "ec2SSH" @@ -66,8 +66,7 @@ class Config: BOOT2DOCKER_ENV_TIMEOUT = 5 DOCKER_IMAGE_BUILD_TIMEOUT = 300 DOCKER_RM_TIMEOUT = 5 - DOCKER_IMAGE = '' - DOCKER_VOLUME_PATH = '' + DOCKER_VOLUME_PATH = 'volumes/' # Maximum size for output file in bytes MAX_OUTPUT_FILE_SIZE = 1000 * 1024 @@ -95,10 +94,10 @@ class Config: POOL_SIZE = 2 # Path for tashi images - TASHI_IMAGE_PATH = "/raid/tashi/images/" + TASHI_IMAGE_PATH = '' # Optionally log finer-grained timing information - LOG_TIMING = True + LOG_TIMING = False # Largest job ID MAX_JOBID = 500 diff --git a/vmms/dockerSSH.py b/vmms/dockerSSH.py index e3197f53..3800f965 100644 --- a/vmms/dockerSSH.py +++ b/vmms/dockerSSH.py @@ -139,7 +139,7 @@ def runJob(self, vm, runTimeout, maxOutputFileSize): args = ['docker', 'run', '--name', instanceName, '-v'] args = args + ['%s:%s' % (config.Config.DOCKER_VOLUME_PATH + instanceName, '/home/mount')] - args = args + [config.Config.DOCKER_IMAGE] + args = args + [vm.image] args = args + ['sh', '-c'] autodriverCmd = 'autodriver -u %d -f %d -t %d -o %d autolab &> mount/feedback' % \ From 8385642e791556a4070995ac4de4cf2984905236 Mon Sep 17 00:00:00 2001 From: Mihir Pandya Date: Fri, 27 Mar 2015 12:33:29 -0400 Subject: [PATCH 17/25] Docker needs a complete path to the volume it is mounting. Changed the default value of DOCKER_VOLUME_PATH to '*', in light of this. --- config.template.py | 4 +++- vmms/dockerSSH.py | 37 +++++++++++++++++++++++-------------- 2 files changed, 26 insertions(+), 15 deletions(-) diff --git a/config.template.py b/config.template.py index 06922d4c..adc660d4 100644 --- a/config.template.py +++ b/config.template.py @@ -66,7 +66,9 @@ class Config: BOOT2DOCKER_ENV_TIMEOUT = 5 DOCKER_IMAGE_BUILD_TIMEOUT = 300 DOCKER_RM_TIMEOUT = 5 - DOCKER_VOLUME_PATH = 'volumes/' + # Must be absolute path with trailing slash + # Default value of '*'' points this path to /path/to/Tango/volumes/ + DOCKER_VOLUME_PATH = '*' # Maximum size for output file in bytes MAX_OUTPUT_FILE_SIZE = 1000 * 1024 diff --git a/vmms/dockerSSH.py b/vmms/dockerSSH.py index 3800f965..6fd456a7 100644 --- a/vmms/dockerSSH.py +++ b/vmms/dockerSSH.py @@ -79,8 +79,8 @@ def __init__(self): if len(config.Config.DOCKER_VOLUME_PATH) == 0: raise Exception('DOCKER_VOLUME_PATH not defined in config.') - if len(config.Config.DOCKER_IMAGE) == 0: - raise Exception('DOCKER_IMAGE not defined in config.') + # if len(config.Config.DOCKER_IMAGE) == 0: + # raise Exception('DOCKER_IMAGE not defined in config.') self.log.info("Docker host IP is %s" % self.docker_host_ip) @@ -95,6 +95,13 @@ def instanceName(self, id, name): """ return "%s-%s-%s" % (config.Config.PREFIX, id, name) + def getVolumePath(self, instanceName): + volumePath = config.Config.DOCKER_VOLUME_PATH + if '*' in volumePath: + volumePath = os.getcwd() + '/' + 'volumes/' + volumePath = volumePath + instanceName + '/' + return volumePath + def domainName(self, vm): """ Returns the domain name that is stored in the vm instance. @@ -119,13 +126,13 @@ def copyIn(self, vm, inputFiles): for the docker containers. Copy input files to this directory. """ instanceName = self.instanceName(vm.id, vm.image) - volume_path = config.Config.DOCKER_VOLUME_PATH + instanceName +'/' + volumePath = self.getVolumePath(instanceName) # Create a fresh volume - os.makedirs(volume_path) + os.makedirs(volumePath) for file in inputFiles: - shutil.copy(file.localFile, volume_path + file.destFile) - self.log.debug('Copied in file %s to %s' % (file.localFile, volume_path + file.destFile)) + shutil.copy(file.localFile, volumePath + file.destFile) + self.log.debug('Copied in file %s to %s' % (file.localFile, volumePath + file.destFile)) return 0 def runJob(self, vm, runTimeout, maxOutputFileSize): @@ -136,9 +143,9 @@ def runJob(self, vm, runTimeout, maxOutputFileSize): autolab user """ instanceName = self.instanceName(vm.id, vm.image) + volumePath = self.getVolumePath(instanceName) args = ['docker', 'run', '--name', instanceName, '-v'] - args = args + ['%s:%s' % - (config.Config.DOCKER_VOLUME_PATH + instanceName, '/home/mount')] + args = args + ['%s:%s' % (volumePath, '/home/mount')] args = args + [vm.image] args = args + ['sh', '-c'] @@ -147,7 +154,7 @@ def runJob(self, vm, runTimeout, maxOutputFileSize): config.Config.VM_ULIMIT_FILE_SIZE, runTimeout, config.Config.MAX_OUTPUT_FILE_SIZE) - args = args + ['cp -r mount/* autolab/; su autolab -c "%s";' % + args = args + ['cp -r mount/* autolab/; su autolab -c "%s"; cat mount/feedback; ls mount/' % autodriverCmd] self.log.debug('Running job: %s' % str(args)) @@ -161,8 +168,8 @@ def copyOut(self, vm, destFile): Containers are never reused. """ instanceName = self.instanceName(vm.id, vm.image) - volume_path = config.Config.DOCKER_VOLUME_PATH + instanceName - shutil.move(volume_path + '/feedback', destFile) + volumePath = self.getVolumePath(instanceName) + shutil.move(volumePath + '/feedback', destFile) self.log.debug('Copied feedback file to %s' % destFile) self.destroyVM(vm) @@ -172,13 +179,14 @@ def destroyVM(self, vm): """ destroyVM - Delete the docker container. """ instanceName = self.instanceName(vm.id, vm.image) + volumePath = self.getVolumePath('') # Do a hard kill on corresponding docker container. # Return status does not matter. timeout(['docker', 'rm', '-f', instanceName], config.Config.DOCKER_RM_TIMEOUT) # Destroy corresponding volume if it exists. - if instanceName in os.listdir(config.Config.DOCKER_VOLUME_PATH): - shutil.rmtree(config.Config.DOCKER_VOLUME_PATH + instanceName) + if instanceName in os.listdir(volumePath): + shutil.rmtree(volumePath + instanceName) self.log.debug('Deleted volume %s' % instanceName) return @@ -200,7 +208,8 @@ def getVMs(self): """ # Get all volumes of docker containers machines = [] - for volume in os.listdir(config.Config.DOCKER_VOLUME_PATH): + volumePath = self.getVolumePath('') + for volume in os.listdir(volumePath): if re.match("%s-" % config.Config.PREFIX, volume): machine = TangoMachine() machine.vmms = 'dockerSSH' From 594f0798bcc986b9ef3910a0456712678e9a1d29 Mon Sep 17 00:00:00 2001 From: Mihir Pandya Date: Fri, 27 Mar 2015 12:42:40 -0400 Subject: [PATCH 18/25] Moved Dockerfile to vmms directory. Renamed dockerSSH to localDocker. --- jobManager.py | 6 +++--- restful-tango/tangoREST.py | 6 +++--- Dockerfile => vmms/Dockerfile | 0 vmms/{dockerSSH.py => localDocker.py} | 22 ++++------------------ 4 files changed, 10 insertions(+), 24 deletions(-) rename Dockerfile => vmms/Dockerfile (100%) rename vmms/{dockerSSH.py => localDocker.py} (90%) diff --git a/jobManager.py b/jobManager.py index 6e15902c..b9508f5b 100644 --- a/jobManager.py +++ b/jobManager.py @@ -82,9 +82,9 @@ def __manage(self): elif Config.VMMS_NAME == "ec2SSH": from vmms.ec2SSH import Ec2SSH vmms = Ec2SSH() - elif Config.VMMS_NAME == "dockerSSH": - from vmms.dockerSSH import DockerSSH - vmms = DockerSSH() + elif Config.VMMS_NAME == "localDocker": + from vmms.localDocker import LocalDocker + vmms = LocalDocker() vmms = {Config.VMMS_NAME: vmms} preallocator = Preallocator(vmms) diff --git a/restful-tango/tangoREST.py b/restful-tango/tangoREST.py index f1991515..227bc0bf 100644 --- a/restful-tango/tangoREST.py +++ b/restful-tango/tangoREST.py @@ -76,9 +76,9 @@ def __init__(self): elif Config.VMMS_NAME == "ec2SSH": from vmms.ec2SSH import Ec2SSH vmms = Ec2SSH() - elif Config.VMMS_NAME == "dockerSSH": - from vmms.dockerSSH import DockerSSH - vmms = DockerSSH() + elif Config.VMMS_NAME == "localDocker": + from vmms.localDocker import LocalDocker + vmms = LocalDocker() self.vmms = {Config.VMMS_NAME: vmms} diff --git a/Dockerfile b/vmms/Dockerfile similarity index 100% rename from Dockerfile rename to vmms/Dockerfile diff --git a/vmms/dockerSSH.py b/vmms/localDocker.py similarity index 90% rename from vmms/dockerSSH.py rename to vmms/localDocker.py index 6fd456a7..6a532fcd 100644 --- a/vmms/dockerSSH.py +++ b/vmms/localDocker.py @@ -1,5 +1,5 @@ # -# dockerSSH.py - Implements the Tango VMMS interface to run Tango jobs in +# localDocker.py - Implements the Tango VMMS interface to run Tango jobs in # docker containers. In this context, VMs are docker containers. # import random, subprocess, re, time, logging, threading, os, sys, shutil @@ -57,33 +57,19 @@ def timeoutWithReturnStatus(command, time_out, returnValue = 0): # User defined exceptions # -class DockerSSH: - _OS_X = 'darwin' - LOCALHOST = '127.0.0.1' +class LocalDocker: def __init__(self): """ Checks if the machine is ready to run docker containers. Initialize boot2docker if running on OS X. """ try: - self.log = logging.getLogger("DockerSSH") - # If running on OS X, create a boot2docker VM - if sys.platform == self._OS_X: - # boot2docker initialization will be part of initial - # set up with Tango. - self.docker_host_ip = subprocess.check_output(['boot2docker', 'ip']).strip('\n') - else: - self.docker_host_ip = self.LOCALHOST + self.log = logging.getLogger("LocalDocker") # Check import docker constants are defined in config if len(config.Config.DOCKER_VOLUME_PATH) == 0: raise Exception('DOCKER_VOLUME_PATH not defined in config.') - # if len(config.Config.DOCKER_IMAGE) == 0: - # raise Exception('DOCKER_IMAGE not defined in config.') - - self.log.info("Docker host IP is %s" % self.docker_host_ip) - except Exception as e: self.log.error(str(e)) exit(1) @@ -212,7 +198,7 @@ def getVMs(self): for volume in os.listdir(volumePath): if re.match("%s-" % config.Config.PREFIX, volume): machine = TangoMachine() - machine.vmms = 'dockerSSH' + machine.vmms = 'localDocker' machine.name = volume volume_l = volume.split('-') machine.id = volume_l[1] From 4388e1f4ce24b1a7f1416eaa584317f20e23f2b2 Mon Sep 17 00:00:00 2001 From: Mihir Pandya Date: Fri, 27 Mar 2015 17:11:20 -0400 Subject: [PATCH 19/25] Fixed annoying typo. --- worker.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/worker.py b/worker.py index 2e0afab2..2de4a4ea 100644 --- a/worker.py +++ b/worker.py @@ -69,7 +69,7 @@ def rescheduleJob(self, hdrfile, ret, err): # Here is where we give up else: self.jobQueue.makeDead(self.job.id, err) - self.appendMsg(hdrfile, "Internal error: Unable to complete job after %d tries. Pleae resubmit" % (Config.JOB_RETRIES)) + self.appendMsg(hdrfile, "Internal error: Unable to complete job after %d tries. Please resubmit" % (Config.JOB_RETRIES)) self.appendMsg(hdrfile, "Job status: waitVM=%s copyIn=%s runJob=%s copyOut=%s" % (ret["waitvm"], ret["copyin"], ret["runjob"], ret["copyout"])) self.catFiles(hdrfile, self.job.outputFile) self.detachVM(return_vm=False, replace_vm=True) From bdebd2ae1bf756365ab7f851f6dd241c1daa5865 Mon Sep 17 00:00:00 2001 From: Mihir Pandya Date: Fri, 27 Mar 2015 17:31:40 -0400 Subject: [PATCH 20/25] Return status of -1 was ignored by the worker thread. --- worker.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/worker.py b/worker.py index 2de4a4ea..61cbfe54 100644 --- a/worker.py +++ b/worker.py @@ -261,7 +261,7 @@ def run (self): elif ret["runjob"] != 0: if ret["runjob"] == 1: # This should never happen msg = "Error: Autodriver usage error (status=%d)" % (ret["runjob"]) - elif ret["runjob"] == 2: + elif ret["runjob"] == 2 or ret["runjob"] == -1: msg = "Error: Job timed out after %d seconds" % (self.job.timeout) else: # This should never happen msg = "Error: Unknown autodriver error (status=%d)" % (ret["runjob"]) From 8076ce4db6c41fd8ab13e10b787caa50a28108b8 Mon Sep 17 00:00:00 2001 From: Mihir Pandya Date: Fri, 27 Mar 2015 17:58:52 -0400 Subject: [PATCH 21/25] Updated tests. job2 works. --- clients/job4/autograde-Makefile | 2 +- clients/job5/autograde-Makefile | 2 +- clients/job6/autograde-Makefile | 2 +- clients/job7/autograde-Makefile | 2 +- vmms/localDocker.py | 4 +++- 5 files changed, 7 insertions(+), 5 deletions(-) diff --git a/clients/job4/autograde-Makefile b/clients/job4/autograde-Makefile index b7c98a20..ec749919 100644 --- a/clients/job4/autograde-Makefile +++ b/clients/job4/autograde-Makefile @@ -1,5 +1,5 @@ autograde: - (./hello.sh; exit 2) + (bash hello.sh; exit 2) diff --git a/clients/job5/autograde-Makefile b/clients/job5/autograde-Makefile index d2865ec3..59f40e4b 100644 --- a/clients/job5/autograde-Makefile +++ b/clients/job5/autograde-Makefile @@ -1,4 +1,4 @@ autograde: - ./hello.sh + bash hello.sh diff --git a/clients/job6/autograde-Makefile b/clients/job6/autograde-Makefile index d2865ec3..59f40e4b 100644 --- a/clients/job6/autograde-Makefile +++ b/clients/job6/autograde-Makefile @@ -1,4 +1,4 @@ autograde: - ./hello.sh + bash hello.sh diff --git a/clients/job7/autograde-Makefile b/clients/job7/autograde-Makefile index 0361c6e2..3a263b4b 100644 --- a/clients/job7/autograde-Makefile +++ b/clients/job7/autograde-Makefile @@ -1,5 +1,5 @@ autograde: - ./bug + bash bug diff --git a/vmms/localDocker.py b/vmms/localDocker.py index 6a532fcd..cbde2ab7 100644 --- a/vmms/localDocker.py +++ b/vmms/localDocker.py @@ -144,8 +144,10 @@ def runJob(self, vm, runTimeout, maxOutputFileSize): autodriverCmd] self.log.debug('Running job: %s' % str(args)) + ret = timeout(args, runTimeout) + self.log.debug('runJob returning %d' % ret) - return timeout(args, runTimeout) + return ret def copyOut(self, vm, destFile): From 20a524c02ed32de392d4f3e3f99987f65fcfcdea Mon Sep 17 00:00:00 2001 From: Mihir Pandya Date: Wed, 1 Apr 2015 22:31:17 -0400 Subject: [PATCH 22/25] Removed unnecessary slash --- vmms/localDocker.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vmms/localDocker.py b/vmms/localDocker.py index cbde2ab7..0097bcd9 100644 --- a/vmms/localDocker.py +++ b/vmms/localDocker.py @@ -157,7 +157,7 @@ def copyOut(self, vm, destFile): """ instanceName = self.instanceName(vm.id, vm.image) volumePath = self.getVolumePath(instanceName) - shutil.move(volumePath + '/feedback', destFile) + shutil.move(volumePath + 'feedback', destFile) self.log.debug('Copied feedback file to %s' % destFile) self.destroyVM(vm) From 6c17cfe5ac7b557293414792b61f6d72e9cbd503 Mon Sep 17 00:00:00 2001 From: Mihir Pandya Date: Wed, 1 Apr 2015 22:32:04 -0400 Subject: [PATCH 23/25] bash hello.sh for job 5 --- clients/job5/autograde-Makefile | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/clients/job5/autograde-Makefile b/clients/job5/autograde-Makefile index 59f40e4b..c6d06dd8 100644 --- a/clients/job5/autograde-Makefile +++ b/clients/job5/autograde-Makefile @@ -1,4 +1,2 @@ autograde: - bash hello.sh - - + bash hello.sh \ No newline at end of file From 4bd265215c151cf77ab17dfbc4863655ac6c65c3 Mon Sep 17 00:00:00 2001 From: Mihir Pandya Date: Mon, 6 Apr 2015 15:39:18 -0400 Subject: [PATCH 24/25] Added output file with write permissions on docker iamge. --- vmms/Dockerfile | 3 ++- vmms/localDocker.py | 5 +++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/vmms/Dockerfile b/vmms/Dockerfile index ec64b311..e9053445 100644 --- a/vmms/Dockerfile +++ b/vmms/Dockerfile @@ -12,8 +12,9 @@ RUN apt-get install -y build-essential WORKDIR /home RUN useradd autolab RUN useradd autograde -RUN mkdir autolab autograde +RUN mkdir autolab autograde output RUN chown autolab:autolab autolab +RUN chown autolab:autolab output RUN chown autograde:autograde autograde RUN apt-get install -y git RUN git clone https://github.com/autolab/Tango.git diff --git a/vmms/localDocker.py b/vmms/localDocker.py index 0097bcd9..f48e7f3c 100644 --- a/vmms/localDocker.py +++ b/vmms/localDocker.py @@ -135,12 +135,13 @@ def runJob(self, vm, runTimeout, maxOutputFileSize): args = args + [vm.image] args = args + ['sh', '-c'] - autodriverCmd = 'autodriver -u %d -f %d -t %d -o %d autolab &> mount/feedback' % \ + autodriverCmd = 'autodriver -u %d -f %d -t %d -o %d autolab &> output/feedback' % \ (config.Config.VM_ULIMIT_USER_PROC, config.Config.VM_ULIMIT_FILE_SIZE, runTimeout, config.Config.MAX_OUTPUT_FILE_SIZE) - args = args + ['cp -r mount/* autolab/; su autolab -c "%s"; cat mount/feedback; ls mount/' % + args = args + ['cp -r mount/* autolab/; su autolab -c "%s"; \ + cp output/feedback mount/feedback' % autodriverCmd] self.log.debug('Running job: %s' % str(args)) From ca035fee543f7f9dd1fda6125ab82925cfc3699a Mon Sep 17 00:00:00 2001 From: Ilter Canberk Date: Tue, 7 Apr 2015 18:09:18 +0300 Subject: [PATCH 25/25] Merge fixes --- tangod.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tangod.py b/tangod.py index 81839904..4db3799a 100755 --- a/tangod.py +++ b/tangod.py @@ -97,7 +97,7 @@ def getJobs(self, item): return self.jobQueue.deadJobs.values() elif item == 0: # return the list of live jobs - return self.jobQueue.jobQueue.values() + return self.jobQueue.liveJobs.values() else: # invalid parameter return [] @@ -207,7 +207,7 @@ def resetTango(self, vmms): log.warning("Killed these %s VMs on restart: %s" % (vmms_name, namelist)) - for job in self.jobQueue.jobQueue.values(): + for job in self.jobQueue.liveJobs.values(): self.log.debug("job: %s, assigned: %s" % (str(job.name), str(job.assigned)))