#!/usr/bin/env python2.7 """ This is a program for unpacking a ZIP file of student submissions downloaded from D2L. It relies on Python 2.7 and the "atool" package (http://www.nongnu.org/atool/). """ import sys, os, glob, string, time from pprint import * import optparse import subprocess import shutil import errno # verify that atool is installed try: subprocess.check_output(["atool", "--version"], stderr=subprocess.STDOUT) except OSError, e: (err, msg) = e if err == errno.ENOENT: print sys.argv[0], 'requires the "atool" package.' sys.exit(1) else: raise e except subprocess.CalledProcessError, e: print 'Unable to run the "atool" command: ', e.msg sys.exit(1) def Debug(msg): if options.debug: print >> sys.stderr, msg class File(object): def __init__(self, name): """ The file name format from D2L is horrific. Fields are separated by '-', except that a student name or filename could contain a hyphen. So split on '- ' instead (and hope that a filename does not contain '- '). Except, if the filename starts with '.' then D2L appears to leave out the space between the '-' and '.'. """ self.orig = name try: # If the last '-' in the name is actually '-.' then insert the missing space. index = name.rfind('-.') if index > 0 and index == name.rfind('-'): name = name[:index+1] + ' ' + name[index+1:] # Split the name based on '- '. fields = [x.strip() for x in name.split('- ')] keys = ['id', 'student', 'date', 'file'] for i,key in enumerate(keys): setattr(self, key, fields[i]) # for the id field we only want the number before the '-' self.id = self.id.split('-')[0] self.timestamp = time.strptime(self.date, "%b %d, %Y %I%M %p") except Exception, e: print >> sys.stderr, "Error parsing:", self.orig raise e def RemoveHeaderComment(path): # Remove the comment at the start of the file with the hope of removing the student's name. if path.endswith(".py"): first = True processing = True inside = False # True if inside triple-quote with open(path) as input, open(path + ".new", "w") as output: for line in input: ignore = False if processing: if not inside: if first and line.startswith('#!'): # ignore #! in first line of the file pass elif line.startswith('#'): ignore = True elif line.startswith('"""'): # start of triple-quote inside = True ignore = True elif len(line) > 1: processing = False # non-blank line signifies end of header comments else: # inside triple-quote ignore = True if line.startswith('"""'): # end of triple-quote inside = False if not ignore: print >> output, line, first = False os.rename(path, path + ".bak") os.rename(path + ".new", path) description = \ """\ und2l unpacks a zip file created by D2L into a sane directory hierarchy. "zipfile" is the zip file downloaded from D2L and "dir" is the directory into which you want the files unzipped. "dir" must not exist. """ usage = "Usage %s [options] zipfile dir" parser = optparse.OptionParser(version="%prog 1.3", usage=usage, description=description) parser.add_option("-d", "--debug", action="store_true", dest="debug", default=False, help="print debugging output") parser.add_option("-m", "--moss", action="store_true", dest="moss", default=False, help="configure for running Moss (e.g. no spaces in filenames)") parser.add_option("-o", "--old", action="store_true", dest="old", default=False, help='put older versions of each file in "old" subdirectory') parser.add_option("-b", "--blind", action="store_true", dest="blind", default=False, help='configure for blind reviewing') parser.add_option("-V", action="store_true", dest="version", default=False, help="show program's version number and exit") (options, args) = parser.parse_args(sys.argv[1:]) if options.version: parser.print_version() sys.exit(0) if len(args) != 2: parser.print_help() sys.exit(1) zipfile, dest = args[0:2] if not os.path.isfile(zipfile): parser.print_help() sys.exit(1) if os.path.exists(dest): print 'Target directory "%s" already exists.' % dest parser.print_help() sys.exit(1) if not os.path.exists(dest): os.mkdir(dest) # Unpack the zipfile. atool doesn't seem to pass the 'force' option to unzip properly # so invoke unzip directly. #cmd = ["atool", "--force", "-X", dest, zipfile] cmd = ["unzip", "-o", "-d", dest, zipfile] Debug(str(cmd)) subprocess.check_output(cmd) os.chdir(dest) # If there is a top-level directory get rid of it. files = os.listdir('.') if len(files) == 1 and os.path.isdir(files[0]): Debug("removing top-level directory") top = files[0] for x in os.listdir(top): os.rename(os.path.join(top, x), x) Debug("deleting %s" % top) os.rmdir(top) # Remove the index.html file. os.remove("index.html") files = [File(x) for x in os.listdir('.') if x[0].isdigit() and '-' in x] # Keep only the newest copy of each file. newest = {} older = [] for x in files: key = (x.id, x.file) if key in newest: if x.timestamp > newest[key].timestamp: # Current file is newer, use it older.append(newest[key]) newest[key] = x else: # Current file is older, ignore it older.append(x) else: newest[key] = x # Put the newest files in directories named by the id. for x in newest.values(): if not os.path.exists(x.id): os.mkdir(x.id) if options.blind is False: path = os.path.join(x.id, 'STUDENT.txt') with open(path, "w") as f: print >> f, x.student if x.file.endswith(('.zip', '.tar', 'gz')): if x.file.endswith('.zip'): # atool doesn't unpack zip files correctly, do it ourselves cmd = ["unzip", "-o", "-d", x.id, x.orig] else: cmd = ["atool", "--force", "-X", x.id, x.orig] Debug(str(cmd)) subprocess.check_output(cmd, stderr=subprocess.STDOUT) os.remove(x.orig) else: target = os.path.join(x.id, x.file) Debug("moving %s to %s" % (x.orig, target)) os.rename(x.orig, target) if options.old: # Put old files in 'old' directory named by time and file name for x in older: old = os.path.join(x.id, 'old') if not os.path.exists(old): os.mkdir(old) os.rename(x.orig, os.path.join(old, x.date + '-' + x.file)) else: # Delete old files for x in older: os.remove(x.orig) # Remove student names for root, dirs, files in os.walk('.'): for name in files: RemoveHeaderComment(os.path.join(root, name)) # Remove unwanted directories for root, dirs, files in os.walk('.'): for d in dirs: if d in ['__MACOSX']: path = os.path.join(root, d) Debug("deleting " + path) shutil.rmtree(path) if options.moss: # Replace all spaces in filenames with underscores. for root, dirs, files in os.walk('.', topdown=False): for name in dirs + files: if ' ' in name: os.rename(os.path.join(root, name), os.path.join(root, name.replace(' ', '_')))