#!/usr/bin/env python2.7

"""
This is a program for unpacking a ZIP file of student submissions downloaded from D2L. 
It relies on Python 2.7 and the "atool" package (http://www.nongnu.org/atool/). 
"""

import sys, os, glob, string, time
from pprint import *
import optparse
import subprocess
import shutil
import errno


# verify that atool is installed
try:
    subprocess.check_output(["atool", "--version"], stderr=subprocess.STDOUT)
except OSError, e:
    (err, msg) = e
    if err == errno.ENOENT:
        print sys.argv[0], 'requires the "atool" package.'
        sys.exit(1)
    else:
        raise e
except subprocess.CalledProcessError, e:
    print 'Unable to run the "atool" command: ', e.msg
    sys.exit(1)

def Debug(msg):
    if options.debug:
        print >> sys.stderr, msg

class File(object):
    def __init__(self, name):
        """
        The file name format from D2L is horrific. Fields are separated by '-', except
        that a student name or filename could contain a hyphen. So split on '- ' instead
        (and hope that a filename does not contain '- '). Except, if the filename starts
        with '.' then D2L appears to leave out the space between the '-' and '.'. 
        """
        self.orig = name
        try:
            # If the last '-' in the name is actually '-.' then insert the missing space.
            index = name.rfind('-.')
            if index > 0 and index == name.rfind('-'):
                name = name[:index+1] + ' ' + name[index+1:]
            # Split the name based on '- '.
            fields = [x.strip() for x in name.split('- ')]
            keys = ['id', 'student', 'date', 'file']
            for i,key in enumerate(keys):
                setattr(self, key, fields[i])
            # for the id field we only want the number before the '-'
            self.id = self.id.split('-')[0]
            self.timestamp = time.strptime(self.date, "%b %d, %Y %I%M %p")
        except Exception, e:
            print >> sys.stderr, "Error parsing:", self.orig
            raise e

def RemoveHeaderComment(path):
    # Remove the comment at the start of the file with the hope of removing the student's name.
    if path.endswith(".py"):
        first = True
        processing = True
        inside = False # True if inside triple-quote
        with open(path) as input, open(path + ".new", "w") as output:
            for line in input:
                ignore = False
                if processing:
                    if not inside:
                        if first and line.startswith('#!'):
                            # ignore #! in first line of the file
                            pass
                        elif line.startswith('#'):
                            ignore = True
                        elif line.startswith('"""'):
                            # start of triple-quote
                            inside = True
                            ignore = True
                        elif len(line) > 1:
                            processing = False # non-blank line signifies end of header comments
                    else:
                        # inside triple-quote
                        ignore = True
                        if line.startswith('"""'):
                            # end of triple-quote
                            inside = False

                if not ignore:
                    print >> output, line,
                first = False
        os.rename(path, path + ".bak")
        os.rename(path + ".new", path)

description = \
"""\
und2l unpacks a zip file created by D2L into a sane directory hierarchy.
"zipfile" is the zip file downloaded from D2L and "dir" is the directory into
which you want the files unzipped. "dir" must not exist.
"""

usage = "Usage %s [options] zipfile dir"
parser = optparse.OptionParser(version="%prog 1.3", usage=usage, description=description)

parser.add_option("-d", "--debug",
                  action="store_true", dest="debug",
                  default=False,
                  help="print debugging output")

parser.add_option("-m", "--moss",
                  action="store_true", dest="moss",
                  default=False,
                  help="configure for running Moss (e.g. no spaces in filenames)")

parser.add_option("-o", "--old",
                  action="store_true", dest="old",
                  default=False,
                  help='put older versions of each file in "old" subdirectory')

parser.add_option("-b", "--blind",
                  action="store_true", dest="blind",
                  default=False,
                  help='configure for blind reviewing')

parser.add_option("-V",
                  action="store_true", dest="version",
                  default=False,
                  help="show program's version number and exit")

(options, args) = parser.parse_args(sys.argv[1:])

if options.version:
    parser.print_version()
    sys.exit(0)

if len(args) != 2:
    parser.print_help()
    sys.exit(1)

zipfile, dest = args[0:2]

if not os.path.isfile(zipfile):
    parser.print_help()
    sys.exit(1)

if os.path.exists(dest):
    print 'Target directory "%s" already exists.' % dest
    parser.print_help()
    sys.exit(1)

if not os.path.exists(dest):
    os.mkdir(dest)

# Unpack the zipfile. atool doesn't seem to pass the 'force' option to unzip properly
# so invoke unzip directly.

#cmd = ["atool", "--force", "-X", dest, zipfile]
cmd = ["unzip", "-o", "-d", dest, zipfile]
Debug(str(cmd))
subprocess.check_output(cmd)

os.chdir(dest)

# If there is a top-level directory get rid of it.

files = os.listdir('.')
if len(files) == 1 and os.path.isdir(files[0]):
    Debug("removing top-level directory")
    top = files[0]
    for x in os.listdir(top):
        os.rename(os.path.join(top, x), x)
    Debug("deleting %s" % top)
    os.rmdir(top)

# Remove the index.html file.
os.remove("index.html")

files = [File(x) for x in os.listdir('.') if x[0].isdigit() and '-' in x]

# Keep only the newest copy of each file.
newest = {}
older = []
for x in files:
    key = (x.id, x.file)
    if key in newest:
        if x.timestamp > newest[key].timestamp:
            # Current file is newer, use it
            older.append(newest[key])
            newest[key] = x
        else:
            # Current file is older, ignore it
            older.append(x)
    else:
        newest[key] = x

# Put the newest files in directories named by the id.
for x in newest.values():
    if not os.path.exists(x.id):
        os.mkdir(x.id)
        if options.blind is False:
            path = os.path.join(x.id, 'STUDENT.txt')
            with open(path, "w") as f:
                print >> f, x.student
    if x.file.endswith(('.zip', '.tar', 'gz')):
        if x.file.endswith('.zip'):
            # atool doesn't unpack zip files correctly, do it ourselves
            cmd = ["unzip", "-o", "-d", x.id, x.orig]
        else:
            cmd = ["atool", "--force", "-X", x.id, x.orig]
        Debug(str(cmd))
        subprocess.check_output(cmd, stderr=subprocess.STDOUT)
        os.remove(x.orig)
    else:
        target = os.path.join(x.id, x.file)
        Debug("moving %s to %s" % (x.orig, target))
        os.rename(x.orig, target)

if options.old:
    # Put old files in 'old' directory named by time and file name
    for x in older:
        old = os.path.join(x.id, 'old')
        if not os.path.exists(old):
            os.mkdir(old)
        os.rename(x.orig, os.path.join(old, x.date + '-' + x.file))
else:
    # Delete old files
    for x in older:
        os.remove(x.orig)


# Remove student names

for root, dirs, files in os.walk('.'):
    for name in files:
        RemoveHeaderComment(os.path.join(root, name))

# Remove unwanted directories

for root, dirs, files in os.walk('.'):
    for d in dirs:
        if d in ['__MACOSX']:
            path = os.path.join(root, d)
            Debug("deleting " + path)
            shutil.rmtree(path)

if options.moss:
    # Replace all spaces in filenames with underscores.
    for root, dirs, files in os.walk('.', topdown=False):
        for name in dirs + files:
            if ' ' in name:
                os.rename(os.path.join(root, name), 
                          os.path.join(root, name.replace(' ', '_')))