#!/usr/bin/python

from hashlib import sha384
from os import makedirs, unlink
from os.path import basename, getmtime, getsize, isdir, isfile, join
from urllib2 import Request, urlopen
from sys import argv, stdout

# This are the URLs of the media to fetch. Separated by a comma is the
# content-length for media that needs peeking to the end. Use a command like
# this to retreive this information:
#
#    curl --referer=http://www.bigbuckbunny.org/index.php/download/ --head \
#    http://mirror.bigbuckbunny.de/peach/bigbuckbunny_movies/big_buck_bunny_1080p_h264.mov
#
# That size argument might have to be changed into a range list, like this:
#
#    http://somehost/somefile,0-5000,32000000-33000000,66000000-
#
# to deal with non-streamable media.
#
mediaurls = """
    http://mrstoast.fs.uni-bayreuth.de/mango/ToS/tears_of_steel_720p.mkv
    http://mrstoast.fs.uni-bayreuth.de/mango/ToS/tears_of_steel_720p.mov,372178639
    http://mrstoast.fs.uni-bayreuth.de/mango/ToS/tears_of_steel_1080p.mkv
    http://mrstoast.fs.uni-bayreuth.de/mango/ToS/tears_of_steel_1080p.mov,583774083

    http://mirror.bigbuckbunny.de/peach/bigbuckbunny_movies/big_buck_bunny_1080p_surround.avi
    http://mirror.bigbuckbunny.de/peach/bigbuckbunny_movies/big_buck_bunny_1080p_stereo.ogg
    http://mirror.bigbuckbunny.de/peach/bigbuckbunny_movies/big_buck_bunny_1080p_stereo.avi

    http://mirror.bigbuckbunny.de/peach/bigbuckbunny_movies/big_buck_bunny_720p_surround.avi
    http://mirror.bigbuckbunny.de/peach/bigbuckbunny_movies/big_buck_bunny_720p_stereo.ogg
    http://mirror.bigbuckbunny.de/peach/bigbuckbunny_movies/big_buck_bunny_720p_stereo.avi

    http://mirror.bigbuckbunny.de/peach/bigbuckbunny_movies/big_buck_bunny_480p_surround.avi
    http://mirror.bigbuckbunny.de/peach/bigbuckbunny_movies/big_buck_bunny_480p_stereo.ogg
    http://mirror.bigbuckbunny.de/peach/bigbuckbunny_movies/big_buck_bunny_480p_stereo.avi

    http://ftp.halifax.rwth-aachen.de/blender/demo/movies/Sintel.2010.1080p.mkv
    http://ftp.halifax.rwth-aachen.de/blender/demo/movies/Sintel.2010.720p.mkv
"""

# Seems peeking at the end is not good enough for those files. Have to do look
# in detail with "strace -f -e open,lseek gst-discoverer-0.10" what ranges
# we must fetch to discover those files.
"""
    http://mirror.bigbuckbunny.de/peach/bigbuckbunny_movies/big_buck_bunny_1080p_h264.mov,725106140
    http://mirror.bigbuckbunny.de/peach/bigbuckbunny_movies/big_buck_bunny_720p_h264.mov,416751190
    http://mirror.bigbuckbunny.de/peach/bigbuckbunny_movies/big_buck_bunny_480p_h264.mov,249229883
    http://download.blender.org/peach/bigbuckbunny_movies/BigBuckBunny_640x360.m4v,121283919
    http://download.blender.org/peach/bigbuckbunny_movies/BigBuckBunny_320x180.mp4,64657027
"""

# Parse command line.
# First argument is the directory where to store files.
# Second argument is the sha384 sum file.
mediadir = len(argv) > 1 and argv[1] or 'media'
checksums = dict(len(argv) > 2 and [reversed(l.split()) for l
                 in file(argv[2]).read().split('\n') if l] or [])
failed = False

# Create mediadir if needed.
if not isdir(mediadir):
    makedirs(mediadir)

# Download the media
for url in mediaurls.split():
    # Parse URL and extract range list
    url, fullsize = (url + ',0').split(',')[:2]
    fullsize = int(fullsize)

    headsize = 128 * 1024
    tailsize = fullsize and 1024 * 1024 or 0
    fullsize = fullsize or headsize

    title = basename(url)
    filename = join(mediadir, title)

    # Check if a download is needed.
    if not isfile(filename) or getsize(filename) < fullsize:
        stdout.write('Downloading "%s" - ' % title)
        stdout.flush()

        output = file(filename, 'w')
        datasize = headsize + tailsize
        remaining = datasize

        # Fetches a byte range of the current media.
        def fetch_range(offset, size, remaining):
            request = Request(url)
            request.add_header('Range', 'bytes=%d-%d' % (offset, offset + size))

            # Some download sites check the referer header. Patch it in here.
            if 'bigbuckbunny' in url:
                request.add_header('Referer', 'http://www.bigbuckbunny.org/index.php/download/')

            response = urlopen(request)

            # Fetch the bytes and print progress.
            while size > 0:
                progress = '%.1f%%' % (100 * float(datasize - remaining) / datasize)
                stdout.write('%s\033[%dD' % (progress, len(progress)))
                stdout.flush()

                data = response.read(min(4096, remaining))

                if not data:
                    break

                output.write(data)

                remaining -= len(data)
                size -= len(data)

            return remaining

        # Fetch the required ranges.
        if headsize:
            remaining = fetch_range(0, headsize, remaining)
        if tailsize:
            offset = fullsize - tailsize
            output.seek(offset)

            remaining = fetch_range(offset, tailsize, remaining)

        output.close()

        # Complain if not all expected data could be fetched.
        if remaining:
            stdout.write('failed\n')
            unlink(filename)

        else:
            stdout.write('succeeded\n')

    # Verify checksums
    filehash = checksums.get(title)

    if not filehash:
        print 'No checksum found, not checking "%s"' % title
        failed = True
        continue

    hashfile = join(mediadir, title + '.sha348')

    if (isfile(hashfile)
        and getmtime(hashfile) >= getmtime(filename)
        and open(hashfile).read().rstrip() == filehash):
        continue

    with open(filename, 'rb') as f:
        hash = sha384()

        while True:
            data = f.read(8192)

            if not data:
                break

            hash.update(data)

        digest = hash.hexdigest()
        open(hashfile, 'w').write('%s\n' % digest)

        if filehash != digest:
            print 'Checksum does not match for "%s"' % title
            failed = True

# Report failure to the caller if needed.
if failed:
    print 'Failed to setup media folder.'
    raise SystemExit, 1
