#!/usr/bin/python

# Copyright (c) 2012 Jared Stafford (jspenguin@jspenguin.org)
#
# Permission is hereby granted, free of charge, to any person obtaining
# a copy of this software and associated documentation files (the
# "Software"), to deal in the Software without restriction, including
# without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and to
# permit persons to whom the Software is furnished to do so, subject to
# the following conditions:
#
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

import sys
import re
import traceback
import time

import urllib2
import traceback
import sqlite3

import os
import signal

from os.path import *

from optparse import OptionParser, SUPPRESS_HELP
from hashlib import md5
from cStringIO import StringIO

VERSION = '2.00'

try:
    import readline
except ImportError:
    pass

shellfile = 'shell script'
if os.name == 'nt':
    shellfile = 'batch file'
    
def open_db():
    global dbfile, mycmd
    db = None
    dbname = 'ponibooru_image_meta.sqlite'
    try:
        dbfile = join(dirname(__file__), dbname)
        mycmd = '%s %s' % (sys.executable, __file__)
    except:
        cdir = dirname(os.__file__)
        for i in xrange(3):
            dbfile = join(cdir, dbname)
            if exists(dbfile):
                break
            cdir = dirname(cdir)
        mycmd = sys.executable

    if not dbfile or not exists(dbfile):
        print 'cannot find %s' % dbname
        return
    try:
        db = sqlite3.connect(dbfile)
    except Exception, e:
        print 'cannot open %s: %s' % (dbname, e)
        return

    return db
    

def try_unlink(p):
    try:
        os.unlink(p)
    except OSError:
        pass

def check_dir(f):
    try:
        os.makedirs(dirname(f))
    except OSError:
        pass

def multiprompt(ptxt):
    r = ''
    prompt = '%s > ' % ptxt
    while True:
        inp = raw_input(prompt)
        if not inp:
            break
        r += inp + ' '
        prompt = 'more %s > ' % ptxt
    return r

def ask_params():
    home = None
    for d in ('HOME', 'USERPROFILE'):
        home = os.getenv(d)
        if home:
            break

    default_dirs = [join(home, 'Downloads', 'Ponibooru'), abspath(dirname(dbfile))]

    print 'Ponibooru image downloader version %s - jspenguin@jspenguin.org' % VERSION
    print 'Run `%s --help\' for command line usage' % mycmd
    print

    ndd = len(default_dirs)
    while True:
        print 'Where do you want to save your images? (folders will be'
        print 'created if they don\'t exist, and existing images will)'
        print 'not be downloaded again.'
        print

        for i, d in enumerate(default_dirs):
            print '%d) %s' % (i + 1, d)
        print '%d) Other' % (ndd + 1)
        
        sel = raw_input('1-%d, default: 1 > ' % (ndd + 1))
        if sel == '':
            n = 0
            break

        try:
            n = int(sel) - 1
            if n < 0 or n > ndd:
                raise ValueError
        except ValueError:
            print 'invalid selection: %s' % sel
            continue

        break
    
    if n == ndd:
        outdir = raw_input('Output folder: ')
    else:
        outdir = default_dirs[n]

    while True:
        txt = raw_input('Download only highly-rated images (y/N) ').lower()
        if txt in ('', 'y', 'n'):
            high = txt == 'y'
            break

    while True:
        print 'Which image ratings do you want to download?'
        print ' s = Safe'
        print ' u = Unrated'
        print ' q = Questionable'
        print ' e = Explicit'
        print
        
        rtxt = raw_input('ratings, default: su > ') or 'su'
        try:
            ratings = ratings_set(rtxt)
        except ValueError, e:
            print e
            continue

        break

    print 'If you only want to download images with certain tags, '
    print 'enter them here, separated by spaces. If there are tags'
    print "you don't want to download, add a '-'."
    print ' For example, entering:'
    print
    print 'rarity spike -meta -youtube_caption'
    print
    print "will download only images tagged 'rarity' AND 'spike'"
    print "and will not download any image tagged 'meta' or 'youtube_caption'"
    print
    print 'Enter a blank line when done.'
    print

    filter = multiprompt('filter tags')
    
    return outdir, high, ratings, filter
        
ratings_dict = dict(s='safe', u='unrated', q='questionable', e='explicit')
def ratings_set(txt):
    ret = set()
    for r in re.sub(r'[\s,]', '', txt.lower()):
        r = ratings_dict.get(r)
        if not r:
            raise ValueError('invalid char in ratings: %s' % r)
        ret.add(r)
    return ret

def logp(f, txt, stdout=True):
    if stdout:
        print txt
        
    if f:
        ctime = time.time()
        localtime = time.localtime(ctime)
        stime = time.strftime("%Y-%m-%d %H:%M:%S ", localtime)

        f.write('%s %s\n' % (stime, txt))
        f.flush()
        

def main():
    db = open_db()
    if not db:
        return

    options = OptionParser()
    options.add_option('-d', "--outdir", metavar='PATH', 
                       help="output directory")
    
    options.add_option('-f', "--filter", metavar='TAG', action='append', default=[],
                       help="Tag to filter out. Specify more than once to filter multiple tags")

    options.add_option("--high", action='store_true',
                       help="only download images with high ratings")

    options.add_option('-r', "--ratings", metavar='[suqe]', default='su',
                       help="images to download: safe, unrated, questionable, explicit default: su")

    ## don't show this option
    options.add_option("--dummy", action='store_true', help=SUPPRESS_HELP)

    opts, args = options.parse_args()
    if len(sys.argv) == 1 or (opts.dummy and len(sys.argv) == 2):
        outdir, high, ratings, filter = ask_params()
        wait = True
    else:
        outdir = opts.outdir
        high = opts.high
        filter = ' '.join(opts.filter)
        try:
            ratings = ratings_set(opts.ratings)
        except ValueError, e:
            print e
            return
        wait = False

    filterpos = set()
    filterneg = set()
    for f in filter.lower().split(' '):
        if f.startswith('-'):
            filterneg.add(f[1:])
        elif f:
            filterpos.add(f)

    
    check_dir(join(outdir, 'x'))
    os.chdir(outdir)

    filters = sorted(filterpos) + sorted('-%s' % f for f in filterneg)

    logfn = 'download_log_%s.txt' % (time.strftime('%Y-%m-%d_%H_%M_%S', time.localtime(time.time())))

    log = open(logfn, 'a')

    cmd = '%s -d "%s"' % (mycmd, outdir)
    if high:
        cmd += ' --high'
    cmd += ' -r %s' % ''.join(sorted(r[0] for r in ratings))
    if filters:
        cmd += ' -f "%s"' % ' '.join(filters)

    logp(log, 'Download starting: %s' % cmd, False)
    print 'Downloading images rated %s.' % ', '.join(sorted(ratings))
    if filters:
        print 'Download filters: %s' % ', '.join(filters)
    print 'Logging output to %s' % join(outdir, logfn)
    if wait:
        print 'To run this download operation from the command line or in'
        print 'a %s, run the following command:' % shellfile
        print
        print cmd
        print
        print 'Press enter to begin downloading, or Ctrl-C to cancel.'
        raw_input('')
        
    errorimgs = []
    
    for id, md5sum, tags, filename, rating, sel in db.execute('SELECT id, md5sum, tags, filename, rating, sel FROM images'):
        if not rating in ratings:
            continue

        tagset = set(t.lower() for t in tags.split(' '))
        if filterpos and not filterpos.issubset(tagset):
            continue
        
        if filterneg & tagset:
            continue
        
        ## common enough to separate it out
        filename = filename.replace('artist:', 'artist=')
        filename = re.sub(r'[":\|\*<>\\/\?]', '_', filename)
        
        ## fucking windows
        ## http://stackoverflow.com/questions/265769/maximum-filename-length-in-ntfs-windows-xp-and-windows-vista
        b, e = splitext(filename)
        filename = b[:180] + e

        if high and not sel:
            continue
        
        grp = (id // 1000) * 1000
        dir = '%06d-%06d' % (grp, grp + 999)
        outf = join(dir, filename)

        desc = '%d: %s' % (id, tags.encode('utf8'))
        if exists(outf):
            logp(log, 'already downloaded %s' % desc)
            continue
        
        logp(log, 'downloading %s' % desc)
        
        partf = outf + '.part'
        outp = None
        try:
            check_dir(outf)
            if opts.dummy:
                inp = StringIO('')
            else:
                inp = urllib2.urlopen('http://img.ponibooru.org/_images/%s' % (md5sum))
            outp = open(partf, 'wb')
            check = md5()
            while True:
                data = inp.read(65536)
                if not data:
                    break
                check.update(data)
                outp.write(data)
            outp.close()
            outp = None
            gotsum = check.hexdigest()
            if gotsum == md5sum or opts.dummy:
                os.rename(partf, outf)
                partf = None
            else:
                raise ValueError('MD5 sum does not match')
            
        except Exception, e:
            logp(log, 'error dowloading image %d: %s' % (id, e))
            errorimgs.append(id)
        finally:
            if outp:
                try:
                    outp.close()
                except Exception:
                    pass
                
            if partf:
                try_unlink(partf)
    logp(log, 'Finished.')
    print 'Download log saved as %s' % join(outdir, logfn)
    if errorimgs:
        logp(log, 'Errors were encountered with the following images: %s' % ', '.join(str(id) for id in errorimgs))
        print 'See the log file for more details.'
    if wait:
        raw_input('Press enter to continue. ')

if __name__ == '__main__':
    main()
