#!/usr/bin/env python

# Copyright (C) 2003 Andrea Arcangeli <andrea@suse.de> SuSE
# $Id: bkweb.py,v 1.16 2003/02/06 00:20:51 andrea Exp $

# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; version 2 of the License

# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

import getopt, httplib, os, sys, re, time, cPickle, errno, html2text, popen2

class Changeset:
	def __init__(self, rev, date, author, commentary, patch, files):
		self.rev = rev
		self.date = date
		self.author = author
		self.commentary = commentary
		self.patch = patch
		self.files = files
		self.version = 1

class Web:
	def __init__(self, url, cache = '', verbose = 0):
		self.__RETRY_RST_MAX = 100
		self.__RETRY_RST_DELAY = 1
		self.__RETRY_RST_LINEAR_BACKOFF = 2

		self.verbose = verbose

		hostname, main_path = re.search('http://([^/]+)(/.*)', url).groups()

		self.__cache = cache

		self.__hostname = hostname
		self.__main_path = main_path
		self.__changesets = self.extract_changesets(self.download(first = 1))

	def download(self, path = '', first = 0):
		retry_rst = 0
		retry_rst_delay = self.__RETRY_RST_DELAY

		path = os.path.join(self.__main_path, path)

		if first and self.verbose:
			print >> sys.stderr, 'Downloading (%s,%s) ...' % (self.__hostname, path, ),

		while 1:
			try:
				h = httplib.HTTPConnection(self.__hostname)
				h.putrequest('GET', path)
				h.putheader('Accept', 'text/html')
				h.putheader('Accept', 'text/plain')
				h.putheader('Host', self.__hostname)
				h.endheaders()

				response = h.getresponse()

				h.close()

				if response.status != 200 or response.reason != 'OK':
					raise

				ret = response.read()
				break
			except:
				if first or retry_rst >= self.__RETRY_RST_MAX:
					raise
				print >>sys.stderr, 'Retry due download error'
				retry_rst += 1
				time.sleep(retry_rst_delay)
				retry_rst_delay += self.__RETRY_RST_LINEAR_BACKOFF
				continue

		if first and self.verbose:
			print >> sys.stderr, 'done.'

		return ret

	def extract_changesets(self, main_html):
		main_html = re.sub('\n+', '', main_html)

		changesets = re.findall('<a href=(ChangeSet[^\?]+\?nav=index\.html)>', main_html)

		if self.verbose:
			print >> sys.stderr, 'Found %d Changeset fallback pages.' % len(changesets)

		return changesets

	def prepare_iter_rev(self, rev):
		if self.verbose:
			print >> sys.stderr, 'Searching rev %s ...' % rev,

		for changeset in self.__changesets:
			html = self.download(changeset)
			rev_changesets = re.findall('<a href=cset@([^\?]+)\?nav=index\.html', html)
			if rev in rev_changesets:
				rev_changesets = rev_changesets[:rev_changesets.index(rev)]
				self.__rev_changesets = rev_changesets

				if self.verbose:
					print >> sys.stderr, 'found at CHS %s' % self.__changesets.index(changeset)

				return rev_changesets
		return None

	def __iter__(self):
		return self

	def next(self):
		try:
			rev = self.__rev_changesets.pop()
		except IndexError:
			del self.__rev_changesets
			raise StopIteration

		print >> sys.stderr, 'Pending %d revisions' % (len(self.__rev_changesets) + 1)
		changeset = self.unpickle(rev)
		if not changeset:
			changeset = self.generate_changeset(rev)

		if self.verbose:
			print >> sys.stderr, '------ Changeset:\t%s' % changeset.rev
			print >> sys.stderr, '------ Author:\t\t%s' % changeset.author
			print >> sys.stderr, '------ Date:\t\t%s' % changeset.date
			print >> sys.stderr, '------ Commentary:\t%s' % changeset.commentary
			print >> sys.stderr, '------ Files:\t\t%s'% changeset.files

		return changeset

	def changeset_patch(self, rev):
		patch_html = self.download('patch@' + rev + '?nav=index.html|cset@' + rev)
		patch = ''
		pre_regexp = re.compile('</?pre>')
		for line in re.split('\n', patch_html):
			if patch:
				patch += line + '\n'
			if pre_regexp.search(line):
				if not patch:
					patch += line + '\n'
				else:
					break

		patch = re.sub('</?font[^>]*>', '', patch)
		patch = html2text.html2text(patch, '', strict_newline = 1)[:-1]

		return patch

	def generate_changeset(self, rev):
		if self.verbose:
			print >> sys.stderr, 'Downloading changeset %s ...' % rev,

		html = self.download('cset@' + rev + '?nav=index.html')

		files = re.findall('<a href=diffs/([^@]+)@[^\?]+\?nav=index\.html', html)
		
		html = re.split('\n', html)

		date_email_regexp = re.compile('ChangeSet@' + rev + '&nbsp;&nbsp;([^&]+)&nbsp;&nbsp;([^<]+)<')
		idx = 0
		for line in html:
			metadata = date_email_regexp.search(line)
			if not metadata:
				idx += 1
				continue
			date, email = metadata.groups()
			break

		commentary = html[idx + 5]
		commentary = html2text.html2text(commentary, '', strict_newline = 1)[:-1]

		patch = self.changeset_patch(rev)

		if self.verbose:
			print >> sys.stderr, 'done.'

		changeset = Changeset(rev, date, email, commentary, patch, files)
		if self.__cache:
			self.pickle(changeset)

		return changeset

	def unpickle(self, rev):
		changeset = None

		cache = self.__cache
		if cache:
			try:
				f = file(os.path.join(cache, rev) + '.pickle', 'r')
			except IOError, err:
				if err[0] == errno.ENOENT:
					return None

			Unpickle = cPickle.Unpickler(f)
			changeset = Unpickle.load()
			f.close()

		return changeset

	def pickle(self, changeset):
		f = file(os.path.join(self.__cache, changeset.rev) + '.pickle', 'w')
		Pickle = cPickle.Pickler(f)
		Pickle.dump(changeset)
		f.close()


def checkout(url, rev, tree, cache, verbose):
	web = Web(url, cache, verbose)
	if not web.prepare_iter_rev(rev):
		return
	cwd = os.getcwd()
	os.chdir(tree)

	try:
		for changeset in web:
			patch_task = popen2.Popen4('/usr/bin/env patch -p1 -F1')
			patch_task.tochild.write(changeset.patch)
			patch_task.tochild.close()
			if verbose:
				print >> sys.stderr, patch_task.fromchild.read(),
			else:
				patch_task.fromchild.read()
			patch_task.fromchild.close()

			if patch_task.wait():
				print changeset.patch,
				raise 'FailedPatch', changeset.rev

			if verbose:
				print >> sys.stderr, 'make distclean ...',
			if os.system('make distclean &>/dev/null'):
				raise 'FailedDistclean', changeset.rev
			if verbose:
				print >> sys.stderr, 'done.'

	finally:
		os.chdir(cwd)

def usage():
	print >> sys.stderr, '-u <URL> -r <rev> -t <tree> -c <cache>'

def bkweb(argv):
	try:
		opts, args = getopt.getopt(argv[1:], 'hu:t:r:c:v',
					   [ 'help', 'url=', 'tree=', 'rev=', 'cache=', 'verbose'])
	except getopt.GetoptError:
		usage()
		sys.exit(1)

	cache = url = tree = rev = ''
	verbose = 1
	for o, a in opts:
		if o in ('-h', '--help'):
			usage()
			sys.exit()
		if o in ('-u', '--url'):
			url = a
		if o in ('-t', '--tree'):
			tree = os.path.expanduser(a)
		if o in ('-r', '--rev'):
			rev = a
		if o in ('-c', '--cache'):
			cache = os.path.expanduser(a)
		if o in ('-v', '--verbose'):
			verbose = 0

	if not tree or not url or not rev:
		usage()
		sys.exit()

	checkout(url, rev, tree, cache, verbose)

if __name__ == '__main__':
	os.nice(20)
	bkweb(sys.argv)
