#!/usr/bin/env python2.5
#
# Copyright (c) 2003 The University of Wroclaw.
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#    1. Redistributions of source code must retain the above copyright
#       notice, this list of conditions and the following disclaimer.
#    2. Redistributions in binary form must reproduce the above copyright
#       notice, this list of conditions and the following disclaimer in the
#       documentation and/or other materials provided with the distribution.
#    3. The name of the University may not be used to endorse or promote
#       products derived from this software without specific prior
#       written permission.
# 
# THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY ``AS IS'' AND ANY EXPRESS OR
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN
# NO EVENT SHALL THE UNIVERSITY BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
# TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
# History:
# 
# 2006-08-03  Przedsiebiorstwo Informatyczne CORE  <biuro at core.com.pl>:
#         * Following options were added:
#           -s, --strip-comments strip /* ... */ comments in log
#           -O, --only-date      generate only dates (without time)
#           -L, --no-files       generate log without filenames
#           -A, --no-author      generate log without author names
#           -H, --no-host        generate author name without hostname
#


import sys
import os
import time
import re
import getopt
import string
import codecs

from xml.utils import qp_xml

kill_prefix_rx = None
default_domain = "localhost"
exclude = []
users = { }
reloc = { }
max_join_delta = 3 * 60
list_format = False
strip = False
date_only = False
no_files = False
no_host = False
no_author = False

date_rx = re.compile(r"^(\d+-\d+-\d+T\d+:\d+:\d+)")

def die(msg):
  sys.stderr.write(msg + "\n")
  sys.exit(1)

def attr(e, n):
  return e.attrs[("", n)]

def has_child(e, n):
  for c in e.children:
    if c.name == n: return 1
  return 0

def child(e, n):
  for c in e.children:
    if c.name == n: return c
  die("<%s> doesn't have <%s> child" % (e.name, n))
  
def convert_path(n):
  for src in reloc.keys():
    n = string.replace(n, src, reloc[src])
  if kill_prefix_rx != None:
    if kill_prefix_rx.search(n):
      n = kill_prefix_rx.sub("", n)
    else:
      return None
  if n.startswith("/"): n = n[1:]
  if n == "": n = "/"
  for pref in exclude:
    if n.startswith(pref):
      return None
  return n

def convert_user(u):
  if no_author == False:
    if users.has_key(u):
      return users[u]
    else:
      if no_host:
        return u + ":"   
      else:
        return "%s <%s@%s>:" % (u, u, default_domain)
  else:
    return ''

def wrap_text_line(str, pref, width):
  ret = u""
  line = u""
  first_line = True
  for word in str.split():
    if line == u"":
      line = word
    else:
      if len(line + u" " + word) > width:
        if first_line:
          ret += line + u"\n"
          first_line = False
          line = word
        else:
          ret += pref + line + u"\n"
          line = word
      else:
        line += u" " + word
  if first_line:
    ret += line + u"\n"
  else:
    ret += pref + line + u"\n"
  return ret

def wrap_text(str, pref, width):
  if not list_format:
    return wrap_text_line(str,pref,width)
  else:
    items = re.split(r"\-\s+",str)
    ret = wrap_text_line(items[0],pref,width)
    for item in items[1:]:
      ret += pref + u"- " + wrap_text_line(item,pref+"  ",width)
    return ret

class Entry:
  def __init__(self, tm, rev, author, msg):
    self.tm = tm
    self.rev = rev
    self.author = author
    self.msg = msg
    self.beg_tm = tm
    self.beg_rev = rev

  def join(self, other):
    self.tm = other.tm
    self.rev = other.rev
    self.msg += other.msg

  def dump(self, out):
    if len(self.msg) > 0:
      if date_only == False:
        tformat = "%Y-%m-%d %H:%M +0000"
      else:
        tformat = "%Y-%m-%d"

      if self.rev != self.beg_rev:
        out.write("%s [r%s-%s]  %s\n\n" % \
                          (time.strftime(tformat, time.localtime(self.beg_tm)), \
                           self.rev, self.beg_rev, convert_user(self.author)))
      else:
        out.write("%s [r%s]  %s\n\n" % \
                          (time.strftime(tformat, time.localtime(self.beg_tm)), \
                           self.rev, convert_user(self.author)))
      out.write(self.msg)
  
  def can_join(self, other):
    return self.author == other.author and abs(self.tm - other.tm) < max_join_delta

def process_entry(e):
  rev = attr(e, "revision")
  if has_child(e, "author"):
    author = child(e, "author").textof()
  else:
    author = "anonymous"
  m = date_rx.search(child(e, "date").textof())
  msg = ' ' + child(e, "msg").textof()
  if strip == True:
    ibegin = string.find(msg, "/*")
    if ibegin > 0:
      iend = string.find(msg, "*/") + 2
      msg = msg[0:ibegin] + msg[iend:]

  if m:
    tm = time.mktime(time.strptime(m.group(1), "%Y-%m-%dT%H:%M:%S"))
  else:
    die("evil date: %s" % child(e, "date").textof())
  paths = []
  if len(msg) > 1: 
    for path in child(e, "paths").children:
      if path.name != "path": die("<paths> has non-<path> child")
      nam = convert_path(path.textof())
      if nam != None:
        if attr(path, "action") == "D":
          paths.append(nam + " (removed)")
        elif attr(path, "action") == "A":
          paths.append(nam + " (added)")
        else:
          paths.append(nam)
     
    if paths != [] and no_files == False:
      return Entry(tm, rev, author, "\t* %s\n" % wrap_text(", ".join(paths) + ": " + msg, "\t  ", 65))
    elif paths != [] and no_files == True:
      return Entry(tm, rev, author, "\t* %s\n" % wrap_text(msg, "\t  ", 65))

  return None

def process(fin, fout):
  parser = qp_xml.Parser()
  root = parser.parse(fin)

  if root.name != "log": die("root is not <log>")
  
  cur = None
  
  for logentry in root.children:
    if logentry.name != "logentry": die("non <logentry> <log> child")
    e = process_entry(logentry)
    if e != None:
      if cur != None:
        if cur.can_join(e):
          cur.join(e)
        else:
          cur.dump(fout)
          cur = e
      else: cur = e
        
  if cur != None: cur.dump(fout)

def usage():
  sys.stderr.write(\
"""Usage: %s [OPTIONS] [FILE]
Convert specified subversion xml logfile to GNU-style ChangeLog.

Options:
  -p, --prefix=REGEXP  set root directory of project (it will be striped off
                       from ChangeLog entries, paths outside it will be 
                       ignored)
  -x, --exclude=DIR    exclude DIR from ChangeLog (relative to prefix)
  -o, --output         set output file (defaults to 'ChangeLog')
  -d, --domain=DOMAIN  set default domain for logins not listed in users file
  -u, --users=FILE     read logins from specified file
  -F, --list-format    format commit logs with enumerated change list (items
                       prefixed by '- ')
  -r, --relocate=X=Y   before doing any other operations on paths, replace
                       X with Y (useful for directory moves)
  -D, --delta=SECS     when log entries differ by less then SECS seconds and
                       have the same author -- they are merged, it defaults
                       to 180 seconds
  -h, --help           print this information
  -s, --strip-comments strip /* ... */ comments in log
  -O, --only-date      generate only dates (without time)
  -L, --no-files       generate log without filenames
  -A, --no-author      generate log without author names
  -H, --no-host        generate author name without hostname

Users file is used to map svn logins to real names to appear in ChangeLog.
If login is not found in users file "login <login@domain>" is used.

Example users file:
john    John X. Foo <jfoo@example.org>
mark    Marcus Blah <mb@example.org>

Typical usage of this script is something like this:

  svn log -v --xml | %s -p '/foo/(branches/[^/]+|trunk)' -u aux/users
  
Please send bug reports and comments to author:
  Michal Moskal <malekith@pld-linux.org>
  
Regarding -s, -O, -L, -A, -H options see
  http://www.core.com.pl/svn2log
   

""" % (sys.argv[0], sys.argv[0]))

def utf_open(name, mode):
  return codecs.open(name, mode, encoding="utf-8", errors="replace")

def process_opts():
  try:
    opts, args = getopt.gnu_getopt(sys.argv[1:], "o:u:p:x:d:r:d:D:FhsOLHA", 
                                   ["users=", "prefix=", "domain=", "delta=",
                                    "exclude=", "help", "output=", "relocate=",
                                    "list-format","strip-comments", "only-date", "no-files",
																		"no-host", "no-author"])
  except getopt.GetoptError:
    usage()
    sys.exit(2)
  fin = sys.stdin
  fout = None
  global kill_prefix_rx, exclude, users, default_domain, reloc, max_join_delta, list_format, strip, date_only, no_files, no_host, no_author
  for o, a in opts:
    if o in ("--prefix", "-p"):
      kill_prefix_rx = re.compile("^" + a)
    elif o in ("--exclude", "-x"):
      exclude.append(a)
    elif o in ("--help", "-h"):
      usage()
      sys.exit(0)
    elif o in ("--output", "-o"):
      fout = utf_open(a, "w")
    elif o in ("--domain", "-d"):
      default_domain = a
    elif o in ("--strip-comments", "-s"):
      strip = True
    elif o in ("--only-date", "-O"):
      date_only = True
    elif o in ("--no-files", "-L"):
      no_files = True
    elif o in ("--no-host", "-H"):
      no_host = True
    elif o in ("--no-author", "-A"):
      no_author = True
    elif o in ("--users", "-u"):
      f = utf_open(a, "r")
      for line in f.xreadlines():
        w = line.split()
        if len(line) < 1 or line[0] == '#' or len(w) < 2: 
          continue
        users[w[0]] = " ".join(w[1:])
    elif o in ("--relocate", "-r"):
      (src, target) = a.split("=")
      reloc[src] = target
    elif o in ("--delta", "-D"):
      max_join_delta = int(a)
    elif o in ("--list-format", "-F"):
      list_format = True
    else:
      usage()
      sys.exit(2)
  if len(args) > 1:
    usage()
    sys.exit(2)
  if len(args) == 1:
    fin = open(args[0], "r")
  if fout == None:
    fout = utf_open("ChangeLog", "w")
  process(fin, fout)

if __name__ == "__main__":
  os.environ['TZ'] = 'UTC'
  try:
    time.tzset()
  except AttributeError:
    pass
  process_opts()