From 4fb1f1614939f066bc1030894c7e6ddaa5ca62b9 Mon Sep 17 00:00:00 2001 From: Matthias Neeracher Date: Wed, 9 May 2007 06:15:03 +0000 Subject: [PATCH] Add svn2log tool --- scripts/svn2log.py | 355 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 355 insertions(+) create mode 100755 scripts/svn2log.py diff --git a/scripts/svn2log.py b/scripts/svn2log.py new file mode 100755 index 0000000..8280053 --- /dev/null +++ b/scripts/svn2log.py @@ -0,0 +1,355 @@ +#!/usr/bin/python +# +# Copyright (c) 2003 The University of Wroclaw. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# 3. The name of the University may not be used to endorse or promote +# products derived from this software without specific prior +# written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY ``AS IS'' AND ANY EXPRESS OR +# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN +# NO EVENT SHALL THE UNIVERSITY BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED +# TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# History: +# +# 2006-08-03 Przedsiebiorstwo Informatyczne CORE : +# * Following options were added: +# -s, --strip-comments strip /* ... */ comments in log +# -O, --only-date generate only dates (without time) +# -L, --no-files generate log without filenames +# -A, --no-author generate log without author names +# -H, --no-host generate author name without hostname +# + + +import sys +import os +import time +import re +import getopt +import string +import codecs + +from xml.utils import qp_xml + +kill_prefix_rx = None +default_domain = "localhost" +exclude = [] +users = { } +reloc = { } +max_join_delta = 3 * 60 +list_format = False +strip = False +date_only = False +no_files = False +no_host = False +no_author = False + +date_rx = re.compile(r"^(\d+-\d+-\d+T\d+:\d+:\d+)") + +def die(msg): + sys.stderr.write(msg + "\n") + sys.exit(1) + +def attr(e, n): + return e.attrs[("", n)] + +def has_child(e, n): + for c in e.children: + if c.name == n: return 1 + return 0 + +def child(e, n): + for c in e.children: + if c.name == n: return c + die("<%s> doesn't have <%s> child" % (e.name, n)) + +def convert_path(n): + for src in reloc.keys(): + n = string.replace(n, src, reloc[src]) + if kill_prefix_rx != None: + if kill_prefix_rx.search(n): + n = kill_prefix_rx.sub("", n) + else: + return None + if n.startswith("/"): n = n[1:] + if n == "": n = "/" + for pref in exclude: + if n.startswith(pref): + return None + return n + +def convert_user(u): + if no_author == False: + if users.has_key(u): + return users[u] + else: + if no_host: + return u + ":" + else: + return "%s <%s@%s>:" % (u, u, default_domain) + else: + return '' + +def wrap_text_line(str, pref, width): + ret = u"" + line = u"" + first_line = True + for word in str.split(): + if line == u"": + line = word + else: + if len(line + u" " + word) > width: + if first_line: + ret += line + u"\n" + first_line = False + line = word + else: + ret += pref + line + u"\n" + line = word + else: + line += u" " + word + if first_line: + ret += line + u"\n" + else: + ret += pref + line + u"\n" + return ret + +def wrap_text(str, pref, width): + if not list_format: + return wrap_text_line(str,pref,width) + else: + items = re.split(r"\-\s+",str) + ret = wrap_text_line(items[0],pref,width) + for item in items[1:]: + ret += pref + u"- " + wrap_text_line(item,pref+" ",width) + return ret + +class Entry: + def __init__(self, tm, rev, author, msg): + self.tm = tm + self.rev = rev + self.author = author + self.msg = msg + self.beg_tm = tm + self.beg_rev = rev + + def join(self, other): + self.tm = other.tm + self.rev = other.rev + self.msg += other.msg + + def dump(self, out): + if len(self.msg) > 0: + if date_only == False: + tformat = "%Y-%m-%d %H:%M +0000" + else: + tformat = "%Y-%m-%d" + + if self.rev != self.beg_rev: + out.write("%s [r%s-%s] %s\n\n" % \ + (time.strftime(tformat, time.localtime(self.beg_tm)), \ + self.rev, self.beg_rev, convert_user(self.author))) + else: + out.write("%s [r%s] %s\n\n" % \ + (time.strftime(tformat, time.localtime(self.beg_tm)), \ + self.rev, convert_user(self.author))) + out.write(self.msg) + + def can_join(self, other): + return self.author == other.author and abs(self.tm - other.tm) < max_join_delta + +def process_entry(e): + rev = attr(e, "revision") + if has_child(e, "author"): + author = child(e, "author").textof() + else: + author = "anonymous" + m = date_rx.search(child(e, "date").textof()) + msg = ' ' + child(e, "msg").textof() + if strip == True: + ibegin = string.find(msg, "/*") + if ibegin > 0: + iend = string.find(msg, "*/") + 2 + msg = msg[0:ibegin] + msg[iend:] + + if m: + tm = time.mktime(time.strptime(m.group(1), "%Y-%m-%dT%H:%M:%S")) + else: + die("evil date: %s" % child(e, "date").textof()) + paths = [] + if len(msg) > 1: + for path in child(e, "paths").children: + if path.name != "path": die(" has non- child") + nam = convert_path(path.textof()) + if nam != None: + if attr(path, "action") == "D": + paths.append(nam + " (removed)") + elif attr(path, "action") == "A": + paths.append(nam + " (added)") + else: + paths.append(nam) + + if paths != [] and no_files == False: + return Entry(tm, rev, author, "\t* %s\n" % wrap_text(", ".join(paths) + ": " + msg, "\t ", 65)) + elif paths != [] and no_files == True: + return Entry(tm, rev, author, "\t* %s\n" % wrap_text(msg, "\t ", 65)) + + return None + +def process(fin, fout): + parser = qp_xml.Parser() + root = parser.parse(fin) + + if root.name != "log": die("root is not ") + + cur = None + + for logentry in root.children: + if logentry.name != "logentry": die("non child") + e = process_entry(logentry) + if e != None: + if cur != None: + if cur.can_join(e): + cur.join(e) + else: + cur.dump(fout) + cur = e + else: cur = e + + if cur != None: cur.dump(fout) + +def usage(): + sys.stderr.write(\ +"""Usage: %s [OPTIONS] [FILE] +Convert specified subversion xml logfile to GNU-style ChangeLog. + +Options: + -p, --prefix=REGEXP set root directory of project (it will be striped off + from ChangeLog entries, paths outside it will be + ignored) + -x, --exclude=DIR exclude DIR from ChangeLog (relative to prefix) + -o, --output set output file (defaults to 'ChangeLog') + -d, --domain=DOMAIN set default domain for logins not listed in users file + -u, --users=FILE read logins from specified file + -F, --list-format format commit logs with enumerated change list (items + prefixed by '- ') + -r, --relocate=X=Y before doing any other operations on paths, replace + X with Y (useful for directory moves) + -D, --delta=SECS when log entries differ by less then SECS seconds and + have the same author -- they are merged, it defaults + to 180 seconds + -h, --help print this information + -s, --strip-comments strip /* ... */ comments in log + -O, --only-date generate only dates (without time) + -L, --no-files generate log without filenames + -A, --no-author generate log without author names + -H, --no-host generate author name without hostname + +Users file is used to map svn logins to real names to appear in ChangeLog. +If login is not found in users file "login " is used. + +Example users file: +john John X. Foo +mark Marcus Blah + +Typical usage of this script is something like this: + + svn log -v --xml | %s -p '/foo/(branches/[^/]+|trunk)' -u aux/users + +Please send bug reports and comments to author: + Michal Moskal + +Regarding -s, -O, -L, -A, -H options see + http://www.core.com.pl/svn2log + + +""" % (sys.argv[0], sys.argv[0])) + +def utf_open(name, mode): + return codecs.open(name, mode, encoding="utf-8", errors="replace") + +def process_opts(): + try: + opts, args = getopt.gnu_getopt(sys.argv[1:], "o:u:p:x:d:r:d:D:FhsOLHA", + ["users=", "prefix=", "domain=", "delta=", + "exclude=", "help", "output=", "relocate=", + "list-format","strip-comments", "only-date", "no-files", + "no-host", "no-author"]) + except getopt.GetoptError: + usage() + sys.exit(2) + fin = sys.stdin + fout = None + global kill_prefix_rx, exclude, users, default_domain, reloc, max_join_delta, list_format, strip, date_only, no_files, no_host, no_author + for o, a in opts: + if o in ("--prefix", "-p"): + kill_prefix_rx = re.compile("^" + a) + elif o in ("--exclude", "-x"): + exclude.append(a) + elif o in ("--help", "-h"): + usage() + sys.exit(0) + elif o in ("--output", "-o"): + fout = utf_open(a, "w") + elif o in ("--domain", "-d"): + default_domain = a + elif o in ("--strip-comments", "-s"): + strip = True + elif o in ("--only-date", "-O"): + date_only = True + elif o in ("--no-files", "-L"): + no_files = True + elif o in ("--no-host", "-H"): + no_host = True + elif o in ("--no-author", "-A"): + no_author = True + elif o in ("--users", "-u"): + f = utf_open(a, "r") + for line in f.xreadlines(): + w = line.split() + if len(line) < 1 or line[0] == '#' or len(w) < 2: + continue + users[w[0]] = " ".join(w[1:]) + elif o in ("--relocate", "-r"): + (src, target) = a.split("=") + reloc[src] = target + elif o in ("--delta", "-D"): + max_join_delta = int(a) + elif o in ("--list-format", "-F"): + list_format = True + else: + usage() + sys.exit(2) + if len(args) > 1: + usage() + sys.exit(2) + if len(args) == 1: + fin = open(args[0], "r") + if fout == None: + fout = utf_open("ChangeLog", "w") + process(fin, fout) + +if __name__ == "__main__": + os.environ['TZ'] = 'UTC' + try: + time.tzset() + except AttributeError: + pass + process_opts()