#!/usr/bin/env python """Launchpad to github bug migration script. There's a ton of code from Hydrazine copied here: https://launchpad.net/hydrazine Usage ----- This code is meant to port a bug database for a project from Launchpad to GitHub. It was used to port the IPython bug history. The code is meant to be used interactively. I ran it multiple times in one long IPython session, until the data structures I was getting from Launchpad looked right. Then I turned off (see 'if 0' markers below) the Launchpad part, and ran it again with the github part executing and using the 'bugs' variable from my interactive namespace (via"%run -i" in IPython). This code is NOT fire and forget, it's meant to be used with some intelligent supervision at the wheel. Start by making a test repository (I made one called ipython/BugsTest) and upload only a few issues into that. Once you are sure that everything is OK, run it against your real repo with all your issues. You should read all the code below and roughly understand what's going on before using this. Since I didn't intend to use this more than once, it's not particularly robust or documented. It got the job done and I've never used it again. Configuration ------------- To pull things off LP, you need to log in first (see the Hydrazine docs). Your Hydrazine credentials will be cached locally and this script can reuse them. To push to GH, you need to set below the GH repository owner, API token and repository name you wan to push issues into. See the GH section for the necessary variables. """ import collections import os.path import subprocess import sys import time from pprint import pformat import launchpadlib from launchpadlib.credentials import Credentials from launchpadlib.launchpad import ( Launchpad, STAGING_SERVICE_ROOT, EDGE_SERVICE_ROOT ) #----------------------------------------------------------------------------- # Launchpad configuration #----------------------------------------------------------------------------- # The official LP project name PROJECT_NAME = 'statsmodels' # How LP marks your bugs, I don't know where this is stored, but they use it to # generate bug descriptions and we need to split on this string to create # shorter Github bug titles PROJECT_ID = 'statsmodels' # Default Launchpad server, see their docs for details service_root = EDGE_SERVICE_ROOT #----------------------------------------------------------------------------- # Code copied/modified from Hydrazine (https://launchpad.net/hydrazine) #----------------------------------------------------------------------------- # Constants for the names in LP of certain lp_importances = ['Critical', 'High', 'Medium', 'Low', 'Wishlist', 'Undecided'] lp_status = ['Confirmed', 'Triaged', 'Fix Committed', 'Fix Released', 'In Progress',"Won't Fix", "Incomplete", "Invalid", "New"] def squish(a): return a.lower().replace(' ', '_').replace("'",'') lp_importances_c = set(map(squish, lp_importances)) lp_status_c = set(map(squish, lp_status)) def trace(s): sys.stderr.write(s + '\n') def create_session(): lplib_cachedir = os.path.expanduser("~/.cache/launchpadlib/") hydrazine_cachedir = os.path.expanduser("~/.cache/hydrazine/") rrd_dir = os.path.expanduser("~/.cache/hydrazine/rrd") for d in [lplib_cachedir, hydrazine_cachedir, rrd_dir]: if not os.path.isdir(d): os.makedirs(d, mode=0700) hydrazine_credentials_filename = os.path.join(hydrazine_cachedir, 'credentials') if os.path.exists(hydrazine_credentials_filename): credentials = Credentials() credentials.load(file( os.path.expanduser("~/.cache/hydrazine/credentials"), "r")) trace('loaded existing credentials') return Launchpad(credentials, service_root, lplib_cachedir) # TODO: handle the case of having credentials that have expired etc else: launchpad = Launchpad.get_token_and_login( 'Hydrazine', service_root, lplib_cachedir) trace('saving credentials...') launchpad.credentials.save(file( hydrazine_credentials_filename, "w")) return launchpad def canonical_enum(entered, options): entered = squish(entered) return entered if entered in options else None def canonical_importance(from_importance): return canonical_enum(from_importance, lp_importances_c) def canonical_status(entered): return canonical_enum(entered, lp_status_c) #----------------------------------------------------------------------------- # Functions and classes #----------------------------------------------------------------------------- class Base(object): def __str__(self): a = dict([(k,v) for (k,v) in self.__dict__.iteritems() if not k.startswith('_')]) return pformat(a) __repr__ = __str__ class Message(Base): def __init__(self, m): self.content = m.content o = m.owner self.owner = o.name self.owner_name = o.display_name self.date = m.date_created class Bug(Base): def __init__(self, bt): # Cache a few things for which launchpad will make a web request each # time. bug = bt.bug o = bt.owner a = bt.assignee dupe = bug.duplicate_of # Store from the launchpadlib bug objects only what we want, and as # local data self.id = bug.id self.lp_url = 'https://bugs.launchpad.net/%s/+bug/%i' % \ (PROJECT_NAME, self.id) self.title = bt.title self.description = bug.description # Every bug has an owner (who created it) self.owner = o.name self.owner_name = o.display_name # Not all bugs have been assigned to someone yet try: self.assignee = a.name self.assignee_name = a.display_name except AttributeError: self.assignee = self.assignee_name = None # Store status/importance in canonical format self.status = canonical_status(bt.status) self.importance = canonical_importance(bt.importance) self.tags = bug.tags # Store the bug discussion messages, but skip m[0], which is the same # as the bug description we already stored self.messages = map(Message, list(bug.messages)[1:]) self.milestone = getattr(bt.milestone, 'name', None) # Duplicate handling disabled, since the default query already filters # out the duplicates. Keep the code here in case we ever want to look # into this... if 0: # Track duplicates conveniently try: self.duplicate_of = dupe.id self.is_duplicate = True except AttributeError: self.duplicate_of = None self.is_duplicate = False # dbg dupe info if bug.number_of_duplicates > 0: self.duplicates = [b.id for b in bug.duplicates] else: self.duplicates = [] # tmp - debug self._bt = bt self._bug = bug #----------------------------------------------------------------------------- # Main script #----------------------------------------------------------------------------- #----------------------------------------------------------------------------- # Launchpad part #----------------------------------------------------------------------------- # launchpad = create_session() launchpad = Launchpad.login_with('statsmodels', 'production') project = launchpad.projects[PROJECT_NAME] # Note: by default, this will give us all bugs except duplicates and those # with status "won't fix" or 'invalid' bug_tasks = project.searchTasks(status=lp_status) bugs = {} for bt in list(bug_tasks): b = Bug(bt) bugs[b.id] = b print b.title sys.stdout.flush() #----------------------------------------------------------------------------- # Github part #----------------------------------------------------------------------------- #http://pypi.python.org/pypi/github2 #http://github.com/ask/python-github2 # Github libraries from github2 import core, issues, client for mod in (core, issues, client): reload(mod) def format_title(bug): return bug.title.split('{0}: '.format(PROJECT_ID), 1)[1].strip('"') def format_body(bug): body = \ """Original Launchpad bug {bug.id}: {bug.lp_url} Reported by: {bug.owner} ({owner_name}). {description}""".format(bug=bug, owner_name=bug.owner_name.encode('utf-8'), description=bug.description.encode('utf-8')) return body def format_message(num, m): body = \ """[ LP comment {num} by: {owner_name}, on {m.date!s} ] {content}""".format(num=num, m=m, owner_name=m.owner_name.encode('utf-8'), content=m.content.encode('utf-8')) return body # Config user = 'wesm' token= '12efaff85b8e17f63ee835c5632b8cf0' repo = 'statsmodels/statsmodels' #repo = 'ipython/ipython' # Skip bugs with this status: # to_skip = set([u'fix_committed', u'incomplete']) to_skip = set() # Only label these importance levels: gh_importances = set([u'critical', u'high', u'low', u'medium', u'wishlist']) # Start script gh = client.Github(username=user, api_token=token) # Filter out the full LP bug dict to process only the ones we want bugs_todo = dict( (id, b) for (id, b) in bugs.iteritems() if not b.status in to_skip ) # Select which bug ids to run #bids = bugs_todo.keys()[50:100] # bids = bugs_todo.keys()[12:] bids = bugs_todo.keys() #bids = bids[:5]+[502787] # Start loop over bug ids and file them on Github nbugs = len(bids) gh_issues = [] # for reporting at the end for n, bug_id in enumerate(bids): bug = bugs[bug_id] title = format_title(bug) body = format_body(bug) print if len(title)<65: print bug.id, '[{0}/{1}]'.format(n+1, nbugs), title else: print bug.id, title[:65]+'...' # still check bug.status, in case we manually added other bugs to the list # above (mostly during testing) if bug.status in to_skip: print '--- Skipping - status:',bug.status continue print '+++ Filing...', sys.stdout.flush() # Create github issue for this bug issue = gh.issues.open(repo, title=title, body=body) print 'created GitHub #', issue.number gh_issues.append(issue.number) sys.stdout.flush() # Mark status as a label #status = 'status-{0}'.format(b.status) #gh.issues.add_label(repo, issue.number, status) # Mark any extra tags we might have as labels for tag in b.tags: label = 'tag-{0}'.format(tag) gh.issues.add_label(repo, issue.number, label) # If bug has assignee, add it as label if bug.assignee: gh.issues.add_label(repo, issue.number, #bug.assignee # Github bug, gets confused with dots in labels. bug.assignee.replace('.','_') ) if bug.importance in gh_importances: if bug.importance == 'wishlist': label = bug.importance else: label = 'prio-{0}'.format(bug.importance) gh.issues.add_label(repo, issue.number, label) if bug.milestone: label = 'milestone-{0}'.format(bug.milestone).replace('.','_') gh.issues.add_label(repo, issue.number, label) # Add original message thread for num, message in enumerate(bug.messages): # Messages on LP are numbered from 1 comment = format_message(num+1, message) gh.issues.comment(repo, issue.number, comment) time.sleep(0.5) # soft sleep after each message to prevent gh block if bug.status in ['fix_committed', 'fix_released', 'invalid']: gh.issues.close(repo, issue.number) # too many fast requests and gh will block us, so sleep for a while # I just eyeballed these values by trial and error. time.sleep(1) # soft sleep after each request # And longer one after every batch batch_size = 10 tsleep = 60 if (len(gh_issues) % batch_size)==0: print print '*** SLEEPING for {0} seconds to avoid github blocking... ***'.format(tsleep) sys.stdout.flush() time.sleep(tsleep) # Summary report print print '*'*80 print 'Summary of GitHub issues filed:' print gh_issues print 'Total:', len(gh_issues)