You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
372 lines
13 KiB
Python
372 lines
13 KiB
Python
#!/usr/bin/env python
|
|
"""Launchpad to github bug migration script.
|
|
|
|
There's a ton of code from Hydrazine copied here:
|
|
https://launchpad.net/hydrazine
|
|
|
|
|
|
Usage
|
|
-----
|
|
|
|
This code is meant to port a bug database for a project from Launchpad to
|
|
GitHub. It was used to port the IPython bug history.
|
|
|
|
The code is meant to be used interactively. I ran it multiple times in one long
|
|
IPython session, until the data structures I was getting from Launchpad looked
|
|
right. Then I turned off (see 'if 0' markers below) the Launchpad part, and ran
|
|
it again with the github part executing and using the 'bugs' variable from my
|
|
interactive namespace (via"%run -i" in IPython).
|
|
|
|
This code is NOT fire and forget, it's meant to be used with some intelligent
|
|
supervision at the wheel. Start by making a test repository (I made one called
|
|
ipython/BugsTest) and upload only a few issues into that. Once you are sure
|
|
that everything is OK, run it against your real repo with all your issues.
|
|
|
|
You should read all the code below and roughly understand what's going on
|
|
before using this. Since I didn't intend to use this more than once, it's not
|
|
particularly robust or documented. It got the job done and I've never used it
|
|
again.
|
|
|
|
Configuration
|
|
-------------
|
|
|
|
To pull things off LP, you need to log in first (see the Hydrazine docs). Your
|
|
Hydrazine credentials will be cached locally and this script can reuse them.
|
|
|
|
To push to GH, you need to set below the GH repository owner, API token and
|
|
repository name you wan to push issues into. See the GH section for the
|
|
necessary variables.
|
|
"""
|
|
|
|
import collections
|
|
import os.path
|
|
import subprocess
|
|
import sys
|
|
import time
|
|
|
|
from pprint import pformat
|
|
|
|
import launchpadlib
|
|
from launchpadlib.credentials import Credentials
|
|
from launchpadlib.launchpad import (
|
|
Launchpad, STAGING_SERVICE_ROOT, EDGE_SERVICE_ROOT )
|
|
|
|
#-----------------------------------------------------------------------------
|
|
# Launchpad configuration
|
|
#-----------------------------------------------------------------------------
|
|
# The official LP project name
|
|
PROJECT_NAME = 'statsmodels'
|
|
|
|
# How LP marks your bugs, I don't know where this is stored, but they use it to
|
|
# generate bug descriptions and we need to split on this string to create
|
|
# shorter Github bug titles
|
|
PROJECT_ID = 'statsmodels'
|
|
|
|
# Default Launchpad server, see their docs for details
|
|
service_root = EDGE_SERVICE_ROOT
|
|
|
|
#-----------------------------------------------------------------------------
|
|
# Code copied/modified from Hydrazine (https://launchpad.net/hydrazine)
|
|
#-----------------------------------------------------------------------------
|
|
|
|
# Constants for the names in LP of certain
|
|
lp_importances = ['Critical', 'High', 'Medium', 'Low', 'Wishlist', 'Undecided']
|
|
|
|
lp_status = ['Confirmed', 'Triaged', 'Fix Committed', 'Fix Released',
|
|
'In Progress',"Won't Fix", "Incomplete", "Invalid", "New"]
|
|
|
|
def squish(a):
|
|
return a.lower().replace(' ', '_').replace("'",'')
|
|
|
|
lp_importances_c = set(map(squish, lp_importances))
|
|
lp_status_c = set(map(squish, lp_status))
|
|
|
|
def trace(s):
|
|
sys.stderr.write(s + '\n')
|
|
|
|
|
|
def create_session():
|
|
lplib_cachedir = os.path.expanduser("~/.cache/launchpadlib/")
|
|
hydrazine_cachedir = os.path.expanduser("~/.cache/hydrazine/")
|
|
rrd_dir = os.path.expanduser("~/.cache/hydrazine/rrd")
|
|
for d in [lplib_cachedir, hydrazine_cachedir, rrd_dir]:
|
|
if not os.path.isdir(d):
|
|
os.makedirs(d, mode=0700)
|
|
|
|
hydrazine_credentials_filename = os.path.join(hydrazine_cachedir,
|
|
'credentials')
|
|
if os.path.exists(hydrazine_credentials_filename):
|
|
credentials = Credentials()
|
|
credentials.load(file(
|
|
os.path.expanduser("~/.cache/hydrazine/credentials"),
|
|
"r"))
|
|
trace('loaded existing credentials')
|
|
return Launchpad(credentials, service_root,
|
|
lplib_cachedir)
|
|
# TODO: handle the case of having credentials that have expired etc
|
|
else:
|
|
launchpad = Launchpad.get_token_and_login(
|
|
'Hydrazine',
|
|
service_root,
|
|
lplib_cachedir)
|
|
trace('saving credentials...')
|
|
launchpad.credentials.save(file(
|
|
hydrazine_credentials_filename,
|
|
"w"))
|
|
return launchpad
|
|
|
|
def canonical_enum(entered, options):
|
|
entered = squish(entered)
|
|
return entered if entered in options else None
|
|
|
|
def canonical_importance(from_importance):
|
|
return canonical_enum(from_importance, lp_importances_c)
|
|
|
|
def canonical_status(entered):
|
|
return canonical_enum(entered, lp_status_c)
|
|
|
|
#-----------------------------------------------------------------------------
|
|
# Functions and classes
|
|
#-----------------------------------------------------------------------------
|
|
|
|
class Base(object):
|
|
def __str__(self):
|
|
a = dict([(k,v) for (k,v) in self.__dict__.iteritems()
|
|
if not k.startswith('_')])
|
|
return pformat(a)
|
|
|
|
__repr__ = __str__
|
|
|
|
|
|
class Message(Base):
|
|
def __init__(self, m):
|
|
self.content = m.content
|
|
o = m.owner
|
|
self.owner = o.name
|
|
self.owner_name = o.display_name
|
|
self.date = m.date_created
|
|
|
|
class Bug(Base):
|
|
def __init__(self, bt):
|
|
# Cache a few things for which launchpad will make a web request each
|
|
# time.
|
|
bug = bt.bug
|
|
o = bt.owner
|
|
a = bt.assignee
|
|
dupe = bug.duplicate_of
|
|
# Store from the launchpadlib bug objects only what we want, and as
|
|
# local data
|
|
self.id = bug.id
|
|
self.lp_url = 'https://bugs.launchpad.net/%s/+bug/%i' % \
|
|
(PROJECT_NAME, self.id)
|
|
self.title = bt.title
|
|
self.description = bug.description
|
|
# Every bug has an owner (who created it)
|
|
self.owner = o.name
|
|
self.owner_name = o.display_name
|
|
# Not all bugs have been assigned to someone yet
|
|
try:
|
|
self.assignee = a.name
|
|
self.assignee_name = a.display_name
|
|
except AttributeError:
|
|
self.assignee = self.assignee_name = None
|
|
# Store status/importance in canonical format
|
|
self.status = canonical_status(bt.status)
|
|
self.importance = canonical_importance(bt.importance)
|
|
self.tags = bug.tags
|
|
# Store the bug discussion messages, but skip m[0], which is the same
|
|
# as the bug description we already stored
|
|
self.messages = map(Message, list(bug.messages)[1:])
|
|
self.milestone = getattr(bt.milestone, 'name', None)
|
|
|
|
# Duplicate handling disabled, since the default query already filters
|
|
# out the duplicates. Keep the code here in case we ever want to look
|
|
# into this...
|
|
if 0:
|
|
# Track duplicates conveniently
|
|
try:
|
|
self.duplicate_of = dupe.id
|
|
self.is_duplicate = True
|
|
except AttributeError:
|
|
self.duplicate_of = None
|
|
self.is_duplicate = False
|
|
|
|
# dbg dupe info
|
|
if bug.number_of_duplicates > 0:
|
|
self.duplicates = [b.id for b in bug.duplicates]
|
|
else:
|
|
self.duplicates = []
|
|
|
|
# tmp - debug
|
|
self._bt = bt
|
|
self._bug = bug
|
|
|
|
#-----------------------------------------------------------------------------
|
|
# Main script
|
|
#-----------------------------------------------------------------------------
|
|
|
|
#-----------------------------------------------------------------------------
|
|
# Launchpad part
|
|
#-----------------------------------------------------------------------------
|
|
# launchpad = create_session()
|
|
launchpad = Launchpad.login_with('statsmodels', 'production')
|
|
project = launchpad.projects[PROJECT_NAME]
|
|
# Note: by default, this will give us all bugs except duplicates and those
|
|
# with status "won't fix" or 'invalid'
|
|
bug_tasks = project.searchTasks(status=lp_status)
|
|
|
|
bugs = {}
|
|
for bt in list(bug_tasks):
|
|
b = Bug(bt)
|
|
bugs[b.id] = b
|
|
print b.title
|
|
sys.stdout.flush()
|
|
|
|
#-----------------------------------------------------------------------------
|
|
# Github part
|
|
#-----------------------------------------------------------------------------
|
|
#http://pypi.python.org/pypi/github2
|
|
#http://github.com/ask/python-github2
|
|
# Github libraries
|
|
from github2 import core, issues, client
|
|
for mod in (core, issues, client):
|
|
reload(mod)
|
|
|
|
|
|
def format_title(bug):
|
|
return bug.title.split('{0}: '.format(PROJECT_ID), 1)[1].strip('"')
|
|
|
|
|
|
def format_body(bug):
|
|
body = \
|
|
"""Original Launchpad bug {bug.id}: {bug.lp_url}
|
|
Reported by: {bug.owner} ({owner_name}).
|
|
|
|
{description}""".format(bug=bug, owner_name=bug.owner_name.encode('utf-8'),
|
|
description=bug.description.encode('utf-8'))
|
|
return body
|
|
|
|
|
|
def format_message(num, m):
|
|
body = \
|
|
"""[ LP comment {num} by: {owner_name}, on {m.date!s} ]
|
|
|
|
{content}""".format(num=num, m=m, owner_name=m.owner_name.encode('utf-8'),
|
|
content=m.content.encode('utf-8'))
|
|
return body
|
|
|
|
|
|
# Config
|
|
user = 'wesm'
|
|
token= '12efaff85b8e17f63ee835c5632b8cf0'
|
|
|
|
repo = 'statsmodels/statsmodels'
|
|
#repo = 'ipython/ipython'
|
|
|
|
# Skip bugs with this status:
|
|
# to_skip = set([u'fix_committed', u'incomplete'])
|
|
to_skip = set()
|
|
|
|
# Only label these importance levels:
|
|
gh_importances = set([u'critical', u'high', u'low', u'medium', u'wishlist'])
|
|
|
|
# Start script
|
|
gh = client.Github(username=user, api_token=token)
|
|
|
|
# Filter out the full LP bug dict to process only the ones we want
|
|
bugs_todo = dict( (id, b) for (id, b) in bugs.iteritems()
|
|
if not b.status in to_skip )
|
|
|
|
# Select which bug ids to run
|
|
#bids = bugs_todo.keys()[50:100]
|
|
# bids = bugs_todo.keys()[12:]
|
|
|
|
bids = bugs_todo.keys()
|
|
#bids = bids[:5]+[502787]
|
|
|
|
# Start loop over bug ids and file them on Github
|
|
nbugs = len(bids)
|
|
gh_issues = [] # for reporting at the end
|
|
for n, bug_id in enumerate(bids):
|
|
bug = bugs[bug_id]
|
|
title = format_title(bug)
|
|
body = format_body(bug)
|
|
|
|
print
|
|
if len(title)<65:
|
|
print bug.id, '[{0}/{1}]'.format(n+1, nbugs), title
|
|
else:
|
|
print bug.id, title[:65]+'...'
|
|
|
|
# still check bug.status, in case we manually added other bugs to the list
|
|
# above (mostly during testing)
|
|
if bug.status in to_skip:
|
|
print '--- Skipping - status:',bug.status
|
|
continue
|
|
|
|
print '+++ Filing...',
|
|
sys.stdout.flush()
|
|
|
|
# Create github issue for this bug
|
|
issue = gh.issues.open(repo, title=title, body=body)
|
|
print 'created GitHub #', issue.number
|
|
gh_issues.append(issue.number)
|
|
sys.stdout.flush()
|
|
|
|
# Mark status as a label
|
|
#status = 'status-{0}'.format(b.status)
|
|
#gh.issues.add_label(repo, issue.number, status)
|
|
|
|
# Mark any extra tags we might have as labels
|
|
for tag in b.tags:
|
|
label = 'tag-{0}'.format(tag)
|
|
gh.issues.add_label(repo, issue.number, label)
|
|
|
|
# If bug has assignee, add it as label
|
|
if bug.assignee:
|
|
gh.issues.add_label(repo, issue.number,
|
|
#bug.assignee
|
|
# Github bug, gets confused with dots in labels.
|
|
bug.assignee.replace('.','_')
|
|
)
|
|
|
|
if bug.importance in gh_importances:
|
|
if bug.importance == 'wishlist':
|
|
label = bug.importance
|
|
else:
|
|
label = 'prio-{0}'.format(bug.importance)
|
|
gh.issues.add_label(repo, issue.number, label)
|
|
|
|
if bug.milestone:
|
|
label = 'milestone-{0}'.format(bug.milestone).replace('.','_')
|
|
gh.issues.add_label(repo, issue.number, label)
|
|
|
|
# Add original message thread
|
|
for num, message in enumerate(bug.messages):
|
|
# Messages on LP are numbered from 1
|
|
comment = format_message(num+1, message)
|
|
gh.issues.comment(repo, issue.number, comment)
|
|
time.sleep(0.5) # soft sleep after each message to prevent gh block
|
|
|
|
if bug.status in ['fix_committed', 'fix_released', 'invalid']:
|
|
gh.issues.close(repo, issue.number)
|
|
|
|
# too many fast requests and gh will block us, so sleep for a while
|
|
# I just eyeballed these values by trial and error.
|
|
time.sleep(1) # soft sleep after each request
|
|
# And longer one after every batch
|
|
batch_size = 10
|
|
tsleep = 60
|
|
if (len(gh_issues) % batch_size)==0:
|
|
print
|
|
print '*** SLEEPING for {0} seconds to avoid github blocking... ***'.format(tsleep)
|
|
sys.stdout.flush()
|
|
time.sleep(tsleep)
|
|
|
|
# Summary report
|
|
print
|
|
print '*'*80
|
|
print 'Summary of GitHub issues filed:'
|
|
print gh_issues
|
|
print 'Total:', len(gh_issues)
|