You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
pywafo/statsmodels/tools/migrate_issues_gh.py

372 lines
13 KiB
Python

#!/usr/bin/env python
"""Launchpad to github bug migration script.
There's a ton of code from Hydrazine copied here:
https://launchpad.net/hydrazine
Usage
-----
This code is meant to port a bug database for a project from Launchpad to
GitHub. It was used to port the IPython bug history.
The code is meant to be used interactively. I ran it multiple times in one long
IPython session, until the data structures I was getting from Launchpad looked
right. Then I turned off (see 'if 0' markers below) the Launchpad part, and ran
it again with the github part executing and using the 'bugs' variable from my
interactive namespace (via"%run -i" in IPython).
This code is NOT fire and forget, it's meant to be used with some intelligent
supervision at the wheel. Start by making a test repository (I made one called
ipython/BugsTest) and upload only a few issues into that. Once you are sure
that everything is OK, run it against your real repo with all your issues.
You should read all the code below and roughly understand what's going on
before using this. Since I didn't intend to use this more than once, it's not
particularly robust or documented. It got the job done and I've never used it
again.
Configuration
-------------
To pull things off LP, you need to log in first (see the Hydrazine docs). Your
Hydrazine credentials will be cached locally and this script can reuse them.
To push to GH, you need to set below the GH repository owner, API token and
repository name you wan to push issues into. See the GH section for the
necessary variables.
"""
import collections
import os.path
import subprocess
import sys
import time
from pprint import pformat
import launchpadlib
from launchpadlib.credentials import Credentials
from launchpadlib.launchpad import (
Launchpad, STAGING_SERVICE_ROOT, EDGE_SERVICE_ROOT )
#-----------------------------------------------------------------------------
# Launchpad configuration
#-----------------------------------------------------------------------------
# The official LP project name
PROJECT_NAME = 'statsmodels'
# How LP marks your bugs, I don't know where this is stored, but they use it to
# generate bug descriptions and we need to split on this string to create
# shorter Github bug titles
PROJECT_ID = 'statsmodels'
# Default Launchpad server, see their docs for details
service_root = EDGE_SERVICE_ROOT
#-----------------------------------------------------------------------------
# Code copied/modified from Hydrazine (https://launchpad.net/hydrazine)
#-----------------------------------------------------------------------------
# Constants for the names in LP of certain
lp_importances = ['Critical', 'High', 'Medium', 'Low', 'Wishlist', 'Undecided']
lp_status = ['Confirmed', 'Triaged', 'Fix Committed', 'Fix Released',
'In Progress',"Won't Fix", "Incomplete", "Invalid", "New"]
def squish(a):
return a.lower().replace(' ', '_').replace("'",'')
lp_importances_c = set(map(squish, lp_importances))
lp_status_c = set(map(squish, lp_status))
def trace(s):
sys.stderr.write(s + '\n')
def create_session():
lplib_cachedir = os.path.expanduser("~/.cache/launchpadlib/")
hydrazine_cachedir = os.path.expanduser("~/.cache/hydrazine/")
rrd_dir = os.path.expanduser("~/.cache/hydrazine/rrd")
for d in [lplib_cachedir, hydrazine_cachedir, rrd_dir]:
if not os.path.isdir(d):
os.makedirs(d, mode=0700)
hydrazine_credentials_filename = os.path.join(hydrazine_cachedir,
'credentials')
if os.path.exists(hydrazine_credentials_filename):
credentials = Credentials()
credentials.load(file(
os.path.expanduser("~/.cache/hydrazine/credentials"),
"r"))
trace('loaded existing credentials')
return Launchpad(credentials, service_root,
lplib_cachedir)
# TODO: handle the case of having credentials that have expired etc
else:
launchpad = Launchpad.get_token_and_login(
'Hydrazine',
service_root,
lplib_cachedir)
trace('saving credentials...')
launchpad.credentials.save(file(
hydrazine_credentials_filename,
"w"))
return launchpad
def canonical_enum(entered, options):
entered = squish(entered)
return entered if entered in options else None
def canonical_importance(from_importance):
return canonical_enum(from_importance, lp_importances_c)
def canonical_status(entered):
return canonical_enum(entered, lp_status_c)
#-----------------------------------------------------------------------------
# Functions and classes
#-----------------------------------------------------------------------------
class Base(object):
def __str__(self):
a = dict([(k,v) for (k,v) in self.__dict__.iteritems()
if not k.startswith('_')])
return pformat(a)
__repr__ = __str__
class Message(Base):
def __init__(self, m):
self.content = m.content
o = m.owner
self.owner = o.name
self.owner_name = o.display_name
self.date = m.date_created
class Bug(Base):
def __init__(self, bt):
# Cache a few things for which launchpad will make a web request each
# time.
bug = bt.bug
o = bt.owner
a = bt.assignee
dupe = bug.duplicate_of
# Store from the launchpadlib bug objects only what we want, and as
# local data
self.id = bug.id
self.lp_url = 'https://bugs.launchpad.net/%s/+bug/%i' % \
(PROJECT_NAME, self.id)
self.title = bt.title
self.description = bug.description
# Every bug has an owner (who created it)
self.owner = o.name
self.owner_name = o.display_name
# Not all bugs have been assigned to someone yet
try:
self.assignee = a.name
self.assignee_name = a.display_name
except AttributeError:
self.assignee = self.assignee_name = None
# Store status/importance in canonical format
self.status = canonical_status(bt.status)
self.importance = canonical_importance(bt.importance)
self.tags = bug.tags
# Store the bug discussion messages, but skip m[0], which is the same
# as the bug description we already stored
self.messages = map(Message, list(bug.messages)[1:])
self.milestone = getattr(bt.milestone, 'name', None)
# Duplicate handling disabled, since the default query already filters
# out the duplicates. Keep the code here in case we ever want to look
# into this...
if 0:
# Track duplicates conveniently
try:
self.duplicate_of = dupe.id
self.is_duplicate = True
except AttributeError:
self.duplicate_of = None
self.is_duplicate = False
# dbg dupe info
if bug.number_of_duplicates > 0:
self.duplicates = [b.id for b in bug.duplicates]
else:
self.duplicates = []
# tmp - debug
self._bt = bt
self._bug = bug
#-----------------------------------------------------------------------------
# Main script
#-----------------------------------------------------------------------------
#-----------------------------------------------------------------------------
# Launchpad part
#-----------------------------------------------------------------------------
# launchpad = create_session()
launchpad = Launchpad.login_with('statsmodels', 'production')
project = launchpad.projects[PROJECT_NAME]
# Note: by default, this will give us all bugs except duplicates and those
# with status "won't fix" or 'invalid'
bug_tasks = project.searchTasks(status=lp_status)
bugs = {}
for bt in list(bug_tasks):
b = Bug(bt)
bugs[b.id] = b
print b.title
sys.stdout.flush()
#-----------------------------------------------------------------------------
# Github part
#-----------------------------------------------------------------------------
#http://pypi.python.org/pypi/github2
#http://github.com/ask/python-github2
# Github libraries
from github2 import core, issues, client
for mod in (core, issues, client):
reload(mod)
def format_title(bug):
return bug.title.split('{0}: '.format(PROJECT_ID), 1)[1].strip('"')
def format_body(bug):
body = \
"""Original Launchpad bug {bug.id}: {bug.lp_url}
Reported by: {bug.owner} ({owner_name}).
{description}""".format(bug=bug, owner_name=bug.owner_name.encode('utf-8'),
description=bug.description.encode('utf-8'))
return body
def format_message(num, m):
body = \
"""[ LP comment {num} by: {owner_name}, on {m.date!s} ]
{content}""".format(num=num, m=m, owner_name=m.owner_name.encode('utf-8'),
content=m.content.encode('utf-8'))
return body
# Config
user = 'wesm'
token= '12efaff85b8e17f63ee835c5632b8cf0'
repo = 'statsmodels/statsmodels'
#repo = 'ipython/ipython'
# Skip bugs with this status:
# to_skip = set([u'fix_committed', u'incomplete'])
to_skip = set()
# Only label these importance levels:
gh_importances = set([u'critical', u'high', u'low', u'medium', u'wishlist'])
# Start script
gh = client.Github(username=user, api_token=token)
# Filter out the full LP bug dict to process only the ones we want
bugs_todo = dict( (id, b) for (id, b) in bugs.iteritems()
if not b.status in to_skip )
# Select which bug ids to run
#bids = bugs_todo.keys()[50:100]
# bids = bugs_todo.keys()[12:]
bids = bugs_todo.keys()
#bids = bids[:5]+[502787]
# Start loop over bug ids and file them on Github
nbugs = len(bids)
gh_issues = [] # for reporting at the end
for n, bug_id in enumerate(bids):
bug = bugs[bug_id]
title = format_title(bug)
body = format_body(bug)
print
if len(title)<65:
print bug.id, '[{0}/{1}]'.format(n+1, nbugs), title
else:
print bug.id, title[:65]+'...'
# still check bug.status, in case we manually added other bugs to the list
# above (mostly during testing)
if bug.status in to_skip:
print '--- Skipping - status:',bug.status
continue
print '+++ Filing...',
sys.stdout.flush()
# Create github issue for this bug
issue = gh.issues.open(repo, title=title, body=body)
print 'created GitHub #', issue.number
gh_issues.append(issue.number)
sys.stdout.flush()
# Mark status as a label
#status = 'status-{0}'.format(b.status)
#gh.issues.add_label(repo, issue.number, status)
# Mark any extra tags we might have as labels
for tag in b.tags:
label = 'tag-{0}'.format(tag)
gh.issues.add_label(repo, issue.number, label)
# If bug has assignee, add it as label
if bug.assignee:
gh.issues.add_label(repo, issue.number,
#bug.assignee
# Github bug, gets confused with dots in labels.
bug.assignee.replace('.','_')
)
if bug.importance in gh_importances:
if bug.importance == 'wishlist':
label = bug.importance
else:
label = 'prio-{0}'.format(bug.importance)
gh.issues.add_label(repo, issue.number, label)
if bug.milestone:
label = 'milestone-{0}'.format(bug.milestone).replace('.','_')
gh.issues.add_label(repo, issue.number, label)
# Add original message thread
for num, message in enumerate(bug.messages):
# Messages on LP are numbered from 1
comment = format_message(num+1, message)
gh.issues.comment(repo, issue.number, comment)
time.sleep(0.5) # soft sleep after each message to prevent gh block
if bug.status in ['fix_committed', 'fix_released', 'invalid']:
gh.issues.close(repo, issue.number)
# too many fast requests and gh will block us, so sleep for a while
# I just eyeballed these values by trial and error.
time.sleep(1) # soft sleep after each request
# And longer one after every batch
batch_size = 10
tsleep = 60
if (len(gh_issues) % batch_size)==0:
print
print '*** SLEEPING for {0} seconds to avoid github blocking... ***'.format(tsleep)
sys.stdout.flush()
time.sleep(tsleep)
# Summary report
print
print '*'*80
print 'Summary of GitHub issues filed:'
print gh_issues
print 'Total:', len(gh_issues)