| 273 | | plt.yticks([y for y in range(len(paths))], paths, size='xx-small') |
| 274 | | plt.grid(color='grey', linestyle='-', alpha=0.25, ydata=[y for y in range(0,len(paths)+1,2)], linewidth=20.0) |
| | 289 | plt.yticks([y for y in range(len(paths))], paths) |
| | 290 | plt.grid(color='grey', linestyle='-', alpha=0.25, ydata=[y for y in range(0,len(paths)+1,2)], linewidth=24.0) |
| | 295 | |
| | 296 | fig = plt.figure(1) |
| | 297 | xmin, xmax = plt.xlim() |
| | 298 | ymin, ymax = plt.ylim() |
| | 299 | ann = plt.annotate("default", (xmin + (xmax-xmin)*0.05, ymin + (ymax-ymin)*0.05), |
| | 300 | backgroundcolor="yellow", |
| | 301 | bbox=dict(boxstyle="round", fc="0.8"), |
| | 302 | visible=False, |
| | 303 | ) |
| | 304 | #ann.set_animated(True) |
| | 305 | current_rev = [-1] |
| | 306 | |
| | 307 | def onpick(event): |
| | 308 | line = event.artist |
| | 309 | ind = event.ind[0] |
| | 310 | x, y = line.get_xdata()[ind], line.get_ydata()[ind] |
| | 311 | user = line.get_label() |
| | 312 | rev = data[user][ind][0] |
| | 313 | comment = list(rev.comments[user])[0] |
| | 314 | text = "%s [%s]: %s" % (user, rev.revid, comment) |
| | 315 | text = text[:150] |
| | 316 | |
| | 317 | if ann.get_visible() and current_rev[0] == rev.revid: |
| | 318 | # print "II:", rev.revid |
| | 319 | ann.set_visible(False) |
| | 320 | current_rev[0] = -1 |
| | 321 | else: |
| | 322 | # print "VV:", rev.revid |
| | 323 | ann.set_text(text) |
| | 324 | ann.set_visible(True) |
| | 325 | current_rev[0] = rev.revid |
| | 326 | |
| | 327 | pylab.draw() |
| | 328 | |
| | 329 | cid = fig.canvas.mpl_connect('pick_event', onpick) |
| | 330 | |
| | 332 | |
| | 333 | def minimize_rev_separation(revs, paths): |
| | 334 | """Attempt to minimize the maximum distance between paths in |
| | 335 | a revision. |
| | 336 | """ |
| | 337 | import numpy |
| | 338 | |
| | 339 | pathindex = dict(zip(paths, range(len(paths)))) |
| | 340 | revindex = [] |
| | 341 | userindex = {} |
| | 342 | for rev in revs: |
| | 343 | for r_user, r_paths in rev.contributions.items(): |
| | 344 | idxs = [pathindex[r] for r in r_paths if r in pathindex] |
| | 345 | if idxs: |
| | 346 | revindex.append(idxs) |
| | 347 | userindex.setdefault(r_user,[]) |
| | 348 | userindex[r_user].extend(idxs) |
| | 349 | userindex = userindex.values() |
| | 350 | |
| | 351 | allindex = userindex |
| | 352 | |
| | 353 | def func(x): |
| | 354 | tot = 0.0 |
| | 355 | for idxs in allindex: |
| | 356 | places = x.take(idxs) |
| | 357 | tot += sum((places[1:] - places[:-1])**2) |
| | 358 | return tot |
| | 359 | |
| | 360 | x0 = numpy.array(range(len(paths)), dtype=float) |
| | 361 | best_tot = func(x0) |
| | 362 | path_min = x0.copy() |
| | 363 | logging.info("Initial objective function value: %s", best_tot) |
| | 364 | for i in range(1000): |
| | 365 | random.shuffle(x0) |
| | 366 | tot = func(x0) |
| | 367 | if tot < best_tot: |
| | 368 | best_tot = tot |
| | 369 | path_min = x0.copy() |
| | 370 | logging.info("Final objective function value:", best_tot) |
| | 371 | |
| | 372 | path_pairs = zip(paths, path_min) |
| | 373 | path_pairs.sort(key=lambda p: p[1]) |
| | 374 | return [p[0] for p in path_pairs] |
| 328 | | path_vecs.setdefault(r_path, {}) |
| 329 | | path_vecs[r_path].setdefault(r_user, 0.0) |
| 330 | | path_vecs[r_path][r_user] += 1.0 |
| | 426 | path_revs.setdefault(r_path, []) |
| | 427 | path_revs[r_path].append(i) |
| | 428 | |
| | 429 | # for r_path in r_paths: |
| | 430 | # path_vecs.setdefault(r_path, {}) |
| | 431 | # path_vecs[r_path].setdefault(r_user, 0.0) |
| | 432 | # path_vecs[r_path][r_user] += 1.0 |
| 333 | | paths = sorted(paths) |
| 334 | | |
| 335 | | path_vecs = path_vecs.items() |
| 336 | | path_names = [pv[0] for pv in path_vecs] |
| 337 | | path_dicts = [pv[1] for pv in path_vecs] |
| 338 | | path_vectors = [Vector([u in d and d[u] or 0.0 for u in users]) for d in path_dicts] |
| 339 | | path_vectors = [v*(1.0/sum(v)) for v in path_vectors] |
| 340 | | |
| 341 | | means, clusters = k_means(path_vectors, len(users)+2, 100, Vector.euclidian_distance) |
| 342 | | |
| 343 | | clusters.sort(key=lambda c: len(c)) |
| 344 | | |
| 345 | | ordered_paths = [] |
| 346 | | print "Clusters:" |
| 347 | | for cluster in clusters: |
| 348 | | print "\t", [path_names[i] for i in cluster] |
| 349 | | ordered_paths.extend(path_names[i] for i in cluster) |
| | 435 | |
| | 436 | # paths = sorted(paths, key=lambda p: (len(path_revs[p]), max(path_revs[p]))) |
| | 437 | # ordered_paths = paths |
| | 438 | |
| | 439 | ordered_paths = minimize_rev_separation(revs, paths) |
| | 440 | |
| | 441 | # path_vecs = path_vecs.items() |
| | 442 | # path_names = [pv[0] for pv in path_vecs] |
| | 443 | # path_dicts = [pv[1] for pv in path_vecs] |
| | 444 | # path_vectors = [Vector([u in d and d[u] or 0.0 for u in users]) for d in path_dicts] |
| | 445 | # path_vectors = [v*(1.0/sum(v)) for v in path_vectors] |
| | 446 | # |
| | 447 | # means, clusters = k_means(path_vectors, len(users)+2, 100, Vector.euclidian_distance) |
| | 448 | # |
| | 449 | # clusters.sort(key=lambda c: len(c)) |
| | 450 | # |
| | 451 | # ordered_paths = [] |
| | 452 | # print "Clusters:" |
| | 453 | # for cluster in clusters: |
| | 454 | # print "\t", [path_names[i] for i in cluster] |
| | 455 | # ordered_paths.extend(path_names[i] for i in cluster) |