Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(370)

Side by Side Diff: dashboard/dashboard/pinpoint/models/job.py

Issue 3013713002: [pinpoint] Calculate distances between Changes.
Patch Set: Created 3 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 # Copyright 2016 The Chromium Authors. All rights reserved. 1 # Copyright 2016 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be 2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file. 3 # found in the LICENSE file.
4 4
5 import collections 5 import collections
6 import logging 6 import logging
7 import os 7 import os
8 import traceback 8 import traceback
9 9
10 from google.appengine.api import taskqueue 10 from google.appengine.api import taskqueue
(...skipping 195 matching lines...) Expand 10 before | Expand all | Expand 10 after
206 repeat_count: The number of attempts to automatically run per Change. 206 repeat_count: The number of attempts to automatically run per Change.
207 """ 207 """
208 # _quests is mutable. Any modification should mutate the existing list 208 # _quests is mutable. Any modification should mutate the existing list
209 # in-place rather than assign a new list, because every Attempt references 209 # in-place rather than assign a new list, because every Attempt references
210 # this object and will be updated automatically if it's mutated. 210 # this object and will be updated automatically if it's mutated.
211 self._quests = list(quests) 211 self._quests = list(quests)
212 212
213 # _changes can be in arbitrary order. Client should not assume that the 213 # _changes can be in arbitrary order. Client should not assume that the
214 # list of Changes is sorted in any particular order. 214 # list of Changes is sorted in any particular order.
215 self._changes = [] 215 self._changes = []
216 # _change_distances[i] contains the distance, in commits, between
217 # _changes[i-1] and _changes[i]. _change_distances[0] is always 0.
218 self._change_distances = []
216 219
217 # A mapping from a Change to a list of Attempts on that Change. 220 # A mapping from a Change to a list of Attempts on that Change.
218 self._attempts = {} 221 self._attempts = {}
219 222
220 self._repeat_count = repeat_count 223 self._repeat_count = repeat_count
221 224
222 def AddAttempt(self, change): 225 def AddAttempt(self, change):
223 assert change in self._attempts 226 assert change in self._attempts
224 self._attempts[change].append(attempt_module.Attempt(self._quests, change)) 227 self._attempts[change].append(attempt_module.Attempt(self._quests, change))
225 228
226 def AddChange(self, change, index=None): 229 def AddChange(self, change, index=None, distances=(None, None)):
227 if index: 230 if index is None:
228 self._changes.insert(index, change) 231 index = len(self._changes)
229 else:
230 self._changes.append(change)
231 232
233 # Compute distances, if needed.
234 left, right = distances
235 if left is None:
236 left = _ChangeDistance(self._changes[index - 1], change) if index else 0
237 if right is None and index < len(self._changes):
238 right = _ChangeDistance(change, self._changes[index])
239
240 # Insert into _changes and _change_distances.
241 if right is not None:
242 self._change_distances[index] = right
243 self._change_distances.insert(index, left)
244 self._changes.insert(index, change)
245
246 # Add Attempts.
232 self._attempts[change] = [] 247 self._attempts[change] = []
233 for _ in xrange(self._repeat_count): 248 for _ in xrange(self._repeat_count):
234 self.AddAttempt(change) 249 self.AddAttempt(change)
235 250
236 def Explore(self): 251 def Explore(self):
237 """Compare Changes and bisect by adding additional Changes as needed. 252 """Compare Changes and bisect by adding additional Changes as needed.
238 253
239 For every pair of adjacent Changes, compare their results as probability 254 For every pair of adjacent Changes, compare their results as probability
240 distributions. If more information is needed to establish statistical 255 distributions. If more information is needed to establish statistical
241 confidence, add an additional Attempt. If the results are different, find 256 confidence, add an additional Attempt. If the results are different, find
242 the midpoint of the Changes and add it to the Job. 257 the midpoint of the Changes and add it to the Job.
243 258
244 The midpoint can only be added if the second Change represents a commit that 259 The midpoint can only be added if the second Change represents a commit that
245 comes after the first Change. Otherwise, this method won't explore further. 260 comes after the first Change. Otherwise, this method won't explore further.
246 For example, if Change A is repo@abc, and Change B is repo@abc + patch, 261 For example, if Change A is repo@abc, and Change B is repo@abc + patch,
247 there's no way to pick additional Changes to try. 262 there's no way to pick additional Changes to try.
248 """ 263 """
249 # Compare every pair of Changes. 264 # Compare every pair of Changes.
250 # TODO: The list may Change while iterating through it. 265 # TODO: The list may Change while iterating through it.
251 # TODO: Use JobState.Differences(). 266 # TODO: Use JobState.Differences().
252 for index in xrange(1, len(self._changes)): 267 for index in xrange(1, len(self._changes)):
253 change_a = self._changes[index - 1] 268 change_a = self._changes[index - 1]
254 change_b = self._changes[index] 269 change_b = self._changes[index]
255 270
256 comparison_result = self._Compare(change_a, change_b) 271 comparison_result = self._Compare(change_a, change_b)
257 if comparison_result == _DIFFERENT: 272 if comparison_result == _DIFFERENT:
258 # Different: Bisect and add an additional Change to the job. 273 # Different: Bisect and add an additional Change to the job.
259 try: 274 try:
260 midpoint = change_module.Change.Midpoint(change_a, change_b) 275 midpoint, distances = change_module.Change.Midpoint(change_a,
276 change_b)
261 except change_module.NonLinearError: 277 except change_module.NonLinearError:
262 continue 278 continue
279 left, right = distances
280 if not (left and right):
281 continue
282
263 logging.info('Adding Change %s.', midpoint) 283 logging.info('Adding Change %s.', midpoint)
264 self.AddChange(midpoint, index) 284 self.AddChange(midpoint, index, distances)
265 elif comparison_result == _SAME: 285 elif comparison_result == _SAME:
266 # The same: Do nothing. 286 # The same: Do nothing.
267 continue 287 continue
268 elif comparison_result == _UNKNOWN: 288 elif comparison_result == _UNKNOWN:
269 # Unknown: Add an Attempt to the Change with the fewest Attempts. 289 # Unknown: Add an Attempt to the Change with the fewest Attempts.
270 change = min(change_a, change_b, key=lambda c: len(self._attempts[c])) 290 change = min(change_a, change_b, key=lambda c: len(self._attempts[c]))
271 self.AddAttempt(change) 291 self.AddAttempt(change)
272 292
273 def ScheduleWork(self): 293 def ScheduleWork(self):
274 work_left = False 294 work_left = False
(...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after
308 328
309 result_values.append(change_result_values) 329 result_values.append(change_result_values)
310 330
311 attempts = [] 331 attempts = []
312 for c in self._changes: 332 for c in self._changes:
313 attempts.append([attempt.AsDict() for attempt in self._attempts[c]]) 333 attempts.append([attempt.AsDict() for attempt in self._attempts[c]])
314 334
315 return { 335 return {
316 'quests': map(str, self._quests), 336 'quests': map(str, self._quests),
317 'changes': [change.AsDict() for change in self._changes], 337 'changes': [change.AsDict() for change in self._changes],
338 'change_distances': self._change_distances,
318 # TODO: Use JobState.Differences(). 339 # TODO: Use JobState.Differences().
319 'comparisons': comparisons, 340 'comparisons': comparisons,
320 'result_values': result_values, 341 'result_values': result_values,
321 'attempts': attempts, 342 'attempts': attempts,
322 } 343 }
323 344
324 def _Compare(self, change_a, change_b): 345 def _Compare(self, change_a, change_b):
325 attempts_a = self._attempts[change_a] 346 attempts_a = self._attempts[change_a]
326 attempts_b = self._attempts[change_b] 347 attempts_b = self._attempts[change_b]
327 348
(...skipping 18 matching lines...) Expand all
346 # Here, "the same" means that we fail to reject the null hypothesis. We can 367 # Here, "the same" means that we fail to reject the null hypothesis. We can
347 # never be completely sure that the two Changes have the same results, but 368 # never be completely sure that the two Changes have the same results, but
348 # we've run everything that we planned to, and didn't detect any difference. 369 # we've run everything that we planned to, and didn't detect any difference.
349 if (len(attempts_a) >= self._repeat_count and 370 if (len(attempts_a) >= self._repeat_count and
350 len(attempts_b) >= self._repeat_count): 371 len(attempts_b) >= self._repeat_count):
351 return _SAME 372 return _SAME
352 373
353 return _UNKNOWN 374 return _UNKNOWN
354 375
355 376
377 def _ChangeDistance(change_a, change_b):
378 try:
379 _, distances = change_module.Change.Midpoint(change_a, change_b)
380 return sum(distances)
381 except change_module.NonLinearError:
382 return 1
383
384
356 def _CombineResultsPerQuest(attempts): 385 def _CombineResultsPerQuest(attempts):
357 aggregate_results = collections.defaultdict(list) 386 aggregate_results = collections.defaultdict(list)
358 for attempt in attempts: 387 for attempt in attempts:
359 if not attempt.completed: 388 if not attempt.completed:
360 continue 389 continue
361 390
362 for quest, results in attempt.result_values.iteritems(): 391 for quest, results in attempt.result_values.iteritems():
363 aggregate_results[quest] += results 392 aggregate_results[quest] += results
364 393
365 return aggregate_results 394 return aggregate_results
366 395
367 396
368 def _CompareValues(values_a, values_b): 397 def _CompareValues(values_a, values_b):
369 if not (values_a and values_b): 398 if not (values_a and values_b):
370 return _UNKNOWN 399 return _UNKNOWN
371 400
372 try: 401 try:
373 p_value = mann_whitney_u.MannWhitneyU(values_a, values_b) 402 p_value = mann_whitney_u.MannWhitneyU(values_a, values_b)
374 except ValueError: 403 except ValueError:
375 return _UNKNOWN 404 return _UNKNOWN
376 405
377 if p_value < _SIGNIFICANCE_LEVEL: 406 if p_value < _SIGNIFICANCE_LEVEL:
378 return _DIFFERENT 407 return _DIFFERENT
379 else: 408 else:
380 return _UNKNOWN 409 return _UNKNOWN
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698