OLD | NEW |
1 # Copyright 2016 The Chromium Authors. All rights reserved. | 1 # Copyright 2016 The Chromium Authors. All rights reserved. |
2 # Use of this source code is governed by a BSD-style license that can be | 2 # Use of this source code is governed by a BSD-style license that can be |
3 # found in the LICENSE file. | 3 # found in the LICENSE file. |
4 | 4 |
5 import collections | 5 import collections |
6 import logging | 6 import logging |
7 import os | 7 import os |
8 import traceback | 8 import traceback |
9 | 9 |
10 from google.appengine.api import taskqueue | 10 from google.appengine.api import taskqueue |
(...skipping 195 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
206 repeat_count: The number of attempts to automatically run per Change. | 206 repeat_count: The number of attempts to automatically run per Change. |
207 """ | 207 """ |
208 # _quests is mutable. Any modification should mutate the existing list | 208 # _quests is mutable. Any modification should mutate the existing list |
209 # in-place rather than assign a new list, because every Attempt references | 209 # in-place rather than assign a new list, because every Attempt references |
210 # this object and will be updated automatically if it's mutated. | 210 # this object and will be updated automatically if it's mutated. |
211 self._quests = list(quests) | 211 self._quests = list(quests) |
212 | 212 |
213 # _changes can be in arbitrary order. Client should not assume that the | 213 # _changes can be in arbitrary order. Client should not assume that the |
214 # list of Changes is sorted in any particular order. | 214 # list of Changes is sorted in any particular order. |
215 self._changes = [] | 215 self._changes = [] |
| 216 # _change_distances[i] contains the distance, in commits, between |
| 217 # _changes[i-1] and _changes[i]. _change_distances[0] is always 0. |
| 218 self._change_distances = [] |
216 | 219 |
217 # A mapping from a Change to a list of Attempts on that Change. | 220 # A mapping from a Change to a list of Attempts on that Change. |
218 self._attempts = {} | 221 self._attempts = {} |
219 | 222 |
220 self._repeat_count = repeat_count | 223 self._repeat_count = repeat_count |
221 | 224 |
222 def AddAttempt(self, change): | 225 def AddAttempt(self, change): |
223 assert change in self._attempts | 226 assert change in self._attempts |
224 self._attempts[change].append(attempt_module.Attempt(self._quests, change)) | 227 self._attempts[change].append(attempt_module.Attempt(self._quests, change)) |
225 | 228 |
226 def AddChange(self, change, index=None): | 229 def AddChange(self, change, index=None, distances=(None, None)): |
227 if index: | 230 if index is None: |
228 self._changes.insert(index, change) | 231 index = len(self._changes) |
229 else: | |
230 self._changes.append(change) | |
231 | 232 |
| 233 # Compute distances, if needed. |
| 234 left, right = distances |
| 235 if left is None: |
| 236 left = _ChangeDistance(self._changes[index - 1], change) if index else 0 |
| 237 if right is None and index < len(self._changes): |
| 238 right = _ChangeDistance(change, self._changes[index]) |
| 239 |
| 240 # Insert into _changes and _change_distances. |
| 241 if right is not None: |
| 242 self._change_distances[index] = right |
| 243 self._change_distances.insert(index, left) |
| 244 self._changes.insert(index, change) |
| 245 |
| 246 # Add Attempts. |
232 self._attempts[change] = [] | 247 self._attempts[change] = [] |
233 for _ in xrange(self._repeat_count): | 248 for _ in xrange(self._repeat_count): |
234 self.AddAttempt(change) | 249 self.AddAttempt(change) |
235 | 250 |
236 def Explore(self): | 251 def Explore(self): |
237 """Compare Changes and bisect by adding additional Changes as needed. | 252 """Compare Changes and bisect by adding additional Changes as needed. |
238 | 253 |
239 For every pair of adjacent Changes, compare their results as probability | 254 For every pair of adjacent Changes, compare their results as probability |
240 distributions. If more information is needed to establish statistical | 255 distributions. If more information is needed to establish statistical |
241 confidence, add an additional Attempt. If the results are different, find | 256 confidence, add an additional Attempt. If the results are different, find |
242 the midpoint of the Changes and add it to the Job. | 257 the midpoint of the Changes and add it to the Job. |
243 | 258 |
244 The midpoint can only be added if the second Change represents a commit that | 259 The midpoint can only be added if the second Change represents a commit that |
245 comes after the first Change. Otherwise, this method won't explore further. | 260 comes after the first Change. Otherwise, this method won't explore further. |
246 For example, if Change A is repo@abc, and Change B is repo@abc + patch, | 261 For example, if Change A is repo@abc, and Change B is repo@abc + patch, |
247 there's no way to pick additional Changes to try. | 262 there's no way to pick additional Changes to try. |
248 """ | 263 """ |
249 # Compare every pair of Changes. | 264 # Compare every pair of Changes. |
250 # TODO: The list may Change while iterating through it. | 265 # TODO: The list may Change while iterating through it. |
251 # TODO: Use JobState.Differences(). | 266 # TODO: Use JobState.Differences(). |
252 for index in xrange(1, len(self._changes)): | 267 for index in xrange(1, len(self._changes)): |
253 change_a = self._changes[index - 1] | 268 change_a = self._changes[index - 1] |
254 change_b = self._changes[index] | 269 change_b = self._changes[index] |
255 | 270 |
256 comparison_result = self._Compare(change_a, change_b) | 271 comparison_result = self._Compare(change_a, change_b) |
257 if comparison_result == _DIFFERENT: | 272 if comparison_result == _DIFFERENT: |
258 # Different: Bisect and add an additional Change to the job. | 273 # Different: Bisect and add an additional Change to the job. |
259 try: | 274 try: |
260 midpoint = change_module.Change.Midpoint(change_a, change_b) | 275 midpoint, distances = change_module.Change.Midpoint(change_a, |
| 276 change_b) |
261 except change_module.NonLinearError: | 277 except change_module.NonLinearError: |
262 continue | 278 continue |
| 279 left, right = distances |
| 280 if not (left and right): |
| 281 continue |
| 282 |
263 logging.info('Adding Change %s.', midpoint) | 283 logging.info('Adding Change %s.', midpoint) |
264 self.AddChange(midpoint, index) | 284 self.AddChange(midpoint, index, distances) |
265 elif comparison_result == _SAME: | 285 elif comparison_result == _SAME: |
266 # The same: Do nothing. | 286 # The same: Do nothing. |
267 continue | 287 continue |
268 elif comparison_result == _UNKNOWN: | 288 elif comparison_result == _UNKNOWN: |
269 # Unknown: Add an Attempt to the Change with the fewest Attempts. | 289 # Unknown: Add an Attempt to the Change with the fewest Attempts. |
270 change = min(change_a, change_b, key=lambda c: len(self._attempts[c])) | 290 change = min(change_a, change_b, key=lambda c: len(self._attempts[c])) |
271 self.AddAttempt(change) | 291 self.AddAttempt(change) |
272 | 292 |
273 def ScheduleWork(self): | 293 def ScheduleWork(self): |
274 work_left = False | 294 work_left = False |
(...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
308 | 328 |
309 result_values.append(change_result_values) | 329 result_values.append(change_result_values) |
310 | 330 |
311 attempts = [] | 331 attempts = [] |
312 for c in self._changes: | 332 for c in self._changes: |
313 attempts.append([attempt.AsDict() for attempt in self._attempts[c]]) | 333 attempts.append([attempt.AsDict() for attempt in self._attempts[c]]) |
314 | 334 |
315 return { | 335 return { |
316 'quests': map(str, self._quests), | 336 'quests': map(str, self._quests), |
317 'changes': [change.AsDict() for change in self._changes], | 337 'changes': [change.AsDict() for change in self._changes], |
| 338 'change_distances': self._change_distances, |
318 # TODO: Use JobState.Differences(). | 339 # TODO: Use JobState.Differences(). |
319 'comparisons': comparisons, | 340 'comparisons': comparisons, |
320 'result_values': result_values, | 341 'result_values': result_values, |
321 'attempts': attempts, | 342 'attempts': attempts, |
322 } | 343 } |
323 | 344 |
324 def _Compare(self, change_a, change_b): | 345 def _Compare(self, change_a, change_b): |
325 attempts_a = self._attempts[change_a] | 346 attempts_a = self._attempts[change_a] |
326 attempts_b = self._attempts[change_b] | 347 attempts_b = self._attempts[change_b] |
327 | 348 |
(...skipping 18 matching lines...) Expand all Loading... |
346 # Here, "the same" means that we fail to reject the null hypothesis. We can | 367 # Here, "the same" means that we fail to reject the null hypothesis. We can |
347 # never be completely sure that the two Changes have the same results, but | 368 # never be completely sure that the two Changes have the same results, but |
348 # we've run everything that we planned to, and didn't detect any difference. | 369 # we've run everything that we planned to, and didn't detect any difference. |
349 if (len(attempts_a) >= self._repeat_count and | 370 if (len(attempts_a) >= self._repeat_count and |
350 len(attempts_b) >= self._repeat_count): | 371 len(attempts_b) >= self._repeat_count): |
351 return _SAME | 372 return _SAME |
352 | 373 |
353 return _UNKNOWN | 374 return _UNKNOWN |
354 | 375 |
355 | 376 |
| 377 def _ChangeDistance(change_a, change_b): |
| 378 try: |
| 379 _, distances = change_module.Change.Midpoint(change_a, change_b) |
| 380 return sum(distances) |
| 381 except change_module.NonLinearError: |
| 382 return 1 |
| 383 |
| 384 |
356 def _CombineResultsPerQuest(attempts): | 385 def _CombineResultsPerQuest(attempts): |
357 aggregate_results = collections.defaultdict(list) | 386 aggregate_results = collections.defaultdict(list) |
358 for attempt in attempts: | 387 for attempt in attempts: |
359 if not attempt.completed: | 388 if not attempt.completed: |
360 continue | 389 continue |
361 | 390 |
362 for quest, results in attempt.result_values.iteritems(): | 391 for quest, results in attempt.result_values.iteritems(): |
363 aggregate_results[quest] += results | 392 aggregate_results[quest] += results |
364 | 393 |
365 return aggregate_results | 394 return aggregate_results |
366 | 395 |
367 | 396 |
368 def _CompareValues(values_a, values_b): | 397 def _CompareValues(values_a, values_b): |
369 if not (values_a and values_b): | 398 if not (values_a and values_b): |
370 return _UNKNOWN | 399 return _UNKNOWN |
371 | 400 |
372 try: | 401 try: |
373 p_value = mann_whitney_u.MannWhitneyU(values_a, values_b) | 402 p_value = mann_whitney_u.MannWhitneyU(values_a, values_b) |
374 except ValueError: | 403 except ValueError: |
375 return _UNKNOWN | 404 return _UNKNOWN |
376 | 405 |
377 if p_value < _SIGNIFICANCE_LEVEL: | 406 if p_value < _SIGNIFICANCE_LEVEL: |
378 return _DIFFERENT | 407 return _DIFFERENT |
379 else: | 408 else: |
380 return _UNKNOWN | 409 return _UNKNOWN |
OLD | NEW |