dashboard/dashboard/pinpoint/models/job.py - Issue 3013713002: [pinpoint] Calculate distances between Changes.

Side by Side Diff: dashboard/dashboard/pinpoint/models/job.py

Issue 3013713002: [pinpoint] Calculate distances between Changes.

Patch Set: Created 3 years, 3 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
1 # Copyright 2016 The Chromium Authors. All rights reserved.	1 # Copyright 2016 The Chromium Authors. All rights reserved.

2 # Use of this source code is governed by a BSD-style license that can be	2 # Use of this source code is governed by a BSD-style license that can be

3 # found in the LICENSE file.	3 # found in the LICENSE file.

4	4

5 import collections	5 import collections

6 import logging	6 import logging

7 import os	7 import os

8 import traceback	8 import traceback

9	9

10 from google.appengine.api import taskqueue	10 from google.appengine.api import taskqueue

(...skipping 195 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
206 repeat_count: The number of attempts to automatically run per Change.	206 repeat_count: The number of attempts to automatically run per Change.

207 """	207 """

208 # _quests is mutable. Any modification should mutate the existing list	208 # _quests is mutable. Any modification should mutate the existing list

209 # in-place rather than assign a new list, because every Attempt references	209 # in-place rather than assign a new list, because every Attempt references

210 # this object and will be updated automatically if it's mutated.	210 # this object and will be updated automatically if it's mutated.

211 self._quests = list(quests)	211 self._quests = list(quests)

212	212

213 # _changes can be in arbitrary order. Client should not assume that the	213 # _changes can be in arbitrary order. Client should not assume that the

214 # list of Changes is sorted in any particular order.	214 # list of Changes is sorted in any particular order.

215 self._changes = []	215 self._changes = []

	216 # _change_distances[i] contains the distance, in commits, between

	217 # _changes[i-1] and _changes[i]. _change_distances[0] is always 0.

	218 self._change_distances = []

216	219

217 # A mapping from a Change to a list of Attempts on that Change.	220 # A mapping from a Change to a list of Attempts on that Change.

218 self._attempts = {}	221 self._attempts = {}

219	222

220 self._repeat_count = repeat_count	223 self._repeat_count = repeat_count

221	224

222 def AddAttempt(self, change):	225 def AddAttempt(self, change):

223 assert change in self._attempts	226 assert change in self._attempts

224 self._attempts[change].append(attempt_module.Attempt(self._quests, change))	227 self._attempts[change].append(attempt_module.Attempt(self._quests, change))

225	228

226 def AddChange(self, change, index=None):	229 def AddChange(self, change, index=None, distances=(None, None)):

227 if index:	230 if index is None:

228 self._changes.insert(index, change)	231 index = len(self._changes)

229 else:

230 self._changes.append(change)

231	232

	233 # Compute distances, if needed.

	234 left, right = distances

	235 if left is None:

	236 left = _ChangeDistance(self._changes[index - 1], change) if index else 0

	237 if right is None and index < len(self._changes):

	238 right = _ChangeDistance(change, self._changes[index])

	239

	240 # Insert into _changes and _change_distances.

	241 if right is not None:

	242 self._change_distances[index] = right

	243 self._change_distances.insert(index, left)

	244 self._changes.insert(index, change)

	245

	246 # Add Attempts.

232 self._attempts[change] = []	247 self._attempts[change] = []

233 for _ in xrange(self._repeat_count):	248 for _ in xrange(self._repeat_count):

234 self.AddAttempt(change)	249 self.AddAttempt(change)

235	250

236 def Explore(self):	251 def Explore(self):

237 """Compare Changes and bisect by adding additional Changes as needed.	252 """Compare Changes and bisect by adding additional Changes as needed.

238	253

239 For every pair of adjacent Changes, compare their results as probability	254 For every pair of adjacent Changes, compare their results as probability

240 distributions. If more information is needed to establish statistical	255 distributions. If more information is needed to establish statistical

241 confidence, add an additional Attempt. If the results are different, find	256 confidence, add an additional Attempt. If the results are different, find

242 the midpoint of the Changes and add it to the Job.	257 the midpoint of the Changes and add it to the Job.

243	258

244 The midpoint can only be added if the second Change represents a commit that	259 The midpoint can only be added if the second Change represents a commit that

245 comes after the first Change. Otherwise, this method won't explore further.	260 comes after the first Change. Otherwise, this method won't explore further.

246 For example, if Change A is repo@abc, and Change B is repo@abc + patch,	261 For example, if Change A is repo@abc, and Change B is repo@abc + patch,

247 there's no way to pick additional Changes to try.	262 there's no way to pick additional Changes to try.

248 """	263 """

249 # Compare every pair of Changes.	264 # Compare every pair of Changes.

250 # TODO: The list may Change while iterating through it.	265 # TODO: The list may Change while iterating through it.

251 # TODO: Use JobState.Differences().	266 # TODO: Use JobState.Differences().

252 for index in xrange(1, len(self._changes)):	267 for index in xrange(1, len(self._changes)):

253 change_a = self._changes[index - 1]	268 change_a = self._changes[index - 1]

254 change_b = self._changes[index]	269 change_b = self._changes[index]

255	270

256 comparison_result = self._Compare(change_a, change_b)	271 comparison_result = self._Compare(change_a, change_b)

257 if comparison_result == _DIFFERENT:	272 if comparison_result == _DIFFERENT:

258 # Different: Bisect and add an additional Change to the job.	273 # Different: Bisect and add an additional Change to the job.

259 try:	274 try:

260 midpoint = change_module.Change.Midpoint(change_a, change_b)	275 midpoint, distances = change_module.Change.Midpoint(change_a,

	276 change_b)

261 except change_module.NonLinearError:	277 except change_module.NonLinearError:

262 continue	278 continue

	279 left, right = distances

	280 if not (left and right):

	281 continue

	282

263 logging.info('Adding Change %s.', midpoint)	283 logging.info('Adding Change %s.', midpoint)

264 self.AddChange(midpoint, index)	284 self.AddChange(midpoint, index, distances)

265 elif comparison_result == _SAME:	285 elif comparison_result == _SAME:

266 # The same: Do nothing.	286 # The same: Do nothing.

267 continue	287 continue

268 elif comparison_result == _UNKNOWN:	288 elif comparison_result == _UNKNOWN:

269 # Unknown: Add an Attempt to the Change with the fewest Attempts.	289 # Unknown: Add an Attempt to the Change with the fewest Attempts.

270 change = min(change_a, change_b, key=lambda c: len(self._attempts[c]))	290 change = min(change_a, change_b, key=lambda c: len(self._attempts[c]))

271 self.AddAttempt(change)	291 self.AddAttempt(change)

272	292

273 def ScheduleWork(self):	293 def ScheduleWork(self):

274 work_left = False	294 work_left = False

(...skipping 33 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
308	328

309 result_values.append(change_result_values)	329 result_values.append(change_result_values)

310	330

311 attempts = []	331 attempts = []

312 for c in self._changes:	332 for c in self._changes:

313 attempts.append([attempt.AsDict() for attempt in self._attempts[c]])	333 attempts.append([attempt.AsDict() for attempt in self._attempts[c]])

314	334

315 return {	335 return {

316 'quests': map(str, self._quests),	336 'quests': map(str, self._quests),

317 'changes': [change.AsDict() for change in self._changes],	337 'changes': [change.AsDict() for change in self._changes],

	338 'change_distances': self._change_distances,

318 # TODO: Use JobState.Differences().	339 # TODO: Use JobState.Differences().

319 'comparisons': comparisons,	340 'comparisons': comparisons,

320 'result_values': result_values,	341 'result_values': result_values,

321 'attempts': attempts,	342 'attempts': attempts,

322 }	343 }

323	344

324 def _Compare(self, change_a, change_b):	345 def _Compare(self, change_a, change_b):

325 attempts_a = self._attempts[change_a]	346 attempts_a = self._attempts[change_a]

326 attempts_b = self._attempts[change_b]	347 attempts_b = self._attempts[change_b]

327	348

(...skipping 18 matching lines...) Expand all Loading...
346 # Here, "the same" means that we fail to reject the null hypothesis. We can	367 # Here, "the same" means that we fail to reject the null hypothesis. We can

347 # never be completely sure that the two Changes have the same results, but	368 # never be completely sure that the two Changes have the same results, but

348 # we've run everything that we planned to, and didn't detect any difference.	369 # we've run everything that we planned to, and didn't detect any difference.

349 if (len(attempts_a) >= self._repeat_count and	370 if (len(attempts_a) >= self._repeat_count and

350 len(attempts_b) >= self._repeat_count):	371 len(attempts_b) >= self._repeat_count):

351 return _SAME	372 return _SAME

352	373

353 return _UNKNOWN	374 return _UNKNOWN

354	375

355	376

	377 def _ChangeDistance(change_a, change_b):

	378 try:

	379 _, distances = change_module.Change.Midpoint(change_a, change_b)

	380 return sum(distances)

	381 except change_module.NonLinearError:

	382 return 1

	383

	384

356 def _CombineResultsPerQuest(attempts):	385 def _CombineResultsPerQuest(attempts):

357 aggregate_results = collections.defaultdict(list)	386 aggregate_results = collections.defaultdict(list)

358 for attempt in attempts:	387 for attempt in attempts:

359 if not attempt.completed:	388 if not attempt.completed:

360 continue	389 continue

361	390

362 for quest, results in attempt.result_values.iteritems():	391 for quest, results in attempt.result_values.iteritems():

363 aggregate_results[quest] += results	392 aggregate_results[quest] += results

364	393

365 return aggregate_results	394 return aggregate_results

366	395

367	396

368 def _CompareValues(values_a, values_b):	397 def _CompareValues(values_a, values_b):

369 if not (values_a and values_b):	398 if not (values_a and values_b):

370 return _UNKNOWN	399 return _UNKNOWN

371	400

372 try:	401 try:

373 p_value = mann_whitney_u.MannWhitneyU(values_a, values_b)	402 p_value = mann_whitney_u.MannWhitneyU(values_a, values_b)

374 except ValueError:	403 except ValueError:

375 return _UNKNOWN	404 return _UNKNOWN

376	405

377 if p_value < _SIGNIFICANCE_LEVEL:	406 if p_value < _SIGNIFICANCE_LEVEL:

378 return _DIFFERENT	407 return _DIFFERENT

379 else:	408 else:

380 return _UNKNOWN	409 return _UNKNOWN

OLD	NEW

« dashboard/dashboard/pinpoint/models/change/commit_test.py ('K') | « dashboard/dashboard/pinpoint/models/change/commit_test.py ('k') | no next file » | no next file with comments »