Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(655)

Unified Diff: telemetry/telemetry/internal/snap_page_util.py

Issue 3017573002: Make sure snap_page combined iframe serialized dom
Patch Set: . Created 3 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « no previous file | telemetry/telemetry/internal/snap_page_util_unittest.py » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: telemetry/telemetry/internal/snap_page_util.py
diff --git a/telemetry/telemetry/internal/snap_page_util.py b/telemetry/telemetry/internal/snap_page_util.py
index 57210030776187a0ebe0b2ee0350d5fc817607c3..721e35b431d93f37f3192747633fc5a62c5f6e4a 100644
--- a/telemetry/telemetry/internal/snap_page_util.py
+++ b/telemetry/telemetry/internal/snap_page_util.py
@@ -3,11 +3,35 @@
# found in the LICENSE file.
import os
+import json
+import sys
from telemetry.core import util
from telemetry.internal.browser import browser_finder
+def _TransmitLargeJSONToTab(tab, json_obj, js_holder_name):
+ tab.ExecuteJavaScript(
+ 'var {{ @js_holder_name }} = "";', js_holder_name=js_holder_name)
+
+ # To avoid crashing devtool connection (details in crbug.com/763119#c16),
+ # we break down the json string to chunks which each chunk has a maximum
+ # size of 100000 characters (100000 seems to not break the connection and
+ # makes sending data reasonably fast).
+ k = 0
+ step_size = 100000
+ json_obj_string = json.dumps(json_obj)
+ while k < len(json_obj_string):
+ sub_string_chunk = json_obj_string[k: k + step_size]
+ k += step_size
+ tab.ExecuteJavaScript(
+ '{{ @js_holder_name }} += {{ sub_string_chunk }};',
+ js_holder_name=js_holder_name, sub_string_chunk=sub_string_chunk)
+
+ tab.ExecuteJavaScript(
+ '{{ @js_holder_name }} = JSON.parse({{ @js_holder_name }});',
+ js_holder_name=js_holder_name)
+
def SnapPage(finder_options, url, interactive, snapshot_file):
""" Save the HTML snapshot of the page whose address is |url| to
|snapshot_file|.
@@ -17,28 +41,61 @@ def SnapPage(finder_options, url, interactive, snapshot_file):
try:
tab = browser.tabs[0]
tab.Navigate(url)
- tab.WaitForDocumentReadyStateToBeComplete()
if interactive:
raw_input(
'Activating interactive mode. Press enter after you finish '
"interacting with the page to snapshot the page's DOM content.")
- with open(
- os.path.join(util.GetTelemetryThirdPartyDir(), 'snap-it',
- 'HTMLSerializer.js')) as f:
+
+ sys.stdout.write(
+ 'Snapshotting content of %s. This could take a while...\n' % url)
+ tab.WaitForDocumentReadyStateToBeComplete()
+ tab.action_runner.WaitForNetworkQuiescence()
+
+ with open(os.path.join(util.GetTelemetryThirdPartyDir(), 'snap-it',
+ 'HTMLSerializer.js')) as f:
snapit_script = f.read()
- tab.ExecuteJavaScript(snapit_script)
- tab.ExecuteJavaScript(
- '''
- var serializedDomArray;
- var htmlSerializer = new HTMLSerializer();
- htmlSerializer.processDocument(document);
- htmlSerializer.fillHolesAsync(document, function(s) {
- serializedDomArray = s.html;
- });
- ''')
- print 'Snapshotting content of %s. This could take a while...' % url
- tab.WaitForJavaScriptCondition('serializedDomArray !== undefined')
- serialized_dom = ''.join(tab.EvaluateJavaScript('serializedDomArray'))
- snapshot_file.write(serialized_dom)
+
+ with open(os.path.join(util.GetTelemetryThirdPartyDir(), 'snap-it',
+ 'popup.js')) as f:
+ dom_combining_script = f.read()
+
+ serialized_doms = []
+
+ # Serialize the dom in each frame.
+ for context_id in tab.EnableAllContexts():
+ tab.ExecuteJavaScript(snapit_script, context_id=context_id)
+ tab.ExecuteJavaScript(
+ '''
+ var serializedDom;
+ var htmlSerializer = new HTMLSerializer();
+ htmlSerializer.processDocument(document);
+ htmlSerializer.fillHolesAsync(document, function(s) {
+ serializedDom = s.asDict();
+ });
+ ''', context_id=context_id)
+ tab.WaitForJavaScriptCondition(
+ 'serializedDom !== undefined', context_id=context_id)
+ serialized_doms.append(tab.EvaluateJavaScript(
+ 'serializedDom', context_id=context_id))
+
+ # Execute doms combining code in blank page to minimize the chance of V8
+ # OOM.
+ tab.Navigate('about:blank')
+ tab.WaitForDocumentReadyStateToBeComplete()
+
+ # Sending all the serialized doms back to tab execution context.
+ tab.ExecuteJavaScript('var serializedDoms = [];')
+ for i in xrange(len(serialized_doms)):
+ sys.stdout.write('Processing dom of frame #%i / %i\r' %
+ (i, len(serialized_doms)))
+ sys.stdout.flush()
+ _TransmitLargeJSONToTab(tab, serialized_doms[i], 'sub_dom')
+ tab.ExecuteJavaScript('serializedDoms.push(sub_dom);')
+
+ # Combine all the doms to one HTML string.
+ tab.EvaluateJavaScript(dom_combining_script)
+ page_snapshot = tab.EvaluateJavaScript('outputHTMLString(serializedDoms);')
+
+ snapshot_file.write(page_snapshot)
finally:
browser.Close()
« no previous file with comments | « no previous file | telemetry/telemetry/internal/snap_page_util_unittest.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698