Index: tools/py_event_log_analyzer/rtp_analyzer.py |
diff --git a/tools/py_event_log_analyzer/rtp_analyzer.py b/tools/py_event_log_analyzer/rtp_analyzer.py |
new file mode 100644 |
index 0000000000000000000000000000000000000000..efa86ffd935e8031e4d3d7d405012aa6f7da0565 |
--- /dev/null |
+++ b/tools/py_event_log_analyzer/rtp_analyzer.py |
@@ -0,0 +1,266 @@ |
+# Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. |
+# |
+# Use of this source code is governed by a BSD-style license |
+# that can be found in the LICENSE file in the root of the source |
+# tree. An additional intellectual property rights grant can be found |
+# in the file PATENTS. All contributing project authors may |
+# be found in the AUTHORS file in the root of the source tree. |
+ |
+"""Displays statistics and plots graphs from RTC protobuf dump.""" |
+ |
+from __future__ import division |
+from __future__ import print_function |
+ |
+import collections |
+import sys |
+import builtins |
+import matplotlib.pyplot as plt |
+import misc |
+import numpy |
+import pb_parse |
+ |
+ |
+ |
+class RTPStatistics(object): |
+ """Has methods for calculating and plotting statistics for RPT |
hlundin-webrtc
2016/05/25 12:54:23
RPT -> RTP
|
+ packets. |
+ """ |
+ |
+ BANDWIDTH_SMOOTHING_WINDOW_SIZE = 10 |
+ |
+ def __init__(self, data_points): |
+ """Initializes object with data_points computes simple statistics: |
kwiberg-webrtc
2016/05/25 12:42:42
and
aleloi2
2016/05/30 14:57:56
Done.
|
+ percentages of packets and packet sizes by SSRC. |
+ |
+ Args: |
+ data_points: list of pb_parse.DataPoints on which statistics are |
+ calculated. |
+ |
+ """ |
+ |
+ self.data_points = data_points |
+ self.ssrc_frequencies = misc.percent_table([x.ssrc for x in |
+ self.data_points]) |
+ self.ssrc_size_table = misc.ssrc_size_table(self.data_points) |
+ self.bandwidth_kbps = None |
+ self.smooth_bw_kbps = None |
+ |
+ def print_ssrc_info(self, ssrc_id, ssrc): |
+ """Prints packet and size statistics for given SSRC. |
peah-webrtc
2016/05/26 06:44:18
Maybe "the" or "a" should be added between "for" a
aleloi2
2016/05/30 14:57:56
Done.
|
+ |
+ Args: |
+ ssrc_id: textual identifier of SSRC printed beside statistics for it. |
+ ssrc: SSRC by which to filter data and display statistics |
+ |
+ Raises: |
+ Exception: when different payload types are present in data |
hlundin-webrtc
2016/05/25 12:54:23
This will happen, and can be legit. When comfort n
aleloi2
2016/05/30 14:57:55
Now it prints the different payload types instead
|
+ for same SSRC |
+ """ |
+ filtered_ssrc = [x for x in self.data_points if x.ssrc == ssrc] |
+ payloads = misc.percent_table([x.payload_type for x in filtered_ssrc]) |
+ |
+ if len(payloads) == 1: |
+ payload_info = "payload type {}".format(*list(payloads)) |
+ else: |
+ raise Exception( |
+ "This tool cannot yet handle changes in codec sample rate") |
+ print("{} 0X{:X} {}, {:.2f}% packets, {:.2f}% data".format( |
kwiberg-webrtc
2016/05/25 12:42:42
Consider using lowercase hexadecimal. It causes le
aleloi2
2016/05/30 14:57:56
Ok.
|
+ ssrc_id, ssrc, payload_info, self.ssrc_frequencies[ssrc]*100, |
+ self.ssrc_size_table[ssrc]*100)) |
kwiberg-webrtc
2016/05/25 12:42:41
You still have some binary operators not surrounde
aleloi2
2016/05/30 14:57:56
Done.
|
+ print(" packet sizes:") |
+ bin_counts, bin_bounds = numpy.histogram([x.size for x in |
+ filtered_ssrc], bins=5, |
+ density=False) |
+ bin_proportions = bin_counts / sum(bin_counts) |
+ print("\n".join([ |
+ " {:.1f} - {:.1f}: {:.2f}%".format(bin_bounds[i], bin_bounds[i+1], |
+ bin_proportions[i]*100) |
+ for i in builtins.range(len(bin_proportions)) |
+ ])) |
+ |
+ def choose_ssrc(self): |
+ """Queries user for SSRC.""" |
+ ssrc_frequencies_lst = list(enumerate(self.ssrc_frequencies)) |
kwiberg-webrtc
2016/05/25 12:42:42
Eliminate this variable. You don't use it for anyt
aleloi2
2016/05/30 14:57:56
It was used to pair ssrc with an integer counter,
|
+ |
+ if len(self.ssrc_frequencies) == 1: |
+ chosen_ssrc = self.ssrc_frequencies[0][-1] |
+ self.print_ssrc_info("", chosen_ssrc) |
+ return chosen_ssrc |
+ |
+ for i, ssrc in enumerate(self.ssrc_frequencies): |
+ self.print_ssrc_info(i, ssrc) |
+ |
+ while True: |
+ chosen_index = int(builtins.input("choose one> ")) |
+ if 0 <= chosen_index < len(ssrc_frequencies_lst): |
+ return ssrc_frequencies_lst[chosen_index][-1] |
+ else: |
+ print("Invalid index!") |
+ |
+ def filter_ssrc(self, chosen_ssrc): |
+ """Filters and wraps data points. |
+ |
+ Removes data points with `ssrc != chosen_ssrc`. Unwraps sequence |
+ numbers and time stamps for the chosen selection. |
hlundin-webrtc
2016/05/25 12:54:23
Nit: 'timestamps' is typically written as one word
aleloi2
2016/05/30 14:57:56
Done.
|
+ """ |
+ self.data_points = [x for x in self.data_points if x.ssrc == |
+ chosen_ssrc] |
+ unwrapped_sequence_numbers = misc.unwrap([x.sequence_number for x in |
+ self.data_points], |
+ 2**16-1) |
+ for (data_point, sequence_number) in zip(self.data_points, |
+ unwrapped_sequence_numbers): |
+ data_point.sequence_number = sequence_number |
+ |
+ unwrapped_timestamps = misc.unwrap([x.timestamp for x in self.data_points], |
+ 2**32-1) |
+ |
+ for (data_point, timestamp) in zip(self.data_points, |
+ unwrapped_timestamps): |
+ data_point.timestamp = timestamp |
+ |
+ def print_sequence_number_statistics(self): |
+ seq_no_set = set(x.sequence_number for x in self.data_points) |
+ print("Missing sequence numbers: {} out of {}".format( |
+ max(seq_no_set) - min(seq_no_set) + 1 - len(seq_no_set), |
+ len(seq_no_set) |
+ )) |
+ packet_counter = collections.Counter(x.sequence_number for x in |
+ self.data_points) |
+ print("Duplicated packets: {}".format(sum(packet_counter.values()) |
+ - len(packet_counter) |
+ )) |
kwiberg-webrtc
2016/05/25 12:42:41
Unless I'm mistaken, it's simpler and more efficie
aleloi2
2016/05/30 14:57:56
I agree. Changed.
|
+ print("Reordered packets: {}".format( |
+ misc.count_reordered([x.sequence_number for x in self.data_points]))) |
+ |
+ def print_frequency_duration_statistics(self): |
peah-webrtc
2016/05/26 06:44:19
Suggestion:
This is a fairly long method which doe
aleloi2
2016/05/30 14:57:56
I agree. I divided it into a frequency related and
|
+ """Estimates frequency and prints related statistics. |
+ |
+ Guesses the most probable frequency by looking at changes in |
+ timestamps (RFC 3550 section 5.1), calculates clock drifts and |
+ sending time of packets. Updates `self.data_points` with changes |
+ in delay and send time. |
+ |
+ """ |
+ delta_timestamp = (self.data_points[-1].timestamp - |
+ self.data_points[0].timestamp) |
+ delta_arr_timestamp = float((self.data_points[-1].arrival_timestamp_ms - |
+ self.data_points[0].arrival_timestamp_ms)) |
+ fs_est = delta_timestamp / delta_arr_timestamp |
+ |
+ fs_vec = [8, 16, 32, 48, 90] # TODO(aleloi) 90 is a hack for video |
hlundin-webrtc
2016/05/25 12:54:23
Not really a "hack"; https://tools.ietf.org/html/r
peah-webrtc
2016/05/26 06:44:18
The python style guide actually has an example whe
|
+ fs = None |
+ for f in fs_vec: |
+ if abs((fs_est - f)/float(f)) < 0.05: |
kwiberg-webrtc
2016/05/25 12:42:42
No need for float, since you import division from
aleloi2
2016/05/30 14:57:56
OK.
|
+ fs = f |
+ |
+ print("Estimated frequency: {}".format(fs_est)) |
+ if fs is None: |
+ fs = int(builtins.input( |
+ "Frequency could not be guessed. Input frequency> ")) |
+ |
+ print("Guessed frequency: {}".format(fs)) |
kwiberg-webrtc
2016/05/25 12:42:41
Don't print this if the user entered a frequency.
aleloi2
2016/05/30 14:57:56
Done.
|
+ |
+ for f in self.data_points: |
+ f.real_send_time_ms = (f.timestamp - |
+ self.data_points[0].timestamp) / fs |
+ f.delay = f.arrival_timestamp_ms - f.real_send_time_ms |
+ |
+ min_delay = min(f.delay for f in self.data_points) |
+ |
+ for f in self.data_points: |
hlundin-webrtc
2016/05/25 12:54:23
Oh. Python cannot do "vector +/- scalar" without e
kwiberg-webrtc
2016/05/25 13:00:31
No. list + list returns a new list that's a concat
ivoc
2016/05/25 13:17:37
It could do it if it was a numpy array instead of
kwiberg-webrtc
2016/05/25 13:29:04
Right, I'd forgotten about numpy, because I never
|
+ f.absdelay = f.delay - min_delay |
+ |
+ stream_duration_sender = self.data_points[-1].real_send_time_ms / 1000 |
+ print("Stream duration at sender: {:.1f} seconds".format( |
+ stream_duration_sender |
+ )) |
+ |
+ arrival_timestamps_ms = [pt.arrival_timestamp_ms for pt in |
+ self.data_points] |
+ stream_duration_receiver = (max(arrival_timestamps_ms) - |
+ min(arrival_timestamps_ms)) / 1000 |
+ print("Stream duration at receiver: {:.1f} seconds".format( |
+ stream_duration_receiver |
+ )) |
+ |
+ print("Clock drift: {:.2f}%".format( |
+ 100* (stream_duration_receiver / stream_duration_sender - 1) |
+ )) |
+ |
+ print("Send average bitrate: {:.2f} kbps".format( |
+ sum(x.size for x |
+ in self.data_points) * 8 / stream_duration_sender / 1000)) |
+ |
+ print("Receive average bitrate: {:.2f} kbps".format( |
+ sum(x.size |
+ for x in self.data_points) * 8 / stream_duration_receiver / |
+ 1000)) |
+ |
+ def remove_reordered(self): |
+ last = self.data_points[0] |
+ data_points_ordered = [last] |
+ for x in self.data_points[1:]: |
+ if x.sequence_number > last.sequence_number and (x.real_send_time_ms > |
+ last.real_send_time_ms): |
+ data_points_ordered.append(x) |
+ last = x |
+ self.data_points = data_points_ordered |
+ |
+ def compute_bandwidth(self): |
+ """Computes bandwidth averaged over several consecutive packets. |
+ |
+ The number of consecutive packets used in the average is |
+ BANDWIDTH_SMOOTHING_WINDOW_SIZE. Averaging is done with |
+ numpy.correlate. |
+ """ |
+ self.bandwidth_kbps = [] |
+ for i in range(len(self.data_points)-1): |
+ self.bandwidth_kbps.append( |
+ self.data_points[i].size*8 / (self.data_points[i+1].real_send_time_ms |
+ - self.data_points[i].real_send_time_ms) |
+ ) |
+ correlate_filter = (numpy.ones( |
+ RTPStatistics.BANDWIDTH_SMOOTHING_WINDOW_SIZE) / |
+ RTPStatistics.BANDWIDTH_SMOOTHING_WINDOW_SIZE) |
+ self.smooth_bw_kbps = numpy.correlate(self.bandwidth_kbps, correlate_filter) |
+ |
+ def plot_statistics(self): |
+ """Plots changes in delay and average bandwidth.""" |
+ plt.figure(1) |
+ plt.plot([f.real_send_time_ms/1000 for f in self.data_points], |
+ [f.absdelay for f in self.data_points]) |
+ plt.xlabel("Send time [s]") |
+ plt.ylabel("Relative transport delay [ms]") |
+ |
+ plt.figure(2) |
+ plt.plot([f.real_send_time_ms / 1000 for f in |
+ self.data_points][:len(self.smooth_bw_kbps)], |
+ self.smooth_bw_kbps[:len(self.data_points)]) |
+ plt.xlabel("Send time [s]") |
+ plt.ylabel("Bandwidth [kbps]") |
+ |
+ plt.show() |
+ |
+ |
+def main(): |
+ |
+ if len(sys.argv) < 2: |
+ print("Usage: python rtp_analyzer.py <filename of rtc event log>") |
+ sys.exit(0) |
+ |
+ data_points = pb_parse.parse_protobuf(sys.argv[1]) |
+ rtp_stats = RTPStatistics(data_points) |
+ chosen_ssrc = rtp_stats.choose_ssrc() |
+ print("Chosen SSRC: 0X{:X}".format(chosen_ssrc)) |
+ |
+ rtp_stats.filter_ssrc(chosen_ssrc) |
+ print("Statistics:") |
+ rtp_stats.print_sequence_number_statistics() |
+ rtp_stats.print_frequency_duration_statistics() |
+ rtp_stats.remove_reordered() |
+ rtp_stats.compute_bandwidth() |
+ rtp_stats.plot_statistics() |
+ |
+if __name__ == "__main__": |
+ main() |