Index: webrtc/media/base/videocapturer.cc |
diff --git a/webrtc/media/base/videocapturer.cc b/webrtc/media/base/videocapturer.cc |
index 96a605585509220b12875ecf993fc5010fe5e1cc..820d4200edea4dcf118688162f8975e7ef7f32d2 100644 |
--- a/webrtc/media/base/videocapturer.cc |
+++ b/webrtc/media/base/videocapturer.cc |
@@ -59,7 +59,8 @@ bool CapturedFrame::GetDataSize(uint32_t* size) const { |
///////////////////////////////////////////////////////////////////// |
// Implementation of class VideoCapturer |
///////////////////////////////////////////////////////////////////// |
-VideoCapturer::VideoCapturer() : apply_rotation_(false) { |
+VideoCapturer::VideoCapturer() |
+ : apply_rotation_(false), frames_seen_(0), offset_us_(0) { |
thread_checker_.DetachFromThread(); |
Construct(); |
} |
@@ -214,23 +215,113 @@ void VideoCapturer::OnSinkWantsChanged(const rtc::VideoSinkWants& wants) { |
} |
} |
+void VideoCapturer::UpdateOffset(int64_t camera_time_us, |
+ int64_t system_time_us) { |
+ // Estimate the offset between system monotonic time and the capture |
+ // time from the camera. The camera is assumed to provide more |
+ // accurate timestamps than we get from the system time. But the |
+ // camera may use its own free-running clock with a large offset and |
+ // a small drift compared to the system clock. So the model is |
+ // basically |
+ // |
+ // y_k = c_0 + c_1 * x_k + v_k |
+ // |
+ // where x_k is the camera timestamp, believed to be accurate in its |
+ // own scale. y_k is our reading of the system clock. v_k is the |
+ // measurement noise, i.e., the delay from frame capture until the |
+ // system clock was read. |
+ // |
+ // It's possible to do (weighted) least-squares estimation of both |
+ // c_0 and c_1. Then we get the constants as c_1 = Cov(x,y) / |
+ // Var(x), and c_0 = mean(y) - c_1 * mean(x). Substituting this c_0, |
+ // we can rearrange the model as |
+ // |
+ // y_k = mean(y) + (x_k - mean(x)) + (c_1 - 1) * (x_k - mean(x)) + v_k |
+ // |
+ // Now if we use a weighted average which gradually forgets old |
+ // values, x_k - mean(x) is bounded, of the same order as the time |
+ // constant (and close to constant for a steady frame rate). In |
+ // addition, the frequency error |c_1 - 1| should be small. Cameras |
+ // with a frequency error up to 3000 ppm (3 ms drift per second) |
+ // have been observed, but frequency errors below 100 ppm could be |
+ // expected of any cheap crystal. |
+ // |
+ // Bottom line is that we ignore the c_1 term, and use only the estimator |
+ // |
+ // x_k + mean(y-x) |
+ // |
+ // where mean is plain averaging for initial samples, followed by |
+ // exponential averaging. |
+ |
+ // The input for averaging, y_k - x_k in the above notation. |
+ int64_t diff_us = system_time_us - camera_time_us; |
+ // The deviation from the current average. |
+ int64_t error_us = diff_us - offset_us_; |
+ |
+ // If the current difference is far from the currently estimated |
+ // offset, the filter is reset. This could happen, e.g., if the |
+ // camera clock is reset, or cameras are plugged in and out, or if |
+ // the application process is temporarily suspended. The limit of |
+ // 300 ms should make this unlikely in normal operation, and at the |
+ // same time, converging gradually rather than resetting the filter |
+ // should be tolerable for jumps in camera time below this |
+ // threshold. |
+ static const int64_t kResetLimitUs = 300000; |
+ if (std::abs(error_us) > kResetLimitUs) { |
+ LOG(LS_INFO) << "Resetting timestamp translation after averaging " |
+ << frames_seen_ << " frames. Old offset: " |
+ << offset_us_ << ", new offset: " << diff_us; |
+ frames_seen_ = 0; |
+ prev_translated_time_us_ = rtc::Optional<int64_t>(); |
+ } |
+ |
+ static const unsigned kWindowSize = 100; |
+ if (frames_seen_ < kWindowSize) { |
+ ++frames_seen_; |
+ } |
+ offset_us_ += error_us / frames_seen_; |
+} |
sprang_webrtc
2016/06/17 13:10:52
Wow, quite extensive commenting there! Guess it wi
|
+ |
+int64_t VideoCapturer::ClipTimestamp(int64_t time_us, int64_t system_time_us) { |
+ // Make timestamps monotonic. |
+ if (!prev_translated_time_us_) { |
+ // Initialize. |
+ clip_bias_us_ = 0; |
+ } else if (time_us < *prev_translated_time_us_) { |
+ time_us = *prev_translated_time_us_; |
+ } |
+ |
+ // Clip to make sure we don't produce time stamps in the future. |
+ time_us -= clip_bias_us_; |
+ if (time_us > system_time_us) { |
+ clip_bias_us_ += time_us - system_time_us; |
+ time_us = system_time_us; |
+ } |
+ prev_translated_time_us_ = rtc::Optional<int64_t>(time_us); |
+ return time_us; |
+} |
+ |
bool VideoCapturer::AdaptFrame(int width, |
int height, |
- // TODO(nisse): Switch to us unit. |
- int64_t capture_time_ns, |
+ int64_t camera_time_us, |
+ int64_t system_time_us, |
int* out_width, |
int* out_height, |
int* crop_width, |
int* crop_height, |
int* crop_x, |
- int* crop_y) { |
+ int* crop_y, |
+ int64_t* translated_camera_time_us) { |
+ if (translated_camera_time_us) { |
+ UpdateOffset(camera_time_us, system_time_us); |
+ } |
if (!broadcaster_.frame_wanted()) { |
return false; |
} |
if (enable_video_adapter_ && !IsScreencast()) { |
if (!video_adapter_.AdaptFrameResolution( |
- width, height, capture_time_ns, |
+ width, height, camera_time_us * rtc::kNumNanosecsPerMicrosec, |
crop_width, crop_height, out_width, out_height)) { |
// VideoAdapter dropped the frame. |
return false; |
@@ -245,6 +336,11 @@ bool VideoCapturer::AdaptFrame(int width, |
*crop_x = 0; |
*crop_y = 0; |
} |
+ |
+ if (translated_camera_time_us) { |
+ *translated_camera_time_us = |
+ ClipTimestamp(camera_time_us + offset_us_, system_time_us); |
+ } |
return true; |
} |
@@ -257,10 +353,17 @@ void VideoCapturer::OnFrameCaptured(VideoCapturer*, |
int crop_x; |
int crop_y; |
+ // TODO(nisse): We don't do timestamp translation on this input |
+ // path. It seems straight-forward to enable translation, but that |
+ // breaks the WebRtcVideoEngine2Test.PropagatesInputFrameTimestamp |
+ // test. Probably not worth the effort to fix, instead, try to |
+ // delete or refactor all code using VideoFrameFactory and |
+ // SignalCapturedFrame. |
if (!AdaptFrame(captured_frame->width, captured_frame->height, |
- captured_frame->time_stamp, |
+ captured_frame->time_stamp / rtc::kNumNanosecsPerMicrosec, |
+ 0, |
&out_width, &out_height, |
- &crop_width, &crop_height, &crop_x, &crop_y)) { |
+ &crop_width, &crop_height, &crop_x, &crop_y, nullptr)) { |
return; |
} |