OLD | NEW |
1 // Copyright 2017 The Chromium Authors. All rights reserved. | 1 // Copyright 2017 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "components/subresource_filter/core/common/url_pattern_index.h" | 5 #include "components/subresource_filter/core/common/url_pattern_index.h" |
6 | 6 |
7 #include <algorithm> | 7 #include <algorithm> |
8 #include <limits> | 8 #include <limits> |
9 #include <string> | 9 #include <string> |
10 | 10 |
11 #include "base/logging.h" | 11 #include "base/logging.h" |
12 #include "base/numerics/safe_conversions.h" | 12 #include "base/numerics/safe_conversions.h" |
13 #include "base/strings/string_piece.h" | 13 #include "base/strings/string_piece.h" |
14 #include "base/strings/string_util.h" | 14 #include "base/strings/string_util.h" |
15 #include "components/subresource_filter/core/common/ngram_extractor.h" | 15 #include "components/subresource_filter/core/common/ngram_extractor.h" |
16 #include "components/subresource_filter/core/common/url_pattern.h" | 16 #include "components/subresource_filter/core/common/url_pattern.h" |
17 #include "url/gurl.h" | 17 #include "url/gurl.h" |
18 #include "url/origin.h" | 18 #include "url/origin.h" |
19 | 19 |
20 namespace subresource_filter { | 20 namespace subresource_filter { |
21 | 21 |
22 namespace { | 22 namespace { |
23 | 23 |
24 using FlatStringOffset = flatbuffers::Offset<flatbuffers::String>; | 24 using FlatStringOffset = flatbuffers::Offset<flatbuffers::String>; |
25 using FlatDomains = flatbuffers::Vector<FlatStringOffset>; | 25 using FlatDomains = flatbuffers::Vector<FlatStringOffset>; |
26 using FlatDomainsOffset = flatbuffers::Offset<FlatDomains>; | 26 using FlatDomainsOffset = flatbuffers::Offset<FlatDomains>; |
| 27 using ActivationMaskPair = |
| 28 std::pair<proto::ActivationType, flat::ActivationType>; |
| 29 using ElementTypeMaskPair = std::pair<proto::ElementType, flat::ElementType>; |
| 30 |
| 31 const ActivationMaskPair kActivationMaskPairs[] = { |
| 32 {proto::ACTIVATION_TYPE_DOCUMENT, flat::ActivationType_DOCUMENT}, |
| 33 {proto::ACTIVATION_TYPE_ELEMHIDE, |
| 34 flat::ActivationType_NONE}, // ELEMHIDE is not supported. |
| 35 {proto::ACTIVATION_TYPE_GENERICHIDE, |
| 36 flat::ActivationType_NONE}, // Generic block is not supported. |
| 37 {proto::ACTIVATION_TYPE_GENERICBLOCK, flat::ActivationType_GENERIC_BLOCK}, |
| 38 }; |
| 39 |
| 40 const ElementTypeMaskPair kElementTypeMaskPairs[] = { |
| 41 {proto::ELEMENT_TYPE_OTHER, flat::ElementType_OTHER}, |
| 42 {proto::ELEMENT_TYPE_SCRIPT, flat::ElementType_SCRIPT}, |
| 43 {proto::ELEMENT_TYPE_IMAGE, flat::ElementType_IMAGE}, |
| 44 {proto::ELEMENT_TYPE_STYLESHEET, flat::ElementType_STYLESHEET}, |
| 45 {proto::ELEMENT_TYPE_OBJECT, flat::ElementType_OBJECT}, |
| 46 {proto::ELEMENT_TYPE_XMLHTTPREQUEST, flat::ElementType_XMLHTTPREQUEST}, |
| 47 {proto::ELEMENT_TYPE_OBJECT_SUBREQUEST, |
| 48 flat::ElementType_OBJECT}, // Normally we can not distinguish between the |
| 49 // main plugin resource and any other loads it |
| 50 // makes. We treat them both as OBJECT |
| 51 // requests. |
| 52 {proto::ELEMENT_TYPE_SUBDOCUMENT, flat::ElementType_SUBDOCUMENT}, |
| 53 {proto::ELEMENT_TYPE_PING, flat::ElementType_PING}, |
| 54 {proto::ELEMENT_TYPE_MEDIA, flat::ElementType_MEDIA}, |
| 55 {proto::ELEMENT_TYPE_FONT, flat::ElementType_FONT}, |
| 56 {proto::ELEMENT_TYPE_POPUP, |
| 57 flat::ElementType_NONE}, // Filterning popups is not supported. |
| 58 {proto::ELEMENT_TYPE_WEBSOCKET, flat::ElementType_WEBSOCKET}, |
| 59 }; |
27 | 60 |
28 base::StringPiece ToStringPiece(const flatbuffers::String* string) { | 61 base::StringPiece ToStringPiece(const flatbuffers::String* string) { |
29 DCHECK(string); | 62 DCHECK(string); |
30 return base::StringPiece(string->c_str(), string->size()); | 63 return base::StringPiece(string->c_str(), string->size()); |
31 } | 64 } |
32 | 65 |
33 // Performs three-way comparison between two domains. In the total order defined | 66 // Performs three-way comparison between two domains. In the total order defined |
34 // by this predicate, the lengths of domains will be monotonically decreasing. | 67 // by this predicate, the lengths of domains will be monotonically decreasing. |
35 int CompareDomains(base::StringPiece lhs_domain, base::StringPiece rhs_domain) { | 68 int CompareDomains(base::StringPiece lhs_domain, base::StringPiece rhs_domain) { |
36 if (lhs_domain.size() != rhs_domain.size()) | 69 if (lhs_domain.size() != rhs_domain.size()) |
(...skipping 98 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
135 case proto::ANCHOR_TYPE_SUBDOMAIN: | 168 case proto::ANCHOR_TYPE_SUBDOMAIN: |
136 *result = flat::AnchorType_SUBDOMAIN; | 169 *result = flat::AnchorType_SUBDOMAIN; |
137 break; | 170 break; |
138 default: | 171 default: |
139 return false; // Unsupported anchor type. | 172 return false; // Unsupported anchor type. |
140 } | 173 } |
141 return true; | 174 return true; |
142 } | 175 } |
143 | 176 |
144 bool InitializeOptions() { | 177 bool InitializeOptions() { |
| 178 static_assert(flat::OptionFlag_ANY <= std::numeric_limits<uint8_t>::max(), |
| 179 "Option flags can not be stored in uint8_t."); |
| 180 |
145 if (rule_.semantics() == proto::RULE_SEMANTICS_WHITELIST) { | 181 if (rule_.semantics() == proto::RULE_SEMANTICS_WHITELIST) { |
146 options_ |= flat::OptionFlag_IS_WHITELIST; | 182 options_ |= flat::OptionFlag_IS_WHITELIST; |
147 } else if (rule_.semantics() != proto::RULE_SEMANTICS_BLACKLIST) { | 183 } else if (rule_.semantics() != proto::RULE_SEMANTICS_BLACKLIST) { |
148 return false; // Unsupported semantics. | 184 return false; // Unsupported semantics. |
149 } | 185 } |
150 | 186 |
151 switch (rule_.source_type()) { | 187 switch (rule_.source_type()) { |
152 case proto::SOURCE_TYPE_ANY: | 188 case proto::SOURCE_TYPE_ANY: |
153 options_ |= flat::OptionFlag_APPLIES_TO_THIRD_PARTY; | 189 options_ |= flat::OptionFlag_APPLIES_TO_THIRD_PARTY; |
154 // Note: fall through here intentionally. | 190 // Note: fall through here intentionally. |
155 case proto::SOURCE_TYPE_FIRST_PARTY: | 191 case proto::SOURCE_TYPE_FIRST_PARTY: |
156 options_ |= flat::OptionFlag_APPLIES_TO_FIRST_PARTY; | 192 options_ |= flat::OptionFlag_APPLIES_TO_FIRST_PARTY; |
157 break; | 193 break; |
158 case proto::SOURCE_TYPE_THIRD_PARTY: | 194 case proto::SOURCE_TYPE_THIRD_PARTY: |
159 options_ |= flat::OptionFlag_APPLIES_TO_THIRD_PARTY; | 195 options_ |= flat::OptionFlag_APPLIES_TO_THIRD_PARTY; |
160 break; | 196 break; |
161 | 197 |
162 default: | 198 default: |
163 return false; // Unsupported source type. | 199 return false; // Unsupported source type. |
164 } | 200 } |
165 | 201 |
166 if (rule_.match_case()) | 202 if (rule_.match_case()) |
167 options_ |= flat::OptionFlag_IS_MATCH_CASE; | 203 options_ |= flat::OptionFlag_IS_MATCH_CASE; |
168 | 204 |
169 return true; | 205 return true; |
170 } | 206 } |
171 | 207 |
172 bool InitializeElementTypes() { | 208 bool InitializeElementTypes() { |
173 static_assert( | 209 static_assert(flat::ElementType_ANY <= std::numeric_limits<uint16_t>::max(), |
174 proto::ELEMENT_TYPE_ALL <= std::numeric_limits<uint16_t>::max(), | 210 "Element types can not be stored in uint16_t."); |
175 "Element types can not be stored in uint16_t."); | |
176 element_types_ = static_cast<uint16_t>(rule_.element_types()); | |
177 | 211 |
178 // Note: Normally we can not distinguish between the main plugin resource | 212 element_types_ = flat::ElementType_NONE; |
179 // and any other loads it makes. We treat them both as OBJECT requests. | |
180 if (element_types_ & proto::ELEMENT_TYPE_OBJECT_SUBREQUEST) | |
181 element_types_ |= proto::ELEMENT_TYPE_OBJECT; | |
182 | 213 |
183 // Ignore unknown element types. | 214 for (const auto& pair : kElementTypeMaskPairs) |
184 element_types_ &= proto::ELEMENT_TYPE_ALL; | 215 if (rule_.element_types() & pair.first) |
185 // Filtering popups is not supported. | 216 element_types_ |= pair.second; |
186 element_types_ &= ~proto::ELEMENT_TYPE_POPUP; | |
187 | 217 |
188 return true; | 218 return true; |
189 } | 219 } |
190 | 220 |
191 bool InitializeActivationTypes() { | 221 bool InitializeActivationTypes() { |
192 static_assert( | 222 static_assert( |
193 proto::ACTIVATION_TYPE_ALL <= std::numeric_limits<uint8_t>::max(), | 223 flat::ActivationType_ANY <= std::numeric_limits<uint8_t>::max(), |
194 "Activation types can not be stored in uint8_t."); | 224 "Activation types can not be stored in uint8_t."); |
195 activation_types_ = static_cast<uint8_t>(rule_.activation_types()); | 225 activation_types_ = flat::ActivationType_NONE; |
196 | 226 |
197 // Only the following activation types are supported, ignore the others. | 227 for (const auto& pair : kActivationMaskPairs) |
198 activation_types_ &= | 228 if (rule_.activation_types() & pair.first) |
199 proto::ACTIVATION_TYPE_DOCUMENT | proto::ACTIVATION_TYPE_GENERICBLOCK; | 229 activation_types_ |= pair.second; |
200 | 230 |
201 return true; | 231 return true; |
202 } | 232 } |
203 | 233 |
204 bool InitializeUrlPattern() { | 234 bool InitializeUrlPattern() { |
205 switch (rule_.url_pattern_type()) { | 235 switch (rule_.url_pattern_type()) { |
206 case proto::URL_PATTERN_TYPE_SUBSTRING: | 236 case proto::URL_PATTERN_TYPE_SUBSTRING: |
207 url_pattern_type_ = flat::UrlPatternType_SUBSTRING; | 237 url_pattern_type_ = flat::UrlPatternType_SUBSTRING; |
208 break; | 238 break; |
209 case proto::URL_PATTERN_TYPE_WILDCARDED: | 239 case proto::URL_PATTERN_TYPE_WILDCARDED: |
(...skipping 190 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
400 // the |origin|. Thus, domain filters with more domain components trump filters | 430 // the |origin|. Thus, domain filters with more domain components trump filters |
401 // with fewer domain components, i.e. the more specific a filter is, the higher | 431 // with fewer domain components, i.e. the more specific a filter is, the higher |
402 // the priority. | 432 // the priority. |
403 // | 433 // |
404 // A rule whose domain list is empty or contains only negative domains is still | 434 // A rule whose domain list is empty or contains only negative domains is still |
405 // considered a "generic" rule. Therefore, if |disable_generic_rules| is set, | 435 // considered a "generic" rule. Therefore, if |disable_generic_rules| is set, |
406 // this function will always return false for such rules. | 436 // this function will always return false for such rules. |
407 bool DoesOriginMatchDomainList(const url::Origin& origin, | 437 bool DoesOriginMatchDomainList(const url::Origin& origin, |
408 const flat::UrlRule& rule, | 438 const flat::UrlRule& rule, |
409 bool disable_generic_rules) { | 439 bool disable_generic_rules) { |
410 const bool is_generic = !rule.domains_included(); | 440 const bool is_generic = |
411 DCHECK(is_generic || rule.domains_included()->size()); | 441 !rule.domains_included() || !rule.domains_included()->size(); |
| 442 // DCHECK(is_generic || rule.domains_included()->size()); |
412 if (disable_generic_rules && is_generic) | 443 if (disable_generic_rules && is_generic) |
413 return false; | 444 return false; |
414 | 445 |
415 // Unique |origin| matches lists of exception domains only. | 446 // Unique |origin| matches lists of exception domains only. |
416 if (origin.unique()) | 447 if (origin.unique()) |
417 return is_generic; | 448 return is_generic; |
418 | 449 |
419 size_t longest_matching_included_domain_length = 1; | 450 size_t longest_matching_included_domain_length = 1; |
420 if (!is_generic) { | 451 if (!is_generic) { |
421 longest_matching_included_domain_length = | 452 longest_matching_included_domain_length = |
(...skipping 136 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
558 (activation_type == proto::ACTIVATION_TYPE_UNSPECIFIED)) { | 589 (activation_type == proto::ACTIVATION_TYPE_UNSPECIFIED)) { |
559 return nullptr; | 590 return nullptr; |
560 } | 591 } |
561 | 592 |
562 return FindMatchInFlatUrlPatternIndex(*flat_index_, url, first_party_origin, | 593 return FindMatchInFlatUrlPatternIndex(*flat_index_, url, first_party_origin, |
563 element_type, activation_type, | 594 element_type, activation_type, |
564 is_third_party, disable_generic_rules); | 595 is_third_party, disable_generic_rules); |
565 } | 596 } |
566 | 597 |
567 } // namespace subresource_filter | 598 } // namespace subresource_filter |
OLD | NEW |