Index: components/url_formatter/idn_spoof_checker.cc |
diff --git a/components/url_formatter/idn_spoof_checker.cc b/components/url_formatter/idn_spoof_checker.cc |
index c3209cc6d4a699c012225de86f165a7fd3203613..f6a1a51b3d43ef2c8b394e15d7d5da984fb48cfa 100644 |
--- a/components/url_formatter/idn_spoof_checker.cc |
+++ b/components/url_formatter/idn_spoof_checker.cc |
@@ -232,11 +232,10 @@ bool IDNSpoofChecker::SafeToDisplayAsUnicode(base::StringPiece16 label, |
// - Disallow three Hiragana letters (U+307[8-A]) or Katakana letters |
// (U+30D[8-A]) that look exactly like each other when they're used in a |
// label otherwise entirely in Katakna or Hiragana. |
- // - Disallow U+0585 (Armenian Small Letter Oh) and U+0581 (Armenian Small |
- // Letter Co) to be next to Latin. |
- // - Disallow Latin 'o' and 'g' next to Armenian. |
+ // - Disalow mixing of Latin and Armenian |
Peter Kasting
2017/08/29 04:08:30
Nit: Period at end (2 places)
|
// - Disalow mixing of Latin and Canadian Syllabary. |
// - Disalow mixing of Latin and Tifinagh. |
+ // - Disalow mixing of Latin and Miao |
// - Disallow combining diacritical mark (U+0300-U+0339) after a non-LGC |
// character. Other combining diacritical marks are not in the allowed |
// character set. |
@@ -250,14 +249,11 @@ bool IDNSpoofChecker::SafeToDisplayAsUnicode(base::StringPiece16 label, |
R"(^[\p{scx=kana}]+[\u3078-\u307a][\p{scx=kana}]+$|)" |
R"(^[\p{scx=hira}]+[\u30d8-\u30da][\p{scx=hira}]+$|)" |
R"([a-z]\u30fb|\u30fb[a-z]|)" |
- R"(^[\u0585\u0581]+[a-z]|[a-z][\u0585\u0581]+$|)" |
- R"([a-z][\u0585\u0581]+[a-z]|)" |
- R"(^[og]+[\p{scx=armn}]|[\p{scx=armn}][og]+$|)" |
- R"([\p{scx=armn}][og]+[\p{scx=armn}]|)" |
+ R"([\p{sc=armn}].*[a-z]|[a-z].*[\p{sc=armn}]|)" |
R"([\p{sc=cans}].*[a-z]|[a-z].*[\p{sc=cans}]|)" |
R"([\p{sc=tfng}].*[a-z]|[a-z].*[\p{sc=tfng}]|)" |
- R"([^\p{scx=latn}\p{scx=grek}\p{scx=cyrl}][\u0300-\u0339])", |
- -1, US_INV), |
Peter Kasting
2017/08/29 04:08:29
Were these just default values?
|
+ R"([\p{sc=miao}].*[a-z]|[a-z].*[\p{sc=miao}]|)" |
+ R"([^\p{scx=latn}\p{scx=grek}\p{scx=cyrl}][\u0300-\u0339])"), |
0, status); |
tls_index.Set(dangerous_pattern); |
} |
@@ -330,17 +326,16 @@ void IDNSpoofChecker::SetAllowedUnicodeSet(UErrorCode* status) { |
// identifiers. Therefore, only characters belonging to |
// [:Identifier_Type=Aspirational:] (listed in 'Status/Type=Aspirational' |
// section at |
-// http://www.unicode.org/Public/security/latest/xidmodifications.txt) are |
-// are added to the allowed set. The list has to be updated when a new |
+// http://www.unicode.org/Public/security/9.0.0/IdentifierType.txt) are |
+// added to the allowed set. The list has to be updated when a new |
// version of Unicode is released. The current version is 9.0.0 and ICU 60 |
// will have Unicode 10.0 data. |
+// Note that Mongolian is dropped because it's written vertically. |
#if U_ICU_VERSION_MAJOR_NUM < 60 |
const icu::UnicodeSet aspirational_scripts( |
icu::UnicodeString( |
// Unified Canadian Syllabics |
"[\\u1401-\\u166C\\u166F-\\u167F" |
- // Mongolian |
- "\\u1810-\\u1819\\u1820-\\u1877\\u1880-\\u18AA" |
// Unified Canadian Syllabics |
"\\u18B0-\\u18F5" |
// Tifinagh |