OLD | NEW |
1 // Copyright 2015 The Chromium Authors. All rights reserved. | 1 // Copyright 2015 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "components/url_formatter/url_formatter.h" | 5 #include "components/url_formatter/url_formatter.h" |
6 | 6 |
7 #include <stddef.h> | 7 #include <stddef.h> |
8 #include <string.h> | 8 #include <string.h> |
9 | 9 |
10 #include <vector> | 10 #include <vector> |
(...skipping 103 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
114 L"\x0e2a\x0e32\x0e22\x0e01\x0e32\x0e23\x0e1a\x0e34\x0e19.th", true}, | 114 L"\x0e2a\x0e32\x0e22\x0e01\x0e32\x0e23\x0e1a\x0e34\x0e19.th", true}, |
115 // Thai + Common | 115 // Thai + Common |
116 {"xn---123-9goxcp8c9db2r.th", | 116 {"xn---123-9goxcp8c9db2r.th", |
117 L"\x0e20\x0e32\x0e29\x0e32\x0e44\x0e17\x0e22-123.th", true}, | 117 L"\x0e20\x0e32\x0e29\x0e32\x0e44\x0e17\x0e22-123.th", true}, |
118 // Devangari (Hindi) | 118 // Devangari (Hindi) |
119 {"www.xn--l1b6a9e1b7c.in", L"www.\x0905\x0915\x094b\x0932\x093e.in", true}, | 119 {"www.xn--l1b6a9e1b7c.in", L"www.\x0905\x0915\x094b\x0932\x093e.in", true}, |
120 // Devanagari + Common | 120 // Devanagari + Common |
121 {"xn---123-kbjl2j0bl2k.in", | 121 {"xn---123-kbjl2j0bl2k.in", |
122 L"\x0939\x093f\x0928\x094d\x0926\x0940-123.in", true}, | 122 L"\x0939\x093f\x0928\x094d\x0926\x0940-123.in", true}, |
123 | 123 |
124 // 5 Aspirational scripts | 124 // 4 Aspirational scripts |
125 // Unifieid Canadian Syllabary | 125 // Unifieid Canadian Syllabary |
126 {"xn--dfe0tte.ca", L"\x1456\x14c2\x14ef.ca", true}, | 126 {"xn--dfe0tte.ca", L"\x1456\x14c2\x14ef.ca", true}, |
127 // Tifinagh | 127 // Tifinagh |
128 {"xn--4ljxa2bb4a6bxb.ma", | 128 {"xn--4ljxa2bb4a6bxb.ma", |
129 L"\x2d5c\x2d49\x2d3c\x2d49\x2d4f\x2d30\x2d56.ma", true}, | 129 L"\x2d5c\x2d49\x2d3c\x2d49\x2d4f\x2d30\x2d56.ma", true}, |
130 // Tifinagh with a disallowed character(U+2D6F) | 130 // Tifinagh with a disallowed character(U+2D6F) |
131 {"xn--hmjzaby5d5f.ma", L"\x2d5c\x2d49\x2d3c\x2d6f\x2d49\x2d4f.ma", false}, | 131 {"xn--hmjzaby5d5f.ma", L"\x2d5c\x2d49\x2d3c\x2d6f\x2d49\x2d4f.ma", false}, |
132 // Yi | 132 // Yi |
133 {"xn--4o7a6e1x64c.cn", L"\xa188\xa320\xa071\xa0b7.cn", true}, | 133 {"xn--4o7a6e1x64c.cn", L"\xa188\xa320\xa071\xa0b7.cn", true}, |
134 // Mongolian - 'ordu' (place, camp) | |
135 {"xn--56ec8bp.cn", L"\x1823\x1837\x1833\x1824.cn", true}, | |
136 // Mongolian with a disallowed character | |
137 {"xn--95e5de3ds.cn", L"\x1823\x1837\x1804\x1833\x1824.cn", false}, | |
138 // Miao/Pollad | 134 // Miao/Pollad |
139 {"xn--2u0fpf0a.cn", L"\U00016f04\U00016f62\U00016f59.cn", true}, | 135 {"xn--2u0fpf0a.cn", L"\U00016f04\U00016f62\U00016f59.cn", true}, |
140 | 136 |
| 137 // Mongolian is disallowed because it's written vertically. |
| 138 // Mongolian - 'ordu' (place, camp) |
| 139 {"xn--56ec8bp.cn", L"\x1823\x1837\x1833\x1824.cn", false}, |
| 140 // Mongolian with a disallowed character |
| 141 {"xn--95e5de3ds.cn", L"\x1823\x1837\x1804\x1833\x1824.cn", false}, |
| 142 |
141 // Script mixing tests | 143 // Script mixing tests |
142 // The following script combinations are allowed. | 144 // The following script combinations are allowed. |
143 // MODERATELY_RESTRICTIVE with Latin limited to ASCII-Latin. | 145 // MODERATELY_RESTRICTIVE with Latin limited to ASCII-Latin. |
144 // ASCII-Latin + Japn (Kana + Han) | 146 // ASCII-Latin + Japn (Kana + Han) |
145 // ASCII-Latin + Kore (Hangul + Han) | 147 // ASCII-Latin + Kore (Hangul + Han) |
146 // ASCII-Latin + Han + Bopomofo | 148 // ASCII-Latin + Han + Bopomofo |
147 // ASCII-Latin + any allowed script other than Cyrillic, Greek, Cherokee | 149 // ASCII-Latin + any allowed script other than Cyrillic, Greek, Cherokee |
148 // and Unified Canadian Syllabary | 150 // and Unified Canadian Syllabary |
149 // "payp<alpha>l.com" | 151 // "payp<alpha>l.com" |
150 {"www.xn--paypl-g9d.com", L"payp\x03b1l.com", false}, | 152 {"www.xn--paypl-g9d.com", L"payp\x03b1l.com", false}, |
(...skipping 51 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
202 // Canadian Syllabary + Latin | 204 // Canadian Syllabary + Latin |
203 {"xn--ab-lym.com", L"ab\x14BF.com", false}, | 205 {"xn--ab-lym.com", L"ab\x14BF.com", false}, |
204 {"xn--ab1-p6q.com", L"ab1\x14BF.com", false}, | 206 {"xn--ab1-p6q.com", L"ab1\x14BF.com", false}, |
205 {"xn--1ab-m6qd.com", L"\x14BF" L"1ab.com", false}, | 207 {"xn--1ab-m6qd.com", L"\x14BF" L"1ab.com", false}, |
206 {"xn--ab-jymc.com", L"\x14BF" L"ab.com", false}, | 208 {"xn--ab-jymc.com", L"\x14BF" L"ab.com", false}, |
207 // Tifinagh + Latin | 209 // Tifinagh + Latin |
208 {"xn--liy-go4a.com", L"li\u24dfy.com", false}, | 210 {"xn--liy-go4a.com", L"li\u24dfy.com", false}, |
209 {"xn--rol-ho4a.com", L"rol\u24df.com", false}, | 211 {"xn--rol-ho4a.com", L"rol\u24df.com", false}, |
210 {"xn--ily-eo4a.com", L"\u24dfily.com", false}, | 212 {"xn--ily-eo4a.com", L"\u24dfily.com", false}, |
211 {"xn--1ly-eo4a.com", L"\u24df1ly.com", false}, | 213 {"xn--1ly-eo4a.com", L"\u24df1ly.com", false}, |
| 214 // Miao + Latin |
| 215 {"xn--liy-rc12a.com", L"li\U00016FD8y.com", false}, |
| 216 {"xn--rol-sc12a.com", L"roll\U00016FD8.com", false}, |
| 217 {"xn--ily-pc12a.com", L"\U00016FD8ily.com", false}, |
| 218 {"xn--1ly-pc12a.com", L"\U00016FD81ly.com", false}, |
212 | 219 |
213 // Invisibility check | 220 // Invisibility check |
214 // Thai tone mark malek(U+0E48) repeated | 221 // Thai tone mark malek(U+0E48) repeated |
215 {"xn--03c0b3ca.th", L"\x0e23\x0e35\x0e48\x0e48.th", false}, | 222 {"xn--03c0b3ca.th", L"\x0e23\x0e35\x0e48\x0e48.th", false}, |
216 // Accute accent repeated | 223 // Accute accent repeated |
217 {"xn--a-xbba.com", L"a\x0301\x0301.com", false}, | 224 {"xn--a-xbba.com", L"a\x0301\x0301.com", false}, |
218 // 'a' with acuted accent + another acute accent | 225 // 'a' with acuted accent + another acute accent |
219 {"xn--1ca20i.com", L"\x00e1\x0301.com", false}, | 226 {"xn--1ca20i.com", L"\x00e1\x0301.com", false}, |
220 // Combining mark at the beginning | 227 // Combining mark at the beginning |
221 {"xn--abc-fdc.jp", L"\x0300" L"abc.jp", false}, | 228 {"xn--abc-fdc.jp", L"\x0300" L"abc.jp", false}, |
222 | 229 |
223 // Mixed script confusable | 230 // Mixed script confusable |
224 // google with Armenian Small Letter Oh(U+0585) | 231 // Armenian օ, ո, հ, and զ mixed with Latin |
225 {"xn--gogle-lkg.com", L"g\x0585ogle.com", false}, | 232 {"xn--gogle-lkg.com", L"g\x0585ogle.com", false}, |
226 {"xn--range-kkg.com", L"\x0585range.com", false}, | 233 {"xn--range-kkg.com", L"\x0585range.com", false}, |
227 {"xn--cucko-pkg.com", L"cucko\x0585.com", false}, | 234 {"xn--cucko-pkg.com", L"cucko\x0585.com", false}, |
228 // Latin 'o' in Armenian. | 235 {"xn--an-bed.com", L"\x0578" L"an.com", false}, |
| 236 {"xn--hig-tee.com", L"hig\x0570.com", false}, |
| 237 {"xn---ray-fef.com", L"\x0566-ray.com", false}, |
| 238 // Latin 'o', 'h' and 'n' in Armenian |
229 {"xn--o-ybcg0cu0cq.com", | 239 {"xn--o-ybcg0cu0cq.com", |
230 L"o\x0585\x0580\x0574\x0578\x0582\x0566\x0568.com", false}, | 240 L"o\x0585\x0580\x0574\x0578\x0582\x0566\x0568.com", false}, |
| 241 {"xn--h-qccm4a.com", L"\x0580\x0574\x0578h.com", false}, |
| 242 {"xn--n-rccm3a.com", L"\x0580n\x0574\x0578.com", false}, |
| 243 {"xn--n1-0ddq0b.com", L"\x0580n1\x0574\x0578.com", false}, |
231 // Hiragana HE(U+3078) mixed with Katakana | 244 // Hiragana HE(U+3078) mixed with Katakana |
232 {"xn--49jxi3as0d0fpc.com", | 245 {"xn--49jxi3as0d0fpc.com", |
233 L"\x30e2\x30d2\x30fc\x30c8\x3078\x30d6\x30f3.com", false}, | 246 L"\x30e2\x30d2\x30fc\x30c8\x3078\x30d6\x30f3.com", false}, |
234 | 247 |
235 // U+30FC should be preceded by a Hiragana/Katakana. | 248 // U+30FC should be preceded by a Hiragana/Katakana. |
236 // Katakana + U+30FC + Han | 249 // Katakana + U+30FC + Han |
237 {"xn--lck0ip02qw5ya.jp", L"\x30ab\x30fc\x91ce\x7403.jp", true}, | 250 {"xn--lck0ip02qw5ya.jp", L"\x30ab\x30fc\x91ce\x7403.jp", true}, |
238 // Hiragana + U+30FC + Han | 251 // Hiragana + U+30FC + Han |
239 {"xn--u8j5tr47nw5ya.jp", L"\x304b\x30fc\x91ce\x7403.jp", true}, | 252 {"xn--u8j5tr47nw5ya.jp", L"\x304b\x30fc\x91ce\x7403.jp", true}, |
240 // U+30FC + Han | 253 // U+30FC + Han |
(...skipping 808 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1049 0, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 0, kNpos, kNpos, kNpos, kNpos, | 1062 0, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 0, kNpos, kNpos, kNpos, kNpos, |
1050 0, 1, 2, 3, 4, 5, 6, 7 | 1063 0, 1, 2, 3, 4, 5, 6, 7 |
1051 }; | 1064 }; |
1052 CheckAdjustedOffsets("http://user@foo.com/", kFormatUrlOmitAll, | 1065 CheckAdjustedOffsets("http://user@foo.com/", kFormatUrlOmitAll, |
1053 net::UnescapeRule::NORMAL, omit_all_offsets); | 1066 net::UnescapeRule::NORMAL, omit_all_offsets); |
1054 } | 1067 } |
1055 | 1068 |
1056 } // namespace | 1069 } // namespace |
1057 | 1070 |
1058 } // namespace url_formatter | 1071 } // namespace url_formatter |
OLD | NEW |