From da41c19a86158527305ffd41ae12dfc5117b2661 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luis=20Javier=20Merino=20Mor=C3=A1n?= Date: Fri, 3 Jun 2022 17:35:19 +0200 Subject: [PATCH] URI: be more strict with www. URIs We recognize URIs that start with an scheme and a possibly empty authority, and URI suffixes that start with "www." In the case of URIs starting with an scheme, they are of the form: scheme://[ userinfo "@" ] host ... while "www." URI suffixes are of the form: www. ... where host is actually in reg-name form (not in IPv4address or IP-literal form). This commit allows more strict parsing of e.g. www.example.com:foo@bar.com as : instead of as a long . --- src/autotests/HotSpotFilterTest.cpp | 3 +++ src/filterHotSpots/UrlFilter.cpp | 4 +++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/src/autotests/HotSpotFilterTest.cpp b/src/autotests/HotSpotFilterTest.cpp index d85f1932..28b59512 100644 --- a/src/autotests/HotSpotFilterTest.cpp +++ b/src/autotests/HotSpotFilterTest.cpp @@ -63,6 +63,9 @@ void HotSpotFilterTest::testUrlFilterRegex_data() << "http://example.com" << true; QTest::newRow("empty_fragment") << "http://example.com/#" << "http://example.com" << true; + + QTest::newRow("www_followed_by_colon") << "www.example.com:foo@bar.com" + << "www.example.com" << true; } void HotSpotFilterTest::testUrlFilterRegex() diff --git a/src/filterHotSpots/UrlFilter.cpp b/src/filterHotSpots/UrlFilter.cpp index e93b9b41..86acd8c0 100644 --- a/src/filterHotSpots/UrlFilter.cpp +++ b/src/filterHotSpots/UrlFilter.cpp @@ -37,7 +37,8 @@ using namespace Konsole; // scheme:// // - Must start with an ASCII letter, preceeded by any non-word character, // so "http" but not "mhttp" -static const char scheme_or_www[] = "(?<=^|[\\s\\[\\]()'\"])(?:www\\.|[a-z][a-z0-9+\\-.]*+://)"; +static const char scheme_or_www[] = "(?<=^|[\\s\\[\\]()'\"])(?:www\\.|[a-z][a-z0-9+\\-.]*+://"; +static const char scheme_or_www_end[] = ")"; // unreserved / pct-encoded / sub-delims #define COMMON_1 "a-z0-9\\-._~%!$&'()*+,;=" @@ -62,6 +63,7 @@ using LS1 = QLatin1String; const QRegularExpression UrlFilter::FullUrlRegExp( LS1(scheme_or_www) + LS1(userInfo) + + LS1(scheme_or_www_end) + LS1(host) + LS1(port) + LS1(path)