diff --git a/src/autotests/CMakeLists.txt b/src/autotests/CMakeLists.txt index 0a381f85..f1ac634b 100644 --- a/src/autotests/CMakeLists.txt +++ b/src/autotests/CMakeLists.txt @@ -129,3 +129,8 @@ ecm_mark_nongui_executable(Vt102EmulationTest) add_test(NAME Vt102EmulationTest COMMAND Vt102EmulationTest) target_link_libraries(Vt102EmulationTest ${KONSOLE_TEST_LIBS}) +include(ECMAddTests) +ecm_add_test( + HotSpotFilterTest.cpp + LINK_LIBRARIES ${KONSOLE_TEST_LIBS} +) diff --git a/src/autotests/HotSpotFilterTest.cpp b/src/autotests/HotSpotFilterTest.cpp new file mode 100644 index 00000000..43ae68d3 --- /dev/null +++ b/src/autotests/HotSpotFilterTest.cpp @@ -0,0 +1,46 @@ +/* + SPDX-FileCopyrightText: 2022 Ahmad Samir + + SPDX-License-Identifier: GPL-2.0-or-later +*/ + +#include "HotSpotFilterTest.h" +#include + +QTEST_GUILESS_MAIN(HotSpotFilterTest) + +void HotSpotFilterTest::testUrlFilterRegex_data() +{ + QTest::addColumn("url"); + QTest::addColumn("matchResult"); + + // A space, \n, or \t before the url to match what happens at runtime, + // i.e. to match "http" but not "foohttp" + QTest::newRow("url_simple") << " https://api.kde.org" << true; + QTest::newRow("url_with_port") << "\nhttps://api.kde.org:2098" << true; + QTest::newRow("url_with_path") << "https://api.kde.org/path/to/somewhere" << true; + QTest::newRow("url_with_query") << "https://user:pass@api.kde.org?somequery=foo" << true; + QTest::newRow("url_with_port_path") << " https://api.kde.org:2098/path/to/somewhere" << true; + QTest::newRow("url_with_user_password") << "\thttps://user:blah@api.kde.org" << true; + QTest::newRow("url_with_user_password_port_fragment") << " https://user:blah@api.kde.org:2098#fragment" << true; + QTest::newRow("url_all_bells") << " https://user:pass@api.kde.org:2098/path/to/somewhere?somequery=foo#fragment" << true; + QTest::newRow("uppercase") << " https://invent.kde.org/frameworks/ktexteditor/-/blob/master/README.md" << true; + QTest::newRow("markup") << " [https://foobar](https://foobar)" << true; + + QTest::newRow("bad_url_no_scheme") << QStringLiteral(" www.kde.org") << false; +} + +void HotSpotFilterTest::testUrlFilterRegex() +{ + QFETCH(QString, url); + QFETCH(bool, matchResult); + + const QRegularExpression ®ex = Konsole::UrlFilter::FullUrlRegExp; + const QRegularExpressionMatch match = regex.match(url); + QCOMPARE(match.hasMatch(), matchResult); + if (strcmp(QTest::currentDataTag(), "markup") == 0) { + QCOMPARE(match.capturedView(0), u"https://foobar"); + } else if (matchResult) { + QCOMPARE(match.capturedView(0), url.trimmed()); + } +} diff --git a/src/autotests/HotSpotFilterTest.h b/src/autotests/HotSpotFilterTest.h new file mode 100644 index 00000000..728ebbe4 --- /dev/null +++ b/src/autotests/HotSpotFilterTest.h @@ -0,0 +1,21 @@ +/* + SPDX-FileCopyrightText: 2022 Ahmad Samir + + SPDX-License-Identifier: GPL-2.0-or-later +*/ + +#ifndef HOTSPOTFILTERTEST_H +#define HOTSPOTFILTERTEST_H + +#include "filterHotSpots/UrlFilter.h" + +class HotSpotFilterTest : public QObject +{ + Q_OBJECT + +private Q_SLOTS: + void testUrlFilterRegex_data(); + void testUrlFilterRegex(); +}; + +#endif // HOTSPOTFILTERTEST_H diff --git a/src/filterHotSpots/UrlFilter.cpp b/src/filterHotSpots/UrlFilter.cpp index 47518220..c3a1c039 100644 --- a/src/filterHotSpots/UrlFilter.cpp +++ b/src/filterHotSpots/UrlFilter.cpp @@ -15,9 +15,51 @@ using namespace Konsole; // used for finding URLs in the text, especially if they are very general and could match very long // pieces of text. // Please be careful when altering them. -// protocolname:// or www. followed by anything other than whitespaces, <, >, ', ", ], !, ), : and comma, and ends before whitespaces, <, >, ', ", ], !, ), :, comma and dot -// I. e. it can end with anything as a part of the URL except . -const QRegularExpression UrlFilter::FullUrlRegExp(QStringLiteral("(www\\.(?!\\.)|[a-z][a-z0-9+.-]*://)[^!,\\s<>'\"\\]\\)\\:]+[^!,\\.\\s<>'\"\\]\\)\\:]")); + +// FullUrlRegExp is implemented based on: +// https://datatracker.ietf.org/doc/html/rfc3986 +// See above URL for what "unreserved", "pct-encoded" ...etc mean, also +// for the regex used for each part of the url being matched against + +// unreserved / pct-encoded / sub-delims +// [a-z0-9\\-._~%!$&'()*+,;=] +// The above string is used in various char[] below + +// All () groups are non-capturing (by using "(?:)" notation) +// less bookkeeping on the PCRE engine side + +// scheme:// +// - Must start with an ASCII letter, preceeded by any non-word character, +// so "http" but not "mhttp" +static const char scheme[] = "(?<=^|\\s|\\W)(?:[a-z][a-z0-9+\\-.]*://)"; + +// user:password@ +static const char userInfo[] = + "(?:" + "[a-z0-9\\-._~%!$&'()*+,;=]+?:?" + "[a-z0-9\\-._~%!$&'()*+,;=]+@" + ")?"; +static const char host[] = "(?:[a-z0-9\\-._~%!$&'()*+,;=]+)"; // www.foo.bar +static const char port[] = "(?::[0-9]+)?"; // :1234 +static const char path[] = "(?:[a-zA-Z0-9\\-._~%!$&'()*+,;=:@/]+)?"; // /path/to/some/place +static const char query[] = "(?:\\?[a-z0-9\\-._~%!$&'()*+,;=:@/]+)?"; // "?somequery=bar" +static const char fragment[] = "(?:#[a-z0-9/?]+)?"; + +using LS1 = QLatin1String; + +/* clang-format off */ +const QRegularExpression UrlFilter::FullUrlRegExp( + LS1(scheme) + + LS1(userInfo) + + LS1(host) + + LS1(port) + + LS1(path) + + LS1(query) + + LS1(fragment) + ); +/* clang-format on */ + +///////////////////////////////////////////// // email address: // [word chars, dots or dashes]@[word chars, dots or dashes].[word chars] diff --git a/src/filterHotSpots/UrlFilter.h b/src/filterHotSpots/UrlFilter.h index 3779740c..75a2fe65 100644 --- a/src/filterHotSpots/UrlFilter.h +++ b/src/filterHotSpots/UrlFilter.h @@ -9,11 +9,14 @@ #define URLFILTER_H #include "RegExpFilter.h" +#include "konsoleprivate_export.h" namespace Konsole { /** A filter which matches URLs in blocks of text */ -class UrlFilter : public RegExpFilter + +// Exported for unittests +class KONSOLEPRIVATE_EXPORT UrlFilter : public RegExpFilter { public: UrlFilter();