Add option set::spamfilter::utf8-support which defaults to 'no' for now.

When you set this to 'yes' you get more options...
See next (modified) copy-paste from April 2020, which had to be reverted
because PCRE2 was broken. Now it's an opt-in and hopefully matured a bit.

This means:
* Case insensitive matches work better in UTF8 now, such as extended Latin.
  For example, a spamfilter on "ę" now also matches "Ę", while previously
  it did not catch this.
* Other PCRE2 features such as https://www.pcre.org/current/doc/html/pcre2syntax.html#SEC5
  are now available. For example you can now set a spamfilter with the regex
  \p{Arabic} to block all Arabic script, or
  \p{Cyrillic} to block all Cyrillic script (such as Russian)
  Use these new tools with care, of course. Blocking an entire language,
  or script, is quite a drastic measure.

All of this was possible because of the new PCRE2_MATCH_INVALID_UTF
compile time option which was introduced in PCRE2 10.34. Now, that
version turned out to be buggy. As recent as PCRE 10.36 some major bugs
were fixed. This also means we now require at least PCRE2 10.36 version
so everyone can benefit from this new spamfilter UTF8 feature, IF they
enable set::spamfilter::utf8-support, that is.

Many systems come with older PCRE2 versions so this means we will
fall back to the shipped PCRE2 version in UnrealIRCd. This means
./Config will take a little longer to compile things.

For packagers (rpm/deb/ports): if you choose to patch configure to
not require such a recent PCRE2, then please do not allow enabling
of set::spamfilter::utf8-support since it will likely cause crashes
and misbehavior. Check PCRE2 changelog, CTRL+F at PCRE2_MATCH_INVALID_UTF
pull/41/head
Bram Matthys 1 year ago
parent b821aa419f
commit dedff543b5
No known key found for this signature in database
GPG Key ID: BF8116B163EAAE98

16
configure vendored

@ -7392,12 +7392,12 @@ if test -n "$PCRE2_CFLAGS"; then
pkg_cv_PCRE2_CFLAGS="$PCRE2_CFLAGS"
elif test -n "$PKG_CONFIG"; then
if test -n "$PKG_CONFIG" && \
{ { $as_echo "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"libpcre2-8 >= 10.34\""; } >&5
($PKG_CONFIG --exists --print-errors "libpcre2-8 >= 10.34") 2>&5
{ { $as_echo "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"libpcre2-8 >= 10.36\""; } >&5
($PKG_CONFIG --exists --print-errors "libpcre2-8 >= 10.36") 2>&5
ac_status=$?
$as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
test $ac_status = 0; }; then
pkg_cv_PCRE2_CFLAGS=`$PKG_CONFIG --cflags "libpcre2-8 >= 10.34" 2>/dev/null`
pkg_cv_PCRE2_CFLAGS=`$PKG_CONFIG --cflags "libpcre2-8 >= 10.36" 2>/dev/null`
test "x$?" != "x0" && pkg_failed=yes
else
pkg_failed=yes
@ -7409,12 +7409,12 @@ if test -n "$PCRE2_LIBS"; then
pkg_cv_PCRE2_LIBS="$PCRE2_LIBS"
elif test -n "$PKG_CONFIG"; then
if test -n "$PKG_CONFIG" && \
{ { $as_echo "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"libpcre2-8 >= 10.34\""; } >&5
($PKG_CONFIG --exists --print-errors "libpcre2-8 >= 10.34") 2>&5
{ { $as_echo "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"libpcre2-8 >= 10.36\""; } >&5
($PKG_CONFIG --exists --print-errors "libpcre2-8 >= 10.36") 2>&5
ac_status=$?
$as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
test $ac_status = 0; }; then
pkg_cv_PCRE2_LIBS=`$PKG_CONFIG --libs "libpcre2-8 >= 10.34" 2>/dev/null`
pkg_cv_PCRE2_LIBS=`$PKG_CONFIG --libs "libpcre2-8 >= 10.36" 2>/dev/null`
test "x$?" != "x0" && pkg_failed=yes
else
pkg_failed=yes
@ -7435,9 +7435,9 @@ else
_pkg_short_errors_supported=no
fi
if test $_pkg_short_errors_supported = yes; then
PCRE2_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "libpcre2-8 >= 10.34" 2>&1`
PCRE2_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "libpcre2-8 >= 10.36" 2>&1`
else
PCRE2_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "libpcre2-8 >= 10.34" 2>&1`
PCRE2_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "libpcre2-8 >= 10.36" 2>&1`
fi
# Put the nasty error message in config.log where it belongs
echo "$PCRE2_PKG_ERRORS" >&5

@ -578,7 +578,7 @@ export PATH_SEPARATOR
dnl Use system pcre2 when available, unless --without-system-pcre2.
has_system_pcre2="no"
AS_IF([test "x$with_system_pcre2" = "xyes"],[
PKG_CHECK_MODULES([PCRE2], libpcre2-8 >= 10.34,[has_system_pcre2=yes
PKG_CHECK_MODULES([PCRE2], libpcre2-8 >= 10.36,[has_system_pcre2=yes
AS_IF([test "x$PRIVATELIBDIR" != "x"], [rm -f "$PRIVATELIBDIR/"libpcre2*])],[has_system_pcre2=no])])
AS_IF([test "$has_system_pcre2" = "no"], [

@ -128,6 +128,7 @@ struct Configuration {
long spamfilter_detectslow_warn;
long spamfilter_detectslow_fatal;
int spamfilter_stop_on_first_match;
int spamfilter_utf8_support;
int maxbans;
int maxbanlength;
int watch_away_notification;

@ -7666,6 +7666,10 @@ int _conf_set(ConfigFile *conf, ConfigEntry *ce)
{
tempiConf.spamfilter_stop_on_first_match = config_checkval(cepp->value, CFG_YESNO);
}
else if (!strcmp(cepp->name, "utf8-support"))
{
tempiConf.spamfilter_utf8_support = config_checkval(cepp->value, CFG_YESNO);
}
}
}
else if (!strcmp(cep->name, "default-bantime"))
@ -8792,6 +8796,9 @@ int _test_set(ConfigFile *conf, ConfigEntry *ce)
if (!strcmp(cepp->name, "stop-on-first-match"))
{
} else
if (!strcmp(cepp->name, "utf8-support"))
{
} else
{
config_error_unknown(cepp->file->filename,
cepp->line_number, "set::spamfilter",

@ -405,8 +405,13 @@ Match *unreal_create_match(MatchType type, const char *str, char **error)
int options = 0;
char buf2[512];
options = PCRE2_CASELESS|PCRE2_NEVER_UTF|PCRE2_NEVER_UCP;
options = PCRE2_CASELESS;
if (iConf.spamfilter_utf8_support)
options |= PCRE2_MATCH_INVALID_UTF;
else
options |= PCRE2_NEVER_UTF|PCRE2_NEVER_UCP;
m->ext.pcre2_expr = pcre2_compile(str, PCRE2_ZERO_TERMINATED, options, &errorcode, &erroroffset, NULL);
if (m->ext.pcre2_expr == NULL)
{

@ -917,6 +917,7 @@ int stats_set(Client *client, const char *para)
sendtxtnumeric(client, "spamfilter::virus-help-channel: %s", SPAMFILTER_VIRUSCHAN);
if (SPAMFILTER_EXCEPT)
sendtxtnumeric(client, "spamfilter::except: %s", SPAMFILTER_EXCEPT);
sendtxtnumeric(client, "spamfilter::utf8-support: %s", iConf.spamfilter_utf8_support ? "yes" : "no");
sendtxtnumeric(client, "check-target-nick-bans: %s", CHECK_TARGET_NICK_BANS ? "yes" : "no");
sendtxtnumeric(client, "plaintext-policy::user: %s", policy_valtostr(iConf.plaintext_policy_user));
sendtxtnumeric(client, "plaintext-policy::oper: %s", policy_valtostr(iConf.plaintext_policy_oper));

Loading…
Cancel
Save