From 035368d363e31bd99efb21e1c121574718c81b5e Mon Sep 17 00:00:00 2001 From: rinpatch Date: Wed, 19 Jun 2019 00:31:30 +0300 Subject: [PATCH] Rich Media: Skip Microformats hashtags When fixing this problem I incorrectly assumed a.hashtag is the proper way for detecting hashtags, but it is just something Pleroma and Mastodon add. Per microformats it should be detected by the presense of rel=tag. This MR adds a check for rel=tag, but I still left a.hashtag just in case --- lib/pleroma/html.ex | 2 +- test/html_test.exs | 16 ++++++++++++++++ 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/lib/pleroma/html.ex b/lib/pleroma/html.ex index 8c226c944..2fae7281c 100644 --- a/lib/pleroma/html.ex +++ b/lib/pleroma/html.ex @@ -89,7 +89,7 @@ def extract_first_external_url(object, content) do Cachex.fetch!(:scrubber_cache, key, fn _key -> result = content - |> Floki.filter_out("a.mention,a.hashtag") + |> Floki.filter_out("a.mention,a.hashtag,a[rel~=\"tag\"]") |> Floki.attribute("a", "href") |> Enum.at(0) diff --git a/test/html_test.exs b/test/html_test.exs index 64513980b..b8906c46a 100644 --- a/test/html_test.exs +++ b/test/html_test.exs @@ -212,5 +212,21 @@ test "skips hashtags" do assert url == "https://www.pixiv.net/member_illust.php?mode=medium&illust_id=72255140" end + + test "skips microformats hashtags" do + user = insert(:user) + + {:ok, activity} = + CommonAPI.post(user, %{ + "status" => + "#cofe https://www.pixiv.net/member_illust.php?mode=medium&illust_id=72255140", + "content_type" => "text/html" + }) + + object = Object.normalize(activity) + {:ok, url} = HTML.extract_first_external_url(object, object.data["content"]) + + assert url == "https://www.pixiv.net/member_illust.php?mode=medium&illust_id=72255140" + end end end