From 3dadaa4432b442d75b0ac0425aa05527d52f0e7a Mon Sep 17 00:00:00 2001 From: William Pearson Date: Sun, 20 Jan 2019 01:44:00 +0000 Subject: [PATCH] robots.txt Add default robots.txt that allows bots access to all paths. Add mix task to generate robots.txt taht allows bots access to no paths. Document custom emojis, MRF and static_dir static_dir documentation includes docs for the robots.txt Mix task. --- docs/Custom-Emoji.md | 16 +++ .../Message-Rewrite-Facility-configuration.md | 118 ++++++++++++++++++ docs/static_dir.md | 20 +++ lib/mix/tasks/pleroma/robotstxt.ex | 32 +++++ lib/pleroma/plugs/instance_static.ex | 3 +- lib/pleroma/web/endpoint.ex | 2 +- priv/static/robots.txt | 2 + 7 files changed, 191 insertions(+), 2 deletions(-) create mode 100644 docs/Custom-Emoji.md create mode 100644 docs/Message-Rewrite-Facility-configuration.md create mode 100644 docs/static_dir.md create mode 100644 lib/mix/tasks/pleroma/robotstxt.ex create mode 100644 priv/static/robots.txt diff --git a/docs/Custom-Emoji.md b/docs/Custom-Emoji.md new file mode 100644 index 000000000..d4af5c97c --- /dev/null +++ b/docs/Custom-Emoji.md @@ -0,0 +1,16 @@ +To add custom emoji: +* Add the image file(s) to `priv/static/emoji/custom` +* In case of conflicts: add the desired shortcode with the path to `config/custom_emoji.txt`, comma-separated and one per line +* Force recompilation (``mix clean && mix compile``) + +Example: + +image files (in `/priv/static/emoji/custom`): `happy.png` and `sad.png` + +content of `config/custom_emoji.txt`: +``` +happy, /emoji/custom/happy.png +sad, /emoji/custom/sad.png +``` + +The files should be PNG (APNG is okay with `.png` for `image/png` Content-type) and under 50kb for compatibility with mastodon. diff --git a/docs/Message-Rewrite-Facility-configuration.md b/docs/Message-Rewrite-Facility-configuration.md new file mode 100644 index 000000000..708098b41 --- /dev/null +++ b/docs/Message-Rewrite-Facility-configuration.md @@ -0,0 +1,118 @@ +The Message Rewrite Facility (MRF) is a subsystem that is implemented as a series of hooks that allows the administrator to rewrite or discard messages. + +Possible uses include: + +* marking incoming messages with media from a given account or instance as sensitive +* rejecting messages from a specific instance +* removing/unlisting messages from the public timelines +* removing media from messages +* sending only public messages to a specific instance + +The MRF provides user-configurable policies. The default policy is `NoOpPolicy`, which disables the MRF functionality. Pleroma also includes an easy to use policy called `SimplePolicy` which maps messages matching certain pre-defined criterion to actions built into the policy module. +It is possible to use multiple, active MRF policies at the same time. + +## Quarantine Instances + +You have the ability to prevent from private / followers-only messages from federating with specific instances. Which means they will only get the public or unlisted messages from your instance. + +If, for example, you're using `MIX_ENV=prod` aka using production mode, you would open your configuration file located in `config/prod.secret.exs` and edit or add the option under your `:instance` config object. Then you would specify the instance within quotes. +``` +config :pleroma, :instance, + [...] + quarantined_instances: ["instance.example", "other.example"] +``` + +## Using `SimplePolicy` + +`SimplePolicy` is capable of handling most common admin tasks. + +To use `SimplePolicy`, you must enable it. Do so by adding the following to your `:instance` config object, so that it looks like this: + +``` +config :pleroma, :instance, + [...] + rewrite_policy: Pleroma.Web.ActivityPub.MRF.SimplePolicy +``` + +Once `SimplePolicy` is enabled, you can configure various groups in the `:mrf_simple` config object. These groups are: + +* `media_removal`: Servers in this group will have media stripped from incoming messages. +* `media_nsfw`: Servers in this group will have the #nsfw tag and sensitive setting injected into incoming messages which contain media. +* `reject`: Servers in this group will have their messages rejected. +* `federated_timeline_removal`: Servers in this group will have their messages unlisted from the public timelines by flipping the `to` and `cc` fields. + +Servers should be configured as lists. + +### Example + +This example will enable `SimplePolicy`, block media from `illegalporn.biz`, mark media as NSFW from `porn.biz` and `porn.business`, reject messages from `spam.com` and remove messages from `spam.university` from the federated timeline: + +``` +config :pleroma, :instance, + rewrite_policy: [Pleroma.Web.ActivityPub.MRF.SimplePolicy] + +config :pleroma, :mrf_simple, + media_removal: ["illegalporn.biz"], + media_nsfw: ["porn.biz", "porn.business"], + reject: ["spam.com"], + federated_timeline_removal: ["spam.university"] + +``` + +### Use with Care + +The effects of MRF policies can be very drastic. It is important to use this functionality carefully. Always try to talk to an admin before writing an MRF policy concerning their instance. + +## Writing your own MRF Policy + +As discussed above, the MRF system is a modular system that supports pluggable policies. This means that an admin may write a custom MRF policy in Elixir or any other language that runs on the Erlang VM, by specifying the module name in the `rewrite_policy` config setting. + +For example, here is a sample policy module which rewrites all messages to "new message content": + +```!elixir +# This is a sample MRF policy which rewrites all Notes to have "new message +# content." +defmodule Site.RewritePolicy do + @behavior Pleroma.Web.ActivityPub.MRF + + # Catch messages which contain Note objects with actual data to filter. + # Capture the object as `object`, the message content as `content` and the + # message itself as `message`. + @impl true + def filter(%{"type" => Create", "object" => {"type" => "Note", "content" => content} = object} = message) + when is_binary(content) do + # Subject / CW is stored as summary instead of `name` like other AS2 objects + # because of Mastodon doing it that way. + summary = object["summary"] + + # Message edits go here. + content = "new message content" + + # Assemble the mutated object. + object = + object + |> Map.put("content", content) + |> Map.put("summary", summary) + + # Assemble the mutated message. + message = Map.put(message, "object", object) + {:ok, message} + end + + # Let all other messages through without modifying them. + @impl true + def filter(message), do: {:ok, message} +end +``` + +If you save this file as `lib/site/mrf/rewrite_policy.ex`, it will be included when you next rebuild Pleroma. You can enable it in the configuration like so: + +``` +config :pleroma, :instance, + rewrite_policy: [ + Pleroma.Web.ActivityPub.MRF.SimplePolicy, + Site.RewritePolicy + ] +``` + +Please note that the Pleroma developers consider custom MRF policy modules to fall under the purview of the AGPL. As such, you are obligated to release the sources to your custom MRF policy modules upon request. \ No newline at end of file diff --git a/docs/static_dir.md b/docs/static_dir.md new file mode 100644 index 000000000..0cc52b99a --- /dev/null +++ b/docs/static_dir.md @@ -0,0 +1,20 @@ +# Static Directory + +Static frontend files are shipped in `priv/static/` and tracked by version control in this repository. If you want to overwrite or update these without the possibility of merge conflicts, you can write your custom versions to `instance/static/`. + +``` +config :pleroma, :instance, + static_dir: "instance/static/", +``` + +You can overwrite this value in your configuration to use a different static instance directory. + +## robots.txt + +By default, the `robots.txt` that ships in `priv/static/` is permissive. It allows well-behaved search engines to index all of your instance's URIs. + +If you want to generate a restrictive `robots.txt`, you can run the following mix task. The generated `robots.txt` will be written in your instance static directory. + +``` +mix pleroma.robots_txt disallow_all +``` diff --git a/lib/mix/tasks/pleroma/robotstxt.ex b/lib/mix/tasks/pleroma/robotstxt.ex new file mode 100644 index 000000000..2128e1cd6 --- /dev/null +++ b/lib/mix/tasks/pleroma/robotstxt.ex @@ -0,0 +1,32 @@ +# Pleroma: A lightweight social networking server +# Copyright © 2019 Pleroma Authors +# SPDX-License-Identifier: AGPL-3.0-only + +defmodule Mix.Tasks.Pleroma.RobotsTxt do + use Mix.Task + + @shortdoc "Generate robots.txt" + @moduledoc """ + Generates robots.txt + + ## Overwrite robots.txt to disallow all + + mix pleroma.robots_txt disallow_all + + This will write a robots.txt that will hide all paths on your instance + from search engines and other robots that obey robots.txt + + """ + def run(["disallow_all"]) do + static_dir = Pleroma.Config.get([:instance, :static_dir], "instance/static/") + + if !File.exists?(static_dir) do + File.mkdir_p!(static_dir) + end + + robots_txt_path = Path.join(static_dir, "robots.txt") + robots_txt_content = "User-Agent: *\nDisallow: /\n" + + File.write!(robots_txt_path, robots_txt_content, [:write]) + end +end diff --git a/lib/pleroma/plugs/instance_static.ex b/lib/pleroma/plugs/instance_static.ex index 41125921a..a64f1ea80 100644 --- a/lib/pleroma/plugs/instance_static.ex +++ b/lib/pleroma/plugs/instance_static.ex @@ -21,7 +21,8 @@ def file_path(path) do end end - @only ~w(index.html static emoji packs sounds images instance favicon.png sw.js sw-pleroma.js) + @only ~w(index.html robots.txt static emoji packs sounds images instance favicon.png sw.js + sw-pleroma.js) def init(opts) do opts diff --git a/lib/pleroma/web/endpoint.ex b/lib/pleroma/web/endpoint.ex index 697b1bc3a..fa2d1cbe7 100644 --- a/lib/pleroma/web/endpoint.ex +++ b/lib/pleroma/web/endpoint.ex @@ -25,7 +25,7 @@ defmodule Pleroma.Web.Endpoint do at: "/", from: :pleroma, only: - ~w(index.html static finmoji emoji packs sounds images instance sw.js sw-pleroma.js favicon.png schemas doc) + ~w(index.html robots.txt static finmoji emoji packs sounds images instance sw.js sw-pleroma.js favicon.png schemas doc) # credo:disable-for-previous-line Credo.Check.Readability.MaxLineLength ) diff --git a/priv/static/robots.txt b/priv/static/robots.txt new file mode 100644 index 000000000..25781b7d7 --- /dev/null +++ b/priv/static/robots.txt @@ -0,0 +1,2 @@ +User-Agent: * +Disallow: