Merge branch 'rich_media_parsers_configurable' into 'develop'

parsers configurable

See merge request 
This commit is contained in:
kaniini 2019-07-14 09:53:42 +00:00
commit 9f211838ec
4 changed files with 13 additions and 8 deletions
CHANGELOG.md
config
docs
lib/pleroma/web/rich_media

View File

@ -39,6 +39,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
### Changed ### Changed
- Configuration: Filter.AnonymizeFilename added ability to retain file extension with custom text - Configuration: Filter.AnonymizeFilename added ability to retain file extension with custom text
- Admin API: changed json structure for saving config settings. - Admin API: changed json structure for saving config settings.
- RichMedia: parsers and their order are configured in `rich_media` config.
## [1.0.0] - 2019-06-29 ## [1.0.0] - 2019-06-29
### Security ### Security

View File

@ -339,7 +339,12 @@
config :pleroma, :rich_media, config :pleroma, :rich_media,
enabled: true, enabled: true,
ignore_hosts: [], ignore_hosts: [],
ignore_tld: ["local", "localdomain", "lan"] ignore_tld: ["local", "localdomain", "lan"],
parsers: [
Pleroma.Web.RichMedia.Parsers.TwitterCard,
Pleroma.Web.RichMedia.Parsers.OGP,
Pleroma.Web.RichMedia.Parsers.OEmbed
]
config :pleroma, :media_proxy, config :pleroma, :media_proxy,
enabled: false, enabled: false,

View File

@ -425,6 +425,7 @@ This config contains two queues: `federator_incoming` and `federator_outgoing`.
* `enabled`: if enabled the instance will parse metadata from attached links to generate link previews * `enabled`: if enabled the instance will parse metadata from attached links to generate link previews
* `ignore_hosts`: list of hosts which will be ignored by the metadata parser. For example `["accounts.google.com", "xss.website"]`, defaults to `[]`. * `ignore_hosts`: list of hosts which will be ignored by the metadata parser. For example `["accounts.google.com", "xss.website"]`, defaults to `[]`.
* `ignore_tld`: list TLDs (top-level domains) which will ignore for parse metadata. default is ["local", "localdomain", "lan"] * `ignore_tld`: list TLDs (top-level domains) which will ignore for parse metadata. default is ["local", "localdomain", "lan"]
* `parsers`: list of Rich Media parsers
## :fetch_initial_posts ## :fetch_initial_posts
* `enabled`: if enabled, when a new user is federated with, fetch some of their latest posts * `enabled`: if enabled, when a new user is federated with, fetch some of their latest posts

View File

@ -3,12 +3,6 @@
# SPDX-License-Identifier: AGPL-3.0-only # SPDX-License-Identifier: AGPL-3.0-only
defmodule Pleroma.Web.RichMedia.Parser do defmodule Pleroma.Web.RichMedia.Parser do
@parsers [
Pleroma.Web.RichMedia.Parsers.OGP,
Pleroma.Web.RichMedia.Parsers.TwitterCard,
Pleroma.Web.RichMedia.Parsers.OEmbed
]
@hackney_options [ @hackney_options [
pool: :media, pool: :media,
recv_timeout: 2_000, recv_timeout: 2_000,
@ -16,6 +10,10 @@ defmodule Pleroma.Web.RichMedia.Parser do
with_body: true with_body: true
] ]
defp parsers do
Pleroma.Config.get([:rich_media, :parsers])
end
def parse(nil), do: {:error, "No URL provided"} def parse(nil), do: {:error, "No URL provided"}
if Pleroma.Config.get(:env) == :test do if Pleroma.Config.get(:env) == :test do
@ -48,7 +46,7 @@ defp parse_url(url) do
end end
defp maybe_parse(html) do defp maybe_parse(html) do
Enum.reduce_while(@parsers, %{}, fn parser, acc -> Enum.reduce_while(parsers(), %{}, fn parser, acc ->
case parser.parse(html, acc) do case parser.parse(html, acc) do
{:ok, data} -> {:halt, data} {:ok, data} -> {:halt, data}
{:error, _msg} -> {:cont, acc} {:error, _msg} -> {:cont, acc}