From ec1452fd1cdb9cb1db9b8bad872916d3213489e2 Mon Sep 17 00:00:00 2001 From: href Date: Thu, 14 May 2020 21:36:31 +0200 Subject: [PATCH] Pleroma.MIME: use gen_magic --- .gitlab-ci.yml | 2 + CHANGELOG.md | 1 + config/config.exs | 2 + config/description.exs | 14 +++++ docs/installation/alpine_linux_en.md | 1 + docs/installation/arch_linux_en.md | 1 + docs/installation/debian_based_en.md | 1 + docs/installation/gentoo_en.md | 1 + docs/installation/otp_en.md | 5 +- lib/pleroma/application.ex | 1 + lib/pleroma/mime.ex | 84 ++++++++-------------------- mix.exs | 1 + mix.lock | 2 + 13 files changed, 52 insertions(+), 64 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index aad28a2d8..685106969 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -24,6 +24,8 @@ stages: before_script: - mix local.hex --force - mix local.rebar --force + - apt-get -qq update + - apt-get install -y libmagic-dev build: stage: build diff --git a/CHANGELOG.md b/CHANGELOG.md index d2629bf84..8925f31f6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). ### Changed - MFR policy to set global expiration for all local Create activities - OGP rich media parser merged with TwitterCard +- **Breaking** Requires `libmagic` (or `file`) to guess file types.
API Changes - **Breaking:** Emoji API: changed methods and renamed routes. diff --git a/config/config.exs b/config/config.exs index 6a7bb9e06..65aa8643e 100644 --- a/config/config.exs +++ b/config/config.exs @@ -678,6 +678,8 @@ timeout: 300_000 ] +config :pleroma, :gen_magic_pool, size: 2 + config :pleroma, :restrict_unauthenticated, timelines: %{local: false, federated: false}, profiles: %{local: false, remote: false}, diff --git a/config/description.exs b/config/description.exs index b21d7840c..a318afbe6 100644 --- a/config/description.exs +++ b/config/description.exs @@ -3325,5 +3325,19 @@ suggestions: [false] } ] + }, + %{ + group: :pleroma, + key: :gen_magic_pool, + type: :group, + description: "GenMagic/libmagic configuration", + children: [ + %{ + key: :size, + type: :integer, + description: "Number of gen_magic workers to start.", + suggestions: [2] + } + ] } ] diff --git a/docs/installation/alpine_linux_en.md b/docs/installation/alpine_linux_en.md index 2a9b8f6ff..1d503ff9f 100644 --- a/docs/installation/alpine_linux_en.md +++ b/docs/installation/alpine_linux_en.md @@ -13,6 +13,7 @@ It assumes that you have administrative rights, either as root or a user with [s * `erlang-parsetools` * `erlang-xmerl` * `git` +* `file-dev` * Development Tools #### Optional packages used in this guide diff --git a/docs/installation/arch_linux_en.md b/docs/installation/arch_linux_en.md index 8370986ad..428ed6bd7 100644 --- a/docs/installation/arch_linux_en.md +++ b/docs/installation/arch_linux_en.md @@ -9,6 +9,7 @@ This guide will assume that you have administrative rights, either as root or a * `elixir` * `git` * `base-devel` +* `file` #### Optional packages used in this guide diff --git a/docs/installation/debian_based_en.md b/docs/installation/debian_based_en.md index 2c20d521a..21e89d1a0 100644 --- a/docs/installation/debian_based_en.md +++ b/docs/installation/debian_based_en.md @@ -10,6 +10,7 @@ This guide will assume you are on Debian Stretch. This guide should also work wi * `elixir` (1.8+, Follow the guide to install from the Erlang Solutions repo or use [asdf](https://github.com/asdf-vm/asdf) as the pleroma user) * `erlang-dev` * `erlang-nox` +* `libmagic-dev` * `git` * `build-essential` diff --git a/docs/installation/gentoo_en.md b/docs/installation/gentoo_en.md index 1e61373cc..e936469d0 100644 --- a/docs/installation/gentoo_en.md +++ b/docs/installation/gentoo_en.md @@ -28,6 +28,7 @@ Gentoo quite pointedly does not come with a cron daemon installed, and as such i * `dev-db/postgresql` * `dev-lang/elixir` * `dev-vcs/git` +* `sys-apps/file` #### Optional ebuilds used in this guide diff --git a/docs/installation/otp_en.md b/docs/installation/otp_en.md index 86135cd20..0cad29bc8 100644 --- a/docs/installation/otp_en.md +++ b/docs/installation/otp_en.md @@ -27,15 +27,16 @@ Other than things bundled in the OTP release Pleroma depends on: * PostgreSQL (also utilizes extensions in postgresql-contrib) * nginx (could be swapped with another reverse proxy but this guide covers only it) * certbot (for Let's Encrypt certificates, could be swapped with another ACME client, but this guide covers only it) +* libmagic/file ```sh tab="Alpine" echo "http://nl.alpinelinux.org/alpine/latest-stable/community" >> /etc/apk/repositories apk update -apk add curl unzip ncurses postgresql postgresql-contrib nginx certbot +apk add curl unzip ncurses postgresql postgresql-contrib nginx certbot libmagic ``` ```sh tab="Debian/Ubuntu" -apt install curl unzip libncurses5 postgresql postgresql-contrib nginx certbot +apt install curl unzip libncurses5 postgresql postgresql-contrib nginx certbot libmagic ``` ## Setup diff --git a/lib/pleroma/application.ex b/lib/pleroma/application.ex index 9d3d92b38..c74255629 100644 --- a/lib/pleroma/application.ex +++ b/lib/pleroma/application.ex @@ -80,6 +80,7 @@ def start(_type, _args) do [ Pleroma.Stats, Pleroma.JobQueueMonitor, + Pleroma.MIME, {Oban, Config.get(Oban)} ] ++ task_children(@env) ++ diff --git a/lib/pleroma/mime.ex b/lib/pleroma/mime.ex index 6ee055f50..3b406630e 100644 --- a/lib/pleroma/mime.ex +++ b/lib/pleroma/mime.ex @@ -6,8 +6,21 @@ defmodule Pleroma.MIME do @moduledoc """ Returns the mime-type of a binary and optionally a normalized file-name. """ - @default "application/octet-stream" @read_bytes 35 + @pool __MODULE__.GenMagicPool + + def child_spec(_) do + pool_size = Pleroma.Config.get!([:gen_magic_pool, :size]) + name = @pool + + %{ + id: __MODULE__, + start: {GenMagic.Pool, :start_link, [[name: name, pool_size: pool_size]]}, + type: :worker, + restart: :permanent, + shutdown: 500 + } + end @spec file_mime_type(String.t(), String.t()) :: {:ok, content_type :: String.t(), filename :: String.t()} | {:error, any()} | :error @@ -20,9 +33,10 @@ def file_mime_type(path, filename) do @spec file_mime_type(String.t()) :: {:ok, String.t()} | {:error, any()} | :error def file_mime_type(filename) do - File.open(filename, [:read], fn f -> - check_mime_type(IO.binread(f, @read_bytes)) - end) + case GenMagic.Pool.perform(@pool, filename) do + {:ok, %GenMagic.Result{mime_type: content_type}} -> {:ok, content_type} + error -> error + end end def bin_mime_type(binary, filename) do @@ -34,13 +48,14 @@ def bin_mime_type(binary, filename) do @spec bin_mime_type(binary()) :: {:ok, String.t()} | :error def bin_mime_type(<>) do - {:ok, check_mime_type(head)} + case GenMagic.Pool.perform(@pool, {:bytes, head}) do + {:ok, %GenMagic.Result{mime_type: content_type}} -> {:ok, content_type} + error -> error + end end def bin_mime_type(_), do: :error - def mime_type(<<_::binary>>), do: {:ok, @default} - defp fix_extension(filename, content_type) do parts = String.split(filename, ".") @@ -62,59 +77,4 @@ defp fix_extension(filename, content_type) do Enum.join([new_filename, String.split(content_type, "/") |> List.last()], ".") end end - - defp check_mime_type(<<0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A, _::binary>>) do - "image/png" - end - - defp check_mime_type(<<0x47, 0x49, 0x46, 0x38, _, 0x61, _::binary>>) do - "image/gif" - end - - defp check_mime_type(<<0xFF, 0xD8, 0xFF, _::binary>>) do - "image/jpeg" - end - - defp check_mime_type(<<0x1A, 0x45, 0xDF, 0xA3, _::binary>>) do - "video/webm" - end - - defp check_mime_type(<<0x00, 0x00, 0x00, _, 0x66, 0x74, 0x79, 0x70, _::binary>>) do - "video/mp4" - end - - defp check_mime_type(<<0x49, 0x44, 0x33, _::binary>>) do - "audio/mpeg" - end - - defp check_mime_type(<<255, 251, _, 68, 0, 0, 0, 0, _::binary>>) do - "audio/mpeg" - end - - defp check_mime_type( - <<0x4F, 0x67, 0x67, 0x53, 0x00, 0x02, 0x00, 0x00, _::size(160), 0x80, 0x74, 0x68, 0x65, - 0x6F, 0x72, 0x61, _::binary>> - ) do - "video/ogg" - end - - defp check_mime_type(<<0x4F, 0x67, 0x67, 0x53, 0x00, 0x02, 0x00, 0x00, _::binary>>) do - "audio/ogg" - end - - defp check_mime_type(<<"RIFF", _::binary-size(4), "WAVE", _::binary>>) do - "audio/wav" - end - - defp check_mime_type(<<"RIFF", _::binary-size(4), "WEBP", _::binary>>) do - "image/webp" - end - - defp check_mime_type(<<"RIFF", _::binary-size(4), "AVI.", _::binary>>) do - "video/avi" - end - - defp check_mime_type(_) do - @default - end end diff --git a/mix.exs b/mix.exs index 03b060bc0..2d7e16e94 100644 --- a/mix.exs +++ b/mix.exs @@ -197,6 +197,7 @@ defp deps do ref: "e0f16822d578866e186a0974d65ad58cddc1e2ab"}, {:mox, "~> 0.5", only: :test}, {:restarter, path: "./restarter"}, + {:gen_magic, git: "https://github.com/hrefhref/gen_magic", branch: "develop"}, {:open_api_spex, git: "https://git.pleroma.social/pleroma/elixir-libraries/open_api_spex.git", ref: "f296ac0924ba3cf79c7a588c4c252889df4c2edd"} diff --git a/mix.lock b/mix.lock index 5383c2c6e..7b7f3c934 100644 --- a/mix.lock +++ b/mix.lock @@ -45,6 +45,7 @@ "fast_sanitize": {:hex, :fast_sanitize, "0.1.7", "2a7cd8734c88a2de6de55022104f8a3b87f1fdbe8bbf131d9049764b53d50d0d", [:mix], [{:fast_html, "~> 1.0", [hex: :fast_html, repo: "hexpm", optional: false]}, {:plug, "~> 1.8", [hex: :plug, repo: "hexpm", optional: false]}], "hexpm", "f39fe8ea08fbac17487c30bf09b7d9f3e12472e51fb07a88ffeb8fd17da8ab67"}, "flake_id": {:hex, :flake_id, "0.1.0", "7716b086d2e405d09b647121a166498a0d93d1a623bead243e1f74216079ccb3", [:mix], [{:base62, "~> 1.2", [hex: :base62, repo: "hexpm", optional: false]}, {:ecto, ">= 2.0.0", [hex: :ecto, repo: "hexpm", optional: true]}], "hexpm", "31fc8090fde1acd267c07c36ea7365b8604055f897d3a53dd967658c691bd827"}, "floki": {:hex, :floki, "0.25.0", "b1c9ddf5f32a3a90b43b76f3386ca054325dc2478af020e87b5111c19f2284ac", [:mix], [{:html_entities, "~> 0.5.0", [hex: :html_entities, repo: "hexpm", optional: false]}], "hexpm", "631f4e627c46d5ecd347df5a2accdaf0621c77c3693c5b75a8ad58e84c61f242"}, + "gen_magic": {:git, "https://github.com/hrefhref/gen_magic", "eafdc2ea156433ccd87d1d99eaf1be758064a1db", [branch: "develop"]}, "gen_smtp": {:hex, :gen_smtp, "0.15.0", "9f51960c17769b26833b50df0b96123605a8024738b62db747fece14eb2fbfcc", [:rebar3], [], "hexpm", "29bd14a88030980849c7ed2447b8db6d6c9278a28b11a44cafe41b791205440f"}, "gen_stage": {:hex, :gen_stage, "0.14.3", "d0c66f1c87faa301c1a85a809a3ee9097a4264b2edf7644bf5c123237ef732bf", [:mix], [], "hexpm"}, "gen_state_machine": {:hex, :gen_state_machine, "2.0.5", "9ac15ec6e66acac994cc442dcc2c6f9796cf380ec4b08267223014be1c728a95", [:mix], [], "hexpm"}, @@ -74,6 +75,7 @@ "mox": {:hex, :mox, "0.5.1", "f86bb36026aac1e6f924a4b6d024b05e9adbed5c63e8daa069bd66fb3292165b", [:mix], [], "hexpm", "052346cf322311c49a0f22789f3698eea030eec09b8c47367f0686ef2634ae14"}, "myhtmlex": {:git, "https://git.pleroma.social/pleroma/myhtmlex.git", "ad0097e2f61d4953bfef20fb6abddf23b87111e6", [ref: "ad0097e2f61d4953bfef20fb6abddf23b87111e6", submodules: true]}, "nimble_parsec": {:hex, :nimble_parsec, "0.5.3", "def21c10a9ed70ce22754fdeea0810dafd53c2db3219a0cd54cf5526377af1c6", [:mix], [], "hexpm", "589b5af56f4afca65217a1f3eb3fee7e79b09c40c742fddc1c312b3ac0b3399f"}, + "nimble_pool": {:hex, :nimble_pool, "0.1.0", "ffa9d5be27eee2b00b0c634eb649aa27f97b39186fec3c493716c2a33e784ec6", [:mix], [], "hexpm", "343a1eaa620ddcf3430a83f39f2af499fe2370390d4f785cd475b4df5acaf3f9"}, "nodex": {:git, "https://git.pleroma.social/pleroma/nodex", "cb6730f943cfc6aad674c92161be23a8411f15d1", [ref: "cb6730f943cfc6aad674c92161be23a8411f15d1"]}, "oban": {:hex, :oban, "1.2.0", "7cca94d341be43d220571e28f69131c4afc21095b25257397f50973d3fc59b07", [:mix], [{:ecto_sql, "~> 3.1", [hex: :ecto_sql, repo: "hexpm", optional: false]}, {:jason, "~> 1.1", [hex: :jason, repo: "hexpm", optional: false]}, {:postgrex, "~> 0.14", [hex: :postgrex, repo: "hexpm", optional: false]}, {:telemetry, "~> 0.4", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "ba5f8b3f7d76967b3e23cf8014f6a13e4ccb33431e4808f036709a7f822362ee"}, "open_api_spex": {:git, "https://git.pleroma.social/pleroma/elixir-libraries/open_api_spex.git", "f296ac0924ba3cf79c7a588c4c252889df4c2edd", [ref: "f296ac0924ba3cf79c7a588c4c252889df4c2edd"]},