From 580bca120ac1d35d59878be2d4c1b2f0fa0be2f2 Mon Sep 17 00:00:00 2001 From: Christine Dodrill Date: Wed, 2 Dec 2020 14:11:19 -0500 Subject: [PATCH] backend: support bridgy webmentions specially Signed-off-by: Christine Dodrill --- backend/Cargo.lock | 629 ++++++++++++++++-- backend/Cargo.toml | 2 + .../down.sql | 0 .../up.sql | 2 + backend/src/api/mod.rs | 6 + backend/src/api/posse.rs | 1 + backend/src/api/webmention.rs | 119 +++- backend/src/bin/bridgy_migrate.rs | 60 ++ backend/src/bin/import_blogposts.rs | 6 - backend/src/lib.rs | 8 +- backend/src/main.rs | 1 + backend/src/models.rs | 9 +- backend/src/schema.rs | 1 + backend/src/web/bridgy.rs | 36 + backend/src/web/mod.rs | 1 + 15 files changed, 827 insertions(+), 54 deletions(-) create mode 100644 backend/migrations/2020-12-02-171417_webmention-summary/down.sql create mode 100644 backend/migrations/2020-12-02-171417_webmention-summary/up.sql create mode 100644 backend/src/bin/bridgy_migrate.rs create mode 100644 backend/src/web/bridgy.rs diff --git a/backend/Cargo.lock b/backend/Cargo.lock index efb5510..c4ae0bb 100644 --- a/backend/Cargo.lock +++ b/backend/Cargo.lock @@ -123,9 +123,9 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e1d7169690c4f56343dcd821ab834972a22570a2662a19a84fd7775d5e1c3881" dependencies = [ "askama_shared", - "proc-macro2 1.0.21", + "proc-macro2 1.0.24", "quote 1.0.7", - "syn 1.0.40", + "syn 1.0.53", ] [[package]] @@ -155,10 +155,10 @@ dependencies = [ "nom", "num-traits", "percent-encoding 2.1.0", - "proc-macro2 1.0.21", + "proc-macro2 1.0.24", "quote 1.0.7", "serde", - "syn 1.0.40", + "syn 1.0.53", "toml 0.5.7", ] @@ -173,6 +173,12 @@ dependencies = [ "winapi 0.3.9", ] +[[package]] +name = "autocfg" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d49d90015b3c36167a20fe2810c5cd875ad504b39cff3d4eae7977e6b7c1cb2" + [[package]] name = "autocfg" version = "1.0.1" @@ -326,6 +332,15 @@ version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7477065d45a8fe57167bf3cf8bcd3729b54cfcb81cca49bda2d038ea89ae82ca" +[[package]] +name = "cloudabi" +version = "0.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ddfc5b9aa5d4507acaf872de71051dfd0e309860e88966e1051e462a077aac4f" +dependencies = [ + "bitflags", +] + [[package]] name = "cloudabi" version = "0.1.0" @@ -429,6 +444,44 @@ dependencies = [ "subtle 1.0.0", ] +[[package]] +name = "cssparser" +version = "0.27.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "754b69d351cdc2d8ee09ae203db831e005560fc6030da058f86ad60c92a9cb0a" +dependencies = [ + "cssparser-macros", + "dtoa-short", + "itoa", + "matches", + "phf 0.8.0", + "proc-macro2 1.0.24", + "quote 1.0.7", + "smallvec", + "syn 1.0.53", +] + +[[package]] +name = "cssparser-macros" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dfae75de57f2b2e85e8768c3ea840fd159c8f33e2b6522c7835b7abac81be16e" +dependencies = [ + "quote 1.0.7", + "syn 1.0.53", +] + +[[package]] +name = "derive_more" +version = "0.99.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41cb0e6161ad61ed084a36ba71fbba9e3ac5aee3606fb607fe08da6acbcf3d8c" +dependencies = [ + "proc-macro2 1.0.24", + "quote 1.0.7", + "syn 1.0.53", +] + [[package]] name = "devise" version = "0.2.0" @@ -480,9 +533,9 @@ version = "1.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "45f5098f628d02a7a0f68ddba586fb61e80edec3bdc1be3b921f4ceec60858d3" dependencies = [ - "proc-macro2 1.0.21", + "proc-macro2 1.0.24", "quote 1.0.7", - "syn 1.0.40", + "syn 1.0.53", ] [[package]] @@ -516,6 +569,27 @@ version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fea41bba32d969b513997752735605054bc0dfa92b4c56bf1189f2e174be7a10" +[[package]] +name = "dtoa" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "134951f4028bdadb9b84baf4232681efbf277da25144b9b0ad65df75946c422b" + +[[package]] +name = "dtoa-short" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "59020b8513b76630c49d918c33db9f4c91638e7d3404a28084083b87e33f76f2" +dependencies = [ + "dtoa", +] + +[[package]] +name = "ego-tree" +version = "0.6.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3a68a4904193147e0a8dec3314640e6db742afd5f6e634f428a6af230d9b3591" + [[package]] name = "encoding" version = "0.2.33" @@ -615,9 +689,9 @@ version = "0.1.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "aa4da3c766cd7a0db8242e326e9e4e081edd567072893ed320008189715366a4" dependencies = [ - "proc-macro2 1.0.21", + "proc-macro2 1.0.24", "quote 1.0.7", - "syn 1.0.40", + "syn 1.0.53", "synstructure", ] @@ -711,12 +785,31 @@ version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3dcaa9ae7725d12cdb85b3ad99a434db70b468c09ded17e012d86b5c1010f7a7" +[[package]] +name = "futf" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7c9c1ce3fa9336301af935ab852c437817d14cd33690446569392e65170aac3b" +dependencies = [ + "mac", + "new_debug_unreachable", +] + [[package]] name = "futures-io" version = "0.3.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "de27142b013a8e869c14957e6d2edeef89e97c289e69d042ee3a49acd8b51789" +[[package]] +name = "fxhash" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c31b6d751ae2c7f11320402d34e41349dd1016f8d5d45e48c4312bc8625af50c" +dependencies = [ + "byteorder 1.3.4", +] + [[package]] name = "gcc" version = "0.3.55" @@ -732,6 +825,15 @@ dependencies = [ "typenum", ] +[[package]] +name = "getopts" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "14dbbfd5c71d70241ecf9e6f13737f7b5ce823821063188d7e46c41d371eebd5" +dependencies = [ + "unicode-width", +] + [[package]] name = "getrandom" version = "0.1.15" @@ -805,6 +907,34 @@ dependencies = [ "digest", ] +[[package]] +name = "html5ever" +version = "0.24.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "025483b0a1e4577bb28578318c886ee5f817dda6eb62473269349044406644cb" +dependencies = [ + "log 0.4.11", + "mac", + "markup5ever 0.9.0", + "proc-macro2 1.0.24", + "quote 1.0.7", + "syn 1.0.53", +] + +[[package]] +name = "html5ever" +version = "0.25.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aafcf38a1a36118242d29b92e1b08ef84e67e4a5ed06e0a80be20e6a32bfed6b" +dependencies = [ + "log 0.4.11", + "mac", + "markup5ever 0.10.0", + "proc-macro2 1.0.24", + "quote 1.0.7", + "syn 1.0.53", +] + [[package]] name = "httparse" version = "1.3.4" @@ -870,7 +1000,7 @@ version = "1.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "55e2e4c765aa53a0424761bf9f41aa7a6ac1efa87238f59560640e27fca028f2" dependencies = [ - "autocfg", + "autocfg 1.0.1", "hashbrown", ] @@ -1024,6 +1154,46 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "mac" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c41e0c4fef86961ac6d6f8a82609f55f31b05e4fce149ac5710e439df7619ba4" + +[[package]] +name = "markup5ever" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "65381d9d47506b8592b97c4efd936afcf673b09b059f2bef39c7211ee78b9d03" +dependencies = [ + "log 0.4.11", + "phf 0.7.24", + "phf_codegen 0.7.24", + "serde", + "serde_derive", + "serde_json", + "string_cache 0.7.5", + "string_cache_codegen 0.4.4", + "tendril", +] + +[[package]] +name = "markup5ever" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aae38d669396ca9b707bfc3db254bc382ddb94f57cc5c235f34623a669a01dab" +dependencies = [ + "log 0.4.11", + "phf 0.8.0", + "phf_codegen 0.8.0", + "serde", + "serde_derive", + "serde_json", + "string_cache 0.8.1", + "string_cache_codegen 0.5.1", + "tendril", +] + [[package]] name = "matchers" version = "0.0.1" @@ -1062,12 +1232,14 @@ dependencies = [ "paseto", "prometheus", "rand 0.7.3", + "readability-fork", "ring", "rocket", "rocket_contrib", "rocket_cors", "rocket_prometheus", "rusty_ulid", + "scraper", "serde", "serde_json", "thiserror", @@ -1095,9 +1267,9 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9753f12909fd8d923f75ae5c3258cae1ed3c8ec052e1b38c93c21a6d157f789c" dependencies = [ "migrations_internals", - "proc-macro2 1.0.21", + "proc-macro2 1.0.24", "quote 1.0.7", - "syn 1.0.40", + "syn 1.0.53", ] [[package]] @@ -1188,6 +1360,18 @@ dependencies = [ "winapi 0.3.9", ] +[[package]] +name = "new_debug_unreachable" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e4a24736216ec316047a1fc4252e27dabb04218aa4a3f37c6e7ddbf1f9782b54" + +[[package]] +name = "nodrop" +version = "0.1.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72ef4a56884ca558e5ddb05a1d1e7e1bfd9a68d9ed024c21704cc98872dae1bb" + [[package]] name = "nom" version = "5.1.2" @@ -1222,7 +1406,7 @@ version = "0.1.43" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8d59457e662d541ba17869cf51cf177c0b5f0cbf476c66bdc90bf1edac4f875b" dependencies = [ - "autocfg", + "autocfg 1.0.1", "num-traits", ] @@ -1232,7 +1416,7 @@ version = "0.2.12" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ac267bcc07f48ee5f8935ab0d24f316fb722d7a1292e2913f0cc196b29ffd611" dependencies = [ - "autocfg", + "autocfg 1.0.1", ] [[package]] @@ -1283,7 +1467,7 @@ version = "0.9.58" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a842db4709b604f0fe5d1170ae3565899be2ad3d9cbc72dedc789ac0511f78de" dependencies = [ - "autocfg", + "autocfg 1.0.1", "cc", "libc", "pkg-config", @@ -1314,7 +1498,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c361aa727dd08437f2f1447be8b59a33b0edd15e0fcee698f935613d9efbca9b" dependencies = [ "cfg-if", - "cloudabi", + "cloudabi 0.1.0", "instant", "libc", "redox_syscall", @@ -1372,6 +1556,98 @@ version = "2.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d4fd5641d01c8f18a23da7b6fe29298ff4b55afcccdf78973b24cf3175fee32e" +[[package]] +name = "phf" +version = "0.7.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b3da44b85f8e8dfaec21adae67f95d93244b2ecf6ad2a692320598dcc8e6dd18" +dependencies = [ + "phf_shared 0.7.24", +] + +[[package]] +name = "phf" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3dfb61232e34fcb633f43d12c58f83c1df82962dcdfa565a4e866ffc17dafe12" +dependencies = [ + "phf_macros", + "phf_shared 0.8.0", + "proc-macro-hack", +] + +[[package]] +name = "phf_codegen" +version = "0.7.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b03e85129e324ad4166b06b2c7491ae27fe3ec353af72e72cd1654c7225d517e" +dependencies = [ + "phf_generator 0.7.24", + "phf_shared 0.7.24", +] + +[[package]] +name = "phf_codegen" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cbffee61585b0411840d3ece935cce9cb6321f01c45477d30066498cd5e1a815" +dependencies = [ + "phf_generator 0.8.0", + "phf_shared 0.8.0", +] + +[[package]] +name = "phf_generator" +version = "0.7.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09364cc93c159b8b06b1f4dd8a4398984503483891b0c26b867cf431fb132662" +dependencies = [ + "phf_shared 0.7.24", + "rand 0.6.5", +] + +[[package]] +name = "phf_generator" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "17367f0cc86f2d25802b2c26ee58a7b23faeccf78a396094c13dced0d0182526" +dependencies = [ + "phf_shared 0.8.0", + "rand 0.7.3", +] + +[[package]] +name = "phf_macros" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f6fde18ff429ffc8fe78e2bf7f8b7a5a5a6e2a8b58bc5a9ac69198bbda9189c" +dependencies = [ + "phf_generator 0.8.0", + "phf_shared 0.8.0", + "proc-macro-hack", + "proc-macro2 1.0.24", + "quote 1.0.7", + "syn 1.0.53", +] + +[[package]] +name = "phf_shared" +version = "0.7.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "234f71a15de2288bcb7e3b6515828d22af7ec8598ee6d24c3b526fa0a80b67a0" +dependencies = [ + "siphasher 0.2.3", +] + +[[package]] +name = "phf_shared" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c00cf8b9eafe68dde5e9eaa2cef8ee84a9336a47d566ec55ca16589633b65af7" +dependencies = [ + "siphasher 0.3.3", +] + [[package]] name = "pkg-config" version = "0.3.18" @@ -1394,6 +1670,12 @@ version = "0.2.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c36fa947111f5c62a733b652544dd0016a43ce89619538a8ef92724a6f501a20" +[[package]] +name = "precomputed-hash" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c" + [[package]] name = "proc-macro-hack" version = "0.5.18" @@ -1411,9 +1693,9 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.21" +version = "1.0.24" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "36e28516df94f3dd551a587da5357459d9b36d945a7c37c3557928c1c2ff2a2c" +checksum = "1e0704ee1a7e00d7bb417d0770ea303c1bccbabf0ef1667dae92b5967f5f8a71" dependencies = [ "unicode-xid 0.2.1", ] @@ -1485,7 +1767,7 @@ version = "1.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "aa563d17ecb180e500da1cfd2b028310ac758de548efdd203e18f283af693f37" dependencies = [ - "proc-macro2 1.0.21", + "proc-macro2 1.0.24", ] [[package]] @@ -1522,6 +1804,25 @@ dependencies = [ "winapi 0.3.9", ] +[[package]] +name = "rand" +version = "0.6.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6d71dacdc3c88c1fde3885a3be3fbab9f35724e6ce99467f7d9c5026132184ca" +dependencies = [ + "autocfg 0.1.7", + "libc", + "rand_chacha 0.1.1", + "rand_core 0.4.2", + "rand_hc 0.1.0", + "rand_isaac", + "rand_jitter", + "rand_os", + "rand_pcg 0.1.2", + "rand_xorshift", + "winapi 0.3.9", +] + [[package]] name = "rand" version = "0.7.3" @@ -1530,9 +1831,20 @@ checksum = "6a6b1679d49b24bbfe0c803429aa1874472f50d9b363131f0e89fc356b544d03" dependencies = [ "getrandom", "libc", - "rand_chacha", + "rand_chacha 0.2.2", "rand_core 0.5.1", - "rand_hc", + "rand_hc 0.2.0", + "rand_pcg 0.2.1", +] + +[[package]] +name = "rand_chacha" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "556d3a1ca6600bfcbab7c7c91ccb085ac7fbbcd70e008a98742e7847f4f7bcef" +dependencies = [ + "autocfg 0.1.7", + "rand_core 0.3.1", ] [[package]] @@ -1569,6 +1881,15 @@ dependencies = [ "getrandom", ] +[[package]] +name = "rand_hc" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b40677c7be09ae76218dc623efbf7b18e34bced3f38883af07bb75630a21bc4" +dependencies = [ + "rand_core 0.3.1", +] + [[package]] name = "rand_hc" version = "0.2.0" @@ -1578,6 +1899,68 @@ dependencies = [ "rand_core 0.5.1", ] +[[package]] +name = "rand_isaac" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ded997c9d5f13925be2a6fd7e66bf1872597f759fd9dd93513dd7e92e5a5ee08" +dependencies = [ + "rand_core 0.3.1", +] + +[[package]] +name = "rand_jitter" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1166d5c91dc97b88d1decc3285bb0a99ed84b05cfd0bc2341bdf2d43fc41e39b" +dependencies = [ + "libc", + "rand_core 0.4.2", + "winapi 0.3.9", +] + +[[package]] +name = "rand_os" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b75f676a1e053fc562eafbb47838d67c84801e38fc1ba459e8f180deabd5071" +dependencies = [ + "cloudabi 0.0.3", + "fuchsia-cprng", + "libc", + "rand_core 0.4.2", + "rdrand", + "winapi 0.3.9", +] + +[[package]] +name = "rand_pcg" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "abf9b09b01790cfe0364f52bf32995ea3c39f4d2dd011eac241d2914146d0b44" +dependencies = [ + "autocfg 0.1.7", + "rand_core 0.4.2", +] + +[[package]] +name = "rand_pcg" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "16abd0c1b639e9eb4d7c50c0b8100b0d0f849be2349829c740fe8e6eb4816429" +dependencies = [ + "rand_core 0.5.1", +] + +[[package]] +name = "rand_xorshift" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cbf7e9e623549b0e21f6e97cf8ecf247c1a8fd2e8a992ae265314300b2455d5c" +dependencies = [ + "rand_core 0.3.1", +] + [[package]] name = "rdrand" version = "0.4.0" @@ -1587,6 +1970,18 @@ dependencies = [ "rand_core 0.3.1", ] +[[package]] +name = "readability-fork" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f6ec2b0bd9b8bbab0eb27f1235e17e201ad31a2669bafe19e60924e11a113f42" +dependencies = [ + "html5ever 0.24.1", + "lazy_static", + "regex", + "url 2.1.1", +] + [[package]] name = "redox_syscall" version = "0.1.57" @@ -1845,6 +2240,22 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" +[[package]] +name = "scraper" +version = "0.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48e02aa790c80c2e494130dec6a522033b6a23603ffc06360e9fe6c611ea2c12" +dependencies = [ + "cssparser", + "ego-tree", + "getopts", + "html5ever 0.25.1", + "matches", + "selectors", + "smallvec", + "tendril", +] + [[package]] name = "sct" version = "0.6.0" @@ -1855,6 +2266,26 @@ dependencies = [ "untrusted", ] +[[package]] +name = "selectors" +version = "0.22.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df320f1889ac4ba6bc0cdc9c9af7af4bd64bb927bccdf32d81140dc1f9be12fe" +dependencies = [ + "bitflags", + "cssparser", + "derive_more", + "fxhash", + "log 0.4.11", + "matches", + "phf 0.8.0", + "phf_codegen 0.8.0", + "precomputed-hash", + "servo_arc", + "smallvec", + "thin-slice", +] + [[package]] name = "semver" version = "0.9.0" @@ -1885,9 +2316,9 @@ version = "1.0.116" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f630a6370fd8e457873b4bd2ffdae75408bc291ba72be773772a4c2a065d9ae8" dependencies = [ - "proc-macro2 1.0.21", + "proc-macro2 1.0.24", "quote 1.0.7", - "syn 1.0.40", + "syn 1.0.53", ] [[package]] @@ -1913,6 +2344,16 @@ dependencies = [ "serde", ] +[[package]] +name = "servo_arc" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d98238b800e0d1576d8b6e3de32827c2d74bee68bb97748dcf5071fb53965432" +dependencies = [ + "nodrop", + "stable_deref_trait", +] + [[package]] name = "sha1" version = "0.6.0" @@ -1940,6 +2381,18 @@ dependencies = [ "lazy_static", ] +[[package]] +name = "siphasher" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b8de496cf83d4ed58b6be86c3a275b8602f6ffe98d3024a869e124147a9a3ac" + +[[package]] +name = "siphasher" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fa8f3741c7372e75519bd9346068370c9cdaabcc1f9599cbcf2a2719352286b7" + [[package]] name = "slab" version = "0.4.2" @@ -1969,6 +2422,12 @@ version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6e63cff320ae2c57904679ba7cb63280a3dc4613885beafb148ee7bf9aa9042d" +[[package]] +name = "stable_deref_trait" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" + [[package]] name = "standback" version = "0.2.11" @@ -2004,11 +2463,11 @@ version = "0.5.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c87a60a40fccc84bef0652345bbbbbe20a605bf5d0ce81719fc476f5c03b50ef" dependencies = [ - "proc-macro2 1.0.21", + "proc-macro2 1.0.24", "quote 1.0.7", "serde", "serde_derive", - "syn 1.0.40", + "syn 1.0.53", ] [[package]] @@ -2018,13 +2477,13 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "58fa5ff6ad0d98d1ffa8cb115892b6e69d67799f6763e162a1c9db421dc22e11" dependencies = [ "base-x", - "proc-macro2 1.0.21", + "proc-macro2 1.0.24", "quote 1.0.7", "serde", "serde_derive", "serde_json", "sha1", - "syn 1.0.40", + "syn 1.0.53", ] [[package]] @@ -2033,6 +2492,65 @@ version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "213701ba3370744dcd1a12960caa4843b3d68b4d1c0a5d575e0d65b2ee9d16c0" +[[package]] +name = "string_cache" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "89c058a82f9fd69b1becf8c274f412281038877c553182f1d02eb027045a2d67" +dependencies = [ + "lazy_static", + "new_debug_unreachable", + "phf_shared 0.7.24", + "precomputed-hash", + "serde", + "string_cache_codegen 0.4.4", + "string_cache_shared", +] + +[[package]] +name = "string_cache" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ddb1139b5353f96e429e1a5e19fbaf663bddedaa06d1dbd49f82e352601209a" +dependencies = [ + "lazy_static", + "new_debug_unreachable", + "phf_shared 0.8.0", + "precomputed-hash", + "serde", +] + +[[package]] +name = "string_cache_codegen" +version = "0.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0f45ed1b65bf9a4bf2f7b7dc59212d1926e9eaf00fa998988e420fd124467c6" +dependencies = [ + "phf_generator 0.7.24", + "phf_shared 0.7.24", + "proc-macro2 1.0.24", + "quote 1.0.7", + "string_cache_shared", +] + +[[package]] +name = "string_cache_codegen" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f24c8e5e19d22a726626f1a5e16fe15b132dcf21d10177fa5a45ce7962996b97" +dependencies = [ + "phf_generator 0.8.0", + "phf_shared 0.8.0", + "proc-macro2 1.0.24", + "quote 1.0.7", +] + +[[package]] +name = "string_cache_shared" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1884d1bc09741d466d9b14e6d37ac89d6909cbcac41dd9ae982d4d063bbedfc" + [[package]] name = "subtle" version = "1.0.0" @@ -2058,11 +2576,11 @@ dependencies = [ [[package]] name = "syn" -version = "1.0.40" +version = "1.0.53" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "963f7d3cc59b59b9325165add223142bbf1df27655d07789f109896d353d8350" +checksum = "8833e20724c24de12bbaba5ad230ea61c3eafb05b881c7c9d3cfe8638b187e68" dependencies = [ - "proc-macro2 1.0.21", + "proc-macro2 1.0.24", "quote 1.0.7", "unicode-xid 0.2.1", ] @@ -2073,12 +2591,29 @@ version = "0.12.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b834f2d66f734cb897113e34aaff2f1ab4719ca946f9a7358dba8f8064148701" dependencies = [ - "proc-macro2 1.0.21", + "proc-macro2 1.0.24", "quote 1.0.7", - "syn 1.0.40", + "syn 1.0.53", "unicode-xid 0.2.1", ] +[[package]] +name = "tendril" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "707feda9f2582d5d680d733e38755547a3e8fb471e7ba11452ecfd9ce93a5d3b" +dependencies = [ + "futf", + "mac", + "utf-8", +] + +[[package]] +name = "thin-slice" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8eaa81235c7058867fa8c0e7314f33dcce9c215f535d1913822a2b3f5e289f3c" + [[package]] name = "thiserror" version = "1.0.20" @@ -2094,9 +2629,9 @@ version = "1.0.20" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bd80fc12f73063ac132ac92aceea36734f04a1d93c1240c6944e23a3b8841793" dependencies = [ - "proc-macro2 1.0.21", + "proc-macro2 1.0.24", "quote 1.0.7", - "syn 1.0.40", + "syn 1.0.53", ] [[package]] @@ -2151,10 +2686,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e5c3be1edfad6027c69f5491cf4cb310d1a71ecd6af742788c6ff8bced86b8fa" dependencies = [ "proc-macro-hack", - "proc-macro2 1.0.21", + "proc-macro2 1.0.24", "quote 1.0.7", "standback", - "syn 1.0.40", + "syn 1.0.53", ] [[package]] @@ -2198,9 +2733,9 @@ version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "80e0ccfc3378da0cce270c946b676a376943f5cd16aeba64568e7939806f4ada" dependencies = [ - "proc-macro2 1.0.21", + "proc-macro2 1.0.24", "quote 1.0.7", - "syn 1.0.40", + "syn 1.0.53", ] [[package]] @@ -2356,6 +2891,12 @@ dependencies = [ "tinyvec", ] +[[package]] +name = "unicode-width" +version = "0.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9337591893a19b88d8d87f2cec1e73fad5cdfd10e5a6f349f498ad6ea2ffb1e3" + [[package]] name = "unicode-xid" version = "0.1.0" @@ -2428,6 +2969,12 @@ dependencies = [ "percent-encoding 2.1.0", ] +[[package]] +name = "utf-8" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05e42f7c18b8f902290b009cde6d651262f956c98bc51bca4cd1d511c9cd85c7" + [[package]] name = "vcpkg" version = "0.2.10" @@ -2488,9 +3035,9 @@ dependencies = [ "bumpalo", "lazy_static", "log 0.4.11", - "proc-macro2 1.0.21", + "proc-macro2 1.0.24", "quote 1.0.7", - "syn 1.0.40", + "syn 1.0.53", "wasm-bindgen-shared", ] @@ -2510,9 +3057,9 @@ version = "0.2.68" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f249f06ef7ee334cc3b8ff031bfc11ec99d00f34d86da7498396dc1e3b1498fe" dependencies = [ - "proc-macro2 1.0.21", + "proc-macro2 1.0.24", "quote 1.0.7", - "syn 1.0.40", + "syn 1.0.53", "wasm-bindgen-backend", "wasm-bindgen-shared", ] diff --git a/backend/Cargo.toml b/backend/Cargo.toml index ede9454..443d1bf 100644 --- a/backend/Cargo.toml +++ b/backend/Cargo.toml @@ -18,11 +18,13 @@ mime = "0.3.0" paseto = { version = "1.0", features = ["easy_tokens", "v2"] } prometheus = { version = "0.10", default-features = false, features = ["process"] } rand = "0" +readability-fork = { version = "0.2", default-features = false } ring = { version = "^0.16", features = ["std"] } rocket = "0.4" rocket_cors = "0.5" rocket_prometheus = "0.7.0" rusty_ulid = "0.10" +scraper = "0.12.0" serde_json = "^1" serde = { version = "1", features = ["derive"] } thiserror = "1" diff --git a/backend/migrations/2020-12-02-171417_webmention-summary/down.sql b/backend/migrations/2020-12-02-171417_webmention-summary/down.sql new file mode 100644 index 0000000..e69de29 diff --git a/backend/migrations/2020-12-02-171417_webmention-summary/up.sql b/backend/migrations/2020-12-02-171417_webmention-summary/up.sql new file mode 100644 index 0000000..c91ae62 --- /dev/null +++ b/backend/migrations/2020-12-02-171417_webmention-summary/up.sql @@ -0,0 +1,2 @@ +ALTER TABLE webmentions + ADD title TEXT; diff --git a/backend/src/api/mod.rs b/backend/src/api/mod.rs index 9f4209e..3688f09 100644 --- a/backend/src/api/mod.rs +++ b/backend/src/api/mod.rs @@ -90,6 +90,12 @@ pub enum Error { #[error("can't switch to the same fronter {0}")] SameFronter(String), + + #[error("target doesn't mention source")] + TargetDoesntMentionSource(String), + + #[error("readability error: {0}")] + Readability(#[from] readability_fork::error::Error), } pub type Result = std::result::Result; diff --git a/backend/src/api/posse.rs b/backend/src/api/posse.rs index 123645a..76e804a 100644 --- a/backend/src/api/posse.rs +++ b/backend/src/api/posse.rs @@ -22,6 +22,7 @@ pub struct Item { pub url: String, pub title: String, pub tags: Option>, + pub content_html: String, } impl Item { diff --git a/backend/src/api/webmention.rs b/backend/src/api/webmention.rs index 33efd19..93cc0e8 100644 --- a/backend/src/api/webmention.rs +++ b/backend/src/api/webmention.rs @@ -1,6 +1,11 @@ use super::{Error, Result}; -use crate::{models, paseto, schema, web::discord_webhook::Client as DiscordWebhook, MainDatabase}; +use crate::{ + models, paseto, schema, + web::{self, discord_webhook::Client as DiscordWebhook}, + MainDatabase, +}; use diesel::prelude::*; +use readability_fork::extractor::{self, Product}; use rocket::{ http::Status, request::Form, @@ -9,12 +14,14 @@ use rocket::{ }; use rocket_contrib::json::Json; use rusty_ulid::generate_ulid_string; +use serde::Serialize; use url::Url; -#[derive(FromForm, Debug)] +#[derive(FromForm, Debug, Serialize)] pub struct WebMention { source: String, target: String, + title: Option, } impl WebMention { @@ -48,6 +55,28 @@ impl WebMention { Ok(()) } + + fn extract(&self) -> Result { + let resp = ureq::get(&self.source) + .set("User-Agent", crate::APPLICATION_NAME) + .set("Mi-Mentioned-Url", &self.target) + .call(); + + if resp.ok() { + let body = resp + .into_string() + .map_err(|why| Error::Web(web::Error::FuturesIO(why)))?; + Ok(extractor::extract( + &mut body.as_bytes(), + &url::Url::parse(&self.source)?, + )?) + } else { + Err(match resp.synthetic_error() { + Some(why) => Error::Web(web::Error::UReq(why.to_string())), + None => Error::Web(web::Error::HttpStatus(resp.status())), + }) + } + } } impl Into for WebMention { @@ -56,6 +85,17 @@ impl Into for WebMention { id: generate_ulid_string(), source_url: self.source, target_url: self.target, + title: self.title, + } + } +} + +impl Into for models::WebMention { + fn into(self) -> WebMention { + WebMention { + source: self.source_url, + target: self.target_url, + title: self.title, } } } @@ -72,8 +112,24 @@ impl<'a> Responder<'a> for models::WebMention { } } +#[get("/webmention/for?")] +#[instrument(skip(conn), err)] +pub fn lookup_target(conn: MainDatabase, target: String) -> Result>> { + use schema::webmentions::dsl::*; + + Ok(Json( + webmentions + .filter(target_url.eq(target)) + .load::(&*conn) + .map_err(Error::Database)? + .into_iter() + .map(Into::into) + .collect::>(), + )) +} + #[post("/webmention/accept", data = "")] -#[instrument(skip(conn, mention, dw), err)] +#[instrument(skip(conn, dw), err)] pub fn accept( conn: MainDatabase, mention: Form, @@ -81,13 +137,26 @@ pub fn accept( ) -> Result { use schema::webmentions; - let mention = mention.into_inner(); + let mut mention = mention.into_inner(); mention.check()?; + mention + .extract() + .map_err(|why| { + error!( + "error extracting information from {}: {}", + mention.source, why + ); + + why + }) + .iter_mut() + .for_each(|info| mention.title = Some(info.title.clone())); info!( source = &mention.source[..], target = &mention.target[..], - "webmention received" + "webmention received: {:?}", + mention.title, ); let wm: models::WebMention = mention.into(); @@ -96,6 +165,8 @@ pub fn accept( .execute(&*conn) .map_err(Error::Database)?; + bridgy_expand(conn, wm.clone())?; + dw.send(format!( "<{}> mentioned <{}> ()", wm.source_url, wm.target_url, wm.id @@ -144,3 +215,41 @@ pub fn list( .map_err(Error::Database)?, )) } + +pub fn bridgy_expand(conn: MainDatabase, wm: models::WebMention) -> Result { + use crate::web::bridgy::parse; + use schema::webmentions::dsl::*; + + if !wm.source_url.contains("https://brid-gy.appspot.com") { + return Ok(()); + } + + if wm.source_url.contains("like/twitter") { + return Ok(()); + } + + let resp = ureq::get(&wm.source_url) + .set("User-Agent", crate::APPLICATION_NAME) + .set("Mi-Mentioned-Url", &wm.target_url) + .call(); + + if resp.ok() { + let body = resp.into_string().unwrap(); + let result = parse(&body).unwrap().unwrap(); + debug!("{:?}", result); + + diesel::update(webmentions.find(wm.id)) + .set(&models::UpdateWebMentionSource { + source_url: result.target, + }) + .execute(&*conn) + .map_err(Error::Database) + .unwrap(); + Ok(()) + } else { + Err(match resp.synthetic_error() { + Some(why) => Error::Web(web::Error::UReq(why.to_string())), + None => Error::Web(web::Error::HttpStatus(resp.status())), + }) + } +} diff --git a/backend/src/bin/bridgy_migrate.rs b/backend/src/bin/bridgy_migrate.rs new file mode 100644 index 0000000..22fb549 --- /dev/null +++ b/backend/src/bin/bridgy_migrate.rs @@ -0,0 +1,60 @@ +#[macro_use] +extern crate tracing; + +use color_eyre::eyre::Result; +use diesel::prelude::*; + +use mi::{web::bridgy::*, *}; + +fn main() -> Result<()> { + use schema::webmentions::{dsl::*, table}; + + color_eyre::install()?; + tracing_subscriber::fmt::init(); + + info!("{} bridgy migrator starting up", mi::APPLICATION_NAME); + + let conn = establish_connection(); + + table + .load::(&conn)? + .into_iter() + .for_each(|wm| { + if !wm.source_url.contains("https://brid-gy.appspot.com") { + return; + } + + if wm.source_url.contains("like/twitter") { + return; + } + + let resp = ureq::get(&wm.source_url) + .set("User-Agent", crate::APPLICATION_NAME) + .set("Mi-Mentioned-Url", &wm.target_url) + .call(); + + if resp.ok() { + let body = resp.into_string().unwrap(); + let result = parse(&body).unwrap().unwrap(); + info!("{:?}", result); + + diesel::update(webmentions.find(wm.id)) + .set(&models::UpdateWebMentionSource { + source_url: result.target, + }) + .execute(&conn) + .unwrap(); + } else { + error!( + "can't fetch {}: {}", + wm.source_url, + match resp.synthetic_error() { + Some(why) => web::Error::UReq(why.to_string()), + None => web::Error::HttpStatus(resp.status()), + } + ); + } + }); + + Ok(()) +} diff --git a/backend/src/bin/import_blogposts.rs b/backend/src/bin/import_blogposts.rs index 2545079..5949ec2 100644 --- a/backend/src/bin/import_blogposts.rs +++ b/backend/src/bin/import_blogposts.rs @@ -30,9 +30,3 @@ fn main() -> Result<()> { Ok(()) } - -pub fn establish_connection() -> SqliteConnection { - let database_url = env::var("DATABASE_URL").expect("DATABASE_URL must be set"); - SqliteConnection::establish(&database_url) - .expect(&format!("Error connecting to {}", database_url)) -} diff --git a/backend/src/lib.rs b/backend/src/lib.rs index 27d4bd7..eea3069 100644 --- a/backend/src/lib.rs +++ b/backend/src/lib.rs @@ -9,7 +9,7 @@ extern crate rocket_contrib; #[macro_use] extern crate tracing; -use diesel::sqlite::SqliteConnection; +use diesel::{prelude::*, SqliteConnection}; pub const APPLICATION_NAME: &str = concat!( env!("CARGO_PKG_NAME"), @@ -28,3 +28,9 @@ pub mod web; #[database("main_data")] pub struct MainDatabase(SqliteConnection); + +pub fn establish_connection() -> SqliteConnection { + let database_url = std::env::var("DATABASE_URL").expect("DATABASE_URL must be set"); + SqliteConnection::establish(&database_url) + .expect(&format!("Error connecting to {}", database_url)) +} diff --git a/backend/src/main.rs b/backend/src/main.rs index 8170f40..4e64350 100644 --- a/backend/src/main.rs +++ b/backend/src/main.rs @@ -72,6 +72,7 @@ fn main() -> Result<()> { api::switch::switch, api::webmention::accept, api::webmention::get, + api::webmention::lookup_target, api::webmention::list, api::get_members, api::token_info, diff --git a/backend/src/models.rs b/backend/src/models.rs index eb6e644..6b072ae 100644 --- a/backend/src/models.rs +++ b/backend/src/models.rs @@ -51,12 +51,19 @@ pub struct UpdateSwitchTime { pub ended_at: Option, } -#[derive(Queryable, Associations, Insertable, Serialize)] +#[derive(Queryable, Associations, Insertable, Serialize, Clone)] #[table_name = "webmentions"] pub struct WebMention { pub id: String, pub source_url: String, pub target_url: String, + pub title: Option, +} + +#[derive(AsChangeset)] +#[table_name = "webmentions"] +pub struct UpdateWebMentionSource { + pub source_url: String, } #[derive(Queryable, Associations, Insertable)] diff --git a/backend/src/schema.rs b/backend/src/schema.rs index 3075b35..aa54508 100644 --- a/backend/src/schema.rs +++ b/backend/src/schema.rs @@ -27,6 +27,7 @@ table! { id -> Text, source_url -> Text, target_url -> Text, + title -> Nullable, } } diff --git a/backend/src/web/bridgy.rs b/backend/src/web/bridgy.rs new file mode 100644 index 0000000..fa7dcda --- /dev/null +++ b/backend/src/web/bridgy.rs @@ -0,0 +1,36 @@ +use super::Result; +use scraper::{Html, Selector}; + +#[derive(Debug)] +pub struct Info { + pub target: String, + pub author: String, +} + +pub fn parse(body: &str) -> Result> { + let doc = Html::parse_document(body); + let target = doc + .select(&Selector::parse(r#"meta[http-equiv="refresh"]"#).unwrap()) + .next() + .unwrap() + .value() + .attr("content") + .map(|val| { + let mut val = val.clone().to_string(); + val.drain(6..val.len()).collect() + }); + let author = doc + .select(&Selector::parse(r#"span[class="p-nickname"]"#).unwrap()) + .next() + .unwrap() + .inner_html(); + + Ok(if target.is_some() { + Some(Info { + target: target.unwrap(), + author: author, + }) + } else { + None + }) +} diff --git a/backend/src/web/mod.rs b/backend/src/web/mod.rs index 091cb9f..b587101 100644 --- a/backend/src/web/mod.rs +++ b/backend/src/web/mod.rs @@ -1,3 +1,4 @@ +pub mod bridgy; pub mod discord_webhook; pub mod mastodon; pub mod pluralkit;