start work on weather scraping from the government of canada API

Signed-off-by: Christine Dodrill <me@christine.website>
This commit is contained in:
Cadey Ratio 2020-12-22 20:17:21 -05:00
parent b0a15936a9
commit 76611d657a
9 changed files with 367 additions and 13 deletions

71
backend/Cargo.lock generated
View File

@ -192,7 +192,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "46254cf2fdcdf1badb5934448c1bcbe046a56537b3987d96c51a7afc5d03f293" checksum = "46254cf2fdcdf1badb5934448c1bcbe046a56537b3987d96c51a7afc5d03f293"
dependencies = [ dependencies = [
"addr2line", "addr2line",
"cfg-if", "cfg-if 0.1.10",
"libc", "libc",
"miniz_oxide", "miniz_oxide",
"object", "object",
@ -314,6 +314,12 @@ version = "0.1.10"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4785bdd1c96b2a846b2bd7cc02e86b6b3dbf14e7e53446c4f54c92a361040822" checksum = "4785bdd1c96b2a846b2bd7cc02e86b6b3dbf14e7e53446c4f54c92a361040822"
[[package]]
name = "cfg-if"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]] [[package]]
name = "chrono" name = "chrono"
version = "0.4.15" version = "0.4.15"
@ -431,7 +437,7 @@ version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ba125de2af0df55319f41944744ad91c71113bf74a4646efff39afe1f6842db1" checksum = "ba125de2af0df55319f41944744ad91c71113bf74a4646efff39afe1f6842db1"
dependencies = [ dependencies = [
"cfg-if", "cfg-if 0.1.10",
] ]
[[package]] [[package]]
@ -654,6 +660,24 @@ version = "0.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a246d82be1c9d791c5dfde9a2bd045fc3cbba3fa2b11ad558f27d01712f00569" checksum = "a246d82be1c9d791c5dfde9a2bd045fc3cbba3fa2b11ad558f27d01712f00569"
[[package]]
name = "encoding_rs"
version = "0.8.26"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "801bbab217d7f79c0062f4f7205b5d4427c6d1a7bd7aafdd1475f7c59d62b283"
dependencies = [
"cfg-if 1.0.0",
]
[[package]]
name = "encoding_rs_io"
version = "0.1.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1cc3c5651fb62ab8aa3103998dade57efdd028544bd300516baa31840c252a83"
dependencies = [
"encoding_rs",
]
[[package]] [[package]]
name = "error-chain" name = "error-chain"
version = "0.12.4" version = "0.12.4"
@ -707,7 +731,7 @@ version = "0.2.12"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3ed85775dcc68644b5c950ac06a2b23768d3bc9390464151aaf27136998dcf9e" checksum = "3ed85775dcc68644b5c950ac06a2b23768d3bc9390464151aaf27136998dcf9e"
dependencies = [ dependencies = [
"cfg-if", "cfg-if 0.1.10",
"libc", "libc",
"redox_syscall", "redox_syscall",
"winapi 0.3.9", "winapi 0.3.9",
@ -840,7 +864,7 @@ version = "0.1.15"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fc587bc0ec293155d5bfa6b9891ec18a1e330c234f896ea47fbada4cadbe47e6" checksum = "fc587bc0ec293155d5bfa6b9891ec18a1e330c234f896ea47fbada4cadbe47e6"
dependencies = [ dependencies = [
"cfg-if", "cfg-if 0.1.10",
"libc", "libc",
"wasi 0.9.0+wasi-snapshot-preview1", "wasi 0.9.0+wasi-snapshot-preview1",
] ]
@ -1151,7 +1175,7 @@ version = "0.4.11"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4fabed175da42fed1fa0746b0ea71f412aa9d35e76e95e59b192c64b9dc2bf8b" checksum = "4fabed175da42fed1fa0746b0ea71f412aa9d35e76e95e59b192c64b9dc2bf8b"
dependencies = [ dependencies = [
"cfg-if", "cfg-if 0.1.10",
] ]
[[package]] [[package]]
@ -1225,6 +1249,8 @@ dependencies = [
"color-eyre", "color-eyre",
"diesel", "diesel",
"diesel_migrations", "diesel_migrations",
"encoding_rs",
"encoding_rs_io",
"futures-io", "futures-io",
"hex", "hex",
"log 0.4.11", "log 0.4.11",
@ -1242,6 +1268,7 @@ dependencies = [
"scraper", "scraper",
"sdnotify", "sdnotify",
"serde", "serde",
"serde-xml-rs",
"serde_json", "serde_json",
"thiserror", "thiserror",
"tracing", "tracing",
@ -1313,7 +1340,7 @@ version = "0.6.22"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fce347092656428bc8eaf6201042cb551b8d67855af7374542a92a0fbfcac430" checksum = "fce347092656428bc8eaf6201042cb551b8d67855af7374542a92a0fbfcac430"
dependencies = [ dependencies = [
"cfg-if", "cfg-if 0.1.10",
"fuchsia-zircon", "fuchsia-zircon",
"fuchsia-zircon-sys", "fuchsia-zircon-sys",
"iovec", "iovec",
@ -1356,7 +1383,7 @@ version = "0.2.35"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3ebc3ec692ed7c9a255596c67808dee269f64655d8baf7b4f0638e51ba1d6853" checksum = "3ebc3ec692ed7c9a255596c67808dee269f64655d8baf7b4f0638e51ba1d6853"
dependencies = [ dependencies = [
"cfg-if", "cfg-if 0.1.10",
"libc", "libc",
"winapi 0.3.9", "winapi 0.3.9",
] ]
@ -1455,7 +1482,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8d575eff3665419f9b83678ff2815858ad9d11567e082f5ac1814baba4e2bcb4" checksum = "8d575eff3665419f9b83678ff2815858ad9d11567e082f5ac1814baba4e2bcb4"
dependencies = [ dependencies = [
"bitflags", "bitflags",
"cfg-if", "cfg-if 0.1.10",
"foreign-types", "foreign-types",
"lazy_static", "lazy_static",
"libc", "libc",
@ -1498,7 +1525,7 @@ version = "0.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c361aa727dd08437f2f1447be8b59a33b0edd15e0fcee698f935613d9efbca9b" checksum = "c361aa727dd08437f2f1447be8b59a33b0edd15e0fcee698f935613d9efbca9b"
dependencies = [ dependencies = [
"cfg-if", "cfg-if 0.1.10",
"cloudabi 0.1.0", "cloudabi 0.1.0",
"instant", "instant",
"libc", "libc",
@ -1661,7 +1688,7 @@ version = "0.3.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7ec3341498978de3bfd12d1b22f1af1de22818f5473a11e8a6ef997989e3a212" checksum = "7ec3341498978de3bfd12d1b22f1af1de22818f5473a11e8a6ef997989e3a212"
dependencies = [ dependencies = [
"cfg-if", "cfg-if 0.1.10",
"universal-hash", "universal-hash",
] ]
@ -1721,7 +1748,7 @@ version = "0.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "30d70cf4412832bcac9cffe27906f4a66e450d323525e977168c70d1b36120ae" checksum = "30d70cf4412832bcac9cffe27906f4a66e450d323525e977168c70d1b36120ae"
dependencies = [ dependencies = [
"cfg-if", "cfg-if 0.1.10",
"fnv", "fnv",
"lazy_static", "lazy_static",
"libc", "libc",
@ -2317,6 +2344,18 @@ dependencies = [
"serde_derive", "serde_derive",
] ]
[[package]]
name = "serde-xml-rs"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "efe415925cf3d0bbb2fc47d09b56ce03eef51c5d56846468a39bcc293c7a846c"
dependencies = [
"log 0.4.11",
"serde",
"thiserror",
"xml-rs",
]
[[package]] [[package]]
name = "serde_derive" name = "serde_derive"
version = "1.0.116" version = "1.0.116"
@ -2729,7 +2768,7 @@ version = "0.1.19"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6d79ca061b032d6ce30c660fded31189ca0b9922bf483cd70759f13a2d86786c" checksum = "6d79ca061b032d6ce30c660fded31189ca0b9922bf483cd70759f13a2d86786c"
dependencies = [ dependencies = [
"cfg-if", "cfg-if 0.1.10",
"tracing-attributes", "tracing-attributes",
"tracing-core", "tracing-core",
] ]
@ -3029,7 +3068,7 @@ version = "0.2.68"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1ac64ead5ea5f05873d7c12b545865ca2b8d28adfc50a49b84770a3a97265d42" checksum = "1ac64ead5ea5f05873d7c12b545865ca2b8d28adfc50a49b84770a3a97265d42"
dependencies = [ dependencies = [
"cfg-if", "cfg-if 0.1.10",
"wasm-bindgen-macro", "wasm-bindgen-macro",
] ]
@ -3159,6 +3198,12 @@ dependencies = [
"winapi-build", "winapi-build",
] ]
[[package]]
name = "xml-rs"
version = "0.8.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b07db065a5cf61a7e4ba64f29e67db906fb1787316516c4e6e5ff0fea1efcd8a"
[[package]] [[package]]
name = "yansi" name = "yansi"
version = "0.5.0" version = "0.5.0"

View File

@ -11,6 +11,8 @@ askama_rocket = "0.10"
chrono = { version = "0.4", features = ["serde"] } chrono = { version = "0.4", features = ["serde"] }
color-eyre = "0.5" color-eyre = "0.5"
diesel_migrations = "1" diesel_migrations = "1"
encoding_rs = "0.8.26"
encoding_rs_io = "0.1"
futures-io = "0.3" futures-io = "0.3"
hex = "0.4" hex = "0.4"
log = "0.4" log = "0.4"
@ -28,6 +30,7 @@ scraper = "0.12.0"
sdnotify = { version = "0.1", default-features = false } sdnotify = { version = "0.1", default-features = false }
serde_json = "^1" serde_json = "^1"
serde = { version = "1", features = ["derive"] } serde = { version = "1", features = ["derive"] }
serde-xml-rs = "0.4.0"
thiserror = "1" thiserror = "1"
tracing = "0.1" tracing = "0.1"
tracing-log = "0.1" tracing-log = "0.1"

View File

@ -0,0 +1,2 @@
DROP INDEX weather_ts_region;
DROP TABLE weather;

View File

@ -0,0 +1,8 @@
CREATE TABLE IF NOT EXISTS weather
( ts TIMESTAMP NOT NULL PRIMARY KEY
, region TEXT NOT NULL
, body BLOB NOT NULL -- JSON-encoded weather data
);
CREATE UNIQUE INDEX weather_ts_region
ON weather(ts, region);

View File

@ -0,0 +1,40 @@
#[macro_use]
extern crate tracing;
use color_eyre::eyre::Result;
use encoding_rs::WINDOWS_1252;
use encoding_rs_io::DecodeReaderBytesBuilder;
use serde_xml_rs::from_reader;
use mi::*;
pub const WEATHER_URL: &'static str =
"https://dd.weather.gc.ca/citypage_weather/xml/QC/s0000635_e.xml";
fn main() -> Result<()> {
color_eyre::install()?;
tracing_subscriber::fmt::init();
info!("{} weather importer starting up", mi::APPLICATION_NAME);
let resp = ureq::get(WEATHER_URL).set("User-Agent", WEATHER_URL).call();
if !resp.ok() {
panic!(
"{}",
match resp.synthetic_error() {
Some(why) => why.to_string(),
None => resp.status_line().to_string(),
}
);
}
let fin = DecodeReaderBytesBuilder::new()
.encoding(Some(WINDOWS_1252))
.build(resp.into_reader());
let data: web::canada_weather::SiteData = from_reader(fin)?;
println!("{:#?}", data);
Ok(())
}

View File

@ -22,6 +22,14 @@ table! {
} }
} }
table! {
weather (ts) {
ts -> Timestamp,
region -> Text,
body -> Binary,
}
}
table! { table! {
webmentions (id) { webmentions (id) {
id -> Text, id -> Text,
@ -37,5 +45,6 @@ allow_tables_to_appear_in_same_query!(
blogposts, blogposts,
members, members,
switches, switches,
weather,
webmentions, webmentions,
); );

View File

@ -0,0 +1,10 @@
pub mod types;
pub use types::SiteData;
/// The credit string for this data.
///
/// XXX(acli): the license [here](https://dd.weather.gc.ca/doc/LICENCE_GENERAL.txt)
/// demands that we include this string somewhere in data derived from this API.
/// This must be manually included in each response to remain within the scope
/// of the license.
pub const DATA_SOURCE: &'static str = "Data Source: Environment and Climate Change Canada";

View File

@ -0,0 +1,236 @@
use serde::{Deserialize, Serialize};
#[derive(Serialize, Deserialize, Debug, Clone)]
#[serde(rename_all = "camelCase")]
pub struct SiteData {
pub license: String,
pub date_time: Vec<DateTime>,
pub location: Location,
pub warnings: Option<Warnings>,
pub current_conditions: CurrentConditions,
pub forecast_group: ForecastGroup,
// TODO(acli): hourly forecasts are not implemented yet.
pub yesterday_conditions: Yesterday,
pub rise_set: RiseSet,
}
#[derive(Serialize, Deserialize, Debug, Clone)]
#[serde(rename_all = "camelCase")]
pub struct Name {
pub name: String,
#[serde(rename = "$value")]
pub value: u8,
}
#[derive(Serialize, Deserialize, Debug, Clone)]
#[serde(rename_all = "camelCase")]
pub struct DateTime {
pub name: String,
pub zone: String,
#[serde(rename = "UTCOffset")]
pub utc_offset: String,
pub year: u16,
pub month: Name,
pub day: Name,
pub hour: u8,
pub minute: u8,
pub time_stamp: String,
pub text_summary: String,
}
#[derive(Serialize, Deserialize, Debug, Clone)]
#[serde(rename_all = "camelCase")]
pub struct Location {
pub continent: String,
pub country: CodeName,
pub province: CodeName,
pub name: CodeName,
pub region: String,
}
#[derive(Serialize, Deserialize, Debug, Clone)]
#[serde(rename_all = "camelCase")]
pub struct CodeName {
pub code: String,
pub lat: Option<String>,
pub lon: Option<String>,
#[serde(rename = "$value")]
pub name: String,
}
#[derive(Serialize, Deserialize, Debug, Clone)]
#[serde(rename_all = "camelCase")]
pub struct IconCode {
pub format: String,
#[serde(rename = "$value")]
pub value: String,
}
#[derive(Serialize, Deserialize, Debug, Clone)]
#[serde(rename_all = "camelCase")]
pub struct MetricWithUnits {
pub units: Option<String>,
pub unit_type: Option<String>,
pub change: Option<f64>,
pub tendency: Option<String>,
#[serde(rename = "$value")]
pub value: Option<f64>,
}
#[derive(Serialize, Deserialize, Debug, Clone)]
#[serde(rename_all = "camelCase")]
pub struct Wind {
pub speed: MetricWithUnits,
pub gust: MetricWithUnits,
pub direction: String,
pub bearing: MetricWithUnits,
}
#[derive(Serialize, Deserialize, Debug, Clone)]
#[serde(rename_all = "camelCase")]
pub struct CurrentConditions {
pub station: CodeName,
pub date_time: Vec<DateTime>,
pub condition: String,
pub icon_code: IconCode,
pub temperature: MetricWithUnits,
pub dewpoint: MetricWithUnits,
pub wind_chill: Option<MetricWithUnits>,
pub pressure: MetricWithUnits,
pub visibility: MetricWithUnits,
pub relative_humidity: MetricWithUnits,
pub wind: Option<Wind>,
}
#[derive(Serialize, Deserialize, Debug, Clone)]
#[serde(rename_all = "camelCase")]
pub struct Warnings {
pub url: String,
pub event: Vec<Event>,
}
#[derive(Serialize, Deserialize, Debug, Clone)]
#[serde(rename_all = "camelCase")]
pub struct Event {
pub r#type: String,
pub priority: String,
pub description: String,
pub date_time: Vec<DateTime>,
}
#[derive(Serialize, Deserialize, Debug, Clone)]
#[serde(rename_all = "camelCase")]
pub struct ForecastGroup {
pub date_time: Vec<DateTime>,
pub regional_normals: RegionalNormals,
pub forecast: Vec<Forecast>,
}
#[derive(Serialize, Deserialize, Debug, Clone)]
#[serde(rename_all = "camelCase")]
pub struct RegionalNormals {
pub text_summary: String,
pub temperature: Vec<MetricWithUnits>,
}
#[derive(Serialize, Deserialize, Debug, Clone)]
#[serde(rename_all = "camelCase")]
pub struct Period {
pub text_forecast_name: String,
#[serde(rename = "$value")]
pub value: String,
}
#[derive(Serialize, Deserialize, Debug, Clone)]
#[serde(rename_all = "camelCase")]
pub struct CloudPrecip {
pub text_summary: String,
}
#[derive(Serialize, Deserialize, Debug, Clone)]
#[serde(rename_all = "camelCase")]
pub struct Forecast {
pub period: Period,
pub text_summary: String,
pub cloud_precip: CloudPrecip,
pub abbreviated_forecast: AbbreviatedForecast,
pub temperatures: Temperatures,
pub winds: Winds,
pub precipitation: Option<Precipitation>,
pub uv: Option<UVIndex>,
pub relative_humidity: MetricWithUnits,
}
#[derive(Serialize, Deserialize, Debug, Clone)]
#[serde(rename_all = "camelCase")]
pub struct AbbreviatedForecast {
pub icon_code: IconCode,
pub pop: MetricWithUnits,
pub text_summary: String,
}
#[derive(Serialize, Deserialize, Debug, Clone)]
#[serde(rename_all = "camelCase")]
pub struct Temperatures {
pub text_summary: String,
pub temperature: MetricWithUnits,
}
#[derive(Serialize, Deserialize, Debug, Clone)]
#[serde(rename_all = "camelCase")]
pub struct Winds {
pub text_summary: Option<String>,
pub wind: Option<Vec<Wind>>,
}
#[derive(Serialize, Deserialize, Debug, Clone)]
#[serde(rename_all = "camelCase")]
pub struct PrecipitationType {
pub start: String,
pub end: String,
#[serde(rename = "$value")]
pub value: Option<String>,
}
#[derive(Serialize, Deserialize, Debug, Clone)]
#[serde(rename_all = "camelCase")]
pub struct Accumulation {
pub name: String,
pub amount: MetricWithUnits,
}
#[derive(Serialize, Deserialize, Debug, Clone)]
#[serde(rename_all = "camelCase")]
pub struct Precipitation {
pub text_summary: Option<String>,
pub precip_type: Vec<PrecipitationType>,
pub accumulation: Option<Accumulation>,
}
#[derive(Serialize, Deserialize, Debug, Clone)]
#[serde(rename_all = "camelCase")]
pub struct Visibility {
pub cause: String,
pub text_summary: String,
}
#[derive(Serialize, Deserialize, Debug, Clone)]
#[serde(rename_all = "camelCase")]
pub struct UVIndex {
pub category: String,
pub index: String,
pub text_summary: String,
}
#[derive(Serialize, Deserialize, Debug, Clone)]
#[serde(rename_all = "camelCase")]
pub struct Yesterday {
pub temperature: Vec<MetricWithUnits>,
}
#[derive(Serialize, Deserialize, Debug, Clone)]
#[serde(rename_all = "camelCase")]
pub struct RiseSet {
pub disclaimer: String,
pub date_time: Vec<DateTime>,
}

View File

@ -1,4 +1,5 @@
pub mod bridgy; pub mod bridgy;
pub mod canada_weather;
pub mod discord_webhook; pub mod discord_webhook;
pub mod mastodon; pub mod mastodon;
pub mod pluralkit; pub mod pluralkit;