diff --git a/Cargo.lock b/Cargo.lock index 198282e..af64954 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -317,6 +317,7 @@ dependencies = [ "expect-test", "futures-util", "hex", + "html-escape", "http-body-util", "humantime", "nix", diff --git a/blahd/Cargo.toml b/blahd/Cargo.toml index 9c11f5d..b5185f0 100644 --- a/blahd/Cargo.toml +++ b/blahd/Cargo.toml @@ -11,6 +11,7 @@ clap = { version = "4", features = ["derive"] } ed25519-dalek = "2" futures-util = "0.3" hex = { version = "0.4", features = ["serde"] } +html-escape = "0.2" http-body-util = "0.1" humantime = "2" parking_lot = "0.12" # Maybe no better performance, just that we hate poisoning. ¯\_(ツ)_/¯ diff --git a/blahd/src/feed.rs b/blahd/src/feed.rs index 3a2ca5d..e2e2416 100644 --- a/blahd/src/feed.rs +++ b/blahd/src/feed.rs @@ -1,15 +1,19 @@ //! Room feed generation. +use std::fmt; use std::num::NonZero; use std::time::{Duration, SystemTime}; use axum::http::header; use axum::response::{IntoResponse, Response}; use axum::Json; -use blah_types::{SignedChatMsg, WithMsgId}; +use blah_types::{Id, SignedChatMsg, WithMsgId}; use serde::{Deserialize, Serialize}; use url::Url; +use crate::id::timestamp_of_id; + const JSON_FEED_MIME: &str = "application/feed+json"; +const ATOM_FEED_MIME: &str = "application/atom+xml"; #[derive(Debug, Clone, Deserialize)] #[serde(default)] @@ -25,10 +29,44 @@ impl Default for Config { } } +pub struct FeedData { + pub rid: Id, + pub title: String, + pub msgs: Vec>, + pub self_url: Url, + pub next_url: Option, +} + +pub trait FeedType { + fn to_feed_response(data: FeedData) -> Response; +} + +fn timestamp_to_rfc3339(timestamp: u64) -> impl fmt::Display { + humantime::format_rfc3339(SystemTime::UNIX_EPOCH + Duration::from_secs(timestamp)) +} + +/// See: +/// - +/// - +/// - +#[derive(Clone, Copy)] +struct TagUri<'a>(&'a Url, &'a str, Id); + +impl fmt::Display for TagUri<'_> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let Self(url, typ, id) = *self; + let domain = url.domain().expect("base_url must have domain"); + let id_time = timestamp_to_rfc3339(timestamp_of_id(id)).to_string(); + // Because github.com also only uses year. + let id_year = &id_time[..4]; + write!(f, "tag:{domain},{id_year}:blah/{typ}/{id}") + } +} + /// Ref: #[derive(Debug, Serialize)] #[serde(tag = "version", rename = "https://jsonfeed.org/version/1.1")] -struct JsonFeed { +pub struct JsonFeed { title: String, feed_url: Url, #[serde(skip_serializing_if = "Option::is_none")] @@ -52,35 +90,125 @@ struct JsonFeedAuthor { name: String, } -pub fn to_json_feed( - title: String, - msgs: Vec>, - self_url: Url, - next_url: Option, -) -> Response { - let items = msgs - .into_iter() - .map(|WithMsgId { cid, msg }| { - let time = SystemTime::UNIX_EPOCH + Duration::from_secs(msg.signee.timestamp); - let author = JsonFeedAuthor { - // TODO: Retrieve id_url as name. - name: msg.signee.user.id_key.to_string(), - }; - JsonFeedItem { - id: cid.to_string(), - content_html: msg.signee.payload.rich_text.html().to_string(), - date_published: humantime::format_rfc3339(time).to_string(), - authors: (author,), - } - }) - .collect::>(); +impl FeedType for JsonFeed { + fn to_feed_response(data: FeedData) -> Response { + let FeedData { + title, + msgs, + self_url, + next_url, + .. + } = data; + let items = msgs + .into_iter() + .map(|WithMsgId { cid, msg }| { + let author = JsonFeedAuthor { + // TODO: Retrieve id_url as name. + name: msg.signee.user.id_key.to_string(), + }; + JsonFeedItem { + id: TagUri(&self_url, "msg", cid).to_string(), + content_html: msg.signee.payload.rich_text.html().to_string(), + date_published: timestamp_to_rfc3339(msg.signee.timestamp).to_string(), + authors: (author,), + } + }) + .collect::>(); - let feed = JsonFeed { - title, - items, - feed_url: self_url, - next_url, - }; + let feed = JsonFeed { + title, + items, + feed_url: self_url, + next_url, + }; - ([(header::CONTENT_TYPE, JSON_FEED_MIME)], Json(feed)).into_response() + ([(header::CONTENT_TYPE, JSON_FEED_MIME)], Json(feed)).into_response() + } +} + +pub struct AtomFeed(FeedData); + +// We write XML manually here, because existing crates (`feed-rs` and `atom_syndication`) +// pull in heavy `quick_xml` and `chrono` which overdoes too much. +// +// Ref: +impl fmt::Display for AtomFeed { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + use html_escape::{encode_quoted_attribute, encode_text}; + + let FeedData { + rid, + title, + msgs, + self_url, + next_url, + } = &self.0; + + let room_id = TagUri(self_url, "room", *rid); + let esc_room_title = encode_text(title.trim()); + // TODO: This should track the latest msg even for non-first page. + let feed_timestamp = + timestamp_to_rfc3339(msgs.first().map_or(0, |msg| msg.msg.signee.timestamp)); + let esc_self_url = encode_quoted_attribute(self_url.as_str()); + + write!( + f, + // NB. XML requires no-newline at start. + r#" + + {room_id} + {esc_room_title} + {feed_timestamp} + +"# + )?; + if let Some(next_url) = &next_url { + let esc_next_url = encode_quoted_attribute(next_url.as_str()); + // + writeln!( + f, + r#" "# + )?; + } + // TODO: HTML links. + + for msg in msgs { + let content = &msg.msg.signee.payload.rich_text; + let msg_id = TagUri(self_url, "msg", msg.cid); + let plain_text = content.plain_text().to_string(); + let esc_msg_title = + encode_text(plain_text.lines().next().unwrap_or("(untitled)").trim()); + let msg_timestamp = timestamp_to_rfc3339(msg.msg.signee.timestamp); + let author = msg.msg.signee.user.id_key.to_string(); + let content = content.html().to_string(); + let esc_content = encode_text(&content); + write!( + f, + r#" + + {msg_id} + {esc_msg_title} + {msg_timestamp} + {msg_timestamp} + {author} + {esc_content} + + "# + )?; + } + + write!( + f, + r#" + +"# + ) + } +} + +impl FeedType for AtomFeed { + fn to_feed_response(data: FeedData) -> Response { + let body = AtomFeed(data).to_string(); + ([(header::CONTENT_TYPE, ATOM_FEED_MIME)], body).into_response() + } } diff --git a/blahd/src/id.rs b/blahd/src/id.rs index f5c94b1..01ed17d 100644 --- a/blahd/src/id.rs +++ b/blahd/src/id.rs @@ -5,6 +5,10 @@ use std::time::SystemTime; use blah_types::Id; +pub fn timestamp_of_id(id: Id) -> u64 { + (id.0 as u64 >> 16) / 1000 +} + pub trait IdExt { fn gen() -> Self; fn gen_peer_chat_rid() -> Self; diff --git a/blahd/src/lib.rs b/blahd/src/lib.rs index a09e0bc..be55ea6 100644 --- a/blahd/src/lib.rs +++ b/blahd/src/lib.rs @@ -17,6 +17,7 @@ use blah_types::{ X_BLAH_NONCE, }; use database::{Transaction, TransactionOps}; +use feed::FeedData; use id::IdExt; use middleware::{Auth, MaybeAuth, ResultExt as _, SignedJson}; use parking_lot::Mutex; @@ -69,6 +70,9 @@ fn de_base_url<'de, D: Deserializer<'de>>(de: D) -> Result { "base_url must be able to be a base", )); } + if url.domain().is_none() { + return Err(serde::de::Error::custom("base_url must have a domain")); + } Ok(url) } @@ -137,7 +141,8 @@ pub fn router(st: Arc) -> Router { .route("/room", get(room_list)) .route("/room/create", post(room_create)) .route("/room/:rid", get(room_get_metadata).delete(room_delete)) - .route("/room/:rid/feed.json", get(room_get_feed)) + .route("/room/:rid/feed.json", get(room_get_feed::)) + .route("/room/:rid/feed.atom", get(room_get_feed::)) .route("/room/:rid/msg", get(room_msg_list).post(room_msg_post)) .route("/room/:rid/msg/:cid/seen", post(room_msg_mark_seen)) .route("/room/:rid/admin", post(room_admin)) @@ -424,7 +429,7 @@ async fn room_get_metadata( })) } -async fn room_get_feed( +async fn room_get_feed( st: ArcState, R(OriginalUri(req_uri), _): RE, R(Path(rid), _): RE>, @@ -470,7 +475,13 @@ async fn room_get_feed( next_url }); - Ok(feed::to_json_feed(title, msgs, self_url, next_url)) + Ok(FT::to_feed_response(FeedData { + rid, + title, + msgs, + self_url, + next_url, + })) } /// Get room messages with pagination parameters, diff --git a/blahd/tests/webapi.rs b/blahd/tests/webapi.rs index 0becfe4..17eecbb 100644 --- a/blahd/tests/webapi.rs +++ b/blahd/tests/webapi.rs @@ -726,15 +726,17 @@ async fn room_chat_post_read(server: Server) { } #[rstest] +#[case::json("json")] +#[case::atom("atom")] #[tokio::test] -async fn room_feed(server: Server) { +async fn room_feed(server: Server, #[case] typ: &'static str) { // Only public readable rooms provides feed. Not even for public joinable ones. let rid_need_join = server .create_room(&ALICE, RoomAttrs::PUBLIC_JOINABLE, "not so public") .await .unwrap(); server - .get::(&format!("/room/{rid_need_join}/feed.json"), None) + .get::(&format!("/room/{rid_need_join}/feed.{typ}"), None) .await .expect_api_err(StatusCode::NOT_FOUND, "room_not_found"); @@ -754,30 +756,46 @@ async fn room_feed(server: Server) { let cid2 = server.post_chat(rid, &BOB, "b1").await.unwrap().cid; server.post_chat(rid, &BOB, "b2").await.unwrap(); - let feed = server - .get::(&format!("/room/{rid}/feed.json"), None) - .await - .unwrap(); - // TODO: Ideally we should assert on the result, but it contains time and random id currently. - assert_eq!(feed["title"].as_str().unwrap(), "public"); - assert_eq!(feed["items"].as_array().unwrap().len(), 2); - let feed_url = format!("{BASE_URL}/_blah/room/{rid}/feed.json"); - assert_eq!(feed["feed_url"].as_str().unwrap(), feed_url,); - assert_eq!( - feed["next_url"].as_str().unwrap(), - format!("{feed_url}?skipToken={cid2}&top=2"), - ); + if typ == "json" { + let feed = server + .get::(&format!("/room/{rid}/feed.json"), None) + .await + .unwrap(); + // TODO: Ideally we should assert on the result, but it contains time and random id currently. + assert_eq!(feed["title"].as_str().unwrap(), "public"); + assert_eq!(feed["items"].as_array().unwrap().len(), 2); + let feed_url = format!("{BASE_URL}/_blah/room/{rid}/feed.json"); + assert_eq!(feed["feed_url"].as_str().unwrap(), feed_url,); + assert_eq!( + feed["next_url"].as_str().unwrap(), + format!("{feed_url}?skipToken={cid2}&top=2"), + ); - let feed2 = server - .get::( - &format!("/room/{rid}/feed.json?skipToken={cid2}&top=2"), - None, - ) - .await - .unwrap(); - let items = feed2["items"].as_array().unwrap(); - assert_eq!(items.len(), 1); - assert_eq!(items[0]["content_html"].as_str().unwrap(), "a"); + let feed2 = server + .get::( + &format!("/room/{rid}/feed.json?skipToken={cid2}&top=2"), + None, + ) + .await + .unwrap(); + let items = feed2["items"].as_array().unwrap(); + assert_eq!(items.len(), 1); + assert_eq!(items[0]["content_html"].as_str().unwrap(), "a"); + } else { + let resp = server + .client + .get(server.url(format!("/room/{rid}/feed.atom"))) + .send() + .await + .unwrap() + .error_for_status() + .unwrap() + .text() + .await + .unwrap(); + assert!(resp.starts_with(r#""#)); + assert_eq!(resp.matches("").count(), 2); + } } #[rstest]