feat(webapi): impl atom feed and use tag URI for feed id

This commit is contained in:
oxalica 2024-09-23 20:29:24 -04:00
parent 94e5913513
commit 487099474f
6 changed files with 222 additions and 59 deletions

1
Cargo.lock generated
View file

@ -317,6 +317,7 @@ dependencies = [
"expect-test", "expect-test",
"futures-util", "futures-util",
"hex", "hex",
"html-escape",
"http-body-util", "http-body-util",
"humantime", "humantime",
"nix", "nix",

View file

@ -11,6 +11,7 @@ clap = { version = "4", features = ["derive"] }
ed25519-dalek = "2" ed25519-dalek = "2"
futures-util = "0.3" futures-util = "0.3"
hex = { version = "0.4", features = ["serde"] } hex = { version = "0.4", features = ["serde"] }
html-escape = "0.2"
http-body-util = "0.1" http-body-util = "0.1"
humantime = "2" humantime = "2"
parking_lot = "0.12" # Maybe no better performance, just that we hate poisoning. ¯\_(ツ)_/¯ parking_lot = "0.12" # Maybe no better performance, just that we hate poisoning. ¯\_(ツ)_/¯

View file

@ -1,15 +1,19 @@
//! Room feed generation. //! Room feed generation.
use std::fmt;
use std::num::NonZero; use std::num::NonZero;
use std::time::{Duration, SystemTime}; use std::time::{Duration, SystemTime};
use axum::http::header; use axum::http::header;
use axum::response::{IntoResponse, Response}; use axum::response::{IntoResponse, Response};
use axum::Json; use axum::Json;
use blah_types::{SignedChatMsg, WithMsgId}; use blah_types::{Id, SignedChatMsg, WithMsgId};
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use url::Url; use url::Url;
use crate::id::timestamp_of_id;
const JSON_FEED_MIME: &str = "application/feed+json"; const JSON_FEED_MIME: &str = "application/feed+json";
const ATOM_FEED_MIME: &str = "application/atom+xml";
#[derive(Debug, Clone, Deserialize)] #[derive(Debug, Clone, Deserialize)]
#[serde(default)] #[serde(default)]
@ -25,10 +29,44 @@ impl Default for Config {
} }
} }
pub struct FeedData {
pub rid: Id,
pub title: String,
pub msgs: Vec<WithMsgId<SignedChatMsg>>,
pub self_url: Url,
pub next_url: Option<Url>,
}
pub trait FeedType {
fn to_feed_response(data: FeedData) -> Response;
}
fn timestamp_to_rfc3339(timestamp: u64) -> impl fmt::Display {
humantime::format_rfc3339(SystemTime::UNIX_EPOCH + Duration::from_secs(timestamp))
}
/// See:
/// - <https://taguri.org/>
/// - <https://www.rfc-editor.org/rfc/rfc4151>
/// - <https://en.wikipedia.org/wiki/Tag_URI_scheme>
#[derive(Clone, Copy)]
struct TagUri<'a>(&'a Url, &'a str, Id);
impl fmt::Display for TagUri<'_> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let Self(url, typ, id) = *self;
let domain = url.domain().expect("base_url must have domain");
let id_time = timestamp_to_rfc3339(timestamp_of_id(id)).to_string();
// Because github.com also only uses year.
let id_year = &id_time[..4];
write!(f, "tag:{domain},{id_year}:blah/{typ}/{id}")
}
}
/// Ref: <https://www.jsonfeed.org/version/1.1/> /// Ref: <https://www.jsonfeed.org/version/1.1/>
#[derive(Debug, Serialize)] #[derive(Debug, Serialize)]
#[serde(tag = "version", rename = "https://jsonfeed.org/version/1.1")] #[serde(tag = "version", rename = "https://jsonfeed.org/version/1.1")]
struct JsonFeed { pub struct JsonFeed {
title: String, title: String,
feed_url: Url, feed_url: Url,
#[serde(skip_serializing_if = "Option::is_none")] #[serde(skip_serializing_if = "Option::is_none")]
@ -52,24 +90,26 @@ struct JsonFeedAuthor {
name: String, name: String,
} }
pub fn to_json_feed( impl FeedType for JsonFeed {
title: String, fn to_feed_response(data: FeedData) -> Response {
msgs: Vec<WithMsgId<SignedChatMsg>>, let FeedData {
self_url: Url, title,
next_url: Option<Url>, msgs,
) -> Response { self_url,
next_url,
..
} = data;
let items = msgs let items = msgs
.into_iter() .into_iter()
.map(|WithMsgId { cid, msg }| { .map(|WithMsgId { cid, msg }| {
let time = SystemTime::UNIX_EPOCH + Duration::from_secs(msg.signee.timestamp);
let author = JsonFeedAuthor { let author = JsonFeedAuthor {
// TODO: Retrieve id_url as name. // TODO: Retrieve id_url as name.
name: msg.signee.user.id_key.to_string(), name: msg.signee.user.id_key.to_string(),
}; };
JsonFeedItem { JsonFeedItem {
id: cid.to_string(), id: TagUri(&self_url, "msg", cid).to_string(),
content_html: msg.signee.payload.rich_text.html().to_string(), content_html: msg.signee.payload.rich_text.html().to_string(),
date_published: humantime::format_rfc3339(time).to_string(), date_published: timestamp_to_rfc3339(msg.signee.timestamp).to_string(),
authors: (author,), authors: (author,),
} }
}) })
@ -83,4 +123,92 @@ pub fn to_json_feed(
}; };
([(header::CONTENT_TYPE, JSON_FEED_MIME)], Json(feed)).into_response() ([(header::CONTENT_TYPE, JSON_FEED_MIME)], Json(feed)).into_response()
}
}
pub struct AtomFeed(FeedData);
// We write XML manually here, because existing crates (`feed-rs` and `atom_syndication`)
// pull in heavy `quick_xml` and `chrono` which overdoes too much.
//
// Ref: <https://validator.w3.org/feed/docs/atom.html>
impl fmt::Display for AtomFeed {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
use html_escape::{encode_quoted_attribute, encode_text};
let FeedData {
rid,
title,
msgs,
self_url,
next_url,
} = &self.0;
let room_id = TagUri(self_url, "room", *rid);
let esc_room_title = encode_text(title.trim());
// TODO: This should track the latest msg even for non-first page.
let feed_timestamp =
timestamp_to_rfc3339(msgs.first().map_or(0, |msg| msg.msg.signee.timestamp));
let esc_self_url = encode_quoted_attribute(self_url.as_str());
write!(
f,
// NB. XML requires no-newline at start.
r#"<?xml version="1.0" encoding="utf-8"?>
<feed xmlns="http://www.w3.org/2005/Atom">
<id>{room_id}</id>
<title>{esc_room_title}</title>
<updated>{feed_timestamp}</updated>
<link rel="self" type="application/atom+xml" href="{esc_self_url}"/>
"#
)?;
if let Some(next_url) = &next_url {
let esc_next_url = encode_quoted_attribute(next_url.as_str());
// <https://www.rfc-editor.org/rfc/rfc5005>
writeln!(
f,
r#" <link rel="next" type="application/atom+xml" href="{esc_next_url}"/>"#
)?;
}
// TODO: HTML links.
for msg in msgs {
let content = &msg.msg.signee.payload.rich_text;
let msg_id = TagUri(self_url, "msg", msg.cid);
let plain_text = content.plain_text().to_string();
let esc_msg_title =
encode_text(plain_text.lines().next().unwrap_or("(untitled)").trim());
let msg_timestamp = timestamp_to_rfc3339(msg.msg.signee.timestamp);
let author = msg.msg.signee.user.id_key.to_string();
let content = content.html().to_string();
let esc_content = encode_text(&content);
write!(
f,
r#"
<entry>
<id>{msg_id}</id>
<title type="text">{esc_msg_title}</title>
<published>{msg_timestamp}</published>
<updated>{msg_timestamp}</updated>
<author><name>{author}</name></author>
<content type="html">{esc_content}</content>
</entry>
"#
)?;
}
write!(
f,
r#"
</feed>
"#
)
}
}
impl FeedType for AtomFeed {
fn to_feed_response(data: FeedData) -> Response {
let body = AtomFeed(data).to_string();
([(header::CONTENT_TYPE, ATOM_FEED_MIME)], body).into_response()
}
} }

View file

@ -5,6 +5,10 @@ use std::time::SystemTime;
use blah_types::Id; use blah_types::Id;
pub fn timestamp_of_id(id: Id) -> u64 {
(id.0 as u64 >> 16) / 1000
}
pub trait IdExt { pub trait IdExt {
fn gen() -> Self; fn gen() -> Self;
fn gen_peer_chat_rid() -> Self; fn gen_peer_chat_rid() -> Self;

View file

@ -17,6 +17,7 @@ use blah_types::{
X_BLAH_NONCE, X_BLAH_NONCE,
}; };
use database::{Transaction, TransactionOps}; use database::{Transaction, TransactionOps};
use feed::FeedData;
use id::IdExt; use id::IdExt;
use middleware::{Auth, MaybeAuth, ResultExt as _, SignedJson}; use middleware::{Auth, MaybeAuth, ResultExt as _, SignedJson};
use parking_lot::Mutex; use parking_lot::Mutex;
@ -69,6 +70,9 @@ fn de_base_url<'de, D: Deserializer<'de>>(de: D) -> Result<Url, D::Error> {
"base_url must be able to be a base", "base_url must be able to be a base",
)); ));
} }
if url.domain().is_none() {
return Err(serde::de::Error::custom("base_url must have a domain"));
}
Ok(url) Ok(url)
} }
@ -137,7 +141,8 @@ pub fn router(st: Arc<AppState>) -> Router {
.route("/room", get(room_list)) .route("/room", get(room_list))
.route("/room/create", post(room_create)) .route("/room/create", post(room_create))
.route("/room/:rid", get(room_get_metadata).delete(room_delete)) .route("/room/:rid", get(room_get_metadata).delete(room_delete))
.route("/room/:rid/feed.json", get(room_get_feed)) .route("/room/:rid/feed.json", get(room_get_feed::<feed::JsonFeed>))
.route("/room/:rid/feed.atom", get(room_get_feed::<feed::AtomFeed>))
.route("/room/:rid/msg", get(room_msg_list).post(room_msg_post)) .route("/room/:rid/msg", get(room_msg_list).post(room_msg_post))
.route("/room/:rid/msg/:cid/seen", post(room_msg_mark_seen)) .route("/room/:rid/msg/:cid/seen", post(room_msg_mark_seen))
.route("/room/:rid/admin", post(room_admin)) .route("/room/:rid/admin", post(room_admin))
@ -424,7 +429,7 @@ async fn room_get_metadata(
})) }))
} }
async fn room_get_feed( async fn room_get_feed<FT: feed::FeedType>(
st: ArcState, st: ArcState,
R(OriginalUri(req_uri), _): RE<OriginalUri>, R(OriginalUri(req_uri), _): RE<OriginalUri>,
R(Path(rid), _): RE<Path<Id>>, R(Path(rid), _): RE<Path<Id>>,
@ -470,7 +475,13 @@ async fn room_get_feed(
next_url next_url
}); });
Ok(feed::to_json_feed(title, msgs, self_url, next_url)) Ok(FT::to_feed_response(FeedData {
rid,
title,
msgs,
self_url,
next_url,
}))
} }
/// Get room messages with pagination parameters, /// Get room messages with pagination parameters,

View file

@ -726,15 +726,17 @@ async fn room_chat_post_read(server: Server) {
} }
#[rstest] #[rstest]
#[case::json("json")]
#[case::atom("atom")]
#[tokio::test] #[tokio::test]
async fn room_feed(server: Server) { async fn room_feed(server: Server, #[case] typ: &'static str) {
// Only public readable rooms provides feed. Not even for public joinable ones. // Only public readable rooms provides feed. Not even for public joinable ones.
let rid_need_join = server let rid_need_join = server
.create_room(&ALICE, RoomAttrs::PUBLIC_JOINABLE, "not so public") .create_room(&ALICE, RoomAttrs::PUBLIC_JOINABLE, "not so public")
.await .await
.unwrap(); .unwrap();
server server
.get::<NoContent>(&format!("/room/{rid_need_join}/feed.json"), None) .get::<NoContent>(&format!("/room/{rid_need_join}/feed.{typ}"), None)
.await .await
.expect_api_err(StatusCode::NOT_FOUND, "room_not_found"); .expect_api_err(StatusCode::NOT_FOUND, "room_not_found");
@ -754,6 +756,7 @@ async fn room_feed(server: Server) {
let cid2 = server.post_chat(rid, &BOB, "b1").await.unwrap().cid; let cid2 = server.post_chat(rid, &BOB, "b1").await.unwrap().cid;
server.post_chat(rid, &BOB, "b2").await.unwrap(); server.post_chat(rid, &BOB, "b2").await.unwrap();
if typ == "json" {
let feed = server let feed = server
.get::<serde_json::Value>(&format!("/room/{rid}/feed.json"), None) .get::<serde_json::Value>(&format!("/room/{rid}/feed.json"), None)
.await .await
@ -778,6 +781,21 @@ async fn room_feed(server: Server) {
let items = feed2["items"].as_array().unwrap(); let items = feed2["items"].as_array().unwrap();
assert_eq!(items.len(), 1); assert_eq!(items.len(), 1);
assert_eq!(items[0]["content_html"].as_str().unwrap(), "a"); assert_eq!(items[0]["content_html"].as_str().unwrap(), "a");
} else {
let resp = server
.client
.get(server.url(format!("/room/{rid}/feed.atom")))
.send()
.await
.unwrap()
.error_for_status()
.unwrap()
.text()
.await
.unwrap();
assert!(resp.starts_with(r#"<?xml version="1.0" encoding="utf-8"?>"#));
assert_eq!(resp.matches("<entry>").count(), 2);
}
} }
#[rstest] #[rstest]