Skip to content

Commit

Permalink
feat(api): add support for CSV trackers
Browse files Browse the repository at this point in the history
  • Loading branch information
azasypkin committed Oct 26, 2024
1 parent d6cfcc4 commit bda862f
Show file tree
Hide file tree
Showing 6 changed files with 197 additions and 2 deletions.
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ calamine = "0.26.1"
chrono = { version = "0.4.38", default-features = false }
clap = "4.5.20"
croner = "2.0.6"
csv = "1.3.0"
deno_core = "0.315.0"
dotenvy = "0.15.7"
figment = "0.10.19"
Expand Down
4 changes: 4 additions & 0 deletions dev/fixtures/csv_fixture.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
Header N1,Header N2,
Some string,100500,
500100,Some string 2,100
,,Another string
85 changes: 84 additions & 1 deletion src/trackers/api_ext.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ use crate::{
tasks::{EmailContent, EmailTaskType, EmailTemplate, HttpTaskType, TaskType},
trackers::{
database_ext::TrackersDatabaseExt,
parsers::XlsParser,
parsers::{CsvParser, XlsParser},
tracker_data_revisions_diff::tracker_data_revisions_diff,
web_scraper::{WebScraperContentRequest, WebScraperErrorResponse},
},
Expand Down Expand Up @@ -906,6 +906,9 @@ where
Some(ref media_type) if XlsParser::supports(media_type) => {
XlsParser::parse(&response_bytes)?
}
Some(ref media_type) if CsvParser::supports(media_type) => {
CsvParser::parse(&response_bytes)?
}
_ => response_bytes,
};

Expand Down Expand Up @@ -3457,6 +3460,86 @@ mod tests {
Ok(())
}

#[sqlx::test]
async fn properly_saves_api_target_revision_with_parser_csv(
pool: PgPool,
) -> anyhow::Result<()> {
let server = MockServer::start();
let config = mock_config()?;

let api = mock_api_with_config(pool, config).await?;

let trackers = api.trackers();
let tracker = trackers
.create_tracker(
TrackerCreateParams::new("name_one")
.with_schedule("0 0 * * * *")
.with_target(TrackerTarget::Api(ApiTarget {
url: server.url("/api/get-call").parse()?,
method: None,
headers: None,
body: None,
media_type: Some("text/csv".parse()?),
configurator: None,
extractor: None,
})),
)
.await?;

let tracker_data = trackers
.get_tracker_data(tracker.id, Default::default())
.await?;
assert!(tracker_data.is_empty());

let content_mock = server.mock(|when, then| {
when.method(httpmock::Method::GET).path("/api/get-call");
then.status(200)
.header("Content-Type", "text/csv;charset=UTF-8")
.body(load_fixture("csv_fixture.csv").unwrap());
});

trackers.create_tracker_data_revision(tracker.id).await?;
content_mock.assert();

let revs = trackers
.get_tracker_data(tracker.id, Default::default())
.await?;
assert_debug_snapshot!(
revs.into_iter().map(|rev| rev.data).collect::<Vec<_>>(),
@r###"
[
TrackerDataValue {
original: Array [
Array [
String("Header N1"),
String("Header N2"),
String(""),
],
Array [
String("Some string"),
String("100500"),
String(""),
],
Array [
String("500100"),
String("Some string 2"),
String("100"),
],
Array [
String(""),
String(""),
String("Another string"),
],
],
mods: None,
},
]
"###
);

Ok(())
}

#[sqlx::test]
async fn properly_saves_api_target_revision_with_remote_scripts(
pool: PgPool,
Expand Down
3 changes: 2 additions & 1 deletion src/trackers/parsers.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
mod csv_parser;
mod xls_parser;

pub use xls_parser::XlsParser;
pub use self::{csv_parser::CsvParser, xls_parser::XlsParser};
105 changes: 105 additions & 0 deletions src/trackers/parsers/csv_parser.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
use bytes::{Buf, Bytes};
use mediatype::{
names::{CSV, TEXT},
MediaType,
};
use tracing::{debug, warn};

/// Parser of the CSV files. Returns JSON representation of the parsed data as a binary
/// data. The JSON structure is a list of rows, each row is a list of cells.
pub struct CsvParser;
impl CsvParser {
/// Check if the given media type is supported by the parser.
pub fn supports(media_type: &MediaType) -> bool {
media_type.ty == TEXT && media_type.subty == CSV
}

/// Parse the CSV file content and return JSON representation of the parsed data.
pub fn parse(content: &[u8]) -> anyhow::Result<Bytes> {
let mut reader = csv::ReaderBuilder::new()
.flexible(true)
.has_headers(false)
.from_reader(content.reader());

let mut rows = vec![];
for (index, record) in reader.records().enumerate() {
let record = match record {
Ok(record) => record,
Err(err) => {
warn!("Failed to parse CSV record with index {index}: {err:?}");
continue;
}
};

rows.push(
record
.into_iter()
.map(|cell| cell.to_string())
.collect::<Vec<_>>(),
);
}

debug!("Parsed CSV file with {} rows.", rows.len());

Ok(Bytes::from(serde_json::to_vec(&rows)?))
}
}

#[cfg(test)]
mod tests {
use super::CsvParser;
use crate::tests::load_fixture;
use insta::assert_json_snapshot;
use mediatype::MediaTypeBuf;

#[test]
fn supports() -> anyhow::Result<()> {
assert!(CsvParser::supports(
&MediaTypeBuf::from_string("text/csv".to_string())?.to_ref()
));
assert!(CsvParser::supports(
&MediaTypeBuf::from_string("text/csv; charset=utf-8".to_string())?.to_ref()
));
assert!(!CsvParser::supports(
&MediaTypeBuf::from_string("application/json".to_string())?.to_ref()
));

Ok(())
}

#[test]
fn parse() -> anyhow::Result<()> {
let fixture = load_fixture("csv_fixture.csv")?;
let parsed_data = CsvParser::parse(&fixture)?;

assert_json_snapshot!(
serde_json::from_slice::<serde_json::Value>(&parsed_data)?,
@r###"
[
[
"Header N1",
"Header N2",
""
],
[
"Some string",
"100500",
""
],
[
"500100",
"Some string 2",
"100"
],
[
"",
"",
"Another string"
]
]
"###
);

Ok(())
}
}

0 comments on commit bda862f

Please sign in to comment.