Skip to content

Commit

Permalink
Merge pull request #1 from scpike/master
Browse files Browse the repository at this point in the history
Merge PR for nullcount in stats
  • Loading branch information
jqnatividad authored Dec 27, 2020
2 parents 3de6c04 + 89639fb commit 1debdbd
Show file tree
Hide file tree
Showing 3 changed files with 20 additions and 0 deletions.
2 changes: 2 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

12 changes: 12 additions & 0 deletions src/cmd/stats.rs
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ stats options:
This requires storing all CSV data in memory.
--median Show the median.
This requires storing all CSV data in memory.
--nullcount Show the number of NULLs.
--nulls Include NULLs in the population size for computing
mean and standard deviation.
-j, --jobs <arg> The number of jobs to run in parallel.
Expand Down Expand Up @@ -76,6 +77,7 @@ struct Args {
flag_cardinality: bool,
flag_median: bool,
flag_nulls: bool,
flag_nullcount: bool,
flag_jobs: usize,
flag_output: Option<String>,
flag_no_headers: bool,
Expand Down Expand Up @@ -209,6 +211,7 @@ impl Args {
range: true,
dist: true,
cardinality: self.flag_cardinality || self.flag_everything,
nullcount: self.flag_nullcount || self.flag_everything,
median: self.flag_median || self.flag_everything,
mode: self.flag_mode || self.flag_everything,
})).take(record_len).collect()
Expand All @@ -223,6 +226,7 @@ impl Args {
if self.flag_median || all { fields.push("median"); }
if self.flag_mode || all { fields.push("mode"); }
if self.flag_cardinality || all { fields.push("cardinality"); }
if self.flag_nullcount || all { fields.push("nullcount"); }
csv::StringRecord::from(fields)
}
}
Expand All @@ -234,6 +238,7 @@ struct WhichStats {
range: bool,
dist: bool,
cardinality: bool,
nullcount: bool,
median: bool,
mode: bool,
}
Expand All @@ -252,6 +257,7 @@ struct Stats {
online: Option<OnlineStats>,
mode: Option<Unsorted<Vec<u8>>>,
median: Option<Unsorted<f64>>,
nullcount: u64,
which: WhichStats,
}

Expand All @@ -271,6 +277,7 @@ impl Stats {
online: online,
mode: mode,
median: median,
nullcount: 0,
which: which,
}
}
Expand All @@ -283,6 +290,7 @@ impl Stats {
self.sum.as_mut().map(|v| v.add(t, sample));
self.minmax.as_mut().map(|v| v.add(t, sample));
self.mode.as_mut().map(|v| v.add(sample.to_vec()));
if sample_type.is_null() { self.nullcount += 1; }
match self.typ {
TUnknown => {}
TNull => {
Expand Down Expand Up @@ -365,6 +373,9 @@ impl Stats {
}
}
}
if self.which.nullcount {
pieces.push(self.nullcount.to_string());
}
csv::StringRecord::from(pieces)
}
}
Expand All @@ -377,6 +388,7 @@ impl Commute for Stats {
self.online.merge(other.online);
self.mode.merge(other.mode);
self.median.merge(other.median);
self.nullcount += other.nullcount;
self.which.merge(other.which);
}
}
Expand Down
6 changes: 6 additions & 0 deletions tests/test_stats.rs
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ fn setup<S>(name: S, rows: &[&str], headers: bool,

fn get_field_value(wrk: &Workdir, cmd: &mut process::Command, field: &str)
-> String {
if field == "nullcount" { cmd.arg("--nullcount"); }
if field == "median" { cmd.arg("--median"); }
if field == "cardinality" { cmd.arg("--cardinality"); }
if field == "mode" { cmd.arg("--mode"); }
Expand Down Expand Up @@ -181,6 +182,11 @@ stats_tests!(stats_median_even_null, "median",
&["", "1", "2", "3", "4"], "2.5");
stats_tests!(stats_median_mix, "median", &["1", "2.5", "3"], "2.5");

stats_tests!(stats_nullcount, "nullcount", &["", "1", "2"], "1");
stats_tests!(stats_nullcount_none, "nullcount", &["a", "1", "2"], "0");
stats_tests!(stats_nullcount_spacenotnull, "nullcount", &[" ", "1", "2"], "0");
stats_tests!(stats_nullcount_all, "nullcount", &["", "", ""], "3");

mod stats_infer_nothing {
// Only test CSV data with headers.
// Empty CSV data with no headers won't produce any statistical analysis.
Expand Down

0 comments on commit 1debdbd

Please sign in to comment.