diff --git a/Cargo.lock b/Cargo.lock index cf90261..d9e2c9a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -31,9 +31,9 @@ dependencies = [ [[package]] name = "assert_cmd" -version = "1.0.7" +version = "1.0.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3d20831bd004dda4c7c372c19cdabff369f794a95e955b3f13fe460e3e1ae95f" +checksum = "c98233c6673d8601ab23e77eb38f999c51100d46c5703b17288c57fddf3a1ffe" dependencies = [ "bstr", "doc-comment", @@ -62,9 +62,9 @@ checksum = "cdb031dd78e28731d87d56cc8ffef4a8f36ca26c38fe2de700543e627f8a464a" [[package]] name = "bitflags" -version = "1.2.1" +version = "1.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cf1de2fe8c75bc145a2f577add951f8134889b4795d47466a54a5c846d691693" +checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" [[package]] name = "bstr" @@ -111,9 +111,9 @@ dependencies = [ [[package]] name = "crossbeam-deque" -version = "0.8.0" +version = "0.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "94af6efb46fef72616855b036a624cf27ba656ffc9be1b9a3c931cfc7749a9a9" +checksum = "6455c0ca19f0d2fbf751b908d5c55c1f5cbc65e03c4225427254b46890bdde1e" dependencies = [ "cfg-if", "crossbeam-epoch", @@ -164,6 +164,7 @@ dependencies = [ "clap", "lscolors", "rayon", + "regex", "stfu8", "tempfile", "terminal_size", @@ -215,9 +216,9 @@ checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" [[package]] name = "libc" -version = "0.2.98" +version = "0.2.101" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "320cfe77175da3a483efed4bc0adc1968ca050b098ce4f2f1c13a56626128790" +checksum = "3cb00336871be5ed2c8ed44b60ae9959dc5b9f08539422ed43f09e34ecaeba21" [[package]] name = "lscolors" @@ -230,9 +231,9 @@ dependencies = [ [[package]] name = "memchr" -version = "2.4.0" +version = "2.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b16bd47d9e329435e309c58469fe0791c2d0d1ba96ec0954152a5ae2b04387dc" +checksum = "308cc39be01b73d0d18f82a0e7b2a3df85245f84af96fdddc5d202d27e47b86a" [[package]] name = "memoffset" @@ -261,9 +262,9 @@ checksum = "ac74c624d6b2d21f425f752262f42188365d7b8ff1aff74c82e45136510a4857" [[package]] name = "predicates" -version = "2.0.0" +version = "2.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c6e46ca79eb4e21e2ec14430340c71250ab69332abf85521c95d3a8bc336aa76" +checksum = "c143348f141cc87aab5b950021bac6145d0e5ae754b0591de23244cee42c9308" dependencies = [ "difflib", "itertools", @@ -278,9 +279,9 @@ checksum = "57e35a3326b75e49aa85f5dc6ec15b41108cf5aee58eabb1f274dd18b73c2451" [[package]] name = "predicates-tree" -version = "1.0.2" +version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "15f553275e5721409451eb85e15fd9a860a6e5ab4496eb215987502b5f5391f2" +checksum = "d7dd0fd014130206c9352efbdc92be592751b2b9274dff685348341082c6ea3d" dependencies = [ "predicates-core", "treeline", @@ -353,9 +354,9 @@ dependencies = [ [[package]] name = "redox_syscall" -version = "0.2.9" +version = "0.2.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ab49abadf3f9e1c4bc499e8845e152ad87d2ad2d30371841171169e9d75feee" +checksum = "8383f39639269cde97d255a32bdb68c047337295414940c68bdd30c2e13203ff" dependencies = [ "bitflags", ] diff --git a/Cargo.toml b/Cargo.toml index 3656aed..39040a0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -30,6 +30,7 @@ unicode-width = "0.1" rayon="1" thousands = "0.2" stfu8 = "0.2" +regex = "1" [target.'cfg(windows)'.dependencies] winapi-util = "0.1" diff --git a/README.md b/README.md index 14061aa..66fca61 100644 --- a/README.md +++ b/README.md @@ -57,6 +57,8 @@ Usage: dust -b (do not show percentages or draw ASCII bars) Usage: dust -i (do not show hidden files) Usage: dust -c (No colors [monochrome]) Usage: dust -f (Count files instead of diskspace) +Usage: dust -t Group by filetype +Usage: dust -e regex Only include files matching this regex (eg dust -e "\.png$" would match png files) ``` diff --git a/src/dir_walker.rs b/src/dir_walker.rs index 7b6ba08..5dc4889 100644 --- a/src/dir_walker.rs +++ b/src/dir_walker.rs @@ -1,8 +1,10 @@ use std::fs; use crate::node::Node; +use crate::utils::is_filtered_out_due_to_regex; use rayon::iter::ParallelBridge; use rayon::prelude::ParallelIterator; +use regex::Regex; use std::path::PathBuf; use std::sync::atomic; @@ -17,6 +19,7 @@ use crate::platform::get_metadata; pub struct WalkData { pub ignore_directories: HashSet, + pub filter_regex: Option, pub allowed_filesystems: HashSet, pub use_apparent_size: bool, pub by_filecount: bool, @@ -84,6 +87,15 @@ fn ignore_file(entry: &DirEntry, walk_data: &WalkData) -> bool { } } } + + // Keeping `walk_data.filter_regex.is_some()` is important for performance reasons, it stops unnecessary work + if walk_data.filter_regex.is_some() + && entry.path().is_file() + && is_filtered_out_due_to_regex(&walk_data.filter_regex, &entry.path()) + { + return true; + } + (is_dot_file && walk_data.ignore_hidden) || is_ignored_path } @@ -110,8 +122,10 @@ fn walk(dir: PathBuf, permissions_flag: &AtomicBool, walk_data: &WalkData) -> Op return build_node( entry.path(), vec![], + &walk_data.filter_regex, walk_data.use_apparent_size, data.is_symlink(), + data.is_file(), walk_data.by_filecount, ); } @@ -128,8 +142,10 @@ fn walk(dir: PathBuf, permissions_flag: &AtomicBool, walk_data: &WalkData) -> Op build_node( dir, children, + &walk_data.filter_regex, walk_data.use_apparent_size, false, + false, walk_data.by_filecount, ) } diff --git a/src/display.rs b/src/display.rs index 5477212..e980673 100644 --- a/src/display.rs +++ b/src/display.rs @@ -107,7 +107,6 @@ impl DrawData<'_> { #[allow(clippy::too_many_arguments)] pub fn draw_it( - permission_error: bool, use_full_path: bool, is_reversed: bool, no_colors: bool, @@ -116,9 +115,6 @@ pub fn draw_it( by_filecount: bool, option_root_node: Option, ) { - if permission_error { - eprintln!("Did not have permissions for all directories"); - } if option_root_node.is_none() { return; } diff --git a/src/filter.rs b/src/filter.rs index 623abe3..754bbd8 100644 --- a/src/filter.rs +++ b/src/filter.rs @@ -1,6 +1,7 @@ use crate::display_node::DisplayNode; use crate::node::Node; use std::collections::BinaryHeap; +use std::collections::HashMap; use std::collections::HashSet; use std::path::PathBuf; @@ -13,7 +14,11 @@ pub fn get_by_depth(top_level_nodes: Vec, n: usize) -> Option Some(build_by_depth(&root, n - 1)) } -pub fn get_biggest(top_level_nodes: Vec, n: usize) -> Option { +pub fn get_biggest( + top_level_nodes: Vec, + n: usize, + using_file_type_filter: bool, +) -> Option { if top_level_nodes.is_empty() { // perhaps change this, bring back Error object? return None; @@ -22,18 +27,17 @@ pub fn get_biggest(top_level_nodes: Vec, n: usize) -> Option let mut heap = BinaryHeap::new(); let number_top_level_nodes = top_level_nodes.len(); let root = get_new_root(top_level_nodes); - - root.children.iter().for_each(|c| heap.push(c)); - let mut allowed_nodes = HashSet::new(); + allowed_nodes.insert(&root.name); + heap = add_children(using_file_type_filter, &root, heap); for _ in number_top_level_nodes..n { let line = heap.pop(); match line { Some(line) => { - line.children.iter().for_each(|c| heap.push(c)); allowed_nodes.insert(&line.name); + heap = add_children(using_file_type_filter, line, heap); } None => break, } @@ -41,6 +45,72 @@ pub fn get_biggest(top_level_nodes: Vec, n: usize) -> Option recursive_rebuilder(&allowed_nodes, &root) } +pub fn get_all_file_types(top_level_nodes: Vec, n: usize) -> Option { + let mut map: HashMap = HashMap::new(); + build_by_all_file_types(top_level_nodes, &mut map); + let mut by_types: Vec = map.into_iter().map(|(_k, v)| v).collect(); + by_types.sort(); + by_types.reverse(); + + let displayed = if by_types.len() <= n { + by_types + } else { + let (displayed, rest) = by_types.split_at(if n > 1 { n - 1 } else { 1 }); + let remaining = DisplayNode { + name: PathBuf::from("(others)"), + size: rest.iter().map(|a| a.size).sum(), + children: vec![], + }; + + let mut displayed = displayed.to_vec(); + displayed.push(remaining); + displayed + }; + + let result = DisplayNode { + name: PathBuf::from("(total)"), + size: displayed.iter().map(|a| a.size).sum(), + children: displayed, + }; + Some(result) +} + +fn add_children<'a>( + using_file_type_filter: bool, + line: &'a Node, + mut heap: BinaryHeap<&'a Node>, +) -> BinaryHeap<&'a Node> { + if using_file_type_filter { + line.children.iter().for_each(|c| { + if c.name.is_file() || c.size > 0 { + heap.push(c) + } + }); + } else { + line.children.iter().for_each(|c| heap.push(c)); + } + heap +} + +fn build_by_all_file_types(top_level_nodes: Vec, counter: &mut HashMap) { + for node in top_level_nodes { + if node.name.is_file() { + let ext = node.name.extension(); + let key: String = match ext { + Some(e) => ".".to_string() + &e.to_string_lossy(), + None => "(no extension)".into(), + }; + let mut display_node = counter.entry(key.clone()).or_insert(DisplayNode { + name: PathBuf::from(key), + size: 0, + children: vec![], + }); + display_node.size += node.size; + } + build_by_all_file_types(node.children, counter) + } +} + fn build_by_depth(node: &Node, depth: usize) -> DisplayNode { let new_children = { if depth == 0 { diff --git a/src/main.rs b/src/main.rs index 4601a3f..73cacf9 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,15 +1,18 @@ #[macro_use] extern crate clap; extern crate rayon; +extern crate regex; extern crate unicode_width; use std::collections::HashSet; +use std::process; use self::display::draw_it; use clap::{App, AppSettings, Arg}; use dir_walker::walk_it; use dir_walker::WalkData; -use filter::{get_biggest, get_by_depth}; +use filter::{get_all_file_types, get_biggest, get_by_depth}; +use regex::Regex; use std::cmp::max; use std::path::PathBuf; use terminal_size::{terminal_size, Height, Width}; @@ -151,6 +154,23 @@ fn main() { .long("ignore_hidden") .help("Do not display hidden files"), ) + .arg( + Arg::with_name("filter") + .short("e") + .long("filter") + .takes_value(true) + .number_of_values(1) + .multiple(true) + .conflicts_with("types") + .help("Only include files matching this regex. For png files type: -e \"\\.png$\" "), + ) + .arg( + Arg::with_name("types") + .short("t") + .long("file_types") + .conflicts_with("depth") + .help("show only these file types"), + ) .arg( Arg::with_name("width") .short("w") @@ -169,6 +189,20 @@ fn main() { } }; + let summarize_file_types = options.is_present("types"); + + let maybe_filter = if options.is_present("filter") { + match Regex::new(options.value_of("filter").unwrap()) { + Ok(r) => Some(r), + Err(e) => { + eprintln!("Ignoring bad value for filter {:?}", e); + process::exit(1); + } + } + } else { + None + }; + let number_of_lines = match value_t!(options.value_of("number_of_lines"), usize) { Ok(v) => v, Err(_) => { @@ -217,23 +251,34 @@ fn main() { let walk_data = WalkData { ignore_directories: ignored_full_path, + filter_regex: maybe_filter, allowed_filesystems, use_apparent_size, by_filecount, ignore_hidden, }; - let (nodes, errors) = walk_it(simplified_dirs, walk_data); + let (top_level_nodes, has_errors) = walk_it(simplified_dirs, walk_data); let tree = { - match depth { - None => get_biggest(nodes, number_of_lines), - Some(depth) => get_by_depth(nodes, depth), + match (depth, summarize_file_types) { + (_, true) => get_all_file_types(top_level_nodes, number_of_lines), + (Some(depth), _) => get_by_depth(top_level_nodes, depth), + (_, _) => get_biggest( + top_level_nodes, + number_of_lines, + options.values_of("filter").is_some(), + ), } }; + if options.is_present("filter") { + println!("Filtering by: {}", options.value_of("filter").unwrap()); + } + if has_errors { + eprintln!("Did not have permissions for all directories"); + } draw_it( - errors, options.is_present("display_full_paths"), !options.is_present("reverse"), no_colors, diff --git a/src/node.rs b/src/node.rs index 320c077..ff391ab 100644 --- a/src/node.rs +++ b/src/node.rs @@ -1,5 +1,7 @@ use crate::platform::get_metadata; +use crate::utils::is_filtered_out_due_to_regex; +use regex::Regex; use std::cmp::Ordering; use std::path::PathBuf; @@ -14,18 +16,29 @@ pub struct Node { pub fn build_node( dir: PathBuf, children: Vec, + filter_regex: &Option, use_apparent_size: bool, is_symlink: bool, + is_file: bool, by_filecount: bool, ) -> Option { match get_metadata(&dir, use_apparent_size) { Some(data) => { - let (size, inode_device) = if by_filecount { - (1, data.1) - } else if is_symlink && !use_apparent_size { - (0, None) + let inode_device = if is_symlink && !use_apparent_size { + None } else { - data + data.1 + }; + + let size = if is_filtered_out_due_to_regex(filter_regex, &dir) + || (is_symlink && !use_apparent_size) + || by_filecount && !is_file + { + 0 + } else if by_filecount { + 1 + } else { + data.0 }; Some(Node { diff --git a/src/utils.rs b/src/utils.rs index 44052d1..cf91b86 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -3,12 +3,7 @@ use std::collections::HashSet; use std::path::{Path, PathBuf}; use crate::platform; - -fn is_a_parent_of>(parent: P, child: P) -> bool { - let parent = parent.as_ref(); - let child = child.as_ref(); - child.starts_with(parent) && !parent.starts_with(child) -} +use regex::Regex; pub fn simplify_dir_names>(filenames: Vec

) -> HashSet { let mut top_level_names: HashSet = HashSet::with_capacity(filenames.len()); @@ -62,6 +57,19 @@ pub fn normalize_path>(path: P) -> PathBuf { path.as_ref().components().collect::() } +pub fn is_filtered_out_due_to_regex(filter_regex: &Option, dir: &Path) -> bool { + match filter_regex { + Some(fr) => !fr.is_match(&dir.as_os_str().to_string_lossy()), + None => false, + } +} + +fn is_a_parent_of>(parent: P, child: P) -> bool { + let parent = parent.as_ref(); + let child = child.as_ref(); + child.starts_with(parent) && !parent.starts_with(child) +} + mod tests { #[allow(unused_imports)] use super::*; diff --git a/tests/test_flags.rs b/tests/test_flags.rs index 6e9db38..5518552 100644 --- a/tests/test_flags.rs +++ b/tests/test_flags.rs @@ -60,7 +60,7 @@ pub fn test_d_flag_works_and_still_recurses_down() { // We had a bug where running with '-d 1' would stop at the first directory and the code // would fail to recurse down let output = build_command(vec!["-d", "1", "-f", "-c", "tests/test_dir2/"]); - assert!(output.contains("7 ┌─┴ test_dir2")); + assert!(output.contains("4 ┌─┴ test_dir2")); } // Check against directories and files whos names are substrings of each other @@ -97,8 +97,8 @@ pub fn test_number_of_files() { let output = build_command(vec!["-c", "-f", "tests/test_dir"]); assert!(output.contains("1 ┌── a_file ")); assert!(output.contains("1 ├── hello_file")); - assert!(output.contains("3 ┌─┴ many")); - assert!(output.contains("4 ┌─┴ test_dir")); + assert!(output.contains("2 ┌─┴ many")); + assert!(output.contains("2 ┌─┴ test_dir")); } #[cfg_attr(target_os = "windows", ignore)] @@ -116,3 +116,27 @@ pub fn test_apparent_size() { let incorrect_apparent_size = "4.0K ├── hello_file"; assert!(!output.contains(incorrect_apparent_size)); } + +#[test] +pub fn test_show_files_by_type() { + // Check we can list files by type + let output = build_command(vec!["-c", "-t", "tests"]); + assert!(output.contains(" .unicode")); + assert!(output.contains(" .japan")); + assert!(output.contains(" .rs")); + assert!(output.contains(" (no extension)")); + assert!(output.contains("┌─┴ (total)")); +} + +#[test] +pub fn test_show_files_by_specific_type() { + // Check we can see '.rs' files in the tests directory + let output = build_command(vec!["-c", "-e", "\\.rs$", "tests"]); + assert!(output.contains(" ┌─┴ tests")); + assert!(!output.contains("0B ┌── tests")); + assert!(!output.contains("0B ┌─┴ tests")); + + // Check there are no '.bad_type' files in the tests directory + let output = build_command(vec!["-c", "-e", "bad_regex", "tests"]); + assert!(output.contains("0B ┌── tests")); +}