From 8a9b5e889df8795599d6c3db3abf90d6ef7ce418 Mon Sep 17 00:00:00 2001 From: "andy.boot" Date: Sun, 9 Feb 2020 12:07:05 +0000 Subject: [PATCH 1/5] Refactor path depth calculation Factor out duplicate code, Add comment explaining why filter is necessary (thanks to rivy) --- src/utils/mod.rs | 30 +++++++++++++----------------- 1 file changed, 13 insertions(+), 17 deletions(-) diff --git a/src/utils/mod.rs b/src/utils/mod.rs index 5586e29..628c4e3 100644 --- a/src/utils/mod.rs +++ b/src/utils/mod.rs @@ -239,6 +239,17 @@ pub fn find_big_ones(new_l: Vec<(PathBuf, u64)>, max_to_show: usize) -> Vec<(Pat } } +fn depth_of_path(name: &PathBuf) -> usize { + // Filter required as paths can have some odd preliminary + // ("Prefix") bits (for example, from windows, "\\?\" or "\\UNC\") + name.components() + .filter(|&c| match c { + std::path::Component::Prefix(_) => false, + _ => true, + }) + .count() +} + pub fn trim_deep_ones( input: Vec<(PathBuf, u64)>, max_depth: u64, @@ -247,25 +258,10 @@ pub fn trim_deep_ones( let mut result: Vec<(PathBuf, u64)> = Vec::with_capacity(input.len() * top_level_names.len()); for name in top_level_names { - let my_max_depth = name - .components() - .filter(|&c| match c { - std::path::Component::Prefix(_) => false, - _ => true, - }) - .count() - + max_depth as usize; + let my_max_depth = depth_of_path(name) + max_depth as usize; for &(ref k, ref v) in input.iter() { - if k.starts_with(name) - && k.components() - .filter(|&c| match c { - std::path::Component::Prefix(_) => false, - _ => true, - }) - .count() - <= my_max_depth - { + if k.starts_with(name) && depth_of_path(k) <= my_max_depth { result.push((k.clone(), *v)); } } From 2082141dfc7aaf616c9af91851cf7b2c4f09b1ff Mon Sep 17 00:00:00 2001 From: "andy.boot" Date: Sun, 9 Feb 2020 13:36:13 +0000 Subject: [PATCH 2/5] Add tests for should_ignore_file function function currently has a bug that is highlighted by second test --- src/utils/mod.rs | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/src/utils/mod.rs b/src/utils/mod.rs index 628c4e3..609d522 100644 --- a/src/utils/mod.rs +++ b/src/utils/mod.rs @@ -349,4 +349,36 @@ mod tests { assert!(is_a_parent_of("/", "/usr")); assert!(!is_a_parent_of("/", "/")); } + + #[test] + fn test_should_ignore_file() { + let mut files = HashSet::new(); + files.insert((10, 20)); + + assert!(!should_ignore_file(true, &None, &mut files, None)); + + // New file is not known it will be inserted to the hashmp and should not be ignored + let new_fd = (11, 12); + assert!(!should_ignore_file(false, &None, &mut files, Some(new_fd))); + assert!(files.contains(&new_fd)); + + // The same file will be ignored the second time + assert!(should_ignore_file(false, &None, &mut files, Some(new_fd))); + } + + #[test] + fn test_should_ignore_file_on_different_device() { + let mut files = HashSet::new(); + files.insert((10, 20)); + + let mut devices = HashSet::new(); + devices.insert(99); + let od = Some(devices); + + // If we are looking at a different device (disk) and the device flag is set + // then apparent_size is irrelevant - we ignore files on other devices + let new_file = (11, 12); + assert!(should_ignore_file(false, &od, &mut files, Some(new_file))); + assert!(should_ignore_file(true, &od, &mut files, Some(new_file))); + } } From b6aa1378de8b8ba810caa600febf7f9f0c5f6b6f Mon Sep 17 00:00:00 2001 From: "andy.boot" Date: Sun, 9 Feb 2020 13:41:58 +0000 Subject: [PATCH 3/5] Fix bug for devices and apparent size If apparent_size was set and ignore files on other devices was set then the latter flat would not work. Fix this bug --- src/utils/mod.rs | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/src/utils/mod.rs b/src/utils/mod.rs index 609d522..0291c14 100644 --- a/src/utils/mod.rs +++ b/src/utils/mod.rs @@ -173,17 +173,19 @@ fn should_ignore_file( inodes: &mut HashSet<(u64, u64)>, maybe_inode: Option<(u64, u64)>, ) -> bool { - if !apparent_size { - if let Some(inode_dev_pair) = maybe_inode { - // Ignore files on different devices (if flag applied) - if restricted_filesystems.is_some() - && !restricted_filesystems - .as_ref() - .unwrap() - .contains(&inode_dev_pair.1) - { - return true; - } + + if let Some(inode_dev_pair) = maybe_inode { + // Ignore files on different devices (if flag applied) + if restricted_filesystems.is_some() + && !restricted_filesystems + .as_ref() + .unwrap() + .contains(&inode_dev_pair.1) + { + return true; + } + + if !apparent_size { // Ignore files already visited or symlinked if inodes.contains(&inode_dev_pair) { return true; From be2250d24143b31d528e06a0a91e38dcf82ba4db Mon Sep 17 00:00:00 2001 From: "andy.boot" Date: Sun, 9 Feb 2020 13:48:23 +0000 Subject: [PATCH 4/5] Refactor: use if let instead of is_some --- src/utils/mod.rs | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/utils/mod.rs b/src/utils/mod.rs index 0291c14..2ed4dfe 100644 --- a/src/utils/mod.rs +++ b/src/utils/mod.rs @@ -173,16 +173,12 @@ fn should_ignore_file( inodes: &mut HashSet<(u64, u64)>, maybe_inode: Option<(u64, u64)>, ) -> bool { - if let Some(inode_dev_pair) = maybe_inode { // Ignore files on different devices (if flag applied) - if restricted_filesystems.is_some() - && !restricted_filesystems - .as_ref() - .unwrap() - .contains(&inode_dev_pair.1) - { - return true; + if let Some(rs) = restricted_filesystems { + if !rs.contains(&inode_dev_pair.1) { + return true; + } } if !apparent_size { @@ -382,5 +378,9 @@ mod tests { let new_file = (11, 12); assert!(should_ignore_file(false, &od, &mut files, Some(new_file))); assert!(should_ignore_file(true, &od, &mut files, Some(new_file))); + + // We do not ignore files on the same device + assert!(!should_ignore_file(false, &od, &mut files, Some((2, 99)))); + assert!(!should_ignore_file(true, &od, &mut files, Some((2, 99)))); } } From c408d8887dae315135e980d05653046245260b4f Mon Sep 17 00:00:00 2001 From: "andy.boot" Date: Sun, 9 Feb 2020 13:58:33 +0000 Subject: [PATCH 5/5] Update docs for -X flag. -X flag changed subtly with previous pull request. It now requires a directory name and doesn't work on substrings. --- README.md | 2 +- src/main.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 62b652e..ce7403e 100644 --- a/README.md +++ b/README.md @@ -41,7 +41,7 @@ Usage: dust -n 30 (Shows 30 directories not 20) Usage: dust -d 3 (Shows 3 levels of subdirectories) Usage: dust -r (Reverse order of output, with root at the lowest) Usage: dust -x (Only show directories on same filesystem) -Usage: dust -X ignore (Ignore all files and directories containing the string 'ignore') +Usage: dust -X ignore (Ignore all files and directories with the name 'ignore') ``` ``` diff --git a/src/main.rs b/src/main.rs index aec5656..583fdd8 100644 --- a/src/main.rs +++ b/src/main.rs @@ -62,7 +62,7 @@ fn main() { .takes_value(true) .number_of_values(1) .multiple(true) - .help("Exclude any file or directory with contains this substring."), + .help("Exclude any file or directory with this name."), ) .arg( Arg::with_name("limit_filesystem")