From bdc3d404ef63db626d3c676ece92518f9a71a00f Mon Sep 17 00:00:00 2001 From: "andy.boot" Date: Wed, 15 Jan 2020 19:51:16 +0000 Subject: [PATCH 1/3] Support excluding filesystems with -x https://github.com/bootandy/dust/issues/50 Add optional -x flag to limit search to the current filesystem. Add (untested) support for windows for the equivalent of inode and device. --- src/main.rs | 14 +++++++++++++- src/utils/mod.rs | 33 +++++++++++++++++++++++++++++++-- src/utils/platform.rs | 31 ++++++++++++++++++++++++++++++- 3 files changed, 74 insertions(+), 4 deletions(-) diff --git a/src/main.rs b/src/main.rs index d8b2f56..9e63ede 100644 --- a/src/main.rs +++ b/src/main.rs @@ -54,6 +54,12 @@ fn main() { .long("full-paths") .help("If set sub directories will not have their path shortened"), ) + .arg( + Arg::with_name("limit_filesystem") + .short("x") + .long("limit-filesystem") + .help("Only count the files and directories in the same filesystem as the supplied directory"), + ) .arg( Arg::with_name("display_apparent_size") .short("s") @@ -110,9 +116,15 @@ fn main() { } let use_apparent_size = options.is_present("display_apparent_size"); + let limit_filesystem = options.is_present("limit_filesystem"); let simplified_dirs = simplify_dir_names(target_dirs); - let (permissions, nodes) = get_dir_tree(&simplified_dirs, use_apparent_size, threads); + let (permissions, nodes) = get_dir_tree( + &simplified_dirs, + use_apparent_size, + limit_filesystem, + threads, + ); let sorted_data = sort(nodes); let biggest_ones = { match depth { diff --git a/src/utils/mod.rs b/src/utils/mod.rs index 0d337ea..dc742ea 100644 --- a/src/utils/mod.rs +++ b/src/utils/mod.rs @@ -37,7 +37,8 @@ impl PartialEq for Node { } pub fn is_a_parent_of(parent: &str, child: &str) -> bool { - (child.starts_with(parent) && child.chars().nth(parent.chars().count()) == Some('/')) || parent == "/" + (child.starts_with(parent) && child.chars().nth(parent.chars().count()) == Some('/')) + || parent == "/" } pub fn simplify_dir_names(filenames: Vec<&str>) -> HashSet { @@ -69,16 +70,23 @@ pub fn simplify_dir_names(filenames: Vec<&str>) -> HashSet { pub fn get_dir_tree( top_level_names: &HashSet, apparent_size: bool, + limit_filesystem: bool, threads: Option, ) -> (bool, HashMap) { let mut permissions = 0; let mut inodes: HashSet<(u64, u64)> = HashSet::new(); let mut data: HashMap = HashMap::new(); + let restricted_filesystems = if limit_filesystem { + get_allowed_filesystems(top_level_names) + } else { + None + }; for b in top_level_names.iter() { examine_dir( &b, apparent_size, + &restricted_filesystems, &mut inodes, &mut data, &mut permissions, @@ -88,6 +96,16 @@ pub fn get_dir_tree( (permissions == 0, data) } +fn get_allowed_filesystems(top_level_names: &HashSet) -> Option> { + let mut limit_filesystems: HashSet = HashSet::new(); + for file_name in top_level_names.iter() { + if let Some(a) = get_filesystem(file_name) { + limit_filesystems.insert(a); + } + } + Some(limit_filesystems) +} + pub fn strip_end_slash(mut new_name: &str) -> &str { while (new_name.ends_with('/') || new_name.ends_with("/.")) && new_name.len() > 1 { new_name = &new_name[..new_name.len() - 1]; @@ -98,6 +116,7 @@ pub fn strip_end_slash(mut new_name: &str) -> &str { fn examine_dir( top_dir: &str, apparent_size: bool, + restricted_filesystems: &Option>, inodes: &mut HashSet<(u64, u64)>, data: &mut HashMap, file_count_no_permission: &mut u64, @@ -117,6 +136,16 @@ fn examine_dir( Some((size, maybe_inode)) => { if !apparent_size { if let Some(inode_dev_pair) = maybe_inode { + // Ignore files on different devices (if flag applied) + if restricted_filesystems.is_some() + && !restricted_filesystems + .as_ref() + .unwrap() + .contains(&inode_dev_pair.1) + { + continue; + } + // Ignore files already visited or symlinked if inodes.contains(&inode_dev_pair) { continue; } @@ -128,7 +157,7 @@ fn examine_dir( // This is required due to bug in Jwalk that adds '/' to all sub dir lists // see: https://github.com/jessegrosjean/jwalk/issues/13 if path_name.to_string_lossy() == "/" && top_dir != "/" { - continue + continue; } let path_name = path_name.to_string_lossy(); let s = data.entry(path_name.to_string()).or_insert(0); diff --git a/src/utils/platform.rs b/src/utils/platform.rs index 86276dc..adb88fa 100644 --- a/src/utils/platform.rs +++ b/src/utils/platform.rs @@ -1,4 +1,5 @@ use jwalk::DirEntry; +use std::fs; #[cfg(target_family = "unix")] fn get_block_size() -> u64 { @@ -20,7 +21,16 @@ pub fn get_metadata(d: &DirEntry, use_apparent_size: bool) -> Option<(u64, Optio }) } -#[cfg(not(target_family = "unix"))] +#[cfg(target_family = "windows")] +pub fn get_metadata(d: &DirEntry, use_apparent_size: bool) -> Option<(u64, Option<(u64, u64)>)> { + use std::os::windows::fs::MetadataExt; + d.metadata.as_ref().unwrap().as_ref().ok().map(|md| { + let windows_equivalent_of_inode = Some((md.file_index(), md.volume_serial_number())); + (md.file_size(), windows_equivalent_of_inode) + }) +} + +#[cfg(all(not(target_family = "windows"), not(target_family = "unix")))] pub fn get_metadata(d: &DirEntry, _apparent: bool) -> Option<(u64, Option<(u64, u64)>)> { d.metadata .as_ref() @@ -29,3 +39,22 @@ pub fn get_metadata(d: &DirEntry, _apparent: bool) -> Option<(u64, Option<(u64, .ok() .map(|md| (md.len(), None)) } + +#[cfg(target_family = "unix")] +pub fn get_filesystem(file_path: &str) -> Option { + use std::os::unix::fs::MetadataExt; + let metadata = fs::metadata(file_path).unwrap(); + Some(metadata.dev()) +} + +#[cfg(target_family = "windows")] +pub fn get_device(file_path: &str) -> Option { + use std::os::windows::fs::MetadataExt; + let metadata = fs::metadata(file_path).unwrap(); + Some(metadata.volume_serial_number()) +} + +#[cfg(all(not(target_family = "windows"), not(target_family = "unix")))] +pub fn get_device(file_path: &str) -> Option { + None +} From 5541df6a73f3de3996be53d2f667d5debfa2ad02 Mon Sep 17 00:00:00 2001 From: "andy.boot" Date: Wed, 15 Jan 2020 20:10:33 +0000 Subject: [PATCH 2/3] Refactor code Reduce complexity of examine_dir. No logic changes --- src/utils/mod.rs | 85 ++++++++++++++++++++++++++++++------------------ 1 file changed, 53 insertions(+), 32 deletions(-) diff --git a/src/utils/mod.rs b/src/utils/mod.rs index dc742ea..55d50d6 100644 --- a/src/utils/mod.rs +++ b/src/utils/mod.rs @@ -1,3 +1,4 @@ +use jwalk::DirEntry; use std::cmp::Ordering; use std::collections::HashMap; use std::collections::HashSet; @@ -116,7 +117,7 @@ pub fn strip_end_slash(mut new_name: &str) -> &str { fn examine_dir( top_dir: &str, apparent_size: bool, - restricted_filesystems: &Option>, + filesystems: &Option>, inodes: &mut HashSet<(u64, u64)>, data: &mut HashMap, file_count_no_permission: &mut u64, @@ -134,37 +135,8 @@ fn examine_dir( match maybe_size_and_inode { Some((size, maybe_inode)) => { - if !apparent_size { - if let Some(inode_dev_pair) = maybe_inode { - // Ignore files on different devices (if flag applied) - if restricted_filesystems.is_some() - && !restricted_filesystems - .as_ref() - .unwrap() - .contains(&inode_dev_pair.1) - { - continue; - } - // Ignore files already visited or symlinked - if inodes.contains(&inode_dev_pair) { - continue; - } - inodes.insert(inode_dev_pair); - } - } - // This path and all its parent paths have their counter incremented - for path_name in e.path().ancestors() { - // This is required due to bug in Jwalk that adds '/' to all sub dir lists - // see: https://github.com/jessegrosjean/jwalk/issues/13 - if path_name.to_string_lossy() == "/" && top_dir != "/" { - continue; - } - let path_name = path_name.to_string_lossy(); - let s = data.entry(path_name.to_string()).or_insert(0); - *s += size; - if path_name == top_dir { - break; - } + if !should_ignore_file(apparent_size, filesystems, inodes, maybe_inode) { + process_file_with_size_and_inode(top_dir, data, e, size) } } None => *file_count_no_permission += 1, @@ -175,6 +147,55 @@ fn examine_dir( } } +fn should_ignore_file( + apparent_size: bool, + restricted_filesystems: &Option>, + inodes: &mut HashSet<(u64, u64)>, + maybe_inode: Option<(u64, u64)>, +) -> bool { + if !apparent_size { + if let Some(inode_dev_pair) = maybe_inode { + // Ignore files on different devices (if flag applied) + if restricted_filesystems.is_some() + && !restricted_filesystems + .as_ref() + .unwrap() + .contains(&inode_dev_pair.1) + { + return true; + } + // Ignore files already visited or symlinked + if inodes.contains(&inode_dev_pair) { + return true; + } + inodes.insert(inode_dev_pair); + } + } + false +} + +fn process_file_with_size_and_inode( + top_dir: &str, + data: &mut HashMap, + e: DirEntry, + size: u64, +) { + // This path and all its parent paths have their counter incremented + for path_name in e.path().ancestors() { + // This is required due to bug in Jwalk that adds '/' to all sub dir lists + // see: https://github.com/jessegrosjean/jwalk/issues/13 + if path_name.to_string_lossy() == "/" && top_dir != "/" { + continue; + } + let path_name = path_name.to_string_lossy(); + let s = data.entry(path_name.to_string()).or_insert(0); + *s += size; + if path_name == top_dir { + break; + } + } +} + pub fn sort_by_size_first_name_second(a: &(String, u64), b: &(String, u64)) -> Ordering { let result = b.1.cmp(&a.1); if result == Ordering::Equal { From b9c27f98384567c7a71284742831ff1e9bc1256b Mon Sep 17 00:00:00 2001 From: "andy.boot" Date: Thu, 16 Jan 2020 23:50:56 +0000 Subject: [PATCH 3/3] get_filesystem returns Result instead of option Removes unwrap and returns a Result instead of panicing if an invalid path is given. Previously if the flag: '-x' was provided with an argument of an invalid directory the code would crash here --- src/utils/mod.rs | 2 +- src/utils/platform.rs | 15 ++++++++------- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/src/utils/mod.rs b/src/utils/mod.rs index 55d50d6..0f42788 100644 --- a/src/utils/mod.rs +++ b/src/utils/mod.rs @@ -100,7 +100,7 @@ pub fn get_dir_tree( fn get_allowed_filesystems(top_level_names: &HashSet) -> Option> { let mut limit_filesystems: HashSet = HashSet::new(); for file_name in top_level_names.iter() { - if let Some(a) = get_filesystem(file_name) { + if let Ok(a) = get_filesystem(file_name) { limit_filesystems.insert(a); } } diff --git a/src/utils/platform.rs b/src/utils/platform.rs index adb88fa..43d1a66 100644 --- a/src/utils/platform.rs +++ b/src/utils/platform.rs @@ -1,5 +1,6 @@ use jwalk::DirEntry; use std::fs; +use std::io; #[cfg(target_family = "unix")] fn get_block_size() -> u64 { @@ -41,20 +42,20 @@ pub fn get_metadata(d: &DirEntry, _apparent: bool) -> Option<(u64, Option<(u64, } #[cfg(target_family = "unix")] -pub fn get_filesystem(file_path: &str) -> Option { +pub fn get_filesystem(file_path: &str) -> Result { use std::os::unix::fs::MetadataExt; - let metadata = fs::metadata(file_path).unwrap(); - Some(metadata.dev()) + let metadata = fs::metadata(file_path)?; + Ok(metadata.dev()) } #[cfg(target_family = "windows")] -pub fn get_device(file_path: &str) -> Option { +pub fn get_device(file_path: &str) -> Result { use std::os::windows::fs::MetadataExt; - let metadata = fs::metadata(file_path).unwrap(); - Some(metadata.volume_serial_number()) + let metadata = fs::metadata(file_path)?; + Ok(metadata.volume_serial_number()) } #[cfg(all(not(target_family = "windows"), not(target_family = "unix")))] -pub fn get_device(file_path: &str) -> Option { +pub fn get_device(file_path: &str) -> Result { None }