diff --git a/src/main.rs b/src/main.rs index d8b2f56..9e63ede 100644 --- a/src/main.rs +++ b/src/main.rs @@ -54,6 +54,12 @@ fn main() { .long("full-paths") .help("If set sub directories will not have their path shortened"), ) + .arg( + Arg::with_name("limit_filesystem") + .short("x") + .long("limit-filesystem") + .help("Only count the files and directories in the same filesystem as the supplied directory"), + ) .arg( Arg::with_name("display_apparent_size") .short("s") @@ -110,9 +116,15 @@ fn main() { } let use_apparent_size = options.is_present("display_apparent_size"); + let limit_filesystem = options.is_present("limit_filesystem"); let simplified_dirs = simplify_dir_names(target_dirs); - let (permissions, nodes) = get_dir_tree(&simplified_dirs, use_apparent_size, threads); + let (permissions, nodes) = get_dir_tree( + &simplified_dirs, + use_apparent_size, + limit_filesystem, + threads, + ); let sorted_data = sort(nodes); let biggest_ones = { match depth { diff --git a/src/utils/mod.rs b/src/utils/mod.rs index 0d337ea..0f42788 100644 --- a/src/utils/mod.rs +++ b/src/utils/mod.rs @@ -1,3 +1,4 @@ +use jwalk::DirEntry; use std::cmp::Ordering; use std::collections::HashMap; use std::collections::HashSet; @@ -37,7 +38,8 @@ impl PartialEq for Node { } pub fn is_a_parent_of(parent: &str, child: &str) -> bool { - (child.starts_with(parent) && child.chars().nth(parent.chars().count()) == Some('/')) || parent == "/" + (child.starts_with(parent) && child.chars().nth(parent.chars().count()) == Some('/')) + || parent == "/" } pub fn simplify_dir_names(filenames: Vec<&str>) -> HashSet { @@ -69,16 +71,23 @@ pub fn simplify_dir_names(filenames: Vec<&str>) -> HashSet { pub fn get_dir_tree( top_level_names: &HashSet, apparent_size: bool, + limit_filesystem: bool, threads: Option, ) -> (bool, HashMap) { let mut permissions = 0; let mut inodes: HashSet<(u64, u64)> = HashSet::new(); let mut data: HashMap = HashMap::new(); + let restricted_filesystems = if limit_filesystem { + get_allowed_filesystems(top_level_names) + } else { + None + }; for b in top_level_names.iter() { examine_dir( &b, apparent_size, + &restricted_filesystems, &mut inodes, &mut data, &mut permissions, @@ -88,6 +97,16 @@ pub fn get_dir_tree( (permissions == 0, data) } +fn get_allowed_filesystems(top_level_names: &HashSet) -> Option> { + let mut limit_filesystems: HashSet = HashSet::new(); + for file_name in top_level_names.iter() { + if let Ok(a) = get_filesystem(file_name) { + limit_filesystems.insert(a); + } + } + Some(limit_filesystems) +} + pub fn strip_end_slash(mut new_name: &str) -> &str { while (new_name.ends_with('/') || new_name.ends_with("/.")) && new_name.len() > 1 { new_name = &new_name[..new_name.len() - 1]; @@ -98,6 +117,7 @@ pub fn strip_end_slash(mut new_name: &str) -> &str { fn examine_dir( top_dir: &str, apparent_size: bool, + filesystems: &Option>, inodes: &mut HashSet<(u64, u64)>, data: &mut HashMap, file_count_no_permission: &mut u64, @@ -115,27 +135,8 @@ fn examine_dir( match maybe_size_and_inode { Some((size, maybe_inode)) => { - if !apparent_size { - if let Some(inode_dev_pair) = maybe_inode { - if inodes.contains(&inode_dev_pair) { - continue; - } - inodes.insert(inode_dev_pair); - } - } - // This path and all its parent paths have their counter incremented - for path_name in e.path().ancestors() { - // This is required due to bug in Jwalk that adds '/' to all sub dir lists - // see: https://github.com/jessegrosjean/jwalk/issues/13 - if path_name.to_string_lossy() == "/" && top_dir != "/" { - continue - } - let path_name = path_name.to_string_lossy(); - let s = data.entry(path_name.to_string()).or_insert(0); - *s += size; - if path_name == top_dir { - break; - } + if !should_ignore_file(apparent_size, filesystems, inodes, maybe_inode) { + process_file_with_size_and_inode(top_dir, data, e, size) } } None => *file_count_no_permission += 1, @@ -146,6 +147,55 @@ fn examine_dir( } } +fn should_ignore_file( + apparent_size: bool, + restricted_filesystems: &Option>, + inodes: &mut HashSet<(u64, u64)>, + maybe_inode: Option<(u64, u64)>, +) -> bool { + if !apparent_size { + if let Some(inode_dev_pair) = maybe_inode { + // Ignore files on different devices (if flag applied) + if restricted_filesystems.is_some() + && !restricted_filesystems + .as_ref() + .unwrap() + .contains(&inode_dev_pair.1) + { + return true; + } + // Ignore files already visited or symlinked + if inodes.contains(&inode_dev_pair) { + return true; + } + inodes.insert(inode_dev_pair); + } + } + false +} + +fn process_file_with_size_and_inode( + top_dir: &str, + data: &mut HashMap, + e: DirEntry, + size: u64, +) { + // This path and all its parent paths have their counter incremented + for path_name in e.path().ancestors() { + // This is required due to bug in Jwalk that adds '/' to all sub dir lists + // see: https://github.com/jessegrosjean/jwalk/issues/13 + if path_name.to_string_lossy() == "/" && top_dir != "/" { + continue; + } + let path_name = path_name.to_string_lossy(); + let s = data.entry(path_name.to_string()).or_insert(0); + *s += size; + if path_name == top_dir { + break; + } + } +} + pub fn sort_by_size_first_name_second(a: &(String, u64), b: &(String, u64)) -> Ordering { let result = b.1.cmp(&a.1); if result == Ordering::Equal { diff --git a/src/utils/platform.rs b/src/utils/platform.rs index 86276dc..43d1a66 100644 --- a/src/utils/platform.rs +++ b/src/utils/platform.rs @@ -1,4 +1,6 @@ use jwalk::DirEntry; +use std::fs; +use std::io; #[cfg(target_family = "unix")] fn get_block_size() -> u64 { @@ -20,7 +22,16 @@ pub fn get_metadata(d: &DirEntry, use_apparent_size: bool) -> Option<(u64, Optio }) } -#[cfg(not(target_family = "unix"))] +#[cfg(target_family = "windows")] +pub fn get_metadata(d: &DirEntry, use_apparent_size: bool) -> Option<(u64, Option<(u64, u64)>)> { + use std::os::windows::fs::MetadataExt; + d.metadata.as_ref().unwrap().as_ref().ok().map(|md| { + let windows_equivalent_of_inode = Some((md.file_index(), md.volume_serial_number())); + (md.file_size(), windows_equivalent_of_inode) + }) +} + +#[cfg(all(not(target_family = "windows"), not(target_family = "unix")))] pub fn get_metadata(d: &DirEntry, _apparent: bool) -> Option<(u64, Option<(u64, u64)>)> { d.metadata .as_ref() @@ -29,3 +40,22 @@ pub fn get_metadata(d: &DirEntry, _apparent: bool) -> Option<(u64, Option<(u64, .ok() .map(|md| (md.len(), None)) } + +#[cfg(target_family = "unix")] +pub fn get_filesystem(file_path: &str) -> Result { + use std::os::unix::fs::MetadataExt; + let metadata = fs::metadata(file_path)?; + Ok(metadata.dev()) +} + +#[cfg(target_family = "windows")] +pub fn get_device(file_path: &str) -> Result { + use std::os::windows::fs::MetadataExt; + let metadata = fs::metadata(file_path)?; + Ok(metadata.volume_serial_number()) +} + +#[cfg(all(not(target_family = "windows"), not(target_family = "unix")))] +pub fn get_device(file_path: &str) -> Result { + None +}