Support excluding filesystems with -x

https://github.com/bootandy/dust/issues/50

Add optional -x flag to limit search to the current filesystem.

Add (untested) support for windows for the equivalent of inode and
device.
This commit is contained in:
andy.boot
2020-01-15 19:51:16 +00:00
parent f395a7d768
commit bdc3d404ef
3 changed files with 74 additions and 4 deletions
+13 -1
View File
@@ -54,6 +54,12 @@ fn main() {
.long("full-paths") .long("full-paths")
.help("If set sub directories will not have their path shortened"), .help("If set sub directories will not have their path shortened"),
) )
.arg(
Arg::with_name("limit_filesystem")
.short("x")
.long("limit-filesystem")
.help("Only count the files and directories in the same filesystem as the supplied directory"),
)
.arg( .arg(
Arg::with_name("display_apparent_size") Arg::with_name("display_apparent_size")
.short("s") .short("s")
@@ -110,9 +116,15 @@ fn main() {
} }
let use_apparent_size = options.is_present("display_apparent_size"); let use_apparent_size = options.is_present("display_apparent_size");
let limit_filesystem = options.is_present("limit_filesystem");
let simplified_dirs = simplify_dir_names(target_dirs); let simplified_dirs = simplify_dir_names(target_dirs);
let (permissions, nodes) = get_dir_tree(&simplified_dirs, use_apparent_size, threads); let (permissions, nodes) = get_dir_tree(
&simplified_dirs,
use_apparent_size,
limit_filesystem,
threads,
);
let sorted_data = sort(nodes); let sorted_data = sort(nodes);
let biggest_ones = { let biggest_ones = {
match depth { match depth {
+31 -2
View File
@@ -37,7 +37,8 @@ impl PartialEq for Node {
} }
pub fn is_a_parent_of(parent: &str, child: &str) -> bool { pub fn is_a_parent_of(parent: &str, child: &str) -> bool {
(child.starts_with(parent) && child.chars().nth(parent.chars().count()) == Some('/')) || parent == "/" (child.starts_with(parent) && child.chars().nth(parent.chars().count()) == Some('/'))
|| parent == "/"
} }
pub fn simplify_dir_names(filenames: Vec<&str>) -> HashSet<String> { pub fn simplify_dir_names(filenames: Vec<&str>) -> HashSet<String> {
@@ -69,16 +70,23 @@ pub fn simplify_dir_names(filenames: Vec<&str>) -> HashSet<String> {
pub fn get_dir_tree( pub fn get_dir_tree(
top_level_names: &HashSet<String>, top_level_names: &HashSet<String>,
apparent_size: bool, apparent_size: bool,
limit_filesystem: bool,
threads: Option<usize>, threads: Option<usize>,
) -> (bool, HashMap<String, u64>) { ) -> (bool, HashMap<String, u64>) {
let mut permissions = 0; let mut permissions = 0;
let mut inodes: HashSet<(u64, u64)> = HashSet::new(); let mut inodes: HashSet<(u64, u64)> = HashSet::new();
let mut data: HashMap<String, u64> = HashMap::new(); let mut data: HashMap<String, u64> = HashMap::new();
let restricted_filesystems = if limit_filesystem {
get_allowed_filesystems(top_level_names)
} else {
None
};
for b in top_level_names.iter() { for b in top_level_names.iter() {
examine_dir( examine_dir(
&b, &b,
apparent_size, apparent_size,
&restricted_filesystems,
&mut inodes, &mut inodes,
&mut data, &mut data,
&mut permissions, &mut permissions,
@@ -88,6 +96,16 @@ pub fn get_dir_tree(
(permissions == 0, data) (permissions == 0, data)
} }
fn get_allowed_filesystems(top_level_names: &HashSet<String>) -> Option<HashSet<u64>> {
let mut limit_filesystems: HashSet<u64> = HashSet::new();
for file_name in top_level_names.iter() {
if let Some(a) = get_filesystem(file_name) {
limit_filesystems.insert(a);
}
}
Some(limit_filesystems)
}
pub fn strip_end_slash(mut new_name: &str) -> &str { pub fn strip_end_slash(mut new_name: &str) -> &str {
while (new_name.ends_with('/') || new_name.ends_with("/.")) && new_name.len() > 1 { while (new_name.ends_with('/') || new_name.ends_with("/.")) && new_name.len() > 1 {
new_name = &new_name[..new_name.len() - 1]; new_name = &new_name[..new_name.len() - 1];
@@ -98,6 +116,7 @@ pub fn strip_end_slash(mut new_name: &str) -> &str {
fn examine_dir( fn examine_dir(
top_dir: &str, top_dir: &str,
apparent_size: bool, apparent_size: bool,
restricted_filesystems: &Option<HashSet<u64>>,
inodes: &mut HashSet<(u64, u64)>, inodes: &mut HashSet<(u64, u64)>,
data: &mut HashMap<String, u64>, data: &mut HashMap<String, u64>,
file_count_no_permission: &mut u64, file_count_no_permission: &mut u64,
@@ -117,6 +136,16 @@ fn examine_dir(
Some((size, maybe_inode)) => { Some((size, maybe_inode)) => {
if !apparent_size { if !apparent_size {
if let Some(inode_dev_pair) = maybe_inode { if let Some(inode_dev_pair) = maybe_inode {
// Ignore files on different devices (if flag applied)
if restricted_filesystems.is_some()
&& !restricted_filesystems
.as_ref()
.unwrap()
.contains(&inode_dev_pair.1)
{
continue;
}
// Ignore files already visited or symlinked
if inodes.contains(&inode_dev_pair) { if inodes.contains(&inode_dev_pair) {
continue; continue;
} }
@@ -128,7 +157,7 @@ fn examine_dir(
// This is required due to bug in Jwalk that adds '/' to all sub dir lists // This is required due to bug in Jwalk that adds '/' to all sub dir lists
// see: https://github.com/jessegrosjean/jwalk/issues/13 // see: https://github.com/jessegrosjean/jwalk/issues/13
if path_name.to_string_lossy() == "/" && top_dir != "/" { if path_name.to_string_lossy() == "/" && top_dir != "/" {
continue continue;
} }
let path_name = path_name.to_string_lossy(); let path_name = path_name.to_string_lossy();
let s = data.entry(path_name.to_string()).or_insert(0); let s = data.entry(path_name.to_string()).or_insert(0);
+30 -1
View File
@@ -1,4 +1,5 @@
use jwalk::DirEntry; use jwalk::DirEntry;
use std::fs;
#[cfg(target_family = "unix")] #[cfg(target_family = "unix")]
fn get_block_size() -> u64 { fn get_block_size() -> u64 {
@@ -20,7 +21,16 @@ pub fn get_metadata(d: &DirEntry, use_apparent_size: bool) -> Option<(u64, Optio
}) })
} }
#[cfg(not(target_family = "unix"))] #[cfg(target_family = "windows")]
pub fn get_metadata(d: &DirEntry, use_apparent_size: bool) -> Option<(u64, Option<(u64, u64)>)> {
use std::os::windows::fs::MetadataExt;
d.metadata.as_ref().unwrap().as_ref().ok().map(|md| {
let windows_equivalent_of_inode = Some((md.file_index(), md.volume_serial_number()));
(md.file_size(), windows_equivalent_of_inode)
})
}
#[cfg(all(not(target_family = "windows"), not(target_family = "unix")))]
pub fn get_metadata(d: &DirEntry, _apparent: bool) -> Option<(u64, Option<(u64, u64)>)> { pub fn get_metadata(d: &DirEntry, _apparent: bool) -> Option<(u64, Option<(u64, u64)>)> {
d.metadata d.metadata
.as_ref() .as_ref()
@@ -29,3 +39,22 @@ pub fn get_metadata(d: &DirEntry, _apparent: bool) -> Option<(u64, Option<(u64,
.ok() .ok()
.map(|md| (md.len(), None)) .map(|md| (md.len(), None))
} }
#[cfg(target_family = "unix")]
pub fn get_filesystem(file_path: &str) -> Option<u64> {
use std::os::unix::fs::MetadataExt;
let metadata = fs::metadata(file_path).unwrap();
Some(metadata.dev())
}
#[cfg(target_family = "windows")]
pub fn get_device(file_path: &str) -> Option<u64> {
use std::os::windows::fs::MetadataExt;
let metadata = fs::metadata(file_path).unwrap();
Some(metadata.volume_serial_number())
}
#[cfg(all(not(target_family = "windows"), not(target_family = "unix")))]
pub fn get_device(file_path: &str) -> Option<u64> {
None
}