Feature: Re-introduce -x flag to limit filesystem

-x flag allows dust to limit itself to the current filesystem
This commit is contained in:
andy.boot
2021-07-15 18:25:21 +01:00
parent c286b8ba97
commit f6e36aba52
3 changed files with 60 additions and 7 deletions
+25 -7
View File
@@ -13,9 +13,12 @@ use std::collections::HashSet;
use crate::node::build_node; use crate::node::build_node;
use std::fs::DirEntry; use std::fs::DirEntry;
use crate::platform::get_metadata;
pub fn walk_it( pub fn walk_it(
dirs: HashSet<PathBuf>, dirs: HashSet<PathBuf>,
ignore_directories: HashSet<PathBuf>, ignore_directories: HashSet<PathBuf>,
allowed_filesystems: HashSet<u64>,
use_apparent_size: bool, use_apparent_size: bool,
by_filecount: bool, by_filecount: bool,
ignore_hidden: bool, ignore_hidden: bool,
@@ -27,9 +30,9 @@ pub fn walk_it(
.filter_map(|d| { .filter_map(|d| {
let n = walk( let n = walk(
d, d,
false,
&permissions_flag, &permissions_flag,
&ignore_directories, &ignore_directories,
&allowed_filesystems,
use_apparent_size, use_apparent_size,
by_filecount, by_filecount,
ignore_hidden, ignore_hidden,
@@ -75,22 +78,32 @@ fn clean_inodes(
}); });
} }
// todo: check for filesystem too
fn ignore_file( fn ignore_file(
entry: &DirEntry, entry: &DirEntry,
ignore_hidden: bool, ignore_hidden: bool,
ignore_directories: &HashSet<PathBuf>, ignore_directories: &HashSet<PathBuf>,
allowed_filesystems: &HashSet<u64>,
) -> bool { ) -> bool {
let is_dot_file = entry.file_name().to_str().unwrap_or("").starts_with('.'); let is_dot_file = entry.file_name().to_str().unwrap_or("").starts_with('.');
let is_ignored_path = ignore_directories.contains(&entry.path()); let is_ignored_path = ignore_directories.contains(&entry.path());
if !allowed_filesystems.is_empty() {
let size_inode_device = get_metadata(&entry.path(), false);
if let Some((_size, Some((_id, dev)))) = size_inode_device {
if !allowed_filesystems.contains(&dev) {
return true;
}
}
}
(is_dot_file && ignore_hidden) || is_ignored_path (is_dot_file && ignore_hidden) || is_ignored_path
} }
fn walk( fn walk(
dir: PathBuf, dir: PathBuf,
is_symlink: bool,
permissions_flag: &AtomicBool, permissions_flag: &AtomicBool,
ignore_directories: &HashSet<PathBuf>, ignore_directories: &HashSet<PathBuf>,
allowed_filesystems: &HashSet<u64>,
use_apparent_size: bool, use_apparent_size: bool,
by_filecount: bool, by_filecount: bool,
ignore_hidden: bool, ignore_hidden: bool,
@@ -107,16 +120,21 @@ fn walk(
// rayon doesn't parallelise as well giving a 3X performance drop // rayon doesn't parallelise as well giving a 3X performance drop
// hence we unravel the recursion a bit // hence we unravel the recursion a bit
// return walk(entry.path(), permissions_flag, ignore_directories, use_apparent_size, by_filecount, ignore_hidden); // return walk(entry.path(), permissions_flag, ignore_directories, allowed_filesystems, use_apparent_size, by_filecount, ignore_hidden);
if !ignore_file(&entry, ignore_hidden, &ignore_directories) { if !ignore_file(
&entry,
ignore_hidden,
&ignore_directories,
&allowed_filesystems,
) {
if let Ok(data) = entry.file_type() { if let Ok(data) = entry.file_type() {
if data.is_dir() && !data.is_symlink() { if data.is_dir() && !data.is_symlink() {
return walk( return walk(
entry.path(), entry.path(),
data.is_symlink(),
permissions_flag, permissions_flag,
ignore_directories, ignore_directories,
allowed_filesystems,
use_apparent_size, use_apparent_size,
by_filecount, by_filecount,
ignore_hidden, ignore_hidden,
@@ -140,7 +158,7 @@ fn walk(
} else { } else {
permissions_flag.store(true, atomic::Ordering::Relaxed); permissions_flag.store(true, atomic::Ordering::Relaxed);
} }
build_node(dir, children, use_apparent_size, is_symlink, by_filecount) build_node(dir, children, use_apparent_size, false, by_filecount)
} }
mod tests { mod tests {
+16
View File
@@ -12,6 +12,7 @@ use filter::{get_biggest, get_by_depth};
use std::cmp::max; use std::cmp::max;
use std::path::PathBuf; use std::path::PathBuf;
use terminal_size::{terminal_size, Height, Width}; use terminal_size::{terminal_size, Height, Width};
use utils::get_filesystem_devices;
use utils::simplify_dir_names; use utils::simplify_dir_names;
mod dirwalker; mod dirwalker;
@@ -107,6 +108,12 @@ fn main() {
.multiple(true) .multiple(true)
.help("Exclude any file or directory with this name"), .help("Exclude any file or directory with this name"),
) )
.arg(
Arg::with_name("limit_filesystem")
.short("x")
.long("limit-filesystem")
.help("Only count the files and directories on the same filesystem as the supplied directory"),
)
.arg( .arg(
Arg::with_name("display_apparent_size") Arg::with_name("display_apparent_size")
.short("s") .short("s")
@@ -191,8 +198,16 @@ fn main() {
let by_filecount = options.is_present("by_filecount"); let by_filecount = options.is_present("by_filecount");
let ignore_hidden = options.is_present("ignore_hidden"); let ignore_hidden = options.is_present("ignore_hidden");
let limit_filesystem = options.is_present("limit_filesystem");
let simplified_dirs = simplify_dir_names(target_dirs); let simplified_dirs = simplify_dir_names(target_dirs);
let allowed_filesystems = {
if limit_filesystem {
get_filesystem_devices(simplified_dirs.iter())
} else {
HashSet::new()
}
};
let ignored_full_path: HashSet<PathBuf> = ignore_directories let ignored_full_path: HashSet<PathBuf> = ignore_directories
.into_iter() .into_iter()
@@ -202,6 +217,7 @@ fn main() {
let (nodes, errors) = walk_it( let (nodes, errors) = walk_it(
simplified_dirs, simplified_dirs,
ignored_full_path, ignored_full_path,
allowed_filesystems,
use_apparent_size, use_apparent_size,
by_filecount, by_filecount,
ignore_hidden, ignore_hidden,
+19
View File
@@ -1,6 +1,9 @@
use platform::get_metadata;
use std::collections::HashSet; use std::collections::HashSet;
use std::path::{Path, PathBuf}; use std::path::{Path, PathBuf};
use crate::platform;
fn is_a_parent_of<P: AsRef<Path>>(parent: P, child: P) -> bool { fn is_a_parent_of<P: AsRef<Path>>(parent: P, child: P) -> bool {
let parent = parent.as_ref(); let parent = parent.as_ref();
let child = child.as_ref(); let child = child.as_ref();
@@ -33,6 +36,22 @@ pub fn simplify_dir_names<P: AsRef<Path>>(filenames: Vec<P>) -> HashSet<PathBuf>
top_level_names top_level_names
} }
pub fn get_filesystem_devices<'a, P: IntoIterator<Item = &'a PathBuf>>(paths: P) -> HashSet<u64> {
// Gets the device ids for the filesystems which are used by the argument paths
paths
.into_iter()
.filter_map(|p| {
let meta = get_metadata(&p, false);
if let Some((_size, Some((_id, dev)))) = meta {
Some(dev)
} else {
None
}
})
.collect()
}
pub fn normalize_path<P: AsRef<Path>>(path: P) -> PathBuf { pub fn normalize_path<P: AsRef<Path>>(path: P) -> PathBuf {
// normalize path ... // normalize path ...
// 1. removing repeated separators // 1. removing repeated separators