mirror of
https://github.com/bootandy/dust.git
synced 2026-06-08 11:29:05 +03:00
Move everything to ignore instead of jwalk
This commit is contained in:
+9
-24
@@ -1,6 +1,9 @@
|
||||
#[macro_use]
|
||||
extern crate clap;
|
||||
extern crate crossbeam_channel as channel;
|
||||
extern crate ignore;
|
||||
extern crate unicode_width;
|
||||
extern crate walkdir;
|
||||
|
||||
use self::display::draw_it;
|
||||
use crate::utils::is_a_parent_of;
|
||||
@@ -8,7 +11,7 @@ use clap::{App, AppSettings, Arg};
|
||||
use std::cmp::max;
|
||||
use std::path::PathBuf;
|
||||
use terminal_size::{terminal_size, Height, Width};
|
||||
use utils::{find_big_ones, get_dir_tree, simplify_dir_names, sort, trim_deep_ones, Node};
|
||||
use utils::{find_big_ones, get_dir_tree, simplify_dir_names, sort, Node};
|
||||
|
||||
mod display;
|
||||
mod utils;
|
||||
@@ -137,27 +140,9 @@ fn main() {
|
||||
}
|
||||
};
|
||||
|
||||
let temp_threads = options.value_of("threads").and_then(|threads| {
|
||||
threads
|
||||
.parse::<usize>()
|
||||
.map_err(|_| eprintln!("Ignoring bad value for threads: {:?}", threads))
|
||||
.ok()
|
||||
});
|
||||
// Bug in JWalk
|
||||
// https://github.com/jessegrosjean/jwalk/issues/15
|
||||
// We force it to use 2 threads if there is only 1 cpu
|
||||
// as JWalk breaks if it tries to run on a single cpu
|
||||
let threads = {
|
||||
if temp_threads.is_none() && num_cpus::get() == 1 {
|
||||
Some(2)
|
||||
} else {
|
||||
temp_threads
|
||||
}
|
||||
};
|
||||
|
||||
let depth = options.value_of("depth").and_then(|depth| {
|
||||
depth
|
||||
.parse::<u64>()
|
||||
.parse::<usize>()
|
||||
.map(|v| v + 1)
|
||||
.map_err(|_| eprintln!("Ignoring bad value for depth"))
|
||||
.ok()
|
||||
@@ -181,13 +166,13 @@ fn main() {
|
||||
&ignore_directories,
|
||||
use_apparent_size,
|
||||
limit_filesystem,
|
||||
threads,
|
||||
depth,
|
||||
);
|
||||
let sorted_data = sort(nodes);
|
||||
let biggest_ones = {
|
||||
match depth {
|
||||
None => find_big_ones(sorted_data, number_of_lines + simplified_dirs.len()),
|
||||
Some(d) => trim_deep_ones(sorted_data, d, &simplified_dirs),
|
||||
Some(_) => sorted_data,
|
||||
}
|
||||
};
|
||||
let tree = build_tree(biggest_ones, depth);
|
||||
@@ -202,7 +187,7 @@ fn main() {
|
||||
);
|
||||
}
|
||||
|
||||
fn build_tree(biggest_ones: Vec<(PathBuf, u64)>, depth: Option<u64>) -> Node {
|
||||
fn build_tree(biggest_ones: Vec<(PathBuf, u64)>, depth: Option<usize>) -> Node {
|
||||
let mut top_parent = Node::default();
|
||||
|
||||
// assume sorted order
|
||||
@@ -217,7 +202,7 @@ fn build_tree(biggest_ones: Vec<(PathBuf, u64)>, depth: Option<u64>) -> Node {
|
||||
top_parent
|
||||
}
|
||||
|
||||
fn recursively_build_tree(parent_node: &mut Node, new_node: Node, depth: Option<u64>) {
|
||||
fn recursively_build_tree(parent_node: &mut Node, new_node: Node, depth: Option<usize>) {
|
||||
let new_depth = match depth {
|
||||
None => None,
|
||||
Some(0) => return,
|
||||
|
||||
+120
-166
@@ -1,14 +1,22 @@
|
||||
use jwalk::DirEntry;
|
||||
use std::cmp::Ordering;
|
||||
use std::collections::HashMap;
|
||||
use std::collections::HashSet;
|
||||
use std::iter::FromIterator;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::sync::atomic::AtomicBool;
|
||||
|
||||
use jwalk::WalkDir;
|
||||
use channel::Receiver;
|
||||
use std::thread::JoinHandle;
|
||||
|
||||
use ignore::{WalkBuilder, WalkState};
|
||||
use std::sync::atomic;
|
||||
use std::thread;
|
||||
|
||||
mod platform;
|
||||
use self::platform::*;
|
||||
|
||||
type PathData = (PathBuf, u64, Option<(u64, u64)>);
|
||||
|
||||
#[derive(Debug, Default, Eq)]
|
||||
pub struct Node {
|
||||
pub name: PathBuf,
|
||||
@@ -70,46 +78,121 @@ pub fn simplify_dir_names<P: AsRef<Path>>(filenames: Vec<P>) -> HashSet<PathBuf>
|
||||
top_level_names
|
||||
}
|
||||
|
||||
fn prepare_walk_dir_builder<P: AsRef<Path>>(
|
||||
top_level_names: &HashSet<P>,
|
||||
limit_filesystem: bool,
|
||||
max_depth: Option<usize>,
|
||||
) -> WalkBuilder {
|
||||
let mut it = top_level_names.iter();
|
||||
let mut builder = WalkBuilder::new(it.next().unwrap());
|
||||
builder.follow_links(false);
|
||||
builder.ignore(false);
|
||||
builder.git_global(false);
|
||||
builder.git_ignore(false);
|
||||
builder.git_exclude(false);
|
||||
builder.hidden(false);
|
||||
|
||||
if limit_filesystem {
|
||||
builder.same_file_system(true);
|
||||
}
|
||||
|
||||
builder.max_depth(max_depth);
|
||||
|
||||
for b in it {
|
||||
builder.add(b);
|
||||
}
|
||||
builder
|
||||
}
|
||||
|
||||
pub fn get_dir_tree<P: AsRef<Path>>(
|
||||
top_level_names: &HashSet<P>,
|
||||
ignore_directories: &Option<Vec<PathBuf>>,
|
||||
apparent_size: bool,
|
||||
limit_filesystem: bool,
|
||||
threads: Option<usize>,
|
||||
max_depth: Option<usize>,
|
||||
) -> (bool, HashMap<PathBuf, u64>) {
|
||||
let mut permissions = 0;
|
||||
let mut data: HashMap<PathBuf, u64> = HashMap::new();
|
||||
let restricted_filesystems = if limit_filesystem {
|
||||
get_allowed_filesystems(top_level_names)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
let (tx, rx) = channel::bounded::<PathData>(1000);
|
||||
|
||||
let mut examine_dir_args = ExamineDirMutArsg {
|
||||
data: &mut data,
|
||||
file_count_no_permission: &mut permissions,
|
||||
};
|
||||
for b in top_level_names.iter() {
|
||||
examine_dir(
|
||||
b,
|
||||
apparent_size,
|
||||
&restricted_filesystems,
|
||||
ignore_directories,
|
||||
threads,
|
||||
&mut examine_dir_args,
|
||||
);
|
||||
}
|
||||
(permissions == 0, data)
|
||||
let permissions_flag = AtomicBool::new(true);
|
||||
|
||||
let t2 = HashSet::from_iter(top_level_names.iter().map(|p| p.as_ref().to_path_buf()));
|
||||
|
||||
let t = create_reader_thread(rx, t2, apparent_size);
|
||||
let walk_dir_builder = prepare_walk_dir_builder(top_level_names, limit_filesystem, max_depth);
|
||||
|
||||
walk_dir_builder.build_parallel().run(|| {
|
||||
let txc = tx.clone();
|
||||
let pf = &permissions_flag;
|
||||
Box::new(move |path| {
|
||||
match path {
|
||||
Ok(p) => {
|
||||
if let Some(dirs) = ignore_directories {
|
||||
let path = p.path();
|
||||
let parts = path.components().collect::<Vec<std::path::Component>>();
|
||||
for d in dirs {
|
||||
let seq = d.components().collect::<Vec<std::path::Component>>();
|
||||
if parts
|
||||
.windows(seq.len())
|
||||
.any(|window| window.iter().collect::<PathBuf>() == *d)
|
||||
{
|
||||
return WalkState::Continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let maybe_size_and_inode = get_metadata(&p, apparent_size);
|
||||
|
||||
match maybe_size_and_inode {
|
||||
Some(data) => {
|
||||
let (size, inode_device) = data;
|
||||
txc.send((p.into_path(), size, inode_device)).unwrap();
|
||||
}
|
||||
None => {
|
||||
pf.store(false, atomic::Ordering::Relaxed);
|
||||
}
|
||||
}
|
||||
}
|
||||
Err(_) => {
|
||||
pf.store(false, atomic::Ordering::Relaxed);
|
||||
}
|
||||
};
|
||||
WalkState::Continue
|
||||
})
|
||||
});
|
||||
|
||||
drop(tx);
|
||||
let data = t.join().unwrap();
|
||||
(permissions_flag.load(atomic::Ordering::SeqCst), data)
|
||||
}
|
||||
|
||||
fn get_allowed_filesystems<P: AsRef<Path>>(top_level_names: &HashSet<P>) -> Option<HashSet<u64>> {
|
||||
let mut limit_filesystems: HashSet<u64> = HashSet::new();
|
||||
for file_name in top_level_names.iter() {
|
||||
if let Ok(a) = get_filesystem(file_name) {
|
||||
limit_filesystems.insert(a);
|
||||
fn create_reader_thread(
|
||||
rx: Receiver<PathData>,
|
||||
top_level_names: HashSet<PathBuf>,
|
||||
apparent_size: bool,
|
||||
) -> JoinHandle<HashMap<PathBuf, u64>> {
|
||||
// Receiver thread
|
||||
thread::spawn(move || {
|
||||
let mut hash: HashMap<PathBuf, u64> = HashMap::new();
|
||||
let mut inodes: HashSet<(u64, u64)> = HashSet::new();
|
||||
|
||||
for dent in rx {
|
||||
let (path, size, maybe_inode_device) = dent;
|
||||
|
||||
if should_ignore_file(apparent_size, &mut inodes, maybe_inode_device) {
|
||||
continue;
|
||||
} else {
|
||||
for p in path.ancestors() {
|
||||
let s = hash.entry(p.to_path_buf()).or_insert(0);
|
||||
*s += size;
|
||||
|
||||
if top_level_names.contains(p) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Some(limit_filesystems)
|
||||
hash
|
||||
})
|
||||
}
|
||||
|
||||
pub fn normalize_path<P: AsRef<Path>>(path: P) -> PathBuf {
|
||||
@@ -122,64 +205,8 @@ pub fn normalize_path<P: AsRef<Path>>(path: P) -> PathBuf {
|
||||
path.as_ref().components().collect::<PathBuf>()
|
||||
}
|
||||
|
||||
struct ExamineDirMutArsg<'a> {
|
||||
data: &'a mut HashMap<PathBuf, u64>,
|
||||
file_count_no_permission: &'a mut u64,
|
||||
}
|
||||
|
||||
fn examine_dir<P: AsRef<Path>>(
|
||||
top_dir: P,
|
||||
apparent_size: bool,
|
||||
filesystems: &Option<HashSet<u64>>,
|
||||
ignore_directories: &Option<Vec<PathBuf>>,
|
||||
threads: Option<usize>,
|
||||
mut_args: &mut ExamineDirMutArsg,
|
||||
) {
|
||||
let top_dir = top_dir.as_ref();
|
||||
let mut inodes: HashSet<(u64, u64)> = HashSet::new();
|
||||
let mut iter = WalkDir::new(top_dir)
|
||||
.preload_metadata(true)
|
||||
.skip_hidden(false);
|
||||
if let Some(threads_to_start) = threads {
|
||||
iter = iter.num_threads(threads_to_start);
|
||||
}
|
||||
|
||||
'entry: for entry in iter {
|
||||
if let Ok(e) = entry {
|
||||
let maybe_size_and_inode = get_metadata(&e, apparent_size);
|
||||
|
||||
if let Some(dirs) = ignore_directories {
|
||||
let path = e.path();
|
||||
let parts = path.components().collect::<Vec<std::path::Component>>();
|
||||
for d in dirs {
|
||||
let seq = d.components().collect::<Vec<std::path::Component>>();
|
||||
if parts
|
||||
.windows(seq.len())
|
||||
.any(|window| window.iter().collect::<PathBuf>() == *d)
|
||||
{
|
||||
continue 'entry;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
match maybe_size_and_inode {
|
||||
Some(data) => {
|
||||
let (size, inode_device) = data;
|
||||
if !should_ignore_file(apparent_size, filesystems, &mut inodes, inode_device) {
|
||||
process_file_with_size_and_inode(top_dir, mut_args.data, e, size)
|
||||
}
|
||||
}
|
||||
None => *mut_args.file_count_no_permission += 1,
|
||||
}
|
||||
} else {
|
||||
*mut_args.file_count_no_permission += 1
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn should_ignore_file(
|
||||
apparent_size: bool,
|
||||
restricted_filesystems: &Option<HashSet<u64>>,
|
||||
inodes: &mut HashSet<(u64, u64)>,
|
||||
maybe_inode_device: Option<(u64, u64)>,
|
||||
) -> bool {
|
||||
@@ -187,13 +214,6 @@ fn should_ignore_file(
|
||||
None => false,
|
||||
Some(data) => {
|
||||
let (inode, device) = data;
|
||||
// Ignore files on different devices (if flag applied)
|
||||
if let Some(rs) = restricted_filesystems {
|
||||
if !rs.contains(&device) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
if !apparent_size {
|
||||
// Ignore files already visited or symlinked
|
||||
if inodes.contains(&(inode, device)) {
|
||||
@@ -206,28 +226,6 @@ fn should_ignore_file(
|
||||
}
|
||||
}
|
||||
|
||||
fn process_file_with_size_and_inode<P: AsRef<Path>>(
|
||||
top_dir: P,
|
||||
data: &mut HashMap<PathBuf, u64>,
|
||||
e: DirEntry,
|
||||
size: u64,
|
||||
) {
|
||||
let top_dir = top_dir.as_ref();
|
||||
// This path and all its parent paths have their counter incremented
|
||||
for path in e.path().ancestors() {
|
||||
// This is required due to bug in Jwalk that adds '/' to all sub dir lists
|
||||
// see: https://github.com/jessegrosjean/jwalk/issues/13
|
||||
if path.to_string_lossy() == "/" && top_dir.to_string_lossy() != "/" {
|
||||
continue;
|
||||
}
|
||||
let s = data.entry(normalize_path(path)).or_insert(0);
|
||||
*s += size;
|
||||
if path.starts_with(top_dir) && top_dir.starts_with(path) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn sort_by_size_first_name_second(a: &(PathBuf, u64), b: &(PathBuf, u64)) -> Ordering {
|
||||
let result = b.1.cmp(&a.1);
|
||||
if result == Ordering::Equal {
|
||||
@@ -251,36 +249,6 @@ pub fn find_big_ones(new_l: Vec<(PathBuf, u64)>, max_to_show: usize) -> Vec<(Pat
|
||||
}
|
||||
}
|
||||
|
||||
fn depth_of_path(name: &PathBuf) -> usize {
|
||||
// Filter required as paths can have some odd preliminary
|
||||
// ("Prefix") bits (for example, from windows, "\\?\" or "\\UNC\")
|
||||
name.components()
|
||||
.filter(|&c| match c {
|
||||
std::path::Component::Prefix(_) => false,
|
||||
_ => true,
|
||||
})
|
||||
.count()
|
||||
}
|
||||
|
||||
pub fn trim_deep_ones(
|
||||
input: Vec<(PathBuf, u64)>,
|
||||
max_depth: u64,
|
||||
top_level_names: &HashSet<PathBuf>,
|
||||
) -> Vec<(PathBuf, u64)> {
|
||||
let mut result: Vec<(PathBuf, u64)> = Vec::with_capacity(input.len() * top_level_names.len());
|
||||
|
||||
for name in top_level_names {
|
||||
let my_max_depth = depth_of_path(name) + max_depth as usize;
|
||||
|
||||
for &(ref k, ref v) in input.iter() {
|
||||
if k.starts_with(name) && depth_of_path(k) <= my_max_depth {
|
||||
result.push((k.clone(), *v));
|
||||
}
|
||||
}
|
||||
}
|
||||
result
|
||||
}
|
||||
|
||||
mod tests {
|
||||
#[allow(unused_imports)]
|
||||
use super::*;
|
||||
@@ -367,19 +335,14 @@ mod tests {
|
||||
let mut files = HashSet::new();
|
||||
files.insert((10, 20));
|
||||
|
||||
assert!(!should_ignore_file(true, &None, &mut files, Some((0, 0))));
|
||||
assert!(!should_ignore_file(true, &mut files, Some((0, 0))));
|
||||
|
||||
// New file is not known it will be inserted to the hashmp and should not be ignored
|
||||
assert!(!should_ignore_file(
|
||||
false,
|
||||
&None,
|
||||
&mut files,
|
||||
Some((11, 12))
|
||||
));
|
||||
assert!(!should_ignore_file(false, &mut files, Some((11, 12))));
|
||||
assert!(files.contains(&(11, 12)));
|
||||
|
||||
// The same file will be ignored the second time
|
||||
assert!(should_ignore_file(false, &None, &mut files, Some((11, 12))));
|
||||
assert!(should_ignore_file(false, &mut files, Some((11, 12))));
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -387,17 +350,8 @@ mod tests {
|
||||
let mut files = HashSet::new();
|
||||
files.insert((10, 20));
|
||||
|
||||
let mut devices = HashSet::new();
|
||||
devices.insert(99);
|
||||
let od = Some(devices);
|
||||
|
||||
// If we are looking at a different device (disk) and the device flag is set
|
||||
// then apparent_size is irrelevant - we ignore files on other devices
|
||||
assert!(should_ignore_file(false, &od, &mut files, Some((11, 12))));
|
||||
assert!(should_ignore_file(true, &od, &mut files, Some((11, 12))));
|
||||
|
||||
// We do not ignore files on the same device
|
||||
assert!(!should_ignore_file(false, &od, &mut files, Some((2, 99))));
|
||||
assert!(!should_ignore_file(true, &od, &mut files, Some((2, 99))));
|
||||
assert!(!should_ignore_file(false, &mut files, Some((2, 99))));
|
||||
assert!(!should_ignore_file(true, &mut files, Some((2, 99))));
|
||||
}
|
||||
}
|
||||
|
||||
+12
-30
@@ -1,8 +1,6 @@
|
||||
use jwalk::DirEntry;
|
||||
use ignore::DirEntry;
|
||||
#[allow(unused_imports)]
|
||||
use std::fs;
|
||||
use std::io;
|
||||
use std::path::Path;
|
||||
|
||||
#[cfg(target_family = "unix")]
|
||||
fn get_block_size() -> u64 {
|
||||
@@ -14,13 +12,12 @@ fn get_block_size() -> u64 {
|
||||
#[cfg(target_family = "unix")]
|
||||
pub fn get_metadata(d: &DirEntry, use_apparent_size: bool) -> Option<(u64, Option<(u64, u64)>)> {
|
||||
use std::os::unix::fs::MetadataExt;
|
||||
d.metadata.as_ref().unwrap().as_ref().ok().map(|md| {
|
||||
if use_apparent_size {
|
||||
(md.len(), Some((md.ino(), md.dev())))
|
||||
} else {
|
||||
(md.blocks() * get_block_size(), Some((md.ino(), md.dev())))
|
||||
}
|
||||
})
|
||||
let md = d.metadata().unwrap();
|
||||
if use_apparent_size {
|
||||
Some((md.len(), Some((md.ino(), md.dev()))))
|
||||
} else {
|
||||
Some((md.blocks() * get_block_size(), Some((md.ino(), md.dev()))))
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(target_family = "windows")]
|
||||
@@ -61,6 +58,8 @@ pub fn get_metadata(d: &DirEntry, _use_apparent_size: bool) -> Option<(u64, Opti
|
||||
// Consistently opening the file: 30 minutes.
|
||||
// With this optimization: 8 sec.
|
||||
|
||||
use std::io;
|
||||
use std::path::Path;
|
||||
use winapi_util::Handle;
|
||||
fn handle_from_path_limited<P: AsRef<Path>>(path: P) -> io::Result<Handle> {
|
||||
use std::fs::OpenOptions;
|
||||
@@ -99,9 +98,9 @@ pub fn get_metadata(d: &DirEntry, _use_apparent_size: bool) -> Option<(u64, Opti
|
||||
))
|
||||
}
|
||||
|
||||
match d.metadata {
|
||||
Some(Ok(ref md)) => {
|
||||
use std::os::windows::fs::MetadataExt;
|
||||
use std::os::windows::fs::MetadataExt;
|
||||
match d.metadata() {
|
||||
Ok(ref md) => {
|
||||
const FILE_ATTRIBUTE_ARCHIVE: u32 = 0x20u32;
|
||||
const FILE_ATTRIBUTE_READONLY: u32 = 0x1u32;
|
||||
const FILE_ATTRIBUTE_HIDDEN: u32 = 0x2u32;
|
||||
@@ -123,20 +122,3 @@ pub fn get_metadata(d: &DirEntry, _use_apparent_size: bool) -> Option<(u64, Opti
|
||||
_ => get_metadata_expensive(&d),
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(target_family = "unix")]
|
||||
pub fn get_filesystem<P: AsRef<Path>>(file_path: P) -> Result<u64, io::Error> {
|
||||
use std::os::unix::fs::MetadataExt;
|
||||
let metadata = fs::metadata(file_path)?;
|
||||
Ok(metadata.dev())
|
||||
}
|
||||
|
||||
#[cfg(target_family = "windows")]
|
||||
pub fn get_filesystem<P: AsRef<Path>>(file_path: P) -> Result<u64, io::Error> {
|
||||
use winapi_util::file::information;
|
||||
use winapi_util::Handle;
|
||||
|
||||
let h = Handle::from_path_any(file_path)?;
|
||||
let info = information(&h)?;
|
||||
Ok(info.volume_serial_number())
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user