mirror of
https://github.com/bootandy/dust.git
synced 2026-06-08 11:29:05 +03:00
Large refactor. Use rayon, 10X performance boost
Code changes: Removed ignore & channel crates. Using a single reciever thread to build a hashmap to prevend duplicate inodes being reported gave a severe performance penalty Using rayon crate with some hand crafted file traversal has improved performance aprox 10X Behaviour changes: Removed parameter 'limit by filesystem' - don't think this is used, and I only added it as it was easy to add with the ignore crate. Sym links will now not appear in the output tree unless using '-s' 'apparent-size' flag Change behaviour of multiple args so that it unifies them and compares them under one tree instead of treating them individually: https://github.com/bootandy/dust/issues/136
This commit is contained in:
@@ -0,0 +1,176 @@
|
||||
use std::fs;
|
||||
|
||||
use crate::node::Node;
|
||||
use rayon::iter::ParallelBridge;
|
||||
use rayon::prelude::ParallelIterator;
|
||||
use std::path::PathBuf;
|
||||
|
||||
use std::sync::atomic;
|
||||
use std::sync::atomic::AtomicBool;
|
||||
|
||||
use std::collections::HashSet;
|
||||
|
||||
use crate::node::build_node;
|
||||
use std::fs::DirEntry;
|
||||
|
||||
pub fn walk_it(
|
||||
dirs: HashSet<PathBuf>,
|
||||
ignore_directories: HashSet<PathBuf>,
|
||||
use_apparent_size: bool,
|
||||
by_filecount: bool,
|
||||
ignore_hidden: bool,
|
||||
) -> (Vec<Node>, bool) {
|
||||
let permissions_flag = AtomicBool::new(false);
|
||||
|
||||
let top_level_nodes: Vec<_> = dirs
|
||||
.into_iter()
|
||||
.filter_map(|d| {
|
||||
let n = walk(
|
||||
d,
|
||||
&permissions_flag,
|
||||
&ignore_directories,
|
||||
use_apparent_size,
|
||||
by_filecount,
|
||||
ignore_hidden,
|
||||
);
|
||||
match n {
|
||||
Some(n) => {
|
||||
let mut inodes: HashSet<(u64, u64)> = HashSet::new();
|
||||
clean_inodes(n, &mut inodes, use_apparent_size)
|
||||
}
|
||||
None => None,
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
(top_level_nodes, permissions_flag.into_inner())
|
||||
}
|
||||
|
||||
// Remove files which have the same inode, we don't want to double count them.
|
||||
fn clean_inodes(
|
||||
x: Node,
|
||||
inodes: &mut HashSet<(u64, u64)>,
|
||||
use_apparent_size: bool,
|
||||
) -> Option<Node> {
|
||||
if !use_apparent_size {
|
||||
if let Some(id) = x.inode_device {
|
||||
if inodes.contains(&id) {
|
||||
return None;
|
||||
}
|
||||
inodes.insert(id);
|
||||
}
|
||||
}
|
||||
|
||||
let new_children: Vec<_> = x
|
||||
.children
|
||||
.into_iter()
|
||||
.filter_map(|c| clean_inodes(c, inodes, use_apparent_size))
|
||||
.collect();
|
||||
|
||||
return Some(Node {
|
||||
name: x.name,
|
||||
size: x.size + new_children.iter().map(|c| c.size).sum::<u64>(),
|
||||
children: new_children,
|
||||
inode_device: x.inode_device,
|
||||
});
|
||||
}
|
||||
|
||||
fn ignore_file(
|
||||
entry: &DirEntry,
|
||||
ignore_hidden: bool,
|
||||
ignore_directories: &HashSet<PathBuf>,
|
||||
) -> bool {
|
||||
let is_dot_file = entry.file_name().to_str().unwrap_or("").starts_with('.');
|
||||
let is_ignored_path = ignore_directories.contains(&entry.path());
|
||||
(is_dot_file && ignore_hidden) || is_ignored_path
|
||||
}
|
||||
|
||||
fn walk(
|
||||
dir: PathBuf,
|
||||
permissions_flag: &AtomicBool,
|
||||
ignore_directories: &HashSet<PathBuf>,
|
||||
use_apparent_size: bool,
|
||||
by_filecount: bool,
|
||||
ignore_hidden: bool,
|
||||
) -> Option<Node> {
|
||||
let mut children = vec![];
|
||||
|
||||
if let Ok(entries) = fs::read_dir(dir.clone()) {
|
||||
children = entries
|
||||
.into_iter()
|
||||
.par_bridge()
|
||||
.filter_map(|entry| {
|
||||
if let Ok(ref entry) = entry {
|
||||
// uncommenting the below line gives simpler code but
|
||||
// rayon doesn't parallelise as well giving a 3X performance drop
|
||||
// hence we unravel the recursion a bit
|
||||
|
||||
// return walk(entry.path(), permissions_flag, ignore_directories, use_apparent_size, by_filecount, ignore_hidden);
|
||||
|
||||
if !ignore_file(&entry, ignore_hidden, &ignore_directories) {
|
||||
if let Ok(data) = entry.file_type() {
|
||||
if data.is_dir() && !data.is_symlink() {
|
||||
return walk(
|
||||
entry.path(),
|
||||
permissions_flag,
|
||||
ignore_directories,
|
||||
use_apparent_size,
|
||||
by_filecount,
|
||||
ignore_hidden,
|
||||
);
|
||||
}
|
||||
return build_node(
|
||||
entry.path(),
|
||||
vec![],
|
||||
use_apparent_size,
|
||||
by_filecount,
|
||||
);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
permissions_flag.store(true, atomic::Ordering::Relaxed);
|
||||
}
|
||||
None
|
||||
})
|
||||
.collect();
|
||||
} else {
|
||||
permissions_flag.store(true, atomic::Ordering::Relaxed);
|
||||
}
|
||||
build_node(dir, children, use_apparent_size, by_filecount)
|
||||
}
|
||||
|
||||
mod tests {
|
||||
#[allow(unused_imports)]
|
||||
use super::*;
|
||||
|
||||
#[cfg(test)]
|
||||
fn create_node() -> Node {
|
||||
Node {
|
||||
name: PathBuf::new(),
|
||||
size: 10,
|
||||
children: vec![],
|
||||
inode_device: Some((5, 6)),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_should_ignore_file() {
|
||||
let mut inodes = HashSet::new();
|
||||
let n = create_node();
|
||||
|
||||
// First time we insert the node
|
||||
assert!(clean_inodes(n.clone(), &mut inodes, false) == Some(n.clone()));
|
||||
|
||||
// Second time is a duplicate - we ignore it
|
||||
assert!(clean_inodes(n.clone(), &mut inodes, false) == None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_should_not_ignore_files_if_using_apparent_size() {
|
||||
let mut inodes = HashSet::new();
|
||||
let n = create_node();
|
||||
|
||||
// If using apparent size we include Nodes, even if duplicate inodes
|
||||
assert!(clean_inodes(n.clone(), &mut inodes, true) == Some(n.clone()));
|
||||
assert!(clean_inodes(n.clone(), &mut inodes, true) == Some(n.clone()));
|
||||
}
|
||||
}
|
||||
+48
-40
@@ -1,6 +1,6 @@
|
||||
extern crate ansi_term;
|
||||
|
||||
use crate::utils::{Errors, Node};
|
||||
use crate::display_node::DisplayNode;
|
||||
|
||||
use self::ansi_term::Colour::Red;
|
||||
use lscolors::{LsColors, Style};
|
||||
@@ -60,7 +60,7 @@ impl DisplayData {
|
||||
}
|
||||
}
|
||||
|
||||
fn percent_size(&self, node: &Node) -> f32 {
|
||||
fn percent_size(&self, node: &DisplayNode) -> f32 {
|
||||
let result = node.size as f32 / self.base_size as f32;
|
||||
if result.is_normal() {
|
||||
result
|
||||
@@ -83,7 +83,7 @@ impl DrawData<'_> {
|
||||
}
|
||||
|
||||
// TODO: can we test this?
|
||||
fn generate_bar(&self, node: &Node, level: usize) -> String {
|
||||
fn generate_bar(&self, node: &DisplayNode, level: usize) -> String {
|
||||
let chars_in_bar = self.percent_bar.chars().count();
|
||||
let num_bars = chars_in_bar as f32 * self.display_data.percent_size(node);
|
||||
let mut num_not_my_bar = (chars_in_bar as i32) - num_bars as i32;
|
||||
@@ -107,21 +107,23 @@ impl DrawData<'_> {
|
||||
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
pub fn draw_it(
|
||||
errors: Errors,
|
||||
permission_error: bool,
|
||||
use_full_path: bool,
|
||||
is_reversed: bool,
|
||||
no_colors: bool,
|
||||
no_percents: bool,
|
||||
terminal_width: usize,
|
||||
by_filecount: bool,
|
||||
root_node: Node,
|
||||
option_root_node: Option<DisplayNode>,
|
||||
) {
|
||||
if errors.permissions {
|
||||
if permission_error {
|
||||
eprintln!("Did not have permissions for all directories");
|
||||
}
|
||||
if errors.not_found {
|
||||
eprintln!("Not all directories were found");
|
||||
if option_root_node.is_none() {
|
||||
return;
|
||||
}
|
||||
let root_node = option_root_node.unwrap();
|
||||
|
||||
let num_chars_needed_on_left_most = if by_filecount {
|
||||
let max_size = root_node.children.iter().map(|n| n.size).fold(0, max);
|
||||
max_size.separate_with_commas().chars().count()
|
||||
@@ -131,11 +133,8 @@ pub fn draw_it(
|
||||
|
||||
let terminal_width = terminal_width - 9 - num_chars_needed_on_left_most;
|
||||
let num_indent_chars = 3;
|
||||
let longest_string_length = root_node
|
||||
.children
|
||||
.iter()
|
||||
.map(|c| find_longest_dir_name(&c, num_indent_chars, terminal_width, !use_full_path))
|
||||
.fold(0, max);
|
||||
let longest_string_length =
|
||||
find_longest_dir_name(&root_node, num_indent_chars, terminal_width, !use_full_path);
|
||||
|
||||
let max_bar_length = if no_percents || longest_string_length >= terminal_width as usize {
|
||||
0
|
||||
@@ -145,27 +144,30 @@ pub fn draw_it(
|
||||
|
||||
let first_size_bar = repeat(BLOCKS[0]).take(max_bar_length).collect::<String>();
|
||||
|
||||
for c in root_node.get_children_from_node(is_reversed) {
|
||||
let display_data = DisplayData {
|
||||
short_paths: !use_full_path,
|
||||
is_reversed,
|
||||
colors_on: !no_colors,
|
||||
by_filecount,
|
||||
num_chars_needed_on_left_most,
|
||||
base_size: c.size,
|
||||
longest_string_length,
|
||||
ls_colors: LsColors::from_env().unwrap_or_default(),
|
||||
};
|
||||
let draw_data = DrawData {
|
||||
indent: "".to_string(),
|
||||
percent_bar: first_size_bar.clone(),
|
||||
display_data: &display_data,
|
||||
};
|
||||
display_node(c, &draw_data, true, true);
|
||||
}
|
||||
let display_data = DisplayData {
|
||||
short_paths: !use_full_path,
|
||||
is_reversed,
|
||||
colors_on: !no_colors,
|
||||
by_filecount,
|
||||
num_chars_needed_on_left_most,
|
||||
base_size: root_node.size,
|
||||
longest_string_length,
|
||||
ls_colors: LsColors::from_env().unwrap_or_default(),
|
||||
};
|
||||
let draw_data = DrawData {
|
||||
indent: "".to_string(),
|
||||
percent_bar: first_size_bar,
|
||||
display_data: &display_data,
|
||||
};
|
||||
display_node(root_node, &draw_data, true, true);
|
||||
}
|
||||
|
||||
fn find_longest_dir_name(node: &Node, indent: usize, terminal: usize, long_paths: bool) -> usize {
|
||||
fn find_longest_dir_name(
|
||||
node: &DisplayNode,
|
||||
indent: usize,
|
||||
terminal: usize,
|
||||
long_paths: bool,
|
||||
) -> usize {
|
||||
let printable_name = get_printable_name(&node.name, long_paths);
|
||||
let longest = min(
|
||||
UnicodeWidthStr::width(&*printable_name) + 1 + indent,
|
||||
@@ -179,7 +181,7 @@ fn find_longest_dir_name(node: &Node, indent: usize, terminal: usize, long_paths
|
||||
.fold(longest, max)
|
||||
}
|
||||
|
||||
fn display_node(node: Node, draw_data: &DrawData, is_biggest: bool, is_last: bool) {
|
||||
fn display_node(node: DisplayNode, draw_data: &DrawData, is_biggest: bool, is_last: bool) {
|
||||
// hacky way of working out how deep we are in the tree
|
||||
let indent = draw_data.get_new_indent(!node.children.is_empty(), is_last);
|
||||
let level = ((indent.chars().count() - 1) / 2) - 1;
|
||||
@@ -254,11 +256,13 @@ fn get_printable_name<P: AsRef<Path>>(dir_name: &P, long_paths: bool) -> String
|
||||
encode_u8(printable_name.display().to_string().as_bytes())
|
||||
}
|
||||
|
||||
fn pad_or_trim_filename(node: &Node, indent: &str, display_data: &DisplayData) -> String {
|
||||
fn pad_or_trim_filename(node: &DisplayNode, indent: &str, display_data: &DisplayData) -> String {
|
||||
let name = get_printable_name(&node.name, display_data.short_paths);
|
||||
let indent_and_name = format!("{} {}", indent, name);
|
||||
let width = UnicodeWidthStr::width(&*indent_and_name);
|
||||
|
||||
assert!(display_data.longest_string_length >= width);
|
||||
|
||||
// Add spaces after the filename so we can draw the % used bar chart.
|
||||
let name_and_padding = name
|
||||
+ &(repeat(" ")
|
||||
@@ -281,7 +285,7 @@ fn maybe_trim_filename(name_in: String, display_data: &DisplayData) -> String {
|
||||
}
|
||||
|
||||
pub fn format_string(
|
||||
node: &Node,
|
||||
node: &DisplayNode,
|
||||
indent: &str,
|
||||
percent_bar: &str,
|
||||
is_biggest: bool,
|
||||
@@ -294,7 +298,7 @@ pub fn format_string(
|
||||
}
|
||||
|
||||
fn get_name_percent(
|
||||
node: &Node,
|
||||
node: &DisplayNode,
|
||||
indent: &str,
|
||||
bar_chart: &str,
|
||||
display_data: &DisplayData,
|
||||
@@ -311,7 +315,7 @@ fn get_name_percent(
|
||||
}
|
||||
}
|
||||
|
||||
fn get_pretty_size(node: &Node, is_biggest: bool, display_data: &DisplayData) -> String {
|
||||
fn get_pretty_size(node: &DisplayNode, is_biggest: bool, display_data: &DisplayData) -> String {
|
||||
let output = if display_data.by_filecount {
|
||||
let size_as_str = node.size.separate_with_commas();
|
||||
let spaces_to_add =
|
||||
@@ -328,7 +332,11 @@ fn get_pretty_size(node: &Node, is_biggest: bool, display_data: &DisplayData) ->
|
||||
}
|
||||
}
|
||||
|
||||
fn get_pretty_name(node: &Node, name_and_padding: String, display_data: &DisplayData) -> String {
|
||||
fn get_pretty_name(
|
||||
node: &DisplayNode,
|
||||
name_and_padding: String,
|
||||
display_data: &DisplayData,
|
||||
) -> String {
|
||||
if display_data.colors_on {
|
||||
let meta_result = fs::metadata(node.name.clone());
|
||||
let directory_color = display_data
|
||||
@@ -379,7 +387,7 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_format_str() {
|
||||
let n = Node {
|
||||
let n = DisplayNode {
|
||||
name: PathBuf::from("/short"),
|
||||
size: 2_u64.pow(12), // This is 4.0K
|
||||
children: vec![],
|
||||
@@ -401,7 +409,7 @@ mod tests {
|
||||
#[test]
|
||||
fn test_format_str_long_name() {
|
||||
let name = "very_long_name_longer_than_the_eighty_character_limit_very_long_name_this_bit_will_truncate";
|
||||
let n = Node {
|
||||
let n = DisplayNode {
|
||||
name: PathBuf::from(name),
|
||||
size: 2_u64.pow(12), // This is 4.0K
|
||||
children: vec![],
|
||||
|
||||
@@ -0,0 +1,46 @@
|
||||
use std::cmp::Ordering;
|
||||
use std::path::PathBuf;
|
||||
|
||||
#[derive(Debug, Eq, Clone)]
|
||||
pub struct DisplayNode {
|
||||
pub name: PathBuf, //todo: consider moving to a string?
|
||||
pub size: u64,
|
||||
pub children: Vec<DisplayNode>,
|
||||
}
|
||||
|
||||
impl Ord for DisplayNode {
|
||||
fn cmp(&self, other: &Self) -> Ordering {
|
||||
if self.size == other.size {
|
||||
self.name.cmp(&other.name)
|
||||
} else {
|
||||
self.size.cmp(&other.size)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl PartialOrd for DisplayNode {
|
||||
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
|
||||
Some(self.cmp(other))
|
||||
}
|
||||
}
|
||||
|
||||
impl PartialEq for DisplayNode {
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
self.name == other.name && self.size == other.size && self.children == other.children
|
||||
}
|
||||
}
|
||||
|
||||
impl DisplayNode {
|
||||
pub fn num_siblings(&self) -> u64 {
|
||||
self.children.len() as u64
|
||||
}
|
||||
|
||||
pub fn get_children_from_node(&self, is_reversed: bool) -> impl Iterator<Item = DisplayNode> {
|
||||
if is_reversed {
|
||||
let children: Vec<DisplayNode> = self.children.clone().into_iter().rev().collect();
|
||||
children.into_iter()
|
||||
} else {
|
||||
self.children.clone().into_iter()
|
||||
}
|
||||
}
|
||||
}
|
||||
+104
@@ -0,0 +1,104 @@
|
||||
use crate::display_node::DisplayNode;
|
||||
use crate::node::Node;
|
||||
use std::collections::BinaryHeap;
|
||||
use std::collections::HashSet;
|
||||
use std::path::PathBuf;
|
||||
|
||||
pub fn get_by_depth(top_level_nodes: Vec<Node>, n: usize) -> Option<DisplayNode> {
|
||||
if top_level_nodes.is_empty() {
|
||||
// perhaps change this, bring back Error object?
|
||||
return None;
|
||||
}
|
||||
let root = get_new_root(top_level_nodes);
|
||||
Some(build_by_depth(&root, n - 1))
|
||||
}
|
||||
|
||||
pub fn get_biggest(top_level_nodes: Vec<Node>, n: usize) -> Option<DisplayNode> {
|
||||
if top_level_nodes.is_empty() {
|
||||
// perhaps change this, bring back Error object?
|
||||
return None;
|
||||
}
|
||||
|
||||
let mut heap = BinaryHeap::new();
|
||||
let number_top_level_nodes = top_level_nodes.len();
|
||||
let root = get_new_root(top_level_nodes);
|
||||
|
||||
root.children.iter().for_each(|c| heap.push(c));
|
||||
|
||||
let mut allowed_nodes = HashSet::new();
|
||||
allowed_nodes.insert(&root.name);
|
||||
|
||||
for _ in number_top_level_nodes..n {
|
||||
let line = heap.pop();
|
||||
match line {
|
||||
Some(line) => {
|
||||
line.children.iter().for_each(|c| heap.push(c));
|
||||
allowed_nodes.insert(&line.name);
|
||||
}
|
||||
None => break,
|
||||
}
|
||||
}
|
||||
recursive_rebuilder(&allowed_nodes, &root)
|
||||
}
|
||||
|
||||
fn build_by_depth(node: &Node, depth: usize) -> DisplayNode {
|
||||
let new_children = {
|
||||
if depth == 0 {
|
||||
vec![]
|
||||
} else {
|
||||
let mut new_children: Vec<_> = node
|
||||
.children
|
||||
.iter()
|
||||
.map(|c| build_by_depth(c, depth - 1))
|
||||
.collect();
|
||||
new_children.sort();
|
||||
new_children.reverse();
|
||||
new_children
|
||||
}
|
||||
};
|
||||
|
||||
DisplayNode {
|
||||
name: node.name.clone(),
|
||||
size: node.size,
|
||||
children: new_children,
|
||||
}
|
||||
}
|
||||
|
||||
fn get_new_root(top_level_nodes: Vec<Node>) -> Node {
|
||||
if top_level_nodes.len() > 1 {
|
||||
let total_size = top_level_nodes.iter().map(|node| node.size).sum();
|
||||
Node {
|
||||
name: PathBuf::from("(total)"),
|
||||
size: total_size,
|
||||
children: top_level_nodes,
|
||||
inode_device: None,
|
||||
}
|
||||
} else {
|
||||
top_level_nodes.into_iter().next().unwrap()
|
||||
}
|
||||
}
|
||||
|
||||
fn recursive_rebuilder<'a>(
|
||||
allowed_nodes: &'a HashSet<&PathBuf>,
|
||||
current: &Node,
|
||||
) -> Option<DisplayNode> {
|
||||
let mut new_children: Vec<_> = current
|
||||
.children
|
||||
.iter()
|
||||
.filter_map(|c| {
|
||||
if allowed_nodes.contains(&c.name) {
|
||||
recursive_rebuilder(allowed_nodes, c)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
new_children.sort();
|
||||
new_children.reverse();
|
||||
let newnode = DisplayNode {
|
||||
name: current.name.clone(),
|
||||
size: current.size,
|
||||
children: new_children,
|
||||
};
|
||||
Some(newnode)
|
||||
}
|
||||
+29
-57
@@ -1,19 +1,25 @@
|
||||
#[macro_use]
|
||||
extern crate clap;
|
||||
extern crate crossbeam_channel as channel;
|
||||
extern crate ignore;
|
||||
extern crate rayon;
|
||||
extern crate unicode_width;
|
||||
extern crate walkdir;
|
||||
|
||||
use std::collections::HashSet;
|
||||
|
||||
use self::display::draw_it;
|
||||
use crate::utils::is_a_parent_of;
|
||||
use clap::{App, AppSettings, Arg};
|
||||
use dirwalker::walk_it;
|
||||
use filter::{get_biggest, get_by_depth};
|
||||
use std::cmp::max;
|
||||
use std::path::PathBuf;
|
||||
use terminal_size::{terminal_size, Height, Width};
|
||||
use utils::{find_big_ones, get_dir_tree, simplify_dir_names, sort, Node};
|
||||
use utils::simplify_dir_names;
|
||||
|
||||
mod dirwalker;
|
||||
mod display;
|
||||
mod display_node;
|
||||
mod filter;
|
||||
mod node;
|
||||
mod platform;
|
||||
mod utils;
|
||||
|
||||
static DEFAULT_NUMBER_OF_LINES: usize = 30;
|
||||
@@ -101,12 +107,6 @@ fn main() {
|
||||
.multiple(true)
|
||||
.help("Exclude any file or directory with this name"),
|
||||
)
|
||||
.arg(
|
||||
Arg::with_name("limit_filesystem")
|
||||
.short("x")
|
||||
.long("limit-filesystem")
|
||||
.help("Only count the files and directories on the same filesystem as the supplied directory"),
|
||||
)
|
||||
.arg(
|
||||
Arg::with_name("display_apparent_size")
|
||||
.short("s")
|
||||
@@ -184,31 +184,35 @@ fn main() {
|
||||
|
||||
let no_colors = init_color(options.is_present("no_colors"));
|
||||
let use_apparent_size = options.is_present("display_apparent_size");
|
||||
let limit_filesystem = options.is_present("limit_filesystem");
|
||||
let ignore_directories = options
|
||||
let ignore_directories: Vec<PathBuf> = options
|
||||
.values_of("ignore_directory")
|
||||
.map(|i| i.map(PathBuf::from).collect());
|
||||
.map(|i| i.map(PathBuf::from).collect())
|
||||
.unwrap_or_default();
|
||||
|
||||
let by_filecount = options.is_present("by_filecount");
|
||||
let show_hidden = !options.is_present("ignore_hidden");
|
||||
let ignore_hidden = options.is_present("ignore_hidden");
|
||||
|
||||
let simplified_dirs = simplify_dir_names(target_dirs);
|
||||
let (errors, nodes) = get_dir_tree(
|
||||
&simplified_dirs,
|
||||
&ignore_directories,
|
||||
|
||||
let ignored_full_path: HashSet<PathBuf> = ignore_directories
|
||||
.into_iter()
|
||||
.flat_map(|x| simplified_dirs.iter().map(move |d| d.join(x.clone())))
|
||||
.collect();
|
||||
|
||||
let (nodes, errors) = walk_it(
|
||||
simplified_dirs,
|
||||
ignored_full_path,
|
||||
use_apparent_size,
|
||||
limit_filesystem,
|
||||
by_filecount,
|
||||
show_hidden,
|
||||
ignore_hidden,
|
||||
);
|
||||
let sorted_data = sort(nodes);
|
||||
let biggest_ones = {
|
||||
|
||||
let tree = {
|
||||
match depth {
|
||||
None => find_big_ones(sorted_data, number_of_lines),
|
||||
Some(_) => sorted_data,
|
||||
None => get_biggest(nodes, number_of_lines),
|
||||
Some(depth) => get_by_depth(nodes, depth),
|
||||
}
|
||||
};
|
||||
let tree = build_tree(biggest_ones, depth);
|
||||
|
||||
draw_it(
|
||||
errors,
|
||||
@@ -221,35 +225,3 @@ fn main() {
|
||||
tree,
|
||||
);
|
||||
}
|
||||
|
||||
fn build_tree(biggest_ones: Vec<(PathBuf, u64)>, depth: Option<usize>) -> Node {
|
||||
let mut top_parent = Node::default();
|
||||
|
||||
// assume sorted order
|
||||
for b in biggest_ones {
|
||||
let n = Node {
|
||||
name: b.0,
|
||||
size: b.1,
|
||||
children: Vec::default(),
|
||||
};
|
||||
recursively_build_tree(&mut top_parent, n, depth);
|
||||
}
|
||||
top_parent
|
||||
}
|
||||
|
||||
fn recursively_build_tree(parent_node: &mut Node, new_node: Node, depth: Option<usize>) {
|
||||
let new_depth = match depth {
|
||||
None => None,
|
||||
Some(0) => return,
|
||||
Some(d) => Some(d - 1),
|
||||
};
|
||||
if let Some(c) = parent_node
|
||||
.children
|
||||
.iter_mut()
|
||||
.find(|c| is_a_parent_of(&c.name, &new_node.name))
|
||||
{
|
||||
recursively_build_tree(c, new_node, new_depth);
|
||||
} else {
|
||||
parent_node.children.push(new_node);
|
||||
}
|
||||
}
|
||||
|
||||
+54
@@ -0,0 +1,54 @@
|
||||
use crate::platform::get_metadata;
|
||||
|
||||
use std::cmp::Ordering;
|
||||
use std::path::PathBuf;
|
||||
|
||||
#[derive(Debug, Eq, Clone)]
|
||||
pub struct Node {
|
||||
pub name: PathBuf,
|
||||
pub size: u64,
|
||||
pub children: Vec<Node>,
|
||||
pub inode_device: Option<(u64, u64)>,
|
||||
}
|
||||
|
||||
pub fn build_node(
|
||||
dir: PathBuf,
|
||||
children: Vec<Node>,
|
||||
use_apparent_size: bool,
|
||||
by_filecount: bool,
|
||||
) -> Option<Node> {
|
||||
match get_metadata(&dir, use_apparent_size) {
|
||||
Some(data) => {
|
||||
let (size, inode_device) = if by_filecount { (1, data.1) } else { data };
|
||||
Some(Node {
|
||||
name: dir,
|
||||
size,
|
||||
children,
|
||||
inode_device,
|
||||
})
|
||||
}
|
||||
None => None,
|
||||
}
|
||||
}
|
||||
|
||||
impl PartialEq for Node {
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
self.name == other.name && self.size == other.size && self.children == other.children
|
||||
}
|
||||
}
|
||||
|
||||
impl Ord for Node {
|
||||
fn cmp(&self, other: &Self) -> Ordering {
|
||||
if self.size == other.size {
|
||||
self.name.cmp(&other.name)
|
||||
} else {
|
||||
self.size.cmp(&other.size)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl PartialOrd for Node {
|
||||
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
|
||||
Some(self.cmp(other))
|
||||
}
|
||||
}
|
||||
@@ -1,7 +1,8 @@
|
||||
use ignore::DirEntry;
|
||||
#[allow(unused_imports)]
|
||||
use std::fs;
|
||||
|
||||
use std::path::Path;
|
||||
|
||||
#[cfg(target_family = "unix")]
|
||||
fn get_block_size() -> u64 {
|
||||
// All os specific implementations of MetatdataExt seem to define a block as 512 bytes
|
||||
@@ -10,7 +11,7 @@ fn get_block_size() -> u64 {
|
||||
}
|
||||
|
||||
#[cfg(target_family = "unix")]
|
||||
pub fn get_metadata(d: &DirEntry, use_apparent_size: bool) -> Option<(u64, Option<(u64, u64)>)> {
|
||||
pub fn get_metadata(d: &Path, use_apparent_size: bool) -> Option<(u64, Option<(u64, u64)>)> {
|
||||
use std::os::unix::fs::MetadataExt;
|
||||
match d.metadata() {
|
||||
Ok(md) => {
|
||||
@@ -25,7 +26,7 @@ pub fn get_metadata(d: &DirEntry, use_apparent_size: bool) -> Option<(u64, Optio
|
||||
}
|
||||
|
||||
#[cfg(target_family = "windows")]
|
||||
pub fn get_metadata(d: &DirEntry, _use_apparent_size: bool) -> Option<(u64, Option<(u64, u64)>)> {
|
||||
pub fn get_metadata(d: &Path, _use_apparent_size: bool) -> Option<(u64, Option<(u64, u64)>)> {
|
||||
// On windows opening the file to get size, file ID and volume can be very
|
||||
// expensive because 1) it causes a few system calls, and more importantly 2) it can cause
|
||||
// windows defender to scan the file.
|
||||
@@ -63,7 +64,6 @@ pub fn get_metadata(d: &DirEntry, _use_apparent_size: bool) -> Option<(u64, Opti
|
||||
// With this optimization: 8 sec.
|
||||
|
||||
use std::io;
|
||||
use std::path::Path;
|
||||
use winapi_util::Handle;
|
||||
fn handle_from_path_limited<P: AsRef<Path>>(path: P) -> io::Result<Handle> {
|
||||
use std::fs::OpenOptions;
|
||||
@@ -90,10 +90,10 @@ pub fn get_metadata(d: &DirEntry, _use_apparent_size: bool) -> Option<(u64, Opti
|
||||
Ok(Handle::from_file(file))
|
||||
}
|
||||
|
||||
fn get_metadata_expensive(d: &DirEntry) -> Option<(u64, Option<(u64, u64)>)> {
|
||||
fn get_metadata_expensive(d: &Path) -> Option<(u64, Option<(u64, u64)>)> {
|
||||
use winapi_util::file::information;
|
||||
|
||||
let h = handle_from_path_limited(d.path()).ok()?;
|
||||
let h = handle_from_path_limited(d).ok()?;
|
||||
let info = information(&h).ok()?;
|
||||
|
||||
Some((
|
||||
+126
@@ -0,0 +1,126 @@
|
||||
use std::collections::HashSet;
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
fn is_a_parent_of<P: AsRef<Path>>(parent: P, child: P) -> bool {
|
||||
let parent = parent.as_ref();
|
||||
let child = child.as_ref();
|
||||
child.starts_with(parent) && !parent.starts_with(child)
|
||||
}
|
||||
|
||||
pub fn simplify_dir_names<P: AsRef<Path>>(filenames: Vec<P>) -> HashSet<PathBuf> {
|
||||
let mut top_level_names: HashSet<PathBuf> = HashSet::with_capacity(filenames.len());
|
||||
let mut to_remove: Vec<PathBuf> = Vec::with_capacity(filenames.len());
|
||||
|
||||
for t in filenames {
|
||||
let top_level_name = normalize_path(t);
|
||||
let mut can_add = true;
|
||||
|
||||
for tt in top_level_names.iter() {
|
||||
if is_a_parent_of(&top_level_name, tt) {
|
||||
to_remove.push(tt.to_path_buf());
|
||||
} else if is_a_parent_of(tt, &top_level_name) {
|
||||
can_add = false;
|
||||
}
|
||||
}
|
||||
to_remove.sort_unstable();
|
||||
top_level_names.retain(|tr| to_remove.binary_search(tr).is_err());
|
||||
to_remove.clear();
|
||||
if can_add {
|
||||
top_level_names.insert(top_level_name);
|
||||
}
|
||||
}
|
||||
|
||||
top_level_names
|
||||
}
|
||||
|
||||
pub fn normalize_path<P: AsRef<Path>>(path: P) -> PathBuf {
|
||||
// normalize path ...
|
||||
// 1. removing repeated separators
|
||||
// 2. removing interior '.' ("current directory") path segments
|
||||
// 3. removing trailing extra separators and '.' ("current directory") path segments
|
||||
// * `Path.components()` does all the above work; ref: <https://doc.rust-lang.org/std/path/struct.Path.html#method.components>
|
||||
// 4. changing to os preferred separator (automatically done by recollecting components back into a PathBuf)
|
||||
path.as_ref().components().collect::<PathBuf>()
|
||||
}
|
||||
|
||||
mod tests {
|
||||
#[allow(unused_imports)]
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_simplify_dir() {
|
||||
let mut correct = HashSet::new();
|
||||
correct.insert(PathBuf::from("a"));
|
||||
assert_eq!(simplify_dir_names(vec!["a"]), correct);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_simplify_dir_rm_subdir() {
|
||||
let mut correct = HashSet::new();
|
||||
correct.insert(["a", "b"].iter().collect::<PathBuf>());
|
||||
assert_eq!(simplify_dir_names(vec!["a/b", "a/b/c", "a/b/d/f"]), correct);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_simplify_dir_duplicates() {
|
||||
let mut correct = HashSet::new();
|
||||
correct.insert(["a", "b"].iter().collect::<PathBuf>());
|
||||
correct.insert(PathBuf::from("c"));
|
||||
assert_eq!(
|
||||
simplify_dir_names(vec![
|
||||
"a/b",
|
||||
"a/b//",
|
||||
"a/././b///",
|
||||
"c",
|
||||
"c/",
|
||||
"c/.",
|
||||
"c/././",
|
||||
"c/././."
|
||||
]),
|
||||
correct
|
||||
);
|
||||
}
|
||||
#[test]
|
||||
fn test_simplify_dir_rm_subdir_and_not_substrings() {
|
||||
let mut correct = HashSet::new();
|
||||
correct.insert(PathBuf::from("b"));
|
||||
correct.insert(["c", "a", "b"].iter().collect::<PathBuf>());
|
||||
correct.insert(["a", "b"].iter().collect::<PathBuf>());
|
||||
assert_eq!(simplify_dir_names(vec!["a/b", "c/a/b/", "b"]), correct);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_simplify_dir_dots() {
|
||||
let mut correct = HashSet::new();
|
||||
correct.insert(PathBuf::from("src"));
|
||||
assert_eq!(simplify_dir_names(vec!["src/."]), correct);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_simplify_dir_substring_names() {
|
||||
let mut correct = HashSet::new();
|
||||
correct.insert(PathBuf::from("src"));
|
||||
correct.insert(PathBuf::from("src_v2"));
|
||||
assert_eq!(simplify_dir_names(vec!["src/", "src_v2"]), correct);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_is_a_parent_of() {
|
||||
assert!(is_a_parent_of("/usr", "/usr/andy"));
|
||||
assert!(is_a_parent_of("/usr", "/usr/andy/i/am/descendant"));
|
||||
assert!(!is_a_parent_of("/usr", "/usr/."));
|
||||
assert!(!is_a_parent_of("/usr", "/usr/"));
|
||||
assert!(!is_a_parent_of("/usr", "/usr"));
|
||||
assert!(!is_a_parent_of("/usr/", "/usr"));
|
||||
assert!(!is_a_parent_of("/usr/andy", "/usr"));
|
||||
assert!(!is_a_parent_of("/usr/andy", "/usr/sibling"));
|
||||
assert!(!is_a_parent_of("/usr/folder", "/usr/folder_not_a_child"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_is_a_parent_of_root() {
|
||||
assert!(is_a_parent_of("/", "/usr/andy"));
|
||||
assert!(is_a_parent_of("/", "/usr"));
|
||||
assert!(!is_a_parent_of("/", "/"));
|
||||
}
|
||||
}
|
||||
@@ -1,402 +0,0 @@
|
||||
use std::cmp::Ordering;
|
||||
use std::collections::HashMap;
|
||||
use std::collections::HashSet;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::sync::atomic::AtomicBool;
|
||||
|
||||
use channel::Receiver;
|
||||
use std::thread::JoinHandle;
|
||||
|
||||
use ignore::{WalkBuilder, WalkState};
|
||||
use std::sync::atomic;
|
||||
use std::thread;
|
||||
|
||||
mod platform;
|
||||
use self::platform::*;
|
||||
|
||||
type PathData = (PathBuf, u64, Option<(u64, u64)>);
|
||||
|
||||
#[derive(Debug, Default, Eq, Clone)]
|
||||
pub struct Node {
|
||||
pub name: PathBuf,
|
||||
pub size: u64,
|
||||
pub children: Vec<Node>,
|
||||
}
|
||||
|
||||
impl Ord for Node {
|
||||
fn cmp(&self, other: &Self) -> Ordering {
|
||||
if self.size == other.size {
|
||||
self.name.cmp(&other.name)
|
||||
} else {
|
||||
self.size.cmp(&other.size)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl PartialOrd for Node {
|
||||
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
|
||||
Some(self.cmp(other))
|
||||
}
|
||||
}
|
||||
|
||||
impl PartialEq for Node {
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
self.name == other.name && self.size == other.size && self.children == other.children
|
||||
}
|
||||
}
|
||||
|
||||
impl Node {
|
||||
pub fn num_siblings(&self) -> u64 {
|
||||
self.children.len() as u64
|
||||
}
|
||||
|
||||
pub fn get_children_from_node(&self, is_reversed: bool) -> impl Iterator<Item = Node> {
|
||||
if is_reversed {
|
||||
let children: Vec<Node> = self.children.clone().into_iter().rev().collect();
|
||||
children.into_iter()
|
||||
} else {
|
||||
self.children.clone().into_iter()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct Errors {
|
||||
pub permissions: bool,
|
||||
pub not_found: bool,
|
||||
}
|
||||
|
||||
pub fn is_a_parent_of<P: AsRef<Path>>(parent: P, child: P) -> bool {
|
||||
let parent = parent.as_ref();
|
||||
let child = child.as_ref();
|
||||
child.starts_with(parent) && !parent.starts_with(child)
|
||||
}
|
||||
|
||||
pub fn simplify_dir_names<P: AsRef<Path>>(filenames: Vec<P>) -> HashSet<PathBuf> {
|
||||
let mut top_level_names: HashSet<PathBuf> = HashSet::with_capacity(filenames.len());
|
||||
let mut to_remove: Vec<PathBuf> = Vec::with_capacity(filenames.len());
|
||||
|
||||
for t in filenames {
|
||||
let top_level_name = normalize_path(t);
|
||||
let mut can_add = true;
|
||||
|
||||
for tt in top_level_names.iter() {
|
||||
if is_a_parent_of(&top_level_name, tt) {
|
||||
to_remove.push(tt.to_path_buf());
|
||||
} else if is_a_parent_of(tt, &top_level_name) {
|
||||
can_add = false;
|
||||
}
|
||||
}
|
||||
to_remove.sort_unstable();
|
||||
top_level_names.retain(|tr| to_remove.binary_search(tr).is_err());
|
||||
to_remove.clear();
|
||||
if can_add {
|
||||
top_level_names.insert(top_level_name);
|
||||
}
|
||||
}
|
||||
|
||||
top_level_names
|
||||
}
|
||||
|
||||
fn prepare_walk_dir_builder<P: AsRef<Path>>(
|
||||
top_level_names: &HashSet<P>,
|
||||
limit_filesystem: bool,
|
||||
show_hidden: bool,
|
||||
) -> WalkBuilder {
|
||||
let mut it = top_level_names.iter();
|
||||
let mut builder = WalkBuilder::new(it.next().unwrap());
|
||||
builder.follow_links(false);
|
||||
if show_hidden {
|
||||
builder.hidden(false);
|
||||
builder.ignore(false);
|
||||
builder.git_global(false);
|
||||
builder.git_ignore(false);
|
||||
builder.git_exclude(false);
|
||||
}
|
||||
|
||||
if limit_filesystem {
|
||||
builder.same_file_system(true);
|
||||
}
|
||||
|
||||
for b in it {
|
||||
builder.add(b);
|
||||
}
|
||||
builder
|
||||
}
|
||||
|
||||
fn is_not_found(e: &ignore::Error) -> bool {
|
||||
use ignore::Error;
|
||||
if let Error::WithPath { err, .. } = e {
|
||||
if let Error::Io(e) = &**err {
|
||||
if e.kind() == std::io::ErrorKind::NotFound {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
pub fn get_dir_tree<P: AsRef<Path>>(
|
||||
top_level_names: &HashSet<P>,
|
||||
ignore_directories: &Option<Vec<PathBuf>>,
|
||||
apparent_size: bool,
|
||||
limit_filesystem: bool,
|
||||
by_filecount: bool,
|
||||
show_hidden: bool,
|
||||
) -> (Errors, HashMap<PathBuf, u64>) {
|
||||
let (tx, rx) = channel::bounded::<PathData>(1000);
|
||||
|
||||
let permissions_flag = AtomicBool::new(false);
|
||||
let not_found_flag = AtomicBool::new(false);
|
||||
|
||||
let t2 = top_level_names
|
||||
.iter()
|
||||
.map(|p| p.as_ref().to_path_buf())
|
||||
.collect();
|
||||
|
||||
let t = create_reader_thread(rx, t2, apparent_size);
|
||||
let walk_dir_builder = prepare_walk_dir_builder(top_level_names, limit_filesystem, show_hidden);
|
||||
|
||||
walk_dir_builder.build_parallel().run(|| {
|
||||
let txc = tx.clone();
|
||||
let pf = &permissions_flag;
|
||||
let nf = ¬_found_flag;
|
||||
Box::new(move |path| {
|
||||
match path {
|
||||
Ok(p) => {
|
||||
if let Some(dirs) = ignore_directories {
|
||||
let path = p.path();
|
||||
let parts = path.components().collect::<Vec<std::path::Component>>();
|
||||
for d in dirs {
|
||||
if parts
|
||||
.windows(d.components().count())
|
||||
.any(|window| window.iter().collect::<PathBuf>() == *d)
|
||||
{
|
||||
return WalkState::Continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let maybe_size_and_inode = get_metadata(&p, apparent_size);
|
||||
|
||||
match maybe_size_and_inode {
|
||||
Some(data) => {
|
||||
let (size, inode_device) =
|
||||
if by_filecount { (1, data.1) } else { data };
|
||||
txc.send((p.into_path(), size, inode_device)).unwrap();
|
||||
}
|
||||
None => {
|
||||
pf.store(true, atomic::Ordering::Relaxed);
|
||||
}
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
if is_not_found(&e) {
|
||||
nf.store(true, atomic::Ordering::Relaxed);
|
||||
} else {
|
||||
pf.store(true, atomic::Ordering::Relaxed);
|
||||
}
|
||||
}
|
||||
};
|
||||
WalkState::Continue
|
||||
})
|
||||
});
|
||||
|
||||
drop(tx);
|
||||
let data = t.join().unwrap();
|
||||
let errors = Errors {
|
||||
permissions: permissions_flag.load(atomic::Ordering::SeqCst),
|
||||
not_found: not_found_flag.load(atomic::Ordering::SeqCst),
|
||||
};
|
||||
(errors, data)
|
||||
}
|
||||
|
||||
fn create_reader_thread(
|
||||
rx: Receiver<PathData>,
|
||||
top_level_names: HashSet<PathBuf>,
|
||||
apparent_size: bool,
|
||||
) -> JoinHandle<HashMap<PathBuf, u64>> {
|
||||
// Receiver thread
|
||||
thread::spawn(move || {
|
||||
let mut hash: HashMap<PathBuf, u64> = HashMap::new();
|
||||
let mut inodes: HashSet<(u64, u64)> = HashSet::new();
|
||||
|
||||
for dent in rx {
|
||||
let (path, size, maybe_inode_device) = dent;
|
||||
|
||||
if should_ignore_file(apparent_size, &mut inodes, maybe_inode_device) {
|
||||
continue;
|
||||
} else {
|
||||
for p in path.ancestors() {
|
||||
let s = hash.entry(p.to_path_buf()).or_insert(0);
|
||||
*s += size;
|
||||
|
||||
if top_level_names.contains(p) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
hash
|
||||
})
|
||||
}
|
||||
|
||||
pub fn normalize_path<P: AsRef<Path>>(path: P) -> PathBuf {
|
||||
// normalize path ...
|
||||
// 1. removing repeated separators
|
||||
// 2. removing interior '.' ("current directory") path segments
|
||||
// 3. removing trailing extra separators and '.' ("current directory") path segments
|
||||
// * `Path.components()` does all the above work; ref: <https://doc.rust-lang.org/std/path/struct.Path.html#method.components>
|
||||
// 4. changing to os preferred separator (automatically done by recollecting components back into a PathBuf)
|
||||
path.as_ref().components().collect::<PathBuf>()
|
||||
}
|
||||
|
||||
fn should_ignore_file(
|
||||
apparent_size: bool,
|
||||
inodes: &mut HashSet<(u64, u64)>,
|
||||
maybe_inode_device: Option<(u64, u64)>,
|
||||
) -> bool {
|
||||
match maybe_inode_device {
|
||||
None => false,
|
||||
Some(data) => {
|
||||
let (inode, device) = data;
|
||||
if !apparent_size {
|
||||
// Ignore files already visited or symlinked
|
||||
if inodes.contains(&(inode, device)) {
|
||||
return true;
|
||||
}
|
||||
inodes.insert((inode, device));
|
||||
}
|
||||
false
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn sort_by_size_first_name_second(a: &(PathBuf, u64), b: &(PathBuf, u64)) -> Ordering {
|
||||
let result = b.1.cmp(&a.1);
|
||||
if result == Ordering::Equal {
|
||||
a.0.cmp(&b.0)
|
||||
} else {
|
||||
result
|
||||
}
|
||||
}
|
||||
|
||||
pub fn sort(data: HashMap<PathBuf, u64>) -> Vec<(PathBuf, u64)> {
|
||||
let mut new_l: Vec<(PathBuf, u64)> = data.iter().map(|(a, b)| (a.clone(), *b)).collect();
|
||||
new_l.sort_unstable_by(sort_by_size_first_name_second);
|
||||
new_l
|
||||
}
|
||||
|
||||
pub fn find_big_ones(new_l: Vec<(PathBuf, u64)>, max_to_show: usize) -> Vec<(PathBuf, u64)> {
|
||||
if max_to_show > 0 && new_l.len() > max_to_show {
|
||||
new_l[0..max_to_show].to_vec()
|
||||
} else {
|
||||
new_l
|
||||
}
|
||||
}
|
||||
|
||||
mod tests {
|
||||
#[allow(unused_imports)]
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_simplify_dir() {
|
||||
let mut correct = HashSet::new();
|
||||
correct.insert(PathBuf::from("a"));
|
||||
assert_eq!(simplify_dir_names(vec!["a"]), correct);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_simplify_dir_rm_subdir() {
|
||||
let mut correct = HashSet::new();
|
||||
correct.insert(["a", "b"].iter().collect::<PathBuf>());
|
||||
assert_eq!(simplify_dir_names(vec!["a/b", "a/b/c", "a/b/d/f"]), correct);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_simplify_dir_duplicates() {
|
||||
let mut correct = HashSet::new();
|
||||
correct.insert(["a", "b"].iter().collect::<PathBuf>());
|
||||
correct.insert(PathBuf::from("c"));
|
||||
assert_eq!(
|
||||
simplify_dir_names(vec![
|
||||
"a/b",
|
||||
"a/b//",
|
||||
"a/././b///",
|
||||
"c",
|
||||
"c/",
|
||||
"c/.",
|
||||
"c/././",
|
||||
"c/././."
|
||||
]),
|
||||
correct
|
||||
);
|
||||
}
|
||||
#[test]
|
||||
fn test_simplify_dir_rm_subdir_and_not_substrings() {
|
||||
let mut correct = HashSet::new();
|
||||
correct.insert(PathBuf::from("b"));
|
||||
correct.insert(["c", "a", "b"].iter().collect::<PathBuf>());
|
||||
correct.insert(["a", "b"].iter().collect::<PathBuf>());
|
||||
assert_eq!(simplify_dir_names(vec!["a/b", "c/a/b/", "b"]), correct);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_simplify_dir_dots() {
|
||||
let mut correct = HashSet::new();
|
||||
correct.insert(PathBuf::from("src"));
|
||||
assert_eq!(simplify_dir_names(vec!["src/."]), correct);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_simplify_dir_substring_names() {
|
||||
let mut correct = HashSet::new();
|
||||
correct.insert(PathBuf::from("src"));
|
||||
correct.insert(PathBuf::from("src_v2"));
|
||||
assert_eq!(simplify_dir_names(vec!["src/", "src_v2"]), correct);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_is_a_parent_of() {
|
||||
assert!(is_a_parent_of("/usr", "/usr/andy"));
|
||||
assert!(is_a_parent_of("/usr", "/usr/andy/i/am/descendant"));
|
||||
assert!(!is_a_parent_of("/usr", "/usr/."));
|
||||
assert!(!is_a_parent_of("/usr", "/usr/"));
|
||||
assert!(!is_a_parent_of("/usr", "/usr"));
|
||||
assert!(!is_a_parent_of("/usr/", "/usr"));
|
||||
assert!(!is_a_parent_of("/usr/andy", "/usr"));
|
||||
assert!(!is_a_parent_of("/usr/andy", "/usr/sibling"));
|
||||
assert!(!is_a_parent_of("/usr/folder", "/usr/folder_not_a_child"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_is_a_parent_of_root() {
|
||||
assert!(is_a_parent_of("/", "/usr/andy"));
|
||||
assert!(is_a_parent_of("/", "/usr"));
|
||||
assert!(!is_a_parent_of("/", "/"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_should_ignore_file() {
|
||||
let mut files = HashSet::new();
|
||||
files.insert((10, 20));
|
||||
|
||||
assert!(!should_ignore_file(true, &mut files, Some((0, 0))));
|
||||
|
||||
// New file is not known it will be inserted to the hashmp and should not be ignored
|
||||
assert!(!should_ignore_file(false, &mut files, Some((11, 12))));
|
||||
assert!(files.contains(&(11, 12)));
|
||||
|
||||
// The same file will be ignored the second time
|
||||
assert!(should_ignore_file(false, &mut files, Some((11, 12))));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_should_ignore_file_on_different_device() {
|
||||
let mut files = HashSet::new();
|
||||
files.insert((10, 20));
|
||||
|
||||
// We do not ignore files on the same device
|
||||
assert!(!should_ignore_file(false, &mut files, Some((2, 99))));
|
||||
assert!(!should_ignore_file(true, &mut files, Some((2, 99))));
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user