Large refactor. Use rayon, 10X performance boost

Code changes: Removed ignore & channel crates. Using a single reciever thread to build a hashmap to prevend duplicate inodes being reported gave a severe performance penalty Using rayon crate with some hand crafted file traversal has improved performance aprox 10X Behaviour changes: Removed parameter 'limit by filesystem' - don't think this is used, and I only added it as it was easy to add with the ignore crate. Sym links will now not appear in the output tree unless using '-s' 'apparent-size' flag Change behaviour of multiple args so that it unifies them and compares them under one tree instead of treating them individually: https://github.com/bootandy/dust/issues/136
2026-06-08 11:29:05 +03:00 · 2021-06-15 11:23:50 +01:00
parent c4a73d5921
commit 00c7ce8f15
14 changed files with 700 additions and 630 deletions
@@ -0,0 +1,176 @@
+use std::fs;
+
+use crate::node::Node;
+use rayon::iter::ParallelBridge;
+use rayon::prelude::ParallelIterator;
+use std::path::PathBuf;
+
+use std::sync::atomic;
+use std::sync::atomic::AtomicBool;
+
+use std::collections::HashSet;
+
+use crate::node::build_node;
+use std::fs::DirEntry;
+
+pub fn walk_it(
+    dirs: HashSet<PathBuf>,
+    ignore_directories: HashSet<PathBuf>,
+    use_apparent_size: bool,
+    by_filecount: bool,
+    ignore_hidden: bool,
+) -> (Vec<Node>, bool) {
+    let permissions_flag = AtomicBool::new(false);
+
+    let top_level_nodes: Vec<_> = dirs
+        .into_iter()
+        .filter_map(|d| {
+            let n = walk(
+                d,
+                &permissions_flag,
+                &ignore_directories,
+                use_apparent_size,
+                by_filecount,
+                ignore_hidden,
+            );
+            match n {
+                Some(n) => {
+                    let mut inodes: HashSet<(u64, u64)> = HashSet::new();
+                    clean_inodes(n, &mut inodes, use_apparent_size)
+                }
+                None => None,
+            }
+        })
+        .collect();
+    (top_level_nodes, permissions_flag.into_inner())
+}
+
+// Remove files which have the same inode, we don't want to double count them.
+fn clean_inodes(
+    x: Node,
+    inodes: &mut HashSet<(u64, u64)>,
+    use_apparent_size: bool,
+) -> Option<Node> {
+    if !use_apparent_size {
+        if let Some(id) = x.inode_device {
+            if inodes.contains(&id) {
+                return None;
+            }
+            inodes.insert(id);
+        }
+    }
+
+    let new_children: Vec<_> = x
+        .children
+        .into_iter()
+        .filter_map(|c| clean_inodes(c, inodes, use_apparent_size))
+        .collect();
+
+    return Some(Node {
+        name: x.name,
+        size: x.size + new_children.iter().map(|c| c.size).sum::<u64>(),
+        children: new_children,
+        inode_device: x.inode_device,
+    });
+}
+
+fn ignore_file(
+    entry: &DirEntry,
+    ignore_hidden: bool,
+    ignore_directories: &HashSet<PathBuf>,
+) -> bool {
+    let is_dot_file = entry.file_name().to_str().unwrap_or("").starts_with('.');
+    let is_ignored_path = ignore_directories.contains(&entry.path());
+    (is_dot_file && ignore_hidden) || is_ignored_path
+}
+
+fn walk(
+    dir: PathBuf,
+    permissions_flag: &AtomicBool,
+    ignore_directories: &HashSet<PathBuf>,
+    use_apparent_size: bool,
+    by_filecount: bool,
+    ignore_hidden: bool,
+) -> Option<Node> {
+    let mut children = vec![];
+
+    if let Ok(entries) = fs::read_dir(dir.clone()) {
+        children = entries
+            .into_iter()
+            .par_bridge()
+            .filter_map(|entry| {
+                if let Ok(ref entry) = entry {
+                    // uncommenting the below line gives simpler code but
+                    // rayon doesn't parallelise as well giving a 3X performance drop
+                    // hence we unravel the recursion a bit
+
+                    // return walk(entry.path(), permissions_flag, ignore_directories, use_apparent_size, by_filecount, ignore_hidden);
+
+                    if !ignore_file(&entry, ignore_hidden, &ignore_directories) {
+                        if let Ok(data) = entry.file_type() {
+                            if data.is_dir() && !data.is_symlink() {
+                                return walk(
+                                    entry.path(),
+                                    permissions_flag,
+                                    ignore_directories,
+                                    use_apparent_size,
+                                    by_filecount,
+                                    ignore_hidden,
+                                );
+                            }
+                            return build_node(
+                                entry.path(),
+                                vec![],
+                                use_apparent_size,
+                                by_filecount,
+                            );
+                        }
+                    }
+                } else {
+                    permissions_flag.store(true, atomic::Ordering::Relaxed);
+                }
+                None
+            })
+            .collect();
+    } else {
+        permissions_flag.store(true, atomic::Ordering::Relaxed);
+    }
+    build_node(dir, children, use_apparent_size, by_filecount)
+}
+
+mod tests {
+    #[allow(unused_imports)]
+    use super::*;
+
+    #[cfg(test)]
+    fn create_node() -> Node {
+        Node {
+            name: PathBuf::new(),
+            size: 10,
+            children: vec![],
+            inode_device: Some((5, 6)),
+        }
+    }
+
+    #[test]
+    fn test_should_ignore_file() {
+        let mut inodes = HashSet::new();
+        let n = create_node();
+
+        // First time we insert the node
+        assert!(clean_inodes(n.clone(), &mut inodes, false) == Some(n.clone()));
+
+        // Second time is a duplicate - we ignore it
+        assert!(clean_inodes(n.clone(), &mut inodes, false) == None);
+    }
+
+    #[test]
+    fn test_should_not_ignore_files_if_using_apparent_size() {
+        let mut inodes = HashSet::new();
+        let n = create_node();
+
+        // If using apparent size we include Nodes, even if duplicate inodes
+        assert!(clean_inodes(n.clone(), &mut inodes, true) == Some(n.clone()));
+        assert!(clean_inodes(n.clone(), &mut inodes, true) == Some(n.clone()));
+    }
+}
@@ -1,6 +1,6 @@
 extern crate ansi_term;

-use crate::utils::{Errors, Node};
+use crate::display_node::DisplayNode;

 use self::ansi_term::Colour::Red;
 use lscolors::{LsColors, Style};
@@ -60,7 +60,7 @@ impl DisplayData {
        }
    }

-    fn percent_size(&self, node: &Node) -> f32 {
+    fn percent_size(&self, node: &DisplayNode) -> f32 {
        let result = node.size as f32 / self.base_size as f32;
        if result.is_normal() {
            result
@@ -83,7 +83,7 @@ impl DrawData<'_> {
    }

    // TODO: can we test this?
-    fn generate_bar(&self, node: &Node, level: usize) -> String {
+    fn generate_bar(&self, node: &DisplayNode, level: usize) -> String {
        let chars_in_bar = self.percent_bar.chars().count();
        let num_bars = chars_in_bar as f32 * self.display_data.percent_size(node);
        let mut num_not_my_bar = (chars_in_bar as i32) - num_bars as i32;
@@ -107,21 +107,23 @@ impl DrawData<'_> {

 #[allow(clippy::too_many_arguments)]
 pub fn draw_it(
-    errors: Errors,
+    permission_error: bool,
    use_full_path: bool,
    is_reversed: bool,
    no_colors: bool,
    no_percents: bool,
    terminal_width: usize,
    by_filecount: bool,
-    root_node: Node,
+    option_root_node: Option<DisplayNode>,
 ) {
-    if errors.permissions {
+    if permission_error {
        eprintln!("Did not have permissions for all directories");
    }
-    if errors.not_found {
-        eprintln!("Not all directories were found");
+    if option_root_node.is_none() {
+        return;
    }
+    let root_node = option_root_node.unwrap();
+
    let num_chars_needed_on_left_most = if by_filecount {
        let max_size = root_node.children.iter().map(|n| n.size).fold(0, max);
        max_size.separate_with_commas().chars().count()
@@ -131,11 +133,8 @@ pub fn draw_it(

    let terminal_width = terminal_width - 9 - num_chars_needed_on_left_most;
    let num_indent_chars = 3;
-    let longest_string_length = root_node
-        .children
-        .iter()
-        .map(|c| find_longest_dir_name(&c, num_indent_chars, terminal_width, !use_full_path))
-        .fold(0, max);
+    let longest_string_length =
+        find_longest_dir_name(&root_node, num_indent_chars, terminal_width, !use_full_path);

    let max_bar_length = if no_percents || longest_string_length >= terminal_width as usize {
        0
@@ -145,27 +144,30 @@ pub fn draw_it(

    let first_size_bar = repeat(BLOCKS[0]).take(max_bar_length).collect::<String>();

-    for c in root_node.get_children_from_node(is_reversed) {
-        let display_data = DisplayData {
-            short_paths: !use_full_path,
-            is_reversed,
-            colors_on: !no_colors,
-            by_filecount,
-            num_chars_needed_on_left_most,
-            base_size: c.size,
-            longest_string_length,
-            ls_colors: LsColors::from_env().unwrap_or_default(),
-        };
-        let draw_data = DrawData {
-            indent: "".to_string(),
-            percent_bar: first_size_bar.clone(),
-            display_data: &display_data,
-        };
-        display_node(c, &draw_data, true, true);
-    }
+    let display_data = DisplayData {
+        short_paths: !use_full_path,
+        is_reversed,
+        colors_on: !no_colors,
+        by_filecount,
+        num_chars_needed_on_left_most,
+        base_size: root_node.size,
+        longest_string_length,
+        ls_colors: LsColors::from_env().unwrap_or_default(),
+    };
+    let draw_data = DrawData {
+        indent: "".to_string(),
+        percent_bar: first_size_bar,
+        display_data: &display_data,
+    };
+    display_node(root_node, &draw_data, true, true);
 }

-fn find_longest_dir_name(node: &Node, indent: usize, terminal: usize, long_paths: bool) -> usize {
+fn find_longest_dir_name(
+    node: &DisplayNode,
+    indent: usize,
+    terminal: usize,
+    long_paths: bool,
+) -> usize {
    let printable_name = get_printable_name(&node.name, long_paths);
    let longest = min(
        UnicodeWidthStr::width(&*printable_name) + 1 + indent,
@@ -179,7 +181,7 @@ fn find_longest_dir_name(node: &Node, indent: usize, terminal: usize, long_paths
        .fold(longest, max)
 }

-fn display_node(node: Node, draw_data: &DrawData, is_biggest: bool, is_last: bool) {
+fn display_node(node: DisplayNode, draw_data: &DrawData, is_biggest: bool, is_last: bool) {
    // hacky way of working out how deep we are in the tree
    let indent = draw_data.get_new_indent(!node.children.is_empty(), is_last);
    let level = ((indent.chars().count() - 1) / 2) - 1;
@@ -254,11 +256,13 @@ fn get_printable_name<P: AsRef<Path>>(dir_name: &P, long_paths: bool) -> String
    encode_u8(printable_name.display().to_string().as_bytes())
 }

-fn pad_or_trim_filename(node: &Node, indent: &str, display_data: &DisplayData) -> String {
+fn pad_or_trim_filename(node: &DisplayNode, indent: &str, display_data: &DisplayData) -> String {
    let name = get_printable_name(&node.name, display_data.short_paths);
    let indent_and_name = format!("{} {}", indent, name);
    let width = UnicodeWidthStr::width(&*indent_and_name);

+    assert!(display_data.longest_string_length >= width);
+
    // Add spaces after the filename so we can draw the % used bar chart.
    let name_and_padding = name
        + &(repeat(" ")
@@ -281,7 +285,7 @@ fn maybe_trim_filename(name_in: String, display_data: &DisplayData) -> String {
 }

 pub fn format_string(
-    node: &Node,
+    node: &DisplayNode,
    indent: &str,
    percent_bar: &str,
    is_biggest: bool,
@@ -294,7 +298,7 @@ pub fn format_string(
 }

 fn get_name_percent(
-    node: &Node,
+    node: &DisplayNode,
    indent: &str,
    bar_chart: &str,
    display_data: &DisplayData,
@@ -311,7 +315,7 @@ fn get_name_percent(
    }
 }

-fn get_pretty_size(node: &Node, is_biggest: bool, display_data: &DisplayData) -> String {
+fn get_pretty_size(node: &DisplayNode, is_biggest: bool, display_data: &DisplayData) -> String {
    let output = if display_data.by_filecount {
        let size_as_str = node.size.separate_with_commas();
        let spaces_to_add =
@@ -328,7 +332,11 @@ fn get_pretty_size(node: &Node, is_biggest: bool, display_data: &DisplayData) ->
    }
 }

-fn get_pretty_name(node: &Node, name_and_padding: String, display_data: &DisplayData) -> String {
+fn get_pretty_name(
+    node: &DisplayNode,
+    name_and_padding: String,
+    display_data: &DisplayData,
+) -> String {
    if display_data.colors_on {
        let meta_result = fs::metadata(node.name.clone());
        let directory_color = display_data
@@ -379,7 +387,7 @@ mod tests {

    #[test]
    fn test_format_str() {
-        let n = Node {
+        let n = DisplayNode {
            name: PathBuf::from("/short"),
            size: 2_u64.pow(12), // This is 4.0K
            children: vec![],
@@ -401,7 +409,7 @@ mod tests {
    #[test]
    fn test_format_str_long_name() {
        let name = "very_long_name_longer_than_the_eighty_character_limit_very_long_name_this_bit_will_truncate";
-        let n = Node {
+        let n = DisplayNode {
            name: PathBuf::from(name),
            size: 2_u64.pow(12), // This is 4.0K
            children: vec![],
@@ -0,0 +1,46 @@
+use std::cmp::Ordering;
+use std::path::PathBuf;
+
+#[derive(Debug, Eq, Clone)]
+pub struct DisplayNode {
+    pub name: PathBuf, //todo: consider moving to a string?
+    pub size: u64,
+    pub children: Vec<DisplayNode>,
+}
+
+impl Ord for DisplayNode {
+    fn cmp(&self, other: &Self) -> Ordering {
+        if self.size == other.size {
+            self.name.cmp(&other.name)
+        } else {
+            self.size.cmp(&other.size)
+        }
+    }
+}
+
+impl PartialOrd for DisplayNode {
+    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
+        Some(self.cmp(other))
+    }
+}
+
+impl PartialEq for DisplayNode {
+    fn eq(&self, other: &Self) -> bool {
+        self.name == other.name && self.size == other.size && self.children == other.children
+    }
+}
+
+impl DisplayNode {
+    pub fn num_siblings(&self) -> u64 {
+        self.children.len() as u64
+    }
+
+    pub fn get_children_from_node(&self, is_reversed: bool) -> impl Iterator<Item = DisplayNode> {
+        if is_reversed {
+            let children: Vec<DisplayNode> = self.children.clone().into_iter().rev().collect();
+            children.into_iter()
+        } else {
+            self.children.clone().into_iter()
+        }
+    }
+}
@@ -0,0 +1,104 @@
+use crate::display_node::DisplayNode;
+use crate::node::Node;
+use std::collections::BinaryHeap;
+use std::collections::HashSet;
+use std::path::PathBuf;
+
+pub fn get_by_depth(top_level_nodes: Vec<Node>, n: usize) -> Option<DisplayNode> {
+    if top_level_nodes.is_empty() {
+        // perhaps change this, bring back Error object?
+        return None;
+    }
+    let root = get_new_root(top_level_nodes);
+    Some(build_by_depth(&root, n - 1))
+}
+
+pub fn get_biggest(top_level_nodes: Vec<Node>, n: usize) -> Option<DisplayNode> {
+    if top_level_nodes.is_empty() {
+        // perhaps change this, bring back Error object?
+        return None;
+    }
+
+    let mut heap = BinaryHeap::new();
+    let number_top_level_nodes = top_level_nodes.len();
+    let root = get_new_root(top_level_nodes);
+
+    root.children.iter().for_each(|c| heap.push(c));
+
+    let mut allowed_nodes = HashSet::new();
+    allowed_nodes.insert(&root.name);
+
+    for _ in number_top_level_nodes..n {
+        let line = heap.pop();
+        match line {
+            Some(line) => {
+                line.children.iter().for_each(|c| heap.push(c));
+                allowed_nodes.insert(&line.name);
+            }
+            None => break,
+        }
+    }
+    recursive_rebuilder(&allowed_nodes, &root)
+}
+
+fn build_by_depth(node: &Node, depth: usize) -> DisplayNode {
+    let new_children = {
+        if depth == 0 {
+            vec![]
+        } else {
+            let mut new_children: Vec<_> = node
+                .children
+                .iter()
+                .map(|c| build_by_depth(c, depth - 1))
+                .collect();
+            new_children.sort();
+            new_children.reverse();
+            new_children
+        }
+    };
+
+    DisplayNode {
+        name: node.name.clone(),
+        size: node.size,
+        children: new_children,
+    }
+}
+
+fn get_new_root(top_level_nodes: Vec<Node>) -> Node {
+    if top_level_nodes.len() > 1 {
+        let total_size = top_level_nodes.iter().map(|node| node.size).sum();
+        Node {
+            name: PathBuf::from("(total)"),
+            size: total_size,
+            children: top_level_nodes,
+            inode_device: None,
+        }
+    } else {
+        top_level_nodes.into_iter().next().unwrap()
+    }
+}
+
+fn recursive_rebuilder<'a>(
+    allowed_nodes: &'a HashSet<&PathBuf>,
+    current: &Node,
+) -> Option<DisplayNode> {
+    let mut new_children: Vec<_> = current
+        .children
+        .iter()
+        .filter_map(|c| {
+            if allowed_nodes.contains(&c.name) {
+                recursive_rebuilder(allowed_nodes, c)
+            } else {
+                None
+            }
+        })
+        .collect();
+    new_children.sort();
+    new_children.reverse();
+    let newnode = DisplayNode {
+        name: current.name.clone(),
+        size: current.size,
+        children: new_children,
+    };
+    Some(newnode)
+}
@@ -1,19 +1,25 @@
 #[macro_use]
 extern crate clap;
-extern crate crossbeam_channel as channel;
-extern crate ignore;
+extern crate rayon;
 extern crate unicode_width;
-extern crate walkdir;
+
+use std::collections::HashSet;

 use self::display::draw_it;
-use crate::utils::is_a_parent_of;
 use clap::{App, AppSettings, Arg};
+use dirwalker::walk_it;
+use filter::{get_biggest, get_by_depth};
 use std::cmp::max;
 use std::path::PathBuf;
 use terminal_size::{terminal_size, Height, Width};
-use utils::{find_big_ones, get_dir_tree, simplify_dir_names, sort, Node};
+use utils::simplify_dir_names;

+mod dirwalker;
 mod display;
+mod display_node;
+mod filter;
+mod node;
+mod platform;
 mod utils;

 static DEFAULT_NUMBER_OF_LINES: usize = 30;
@@ -101,12 +107,6 @@ fn main() {
                .multiple(true)
                .help("Exclude any file or directory with this name"),
        )
-        .arg(
-            Arg::with_name("limit_filesystem")
-                .short("x")
-                .long("limit-filesystem")
-                .help("Only count the files and directories on the same filesystem as the supplied directory"),
-        )
        .arg(
            Arg::with_name("display_apparent_size")
                .short("s")
@@ -184,31 +184,35 @@ fn main() {

    let no_colors = init_color(options.is_present("no_colors"));
    let use_apparent_size = options.is_present("display_apparent_size");
-    let limit_filesystem = options.is_present("limit_filesystem");
-    let ignore_directories = options
+    let ignore_directories: Vec<PathBuf> = options
        .values_of("ignore_directory")
-        .map(|i| i.map(PathBuf::from).collect());
+        .map(|i| i.map(PathBuf::from).collect())
+        .unwrap_or_default();

    let by_filecount = options.is_present("by_filecount");
-    let show_hidden = !options.is_present("ignore_hidden");
+    let ignore_hidden = options.is_present("ignore_hidden");

    let simplified_dirs = simplify_dir_names(target_dirs);
-    let (errors, nodes) = get_dir_tree(
-        &simplified_dirs,
-        &ignore_directories,
+
+    let ignored_full_path: HashSet<PathBuf> = ignore_directories
+        .into_iter()
+        .flat_map(|x| simplified_dirs.iter().map(move |d| d.join(x.clone())))
+        .collect();
+
+    let (nodes, errors) = walk_it(
+        simplified_dirs,
+        ignored_full_path,
        use_apparent_size,
-        limit_filesystem,
        by_filecount,
-        show_hidden,
+        ignore_hidden,
    );
-    let sorted_data = sort(nodes);
-    let biggest_ones = {
+
+    let tree = {
        match depth {
-            None => find_big_ones(sorted_data, number_of_lines),
-            Some(_) => sorted_data,
+            None => get_biggest(nodes, number_of_lines),
+            Some(depth) => get_by_depth(nodes, depth),
        }
    };
-    let tree = build_tree(biggest_ones, depth);

    draw_it(
        errors,
@@ -221,35 +225,3 @@ fn main() {
        tree,
    );
 }
-
-fn build_tree(biggest_ones: Vec<(PathBuf, u64)>, depth: Option<usize>) -> Node {
-    let mut top_parent = Node::default();
-
-    // assume sorted order
-    for b in biggest_ones {
-        let n = Node {
-            name: b.0,
-            size: b.1,
-            children: Vec::default(),
-        };
-        recursively_build_tree(&mut top_parent, n, depth);
-    }
-    top_parent
-}
-
-fn recursively_build_tree(parent_node: &mut Node, new_node: Node, depth: Option<usize>) {
-    let new_depth = match depth {
-        None => None,
-        Some(0) => return,
-        Some(d) => Some(d - 1),
-    };
-    if let Some(c) = parent_node
-        .children
-        .iter_mut()
-        .find(|c| is_a_parent_of(&c.name, &new_node.name))
-    {
-        recursively_build_tree(c, new_node, new_depth);
-    } else {
-        parent_node.children.push(new_node);
-    }
-}
@@ -0,0 +1,54 @@
+use crate::platform::get_metadata;
+
+use std::cmp::Ordering;
+use std::path::PathBuf;
+
+#[derive(Debug, Eq, Clone)]
+pub struct Node {
+    pub name: PathBuf,
+    pub size: u64,
+    pub children: Vec<Node>,
+    pub inode_device: Option<(u64, u64)>,
+}
+
+pub fn build_node(
+    dir: PathBuf,
+    children: Vec<Node>,
+    use_apparent_size: bool,
+    by_filecount: bool,
+) -> Option<Node> {
+    match get_metadata(&dir, use_apparent_size) {
+        Some(data) => {
+            let (size, inode_device) = if by_filecount { (1, data.1) } else { data };
+            Some(Node {
+                name: dir,
+                size,
+                children,
+                inode_device,
+            })
+        }
+        None => None,
+    }
+}
+
+impl PartialEq for Node {
+    fn eq(&self, other: &Self) -> bool {
+        self.name == other.name && self.size == other.size && self.children == other.children
+    }
+}
+
+impl Ord for Node {
+    fn cmp(&self, other: &Self) -> Ordering {
+        if self.size == other.size {
+            self.name.cmp(&other.name)
+        } else {
+            self.size.cmp(&other.size)
+        }
+    }
+}
+
+impl PartialOrd for Node {
+    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
+        Some(self.cmp(other))
+    }
+}
@@ -1,7 +1,8 @@
-use ignore::DirEntry;
 #[allow(unused_imports)]
 use std::fs;

+use std::path::Path;
+
 #[cfg(target_family = "unix")]
 fn get_block_size() -> u64 {
    // All os specific implementations of MetatdataExt seem to define a block as 512 bytes
@@ -10,7 +11,7 @@ fn get_block_size() -> u64 {
 }

 #[cfg(target_family = "unix")]
-pub fn get_metadata(d: &DirEntry, use_apparent_size: bool) -> Option<(u64, Option<(u64, u64)>)> {
+pub fn get_metadata(d: &Path, use_apparent_size: bool) -> Option<(u64, Option<(u64, u64)>)> {
    use std::os::unix::fs::MetadataExt;
    match d.metadata() {
        Ok(md) => {
@@ -25,7 +26,7 @@ pub fn get_metadata(d: &DirEntry, use_apparent_size: bool) -> Option<(u64, Optio
 }

 #[cfg(target_family = "windows")]
-pub fn get_metadata(d: &DirEntry, _use_apparent_size: bool) -> Option<(u64, Option<(u64, u64)>)> {
+pub fn get_metadata(d: &Path, _use_apparent_size: bool) -> Option<(u64, Option<(u64, u64)>)> {
    // On windows opening the file to get size, file ID and volume can be very
    // expensive because 1) it causes a few system calls, and more importantly 2) it can cause
    // windows defender to scan the file.
@@ -63,7 +64,6 @@ pub fn get_metadata(d: &DirEntry, _use_apparent_size: bool) -> Option<(u64, Opti
    // With this optimization:         8 sec.

    use std::io;
-    use std::path::Path;
    use winapi_util::Handle;
    fn handle_from_path_limited<P: AsRef<Path>>(path: P) -> io::Result<Handle> {
        use std::fs::OpenOptions;
@@ -90,10 +90,10 @@ pub fn get_metadata(d: &DirEntry, _use_apparent_size: bool) -> Option<(u64, Opti
        Ok(Handle::from_file(file))
    }

-    fn get_metadata_expensive(d: &DirEntry) -> Option<(u64, Option<(u64, u64)>)> {
+    fn get_metadata_expensive(d: &Path) -> Option<(u64, Option<(u64, u64)>)> {
        use winapi_util::file::information;

-        let h = handle_from_path_limited(d.path()).ok()?;
+        let h = handle_from_path_limited(d).ok()?;
        let info = information(&h).ok()?;

        Some((
@@ -0,0 +1,126 @@
+use std::collections::HashSet;
+use std::path::{Path, PathBuf};
+
+fn is_a_parent_of<P: AsRef<Path>>(parent: P, child: P) -> bool {
+    let parent = parent.as_ref();
+    let child = child.as_ref();
+    child.starts_with(parent) && !parent.starts_with(child)
+}
+
+pub fn simplify_dir_names<P: AsRef<Path>>(filenames: Vec<P>) -> HashSet<PathBuf> {
+    let mut top_level_names: HashSet<PathBuf> = HashSet::with_capacity(filenames.len());
+    let mut to_remove: Vec<PathBuf> = Vec::with_capacity(filenames.len());
+
+    for t in filenames {
+        let top_level_name = normalize_path(t);
+        let mut can_add = true;
+
+        for tt in top_level_names.iter() {
+            if is_a_parent_of(&top_level_name, tt) {
+                to_remove.push(tt.to_path_buf());
+            } else if is_a_parent_of(tt, &top_level_name) {
+                can_add = false;
+            }
+        }
+        to_remove.sort_unstable();
+        top_level_names.retain(|tr| to_remove.binary_search(tr).is_err());
+        to_remove.clear();
+        if can_add {
+            top_level_names.insert(top_level_name);
+        }
+    }
+
+    top_level_names
+}
+
+pub fn normalize_path<P: AsRef<Path>>(path: P) -> PathBuf {
+    // normalize path ...
+    // 1. removing repeated separators
+    // 2. removing interior '.' ("current directory") path segments
+    // 3. removing trailing extra separators and '.' ("current directory") path segments
+    // * `Path.components()` does all the above work; ref: <https://doc.rust-lang.org/std/path/struct.Path.html#method.components>
+    // 4. changing to os preferred separator (automatically done by recollecting components back into a PathBuf)
+    path.as_ref().components().collect::<PathBuf>()
+}
+
+mod tests {
+    #[allow(unused_imports)]
+    use super::*;
+
+    #[test]
+    fn test_simplify_dir() {
+        let mut correct = HashSet::new();
+        correct.insert(PathBuf::from("a"));
+        assert_eq!(simplify_dir_names(vec!["a"]), correct);
+    }
+
+    #[test]
+    fn test_simplify_dir_rm_subdir() {
+        let mut correct = HashSet::new();
+        correct.insert(["a", "b"].iter().collect::<PathBuf>());
+        assert_eq!(simplify_dir_names(vec!["a/b", "a/b/c", "a/b/d/f"]), correct);
+    }
+
+    #[test]
+    fn test_simplify_dir_duplicates() {
+        let mut correct = HashSet::new();
+        correct.insert(["a", "b"].iter().collect::<PathBuf>());
+        correct.insert(PathBuf::from("c"));
+        assert_eq!(
+            simplify_dir_names(vec![
+                "a/b",
+                "a/b//",
+                "a/././b///",
+                "c",
+                "c/",
+                "c/.",
+                "c/././",
+                "c/././."
+            ]),
+            correct
+        );
+    }
+    #[test]
+    fn test_simplify_dir_rm_subdir_and_not_substrings() {
+        let mut correct = HashSet::new();
+        correct.insert(PathBuf::from("b"));
+        correct.insert(["c", "a", "b"].iter().collect::<PathBuf>());
+        correct.insert(["a", "b"].iter().collect::<PathBuf>());
+        assert_eq!(simplify_dir_names(vec!["a/b", "c/a/b/", "b"]), correct);
+    }
+
+    #[test]
+    fn test_simplify_dir_dots() {
+        let mut correct = HashSet::new();
+        correct.insert(PathBuf::from("src"));
+        assert_eq!(simplify_dir_names(vec!["src/."]), correct);
+    }
+
+    #[test]
+    fn test_simplify_dir_substring_names() {
+        let mut correct = HashSet::new();
+        correct.insert(PathBuf::from("src"));
+        correct.insert(PathBuf::from("src_v2"));
+        assert_eq!(simplify_dir_names(vec!["src/", "src_v2"]), correct);
+    }
+
+    #[test]
+    fn test_is_a_parent_of() {
+        assert!(is_a_parent_of("/usr", "/usr/andy"));
+        assert!(is_a_parent_of("/usr", "/usr/andy/i/am/descendant"));
+        assert!(!is_a_parent_of("/usr", "/usr/."));
+        assert!(!is_a_parent_of("/usr", "/usr/"));
+        assert!(!is_a_parent_of("/usr", "/usr"));
+        assert!(!is_a_parent_of("/usr/", "/usr"));
+        assert!(!is_a_parent_of("/usr/andy", "/usr"));
+        assert!(!is_a_parent_of("/usr/andy", "/usr/sibling"));
+        assert!(!is_a_parent_of("/usr/folder", "/usr/folder_not_a_child"));
+    }
+
+    #[test]
+    fn test_is_a_parent_of_root() {
+        assert!(is_a_parent_of("/", "/usr/andy"));
+        assert!(is_a_parent_of("/", "/usr"));
+        assert!(!is_a_parent_of("/", "/"));
+    }
+}
@@ -1,402 +0,0 @@
-use std::cmp::Ordering;
-use std::collections::HashMap;
-use std::collections::HashSet;
-use std::path::{Path, PathBuf};
-use std::sync::atomic::AtomicBool;
-
-use channel::Receiver;
-use std::thread::JoinHandle;
-
-use ignore::{WalkBuilder, WalkState};
-use std::sync::atomic;
-use std::thread;
-
-mod platform;
-use self::platform::*;
-
-type PathData = (PathBuf, u64, Option<(u64, u64)>);
-
-#[derive(Debug, Default, Eq, Clone)]
-pub struct Node {
-    pub name: PathBuf,
-    pub size: u64,
-    pub children: Vec<Node>,
-}
-
-impl Ord for Node {
-    fn cmp(&self, other: &Self) -> Ordering {
-        if self.size == other.size {
-            self.name.cmp(&other.name)
-        } else {
-            self.size.cmp(&other.size)
-        }
-    }
-}
-
-impl PartialOrd for Node {
-    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
-        Some(self.cmp(other))
-    }
-}
-
-impl PartialEq for Node {
-    fn eq(&self, other: &Self) -> bool {
-        self.name == other.name && self.size == other.size && self.children == other.children
-    }
-}
-
-impl Node {
-    pub fn num_siblings(&self) -> u64 {
-        self.children.len() as u64
-    }
-
-    pub fn get_children_from_node(&self, is_reversed: bool) -> impl Iterator<Item = Node> {
-        if is_reversed {
-            let children: Vec<Node> = self.children.clone().into_iter().rev().collect();
-            children.into_iter()
-        } else {
-            self.children.clone().into_iter()
-        }
-    }
-}
-
-pub struct Errors {
-    pub permissions: bool,
-    pub not_found: bool,
-}
-
-pub fn is_a_parent_of<P: AsRef<Path>>(parent: P, child: P) -> bool {
-    let parent = parent.as_ref();
-    let child = child.as_ref();
-    child.starts_with(parent) && !parent.starts_with(child)
-}
-
-pub fn simplify_dir_names<P: AsRef<Path>>(filenames: Vec<P>) -> HashSet<PathBuf> {
-    let mut top_level_names: HashSet<PathBuf> = HashSet::with_capacity(filenames.len());
-    let mut to_remove: Vec<PathBuf> = Vec::with_capacity(filenames.len());
-
-    for t in filenames {
-        let top_level_name = normalize_path(t);
-        let mut can_add = true;
-
-        for tt in top_level_names.iter() {
-            if is_a_parent_of(&top_level_name, tt) {
-                to_remove.push(tt.to_path_buf());
-            } else if is_a_parent_of(tt, &top_level_name) {
-                can_add = false;
-            }
-        }
-        to_remove.sort_unstable();
-        top_level_names.retain(|tr| to_remove.binary_search(tr).is_err());
-        to_remove.clear();
-        if can_add {
-            top_level_names.insert(top_level_name);
-        }
-    }
-
-    top_level_names
-}
-
-fn prepare_walk_dir_builder<P: AsRef<Path>>(
-    top_level_names: &HashSet<P>,
-    limit_filesystem: bool,
-    show_hidden: bool,
-) -> WalkBuilder {
-    let mut it = top_level_names.iter();
-    let mut builder = WalkBuilder::new(it.next().unwrap());
-    builder.follow_links(false);
-    if show_hidden {
-        builder.hidden(false);
-        builder.ignore(false);
-        builder.git_global(false);
-        builder.git_ignore(false);
-        builder.git_exclude(false);
-    }
-
-    if limit_filesystem {
-        builder.same_file_system(true);
-    }
-
-    for b in it {
-        builder.add(b);
-    }
-    builder
-}
-
-fn is_not_found(e: &ignore::Error) -> bool {
-    use ignore::Error;
-    if let Error::WithPath { err, .. } = e {
-        if let Error::Io(e) = &**err {
-            if e.kind() == std::io::ErrorKind::NotFound {
-                return true;
-            }
-        }
-    }
-    false
-}
-
-pub fn get_dir_tree<P: AsRef<Path>>(
-    top_level_names: &HashSet<P>,
-    ignore_directories: &Option<Vec<PathBuf>>,
-    apparent_size: bool,
-    limit_filesystem: bool,
-    by_filecount: bool,
-    show_hidden: bool,
-) -> (Errors, HashMap<PathBuf, u64>) {
-    let (tx, rx) = channel::bounded::<PathData>(1000);
-
-    let permissions_flag = AtomicBool::new(false);
-    let not_found_flag = AtomicBool::new(false);
-
-    let t2 = top_level_names
-        .iter()
-        .map(|p| p.as_ref().to_path_buf())
-        .collect();
-
-    let t = create_reader_thread(rx, t2, apparent_size);
-    let walk_dir_builder = prepare_walk_dir_builder(top_level_names, limit_filesystem, show_hidden);
-
-    walk_dir_builder.build_parallel().run(|| {
-        let txc = tx.clone();
-        let pf = &permissions_flag;
-        let nf = &not_found_flag;
-        Box::new(move |path| {
-            match path {
-                Ok(p) => {
-                    if let Some(dirs) = ignore_directories {
-                        let path = p.path();
-                        let parts = path.components().collect::<Vec<std::path::Component>>();
-                        for d in dirs {
-                            if parts
-                                .windows(d.components().count())
-                                .any(|window| window.iter().collect::<PathBuf>() == *d)
-                            {
-                                return WalkState::Continue;
-                            }
-                        }
-                    }
-
-                    let maybe_size_and_inode = get_metadata(&p, apparent_size);
-
-                    match maybe_size_and_inode {
-                        Some(data) => {
-                            let (size, inode_device) =
-                                if by_filecount { (1, data.1) } else { data };
-                            txc.send((p.into_path(), size, inode_device)).unwrap();
-                        }
-                        None => {
-                            pf.store(true, atomic::Ordering::Relaxed);
-                        }
-                    }
-                }
-                Err(e) => {
-                    if is_not_found(&e) {
-                        nf.store(true, atomic::Ordering::Relaxed);
-                    } else {
-                        pf.store(true, atomic::Ordering::Relaxed);
-                    }
-                }
-            };
-            WalkState::Continue
-        })
-    });
-
-    drop(tx);
-    let data = t.join().unwrap();
-    let errors = Errors {
-        permissions: permissions_flag.load(atomic::Ordering::SeqCst),
-        not_found: not_found_flag.load(atomic::Ordering::SeqCst),
-    };
-    (errors, data)
-}
-
-fn create_reader_thread(
-    rx: Receiver<PathData>,
-    top_level_names: HashSet<PathBuf>,
-    apparent_size: bool,
-) -> JoinHandle<HashMap<PathBuf, u64>> {
-    // Receiver thread
-    thread::spawn(move || {
-        let mut hash: HashMap<PathBuf, u64> = HashMap::new();
-        let mut inodes: HashSet<(u64, u64)> = HashSet::new();
-
-        for dent in rx {
-            let (path, size, maybe_inode_device) = dent;
-
-            if should_ignore_file(apparent_size, &mut inodes, maybe_inode_device) {
-                continue;
-            } else {
-                for p in path.ancestors() {
-                    let s = hash.entry(p.to_path_buf()).or_insert(0);
-                    *s += size;
-
-                    if top_level_names.contains(p) {
-                        break;
-                    }
-                }
-            }
-        }
-        hash
-    })
-}
-
-pub fn normalize_path<P: AsRef<Path>>(path: P) -> PathBuf {
-    // normalize path ...
-    // 1. removing repeated separators
-    // 2. removing interior '.' ("current directory") path segments
-    // 3. removing trailing extra separators and '.' ("current directory") path segments
-    // * `Path.components()` does all the above work; ref: <https://doc.rust-lang.org/std/path/struct.Path.html#method.components>
-    // 4. changing to os preferred separator (automatically done by recollecting components back into a PathBuf)
-    path.as_ref().components().collect::<PathBuf>()
-}
-
-fn should_ignore_file(
-    apparent_size: bool,
-    inodes: &mut HashSet<(u64, u64)>,
-    maybe_inode_device: Option<(u64, u64)>,
-) -> bool {
-    match maybe_inode_device {
-        None => false,
-        Some(data) => {
-            let (inode, device) = data;
-            if !apparent_size {
-                // Ignore files already visited or symlinked
-                if inodes.contains(&(inode, device)) {
-                    return true;
-                }
-                inodes.insert((inode, device));
-            }
-            false
-        }
-    }
-}
-
-pub fn sort_by_size_first_name_second(a: &(PathBuf, u64), b: &(PathBuf, u64)) -> Ordering {
-    let result = b.1.cmp(&a.1);
-    if result == Ordering::Equal {
-        a.0.cmp(&b.0)
-    } else {
-        result
-    }
-}
-
-pub fn sort(data: HashMap<PathBuf, u64>) -> Vec<(PathBuf, u64)> {
-    let mut new_l: Vec<(PathBuf, u64)> = data.iter().map(|(a, b)| (a.clone(), *b)).collect();
-    new_l.sort_unstable_by(sort_by_size_first_name_second);
-    new_l
-}
-
-pub fn find_big_ones(new_l: Vec<(PathBuf, u64)>, max_to_show: usize) -> Vec<(PathBuf, u64)> {
-    if max_to_show > 0 && new_l.len() > max_to_show {
-        new_l[0..max_to_show].to_vec()
-    } else {
-        new_l
-    }
-}
-
-mod tests {
-    #[allow(unused_imports)]
-    use super::*;
-
-    #[test]
-    fn test_simplify_dir() {
-        let mut correct = HashSet::new();
-        correct.insert(PathBuf::from("a"));
-        assert_eq!(simplify_dir_names(vec!["a"]), correct);
-    }
-
-    #[test]
-    fn test_simplify_dir_rm_subdir() {
-        let mut correct = HashSet::new();
-        correct.insert(["a", "b"].iter().collect::<PathBuf>());
-        assert_eq!(simplify_dir_names(vec!["a/b", "a/b/c", "a/b/d/f"]), correct);
-    }
-
-    #[test]
-    fn test_simplify_dir_duplicates() {
-        let mut correct = HashSet::new();
-        correct.insert(["a", "b"].iter().collect::<PathBuf>());
-        correct.insert(PathBuf::from("c"));
-        assert_eq!(
-            simplify_dir_names(vec![
-                "a/b",
-                "a/b//",
-                "a/././b///",
-                "c",
-                "c/",
-                "c/.",
-                "c/././",
-                "c/././."
-            ]),
-            correct
-        );
-    }
-    #[test]
-    fn test_simplify_dir_rm_subdir_and_not_substrings() {
-        let mut correct = HashSet::new();
-        correct.insert(PathBuf::from("b"));
-        correct.insert(["c", "a", "b"].iter().collect::<PathBuf>());
-        correct.insert(["a", "b"].iter().collect::<PathBuf>());
-        assert_eq!(simplify_dir_names(vec!["a/b", "c/a/b/", "b"]), correct);
-    }
-
-    #[test]
-    fn test_simplify_dir_dots() {
-        let mut correct = HashSet::new();
-        correct.insert(PathBuf::from("src"));
-        assert_eq!(simplify_dir_names(vec!["src/."]), correct);
-    }
-
-    #[test]
-    fn test_simplify_dir_substring_names() {
-        let mut correct = HashSet::new();
-        correct.insert(PathBuf::from("src"));
-        correct.insert(PathBuf::from("src_v2"));
-        assert_eq!(simplify_dir_names(vec!["src/", "src_v2"]), correct);
-    }
-
-    #[test]
-    fn test_is_a_parent_of() {
-        assert!(is_a_parent_of("/usr", "/usr/andy"));
-        assert!(is_a_parent_of("/usr", "/usr/andy/i/am/descendant"));
-        assert!(!is_a_parent_of("/usr", "/usr/."));
-        assert!(!is_a_parent_of("/usr", "/usr/"));
-        assert!(!is_a_parent_of("/usr", "/usr"));
-        assert!(!is_a_parent_of("/usr/", "/usr"));
-        assert!(!is_a_parent_of("/usr/andy", "/usr"));
-        assert!(!is_a_parent_of("/usr/andy", "/usr/sibling"));
-        assert!(!is_a_parent_of("/usr/folder", "/usr/folder_not_a_child"));
-    }
-
-    #[test]
-    fn test_is_a_parent_of_root() {
-        assert!(is_a_parent_of("/", "/usr/andy"));
-        assert!(is_a_parent_of("/", "/usr"));
-        assert!(!is_a_parent_of("/", "/"));
-    }
-
-    #[test]
-    fn test_should_ignore_file() {
-        let mut files = HashSet::new();
-        files.insert((10, 20));
-
-        assert!(!should_ignore_file(true, &mut files, Some((0, 0))));
-
-        // New file is not known it will be inserted to the hashmp and should not be ignored
-        assert!(!should_ignore_file(false, &mut files, Some((11, 12))));
-        assert!(files.contains(&(11, 12)));
-
-        // The same file will be ignored the second time
-        assert!(should_ignore_file(false, &mut files, Some((11, 12))));
-    }
-
-    #[test]
-    fn test_should_ignore_file_on_different_device() {
-        let mut files = HashSet::new();
-        files.insert((10, 20));
-
-        // We do not ignore files on the same device
-        assert!(!should_ignore_file(false, &mut files, Some((2, 99))));
-        assert!(!should_ignore_file(true, &mut files, Some((2, 99))));
-    }
-}