mirror of
https://github.com/bootandy/dust.git
synced 2026-06-08 11:29:05 +03:00
Rewrite to use walkdir instead of recursion
Advised to use walkdir by burntsushi as using recursion on file systems can blow the stack. walkdir is slower but allows the code to be cleaner and more reliable Also experimented with ignore but locking the hashmap resulted in similar performance to walkdir but with much uglier code.
This commit is contained in:
+74
-53
@@ -2,86 +2,107 @@ extern crate ansi_term;
|
||||
|
||||
use self::ansi_term::Colour::Fixed;
|
||||
|
||||
use lib::Node;
|
||||
|
||||
static UNITS: [char; 4] = ['T', 'G', 'M', 'K'];
|
||||
|
||||
pub fn draw_it(permissions: bool, heads: &Vec<Node>, to_display: &Vec<&Node>) -> () {
|
||||
pub fn draw_it(permissions: bool, base_dirs: Vec<&str>, to_display: Vec<(String, u64)>) -> () {
|
||||
if !permissions {
|
||||
eprintln!("Did not have permissions for all directories");
|
||||
}
|
||||
|
||||
for d in to_display {
|
||||
if heads.contains(d) {
|
||||
display_node(d, &to_display, true, "")
|
||||
}
|
||||
for f in base_dirs {
|
||||
display_node(f, &to_display, true, "")
|
||||
}
|
||||
}
|
||||
|
||||
fn get_size(nodes: &Vec<(String, u64)>, node_to_print: String) -> Option<u64> {
|
||||
for &(ref k, ref v) in nodes.iter() {
|
||||
if *k == node_to_print {
|
||||
return Some(*v);
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
fn display_node<S: Into<String>>(
|
||||
node_to_print: &Node,
|
||||
to_display: &Vec<&Node>,
|
||||
is_first: bool,
|
||||
node_to_print: &str,
|
||||
to_display: &Vec<(String, u64)>,
|
||||
is_biggest: bool,
|
||||
indentation_str: S,
|
||||
) {
|
||||
let mut is = indentation_str.into();
|
||||
print_this_node(node_to_print, is_first, is.as_ref());
|
||||
let size = get_size(to_display, node_to_print.to_string());
|
||||
match size {
|
||||
None => println!("Can not find path: {}", node_to_print),
|
||||
Some(size) => {
|
||||
print_this_node(node_to_print, size, is_biggest, is.as_ref());
|
||||
|
||||
is = is.replace("└─┬", " ");
|
||||
is = is.replace("└──", " ");
|
||||
is = is.replace("├──", "│ ");
|
||||
is = is.replace("├─┬", "│ ");
|
||||
is = is.replace("└─┬", " ");
|
||||
is = is.replace("└──", " ");
|
||||
is = is.replace("├──", "│ ");
|
||||
is = is.replace("├─┬", "│ ");
|
||||
|
||||
let printable_node_slashes = node_to_print.name().matches('/').count();
|
||||
let printable_node_slashes = node_to_print.matches('/').count();
|
||||
|
||||
let mut num_siblings = to_display.iter().fold(0, |a, b| {
|
||||
if node_to_print.children().contains(b)
|
||||
&& b.name().matches('/').count() == printable_node_slashes + 1
|
||||
{
|
||||
a + 1
|
||||
} else {
|
||||
a
|
||||
}
|
||||
});
|
||||
let mut num_siblings = to_display.iter().fold(0, |a, b| {
|
||||
if b.0.starts_with(node_to_print)
|
||||
&& b.0.matches('/').count() == printable_node_slashes + 1
|
||||
{
|
||||
a + 1
|
||||
} else {
|
||||
a
|
||||
}
|
||||
});
|
||||
|
||||
let mut is_biggest = true;
|
||||
for node in to_display {
|
||||
if node_to_print.children().contains(node) {
|
||||
let has_display_children = node.children()
|
||||
.iter()
|
||||
.fold(false, |has_kids, n| has_kids || to_display.contains(&n));
|
||||
let mut is_biggest = true;
|
||||
for &(ref k, _) in to_display.iter() {
|
||||
if k.starts_with(node_to_print)
|
||||
&& k.matches('/').count() == printable_node_slashes + 1
|
||||
{
|
||||
num_siblings -= 1;
|
||||
|
||||
let has_children = node.children().len() > 0 && has_display_children;
|
||||
if node.name().matches('/').count() == printable_node_slashes + 1 {
|
||||
num_siblings -= 1;
|
||||
|
||||
let tree_chars = {
|
||||
if num_siblings == 0 {
|
||||
if has_children {
|
||||
"└─┬"
|
||||
} else {
|
||||
"└──"
|
||||
}
|
||||
} else {
|
||||
if has_children {
|
||||
"├─┬"
|
||||
} else {
|
||||
"├──"
|
||||
let mut has_children = false;
|
||||
for &(ref k2, _) in to_display.iter() {
|
||||
let kk :&str = k.as_ref();
|
||||
if k2.starts_with(kk)
|
||||
&& k2.matches('/').count() == printable_node_slashes + 2
|
||||
{
|
||||
has_children = true;
|
||||
}
|
||||
}
|
||||
};
|
||||
display_node(&node, to_display, is_biggest, is.to_string() + tree_chars);
|
||||
is_biggest = false;
|
||||
|
||||
display_node(
|
||||
&k,
|
||||
to_display,
|
||||
is_biggest,
|
||||
is.to_string() + get_tree_chars(num_siblings, has_children),
|
||||
);
|
||||
is_biggest = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn print_this_node(node: &Node, is_biggest: bool, indentation: &str) {
|
||||
let pretty_size = format!("{:>5}", human_readable_number(node.size()),);
|
||||
fn get_tree_chars(num_siblings: u64, has_children: bool) -> &'static str {
|
||||
if num_siblings == 0 {
|
||||
if has_children {
|
||||
"└─┬"
|
||||
} else {
|
||||
"└──"
|
||||
}
|
||||
} else {
|
||||
if has_children {
|
||||
"├─┬"
|
||||
} else {
|
||||
"├──"
|
||||
}
|
||||
}
|
||||
}
|
||||
fn print_this_node(node_name: &str, size: u64, is_biggest: bool, indentation: &str) {
|
||||
let pretty_size = format!("{:>5}", human_readable_number(size),);
|
||||
println!(
|
||||
"{}",
|
||||
format_string(node.name(), is_biggest, pretty_size.as_ref(), indentation)
|
||||
format_string(node_name, is_biggest, pretty_size.as_ref(), indentation)
|
||||
)
|
||||
}
|
||||
|
||||
|
||||
-68
@@ -1,68 +0,0 @@
|
||||
use std::cmp::{Eq, Ord, Ordering, PartialEq, PartialOrd};
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct Node {
|
||||
name: String,
|
||||
size: u64,
|
||||
children: Vec<Node>,
|
||||
}
|
||||
|
||||
impl Node {
|
||||
pub fn new<S: Into<String>>(name: S, size: u64, children: Vec<Node>) -> Self {
|
||||
Node {
|
||||
children: children,
|
||||
name: name.into(),
|
||||
size: size,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn children(&self) -> &Vec<Node> {
|
||||
&self.children
|
||||
}
|
||||
|
||||
pub fn name(&self) -> &String {
|
||||
&self.name
|
||||
}
|
||||
|
||||
pub fn size(&self) -> u64 {
|
||||
self.size
|
||||
}
|
||||
}
|
||||
|
||||
impl Ord for Node {
|
||||
fn cmp(&self, other: &Self) -> Ordering {
|
||||
if self.size > other.size {
|
||||
Ordering::Less
|
||||
} else if self.size < other.size {
|
||||
Ordering::Greater
|
||||
} else {
|
||||
let my_slashes = self.name.matches('/').count();
|
||||
let other_slashes = other.name.matches('/').count();
|
||||
|
||||
if my_slashes > other_slashes {
|
||||
Ordering::Greater
|
||||
} else if my_slashes < other_slashes {
|
||||
Ordering::Less
|
||||
} else {
|
||||
if self.name < other.name {
|
||||
Ordering::Less
|
||||
} else if self.name > other.name {
|
||||
Ordering::Greater
|
||||
} else {
|
||||
Ordering::Equal
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
impl PartialOrd for Node {
|
||||
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
|
||||
Some(self.cmp(other))
|
||||
}
|
||||
}
|
||||
impl PartialEq for Node {
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
(&self.name, self.size) == (&other.name, other.size)
|
||||
}
|
||||
}
|
||||
impl Eq for Node {}
|
||||
+4
-5
@@ -1,15 +1,14 @@
|
||||
#[macro_use]
|
||||
extern crate clap;
|
||||
extern crate assert_cli;
|
||||
extern crate walkdir;
|
||||
|
||||
use self::display::draw_it;
|
||||
use clap::{App, AppSettings, Arg};
|
||||
use utils::{find_big_ones, get_dir_tree};
|
||||
|
||||
|
||||
mod display;
|
||||
mod utils;
|
||||
mod lib;
|
||||
|
||||
static DEFAULT_NUMBER_OF_LINES: &'static str = "15";
|
||||
|
||||
@@ -40,9 +39,9 @@ fn main() {
|
||||
let number_of_lines = value_t!(options.value_of("number_of_lines"), usize).unwrap();
|
||||
let use_apparent_size = options.is_present("use_apparent_size");
|
||||
|
||||
let (permissions, node_per_top_level_dir) = get_dir_tree(&filenames, use_apparent_size);
|
||||
let slice_it = find_big_ones(&node_per_top_level_dir, number_of_lines);
|
||||
draw_it(permissions, &node_per_top_level_dir, &slice_it);
|
||||
let (permissions, nodes) = get_dir_tree(&filenames, use_apparent_size);
|
||||
let biggest_ones = find_big_ones(nodes, number_of_lines);
|
||||
draw_it(permissions, filenames, biggest_ones);
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
||||
+43
-25
@@ -40,22 +40,13 @@ fn main_output() -> String {
|
||||
{}
|
||||
{}
|
||||
{}",
|
||||
format_string("src/test_dir", true, " 8.0K", ""),
|
||||
format_string("src/test_dir/many", true, " 4.0K", "└─┬",),
|
||||
format_string("src/test_dir", true, " 12K", ""),
|
||||
format_string("src/test_dir/many", true, " 8.0K", "└─┬",),
|
||||
format_string("src/test_dir/many/hello_file", true, " 4.0K", " ├──",),
|
||||
format_string("src/test_dir/many/a_file", false, " 0B", " └──",),
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
pub fn test_main_extra_slash() {
|
||||
assert_cli::Assert::main_binary()
|
||||
.with_args(&["src/test_dir/"])
|
||||
.stdout()
|
||||
.is(main_output())
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
#[test]
|
||||
pub fn test_apparent_size() {
|
||||
let r = format!(
|
||||
@@ -117,7 +108,7 @@ fn soft_sym_link_output(dir: &str, file_path: &str, link_name: &str) -> String {
|
||||
"{}
|
||||
{}
|
||||
{}",
|
||||
format_string(dir, true, " 4.0K", ""),
|
||||
format_string(dir, true, " 8.0K", ""),
|
||||
format_string(file_path, true, " 4.0K", "├──",),
|
||||
format_string(link_name, false, " 0B", "└──",),
|
||||
)
|
||||
@@ -139,18 +130,7 @@ pub fn test_hard_sym_link() {
|
||||
.output();
|
||||
assert!(c.is_ok());
|
||||
|
||||
let r = format!(
|
||||
"{}
|
||||
{}",
|
||||
format_string(dir_s, true, " 4.0K", ""),
|
||||
format_string(file_path_s, true, " 4.0K", "└──")
|
||||
);
|
||||
let r2 = format!(
|
||||
"{}
|
||||
{}",
|
||||
format_string(dir_s, true, " 4.0K", ""),
|
||||
format_string(link_name_s, true, " 4.0K", "└──")
|
||||
);
|
||||
let (r, r2) = hard_link_output(dir_s, file_path_s, link_name_s);
|
||||
|
||||
// Because this is a hard link the file and hard link look identicle. Therefore
|
||||
// we cannot guarantee which version will appear first.
|
||||
@@ -171,6 +151,41 @@ pub fn test_hard_sym_link() {
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(target_os = "macos")]
|
||||
fn hard_link_output(dir_s: &str, file_path_s: &str, link_name_s: &str) -> (String, String) {
|
||||
let r = format!(
|
||||
"{}
|
||||
{}",
|
||||
format_string(dir_s, true, " 4.0K", ""),
|
||||
format_string(file_path_s, true, " 4.0K", "└──")
|
||||
);
|
||||
let r2 = format!(
|
||||
"{}
|
||||
{}",
|
||||
format_string(dir_s, true, " 4.0K", ""),
|
||||
format_string(link_name_s, true, " 4.0K", "└──")
|
||||
);
|
||||
(r, r2)
|
||||
}
|
||||
|
||||
#[cfg(target_os = "linux")]
|
||||
fn hard_link_output(dir_s: &str, file_path_s: &str, link_name_s: &str) -> (String, String) {
|
||||
let r = format!(
|
||||
"{}
|
||||
{}",
|
||||
|
||||
format_string(dir_s, true, " 8.0K", ""),
|
||||
format_string(file_path_s, true, " 4.0K", "└──")
|
||||
);
|
||||
let r2 = format!(
|
||||
"{}
|
||||
{}",
|
||||
format_string(dir_s, true, true, " 8.0K", ""),
|
||||
format_string(link_name_s, true, true, " 4.0K", "└──")
|
||||
);
|
||||
(r, r2)
|
||||
}
|
||||
|
||||
//Check we don't recurse down an infinite symlink tree
|
||||
#[test]
|
||||
pub fn test_recursive_sym_link() {
|
||||
@@ -208,7 +223,10 @@ fn recursive_sym_link_output(dir: &str, link_name: &str) -> String {
|
||||
format!(
|
||||
"{}
|
||||
{}",
|
||||
format_string(dir, true, " 0B", ""),
|
||||
format_string(dir, true, " 4.0K", ""),
|
||||
format_string(link_name, true, " 0B", "└──",),
|
||||
)
|
||||
}
|
||||
|
||||
|
||||
// TODO: add test for bad path
|
||||
+45
-88
@@ -1,117 +1,74 @@
|
||||
use std::collections::HashMap;
|
||||
use std::collections::HashSet;
|
||||
|
||||
use std::fs;
|
||||
use walkdir::WalkDir;
|
||||
|
||||
use std::path::Path;
|
||||
use std::path::PathBuf;
|
||||
|
||||
use lib::Node;
|
||||
mod platform;
|
||||
use self::platform::*;
|
||||
|
||||
pub fn get_dir_tree(filenames: &Vec<&str>, apparent_size: bool) -> (bool, Vec<Node>) {
|
||||
let mut permissions = true;
|
||||
pub fn get_dir_tree(filenames: &Vec<&str>, apparent_size: bool) -> (bool, HashMap<String, u64>) {
|
||||
let mut permissions = 0;
|
||||
let mut inodes: HashSet<(u64, u64)> = HashSet::new();
|
||||
let mut results = vec![];
|
||||
for &b in filenames {
|
||||
let filename = strip_end_slashes(b);
|
||||
let (hp, data) = examine_dir(&Path::new(&filename), apparent_size, &mut inodes);
|
||||
permissions = permissions && hp;
|
||||
match data {
|
||||
Some(d) => results.push(d),
|
||||
None => permissions = false,
|
||||
}
|
||||
let mut data: HashMap<String, u64> = HashMap::new();
|
||||
for b in filenames {
|
||||
examine_dir(
|
||||
&Path::new(b).to_path_buf(),
|
||||
apparent_size,
|
||||
&mut inodes,
|
||||
&mut data,
|
||||
&mut permissions,
|
||||
);
|
||||
}
|
||||
(permissions, results)
|
||||
}
|
||||
|
||||
fn strip_end_slashes(s: &str) -> String {
|
||||
let mut new_name = String::from(s);
|
||||
while new_name.chars().last() == Some('/') && new_name.len() != 1 {
|
||||
new_name.pop();
|
||||
}
|
||||
new_name
|
||||
(permissions == 0, data)
|
||||
}
|
||||
|
||||
fn examine_dir(
|
||||
sdir: &Path,
|
||||
top_dir: &PathBuf,
|
||||
apparent_size: bool,
|
||||
inodes: &mut HashSet<(u64, u64)>,
|
||||
) -> (bool, Option<Node>) {
|
||||
match fs::read_dir(sdir) {
|
||||
Ok(file_iter) => {
|
||||
let mut result = vec![];
|
||||
let mut have_permission = true;
|
||||
let mut total_size = 0;
|
||||
data: &mut HashMap<String, u64>,
|
||||
permissions: &mut u64,
|
||||
) {
|
||||
for entry in WalkDir::new(top_dir) {
|
||||
match entry {
|
||||
Ok(e) => {
|
||||
let maybe_size_and_inode = get_metadata(&e, apparent_size);
|
||||
|
||||
for single_path in file_iter {
|
||||
match single_path {
|
||||
Ok(d) => {
|
||||
let file_type = d.file_type().ok();
|
||||
let maybe_size_and_inode = get_metadata(&d, apparent_size);
|
||||
|
||||
match (file_type, maybe_size_and_inode) {
|
||||
(Some(file_type), Some((size, maybe_inode))) => {
|
||||
if !apparent_size {
|
||||
if let Some(inode_dev_pair) = maybe_inode {
|
||||
if inodes.contains(&inode_dev_pair) {
|
||||
continue;
|
||||
}
|
||||
inodes.insert(inode_dev_pair);
|
||||
}
|
||||
}
|
||||
total_size += size;
|
||||
|
||||
if d.path().is_dir() && !file_type.is_symlink() {
|
||||
let (hp, child) = examine_dir(&d.path(), apparent_size, inodes);
|
||||
have_permission = have_permission && hp;
|
||||
|
||||
match child {
|
||||
Some(c) => {
|
||||
total_size += c.size();
|
||||
result.push(c);
|
||||
}
|
||||
None => (),
|
||||
}
|
||||
} else {
|
||||
let path_name = d.path().to_string_lossy().to_string();
|
||||
result.push(Node::new(path_name, size, vec![]))
|
||||
match maybe_size_and_inode {
|
||||
Some((size, maybe_inode)) => {
|
||||
if !apparent_size {
|
||||
if let Some(inode_dev_pair) = maybe_inode {
|
||||
if inodes.contains(&inode_dev_pair) {
|
||||
continue;
|
||||
}
|
||||
inodes.insert(inode_dev_pair);
|
||||
}
|
||||
(_, None) => have_permission = false,
|
||||
(_, _) => (),
|
||||
}
|
||||
let mut e_path = e.path().to_path_buf();
|
||||
loop {
|
||||
let path_name = e_path.to_string_lossy().to_string();
|
||||
let s = data.entry(path_name).or_insert(0);
|
||||
*s += size;
|
||||
if e_path == *top_dir {
|
||||
break;
|
||||
}
|
||||
e_path.pop();
|
||||
}
|
||||
}
|
||||
Err(_) => (),
|
||||
None => *permissions += 1,
|
||||
}
|
||||
}
|
||||
let n = Node::new(sdir.to_string_lossy().to_string(), total_size, result);
|
||||
(have_permission, Some(n))
|
||||
_ => {}
|
||||
}
|
||||
Err(_) => (false, None),
|
||||
}
|
||||
}
|
||||
|
||||
// We start with a list of root directories - these must be the biggest folders
|
||||
// We then repeadedly merge in the children of the biggest directory - Each iteration
|
||||
// the next biggest directory's children are merged in.
|
||||
pub fn find_big_ones<'a>(l: &'a Vec<Node>, max_to_show: usize) -> Vec<&Node> {
|
||||
let mut new_l: Vec<&Node> = l.iter().map(|a| a).collect();
|
||||
new_l.sort();
|
||||
|
||||
for processed_pointer in 0..max_to_show {
|
||||
if new_l.len() == processed_pointer {
|
||||
break;
|
||||
}
|
||||
// Must be a list of pointers into new_l otherwise b_list will go out of scope
|
||||
// when it is deallocated
|
||||
let mut b_list: Vec<&Node> = new_l[processed_pointer]
|
||||
.children()
|
||||
.iter()
|
||||
.map(|a| a)
|
||||
.collect();
|
||||
new_l.extend(b_list);
|
||||
new_l.sort();
|
||||
}
|
||||
pub fn find_big_ones<'a>(data: HashMap<String, u64>, max_to_show: usize) -> Vec<(String, u64)> {
|
||||
let mut new_l: Vec<(String, u64)> = data.iter().map(|(a, b)| (a.clone(), *b)).collect();
|
||||
new_l.sort_by(|a, b| b.1.cmp(&a.1));
|
||||
|
||||
if new_l.len() > max_to_show {
|
||||
new_l[0..max_to_show + 1].to_vec()
|
||||
|
||||
+5
-14
@@ -1,4 +1,4 @@
|
||||
use std;
|
||||
use walkdir::DirEntry;
|
||||
|
||||
fn get_block_size() -> u64 {
|
||||
// All os specific implementations of MetatdataExt seem to define a block as 512 bytes
|
||||
@@ -7,10 +7,7 @@ fn get_block_size() -> u64 {
|
||||
}
|
||||
|
||||
#[cfg(target_os = "linux")]
|
||||
pub fn get_metadata(
|
||||
d: &std::fs::DirEntry,
|
||||
use_apparent_size: bool,
|
||||
) -> Option<(u64, Option<(u64, u64)>)> {
|
||||
pub fn get_metadata(d: &DirEntry, use_apparent_size: bool) -> Option<(u64, Option<(u64, u64)>)> {
|
||||
use std::os::linux::fs::MetadataExt;
|
||||
match d.metadata().ok() {
|
||||
Some(md) => {
|
||||
@@ -26,10 +23,7 @@ pub fn get_metadata(
|
||||
}
|
||||
|
||||
#[cfg(target_os = "unix")]
|
||||
pub fn get_metadata(
|
||||
d: &std::fs::DirEntry,
|
||||
use_apparent_size: bool,
|
||||
) -> Option<(u64, Option<(u64, u64)>)> {
|
||||
pub fn get_metadata(d: &DirEntry, use_apparent_size: bool) -> Option<(u64, Option<(u64, u64)>)> {
|
||||
use std::os::unix::fs::MetadataExt;
|
||||
match d.metadata().ok() {
|
||||
Some(md) => {
|
||||
@@ -45,10 +39,7 @@ pub fn get_metadata(
|
||||
}
|
||||
|
||||
#[cfg(target_os = "macos")]
|
||||
pub fn get_metadata(
|
||||
d: &std::fs::DirEntry,
|
||||
use_apparent_size: bool,
|
||||
) -> Option<(u64, Option<(u64, u64)>)> {
|
||||
pub fn get_metadata(d: &DirEntry, use_apparent_size: bool) -> Option<(u64, Option<(u64, u64)>)> {
|
||||
use std::os::macos::fs::MetadataExt;
|
||||
match d.metadata().ok() {
|
||||
Some(md) => {
|
||||
@@ -64,7 +55,7 @@ pub fn get_metadata(
|
||||
}
|
||||
|
||||
#[cfg(not(any(target_os = "linux", target_os = "unix", target_os = "macos")))]
|
||||
pub fn get_metadata(d: &std::fs::DirEntry, _apparent: bool) -> Option<(u64, Option<(u64, u64)>)> {
|
||||
pub fn get_metadata(d: &DirEntry, _apparent: bool) -> Option<(u64, Option<(u64, u64)>)> {
|
||||
match d.metadata().ok() {
|
||||
Some(md) => Some((md.len(), None)),
|
||||
None => None,
|
||||
|
||||
Reference in New Issue
Block a user