Rewrite to use walkdir instead of recursion

Advised to use walkdir by burntsushi as using recursion on file systems
can blow the stack.

walkdir is slower but allows the code to be cleaner and more reliable

Also experimented with ignore but locking the hashmap resulted in
similar performance to walkdir but with much uglier code.
This commit is contained in:
andy.boot
2018-04-19 15:43:32 +01:00
parent 270edf0a76
commit 24c97ef92f
10 changed files with 213 additions and 264 deletions
+45 -88
View File
@@ -1,117 +1,74 @@
use std::collections::HashMap;
use std::collections::HashSet;
use std::fs;
use walkdir::WalkDir;
use std::path::Path;
use std::path::PathBuf;
use lib::Node;
mod platform;
use self::platform::*;
pub fn get_dir_tree(filenames: &Vec<&str>, apparent_size: bool) -> (bool, Vec<Node>) {
let mut permissions = true;
pub fn get_dir_tree(filenames: &Vec<&str>, apparent_size: bool) -> (bool, HashMap<String, u64>) {
let mut permissions = 0;
let mut inodes: HashSet<(u64, u64)> = HashSet::new();
let mut results = vec![];
for &b in filenames {
let filename = strip_end_slashes(b);
let (hp, data) = examine_dir(&Path::new(&filename), apparent_size, &mut inodes);
permissions = permissions && hp;
match data {
Some(d) => results.push(d),
None => permissions = false,
}
let mut data: HashMap<String, u64> = HashMap::new();
for b in filenames {
examine_dir(
&Path::new(b).to_path_buf(),
apparent_size,
&mut inodes,
&mut data,
&mut permissions,
);
}
(permissions, results)
}
fn strip_end_slashes(s: &str) -> String {
let mut new_name = String::from(s);
while new_name.chars().last() == Some('/') && new_name.len() != 1 {
new_name.pop();
}
new_name
(permissions == 0, data)
}
fn examine_dir(
sdir: &Path,
top_dir: &PathBuf,
apparent_size: bool,
inodes: &mut HashSet<(u64, u64)>,
) -> (bool, Option<Node>) {
match fs::read_dir(sdir) {
Ok(file_iter) => {
let mut result = vec![];
let mut have_permission = true;
let mut total_size = 0;
data: &mut HashMap<String, u64>,
permissions: &mut u64,
) {
for entry in WalkDir::new(top_dir) {
match entry {
Ok(e) => {
let maybe_size_and_inode = get_metadata(&e, apparent_size);
for single_path in file_iter {
match single_path {
Ok(d) => {
let file_type = d.file_type().ok();
let maybe_size_and_inode = get_metadata(&d, apparent_size);
match (file_type, maybe_size_and_inode) {
(Some(file_type), Some((size, maybe_inode))) => {
if !apparent_size {
if let Some(inode_dev_pair) = maybe_inode {
if inodes.contains(&inode_dev_pair) {
continue;
}
inodes.insert(inode_dev_pair);
}
}
total_size += size;
if d.path().is_dir() && !file_type.is_symlink() {
let (hp, child) = examine_dir(&d.path(), apparent_size, inodes);
have_permission = have_permission && hp;
match child {
Some(c) => {
total_size += c.size();
result.push(c);
}
None => (),
}
} else {
let path_name = d.path().to_string_lossy().to_string();
result.push(Node::new(path_name, size, vec![]))
match maybe_size_and_inode {
Some((size, maybe_inode)) => {
if !apparent_size {
if let Some(inode_dev_pair) = maybe_inode {
if inodes.contains(&inode_dev_pair) {
continue;
}
inodes.insert(inode_dev_pair);
}
(_, None) => have_permission = false,
(_, _) => (),
}
let mut e_path = e.path().to_path_buf();
loop {
let path_name = e_path.to_string_lossy().to_string();
let s = data.entry(path_name).or_insert(0);
*s += size;
if e_path == *top_dir {
break;
}
e_path.pop();
}
}
Err(_) => (),
None => *permissions += 1,
}
}
let n = Node::new(sdir.to_string_lossy().to_string(), total_size, result);
(have_permission, Some(n))
_ => {}
}
Err(_) => (false, None),
}
}
// We start with a list of root directories - these must be the biggest folders
// We then repeadedly merge in the children of the biggest directory - Each iteration
// the next biggest directory's children are merged in.
pub fn find_big_ones<'a>(l: &'a Vec<Node>, max_to_show: usize) -> Vec<&Node> {
let mut new_l: Vec<&Node> = l.iter().map(|a| a).collect();
new_l.sort();
for processed_pointer in 0..max_to_show {
if new_l.len() == processed_pointer {
break;
}
// Must be a list of pointers into new_l otherwise b_list will go out of scope
// when it is deallocated
let mut b_list: Vec<&Node> = new_l[processed_pointer]
.children()
.iter()
.map(|a| a)
.collect();
new_l.extend(b_list);
new_l.sort();
}
pub fn find_big_ones<'a>(data: HashMap<String, u64>, max_to_show: usize) -> Vec<(String, u64)> {
let mut new_l: Vec<(String, u64)> = data.iter().map(|(a, b)| (a.clone(), *b)).collect();
new_l.sort_by(|a, b| b.1.cmp(&a.1));
if new_l.len() > max_to_show {
new_l[0..max_to_show + 1].to_vec()