perf(IO): use parallel walkdir (jwalk) for super faster traversal

This commit is contained in:
Xavier L'Heureux
2019-11-24 13:26:14 -05:00
parent 3c920431fa
commit 86b3cccaf6
4 changed files with 185 additions and 24 deletions
+4 -4
View File
@@ -2,7 +2,7 @@ use std::cmp::Ordering;
use std::collections::HashMap;
use std::collections::HashSet;
use walkdir::WalkDir;
use jwalk::WalkDir;
mod platform;
use self::platform::*;
@@ -78,7 +78,7 @@ fn examine_dir(
data: &mut HashMap<String, u64>,
file_count_no_permission: &mut u64,
) {
for entry in WalkDir::new(top_dir) {
for entry in WalkDir::new(top_dir).preload_metadata(true) {
if let Ok(e) = entry {
let maybe_size_and_inode = get_metadata(&e, apparent_size);
@@ -93,12 +93,12 @@ fn examine_dir(
}
}
// This path and all its parent paths have their counter incremented
let mut e_path = e.path().to_path_buf();
let mut e_path = e.path();
loop {
let path_name = e_path.to_string_lossy().to_string();
let s = data.entry(path_name.clone()).or_insert(0);
*s += size;
if path_name == *top_dir {
if path_name == top_dir || path_name == "/" {
break;
}
assert!(path_name != "");
+10 -5
View File
@@ -1,4 +1,4 @@
use walkdir::DirEntry;
use jwalk::DirEntry;
fn get_block_size() -> u64 {
// All os specific implementations of MetatdataExt seem to define a block as 512 bytes
@@ -9,17 +9,22 @@ fn get_block_size() -> u64 {
#[cfg(target_family = "unix")]
pub fn get_metadata(d: &DirEntry, use_apparent_size: bool) -> Option<(u64, Option<(u64, u64)>)> {
use std::os::unix::fs::MetadataExt;
d.metadata().ok().and_then(|md| {
d.metadata.as_ref().unwrap().as_ref().ok().map(|md| {
let inode = Some((md.ino(), md.dev()));
if use_apparent_size {
Some((md.len(), inode))
(md.len(), inode)
} else {
Some((md.blocks() * get_block_size(), inode))
(md.blocks() * get_block_size(), inode)
}
})
}
#[cfg(not(target_family = "unix"))]
pub fn get_metadata(d: &DirEntry, _apparent: bool) -> Option<(u64, Option<(u64, u64)>)> {
d.metadata().ok().map_or(None, |md| Some((md.len(), None)))
d.metadata
.as_ref()
.unwrap()
.as_ref()
.ok()
.map(|md| (md.len(), None))
}