Files
archived-dust/src/utils.rs
T
andy.boot 00c7ce8f15 Large refactor. Use rayon, 10X performance boost
Code changes:
Removed ignore & channel crates. Using a single reciever thread to build
a hashmap to prevend duplicate inodes being reported gave a severe
performance penalty

Using rayon crate with some hand crafted file traversal has improved
performance aprox 10X

Behaviour changes:

Removed parameter 'limit by filesystem' - don't think this is used, and
I only added it as it was easy to add with the ignore crate.

Sym links will now not appear in the output tree unless using '-s'
'apparent-size' flag

Change behaviour of multiple args so that it unifies them and
compares them under one tree instead of treating them
individually: https://github.com/bootandy/dust/issues/136
2021-06-22 12:20:48 +01:00

127 lines
4.2 KiB
Rust

use std::collections::HashSet;
use std::path::{Path, PathBuf};
fn is_a_parent_of<P: AsRef<Path>>(parent: P, child: P) -> bool {
let parent = parent.as_ref();
let child = child.as_ref();
child.starts_with(parent) && !parent.starts_with(child)
}
pub fn simplify_dir_names<P: AsRef<Path>>(filenames: Vec<P>) -> HashSet<PathBuf> {
let mut top_level_names: HashSet<PathBuf> = HashSet::with_capacity(filenames.len());
let mut to_remove: Vec<PathBuf> = Vec::with_capacity(filenames.len());
for t in filenames {
let top_level_name = normalize_path(t);
let mut can_add = true;
for tt in top_level_names.iter() {
if is_a_parent_of(&top_level_name, tt) {
to_remove.push(tt.to_path_buf());
} else if is_a_parent_of(tt, &top_level_name) {
can_add = false;
}
}
to_remove.sort_unstable();
top_level_names.retain(|tr| to_remove.binary_search(tr).is_err());
to_remove.clear();
if can_add {
top_level_names.insert(top_level_name);
}
}
top_level_names
}
pub fn normalize_path<P: AsRef<Path>>(path: P) -> PathBuf {
// normalize path ...
// 1. removing repeated separators
// 2. removing interior '.' ("current directory") path segments
// 3. removing trailing extra separators and '.' ("current directory") path segments
// * `Path.components()` does all the above work; ref: <https://doc.rust-lang.org/std/path/struct.Path.html#method.components>
// 4. changing to os preferred separator (automatically done by recollecting components back into a PathBuf)
path.as_ref().components().collect::<PathBuf>()
}
mod tests {
#[allow(unused_imports)]
use super::*;
#[test]
fn test_simplify_dir() {
let mut correct = HashSet::new();
correct.insert(PathBuf::from("a"));
assert_eq!(simplify_dir_names(vec!["a"]), correct);
}
#[test]
fn test_simplify_dir_rm_subdir() {
let mut correct = HashSet::new();
correct.insert(["a", "b"].iter().collect::<PathBuf>());
assert_eq!(simplify_dir_names(vec!["a/b", "a/b/c", "a/b/d/f"]), correct);
}
#[test]
fn test_simplify_dir_duplicates() {
let mut correct = HashSet::new();
correct.insert(["a", "b"].iter().collect::<PathBuf>());
correct.insert(PathBuf::from("c"));
assert_eq!(
simplify_dir_names(vec![
"a/b",
"a/b//",
"a/././b///",
"c",
"c/",
"c/.",
"c/././",
"c/././."
]),
correct
);
}
#[test]
fn test_simplify_dir_rm_subdir_and_not_substrings() {
let mut correct = HashSet::new();
correct.insert(PathBuf::from("b"));
correct.insert(["c", "a", "b"].iter().collect::<PathBuf>());
correct.insert(["a", "b"].iter().collect::<PathBuf>());
assert_eq!(simplify_dir_names(vec!["a/b", "c/a/b/", "b"]), correct);
}
#[test]
fn test_simplify_dir_dots() {
let mut correct = HashSet::new();
correct.insert(PathBuf::from("src"));
assert_eq!(simplify_dir_names(vec!["src/."]), correct);
}
#[test]
fn test_simplify_dir_substring_names() {
let mut correct = HashSet::new();
correct.insert(PathBuf::from("src"));
correct.insert(PathBuf::from("src_v2"));
assert_eq!(simplify_dir_names(vec!["src/", "src_v2"]), correct);
}
#[test]
fn test_is_a_parent_of() {
assert!(is_a_parent_of("/usr", "/usr/andy"));
assert!(is_a_parent_of("/usr", "/usr/andy/i/am/descendant"));
assert!(!is_a_parent_of("/usr", "/usr/."));
assert!(!is_a_parent_of("/usr", "/usr/"));
assert!(!is_a_parent_of("/usr", "/usr"));
assert!(!is_a_parent_of("/usr/", "/usr"));
assert!(!is_a_parent_of("/usr/andy", "/usr"));
assert!(!is_a_parent_of("/usr/andy", "/usr/sibling"));
assert!(!is_a_parent_of("/usr/folder", "/usr/folder_not_a_child"));
}
#[test]
fn test_is_a_parent_of_root() {
assert!(is_a_parent_of("/", "/usr/andy"));
assert!(is_a_parent_of("/", "/usr"));
assert!(!is_a_parent_of("/", "/"));
}
}