mirror of
https://github.com/bootandy/dust.git
synced 2026-06-08 11:29:05 +03:00
Feature: Adding file types filter & F flag changed
-t = Show summary of types -e = Filter by regex allows you to specify a file type like -e "\.txt$" Change behaviour of '-f' flag - it now counts only files. Before it counted files & directories. This was needed for compatibility with the new '-e' filter flag
This commit is contained in:
@@ -1,8 +1,10 @@
|
||||
use std::fs;
|
||||
|
||||
use crate::node::Node;
|
||||
use crate::utils::is_filtered_out_due_to_regex;
|
||||
use rayon::iter::ParallelBridge;
|
||||
use rayon::prelude::ParallelIterator;
|
||||
use regex::Regex;
|
||||
use std::path::PathBuf;
|
||||
|
||||
use std::sync::atomic;
|
||||
@@ -17,6 +19,7 @@ use crate::platform::get_metadata;
|
||||
|
||||
pub struct WalkData {
|
||||
pub ignore_directories: HashSet<PathBuf>,
|
||||
pub filter_regex: Option<Regex>,
|
||||
pub allowed_filesystems: HashSet<u64>,
|
||||
pub use_apparent_size: bool,
|
||||
pub by_filecount: bool,
|
||||
@@ -84,6 +87,15 @@ fn ignore_file(entry: &DirEntry, walk_data: &WalkData) -> bool {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Keeping `walk_data.filter_regex.is_some()` is important for performance reasons, it stops unnecessary work
|
||||
if walk_data.filter_regex.is_some()
|
||||
&& entry.path().is_file()
|
||||
&& is_filtered_out_due_to_regex(&walk_data.filter_regex, &entry.path())
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
(is_dot_file && walk_data.ignore_hidden) || is_ignored_path
|
||||
}
|
||||
|
||||
@@ -110,8 +122,10 @@ fn walk(dir: PathBuf, permissions_flag: &AtomicBool, walk_data: &WalkData) -> Op
|
||||
return build_node(
|
||||
entry.path(),
|
||||
vec![],
|
||||
&walk_data.filter_regex,
|
||||
walk_data.use_apparent_size,
|
||||
data.is_symlink(),
|
||||
data.is_file(),
|
||||
walk_data.by_filecount,
|
||||
);
|
||||
}
|
||||
@@ -128,8 +142,10 @@ fn walk(dir: PathBuf, permissions_flag: &AtomicBool, walk_data: &WalkData) -> Op
|
||||
build_node(
|
||||
dir,
|
||||
children,
|
||||
&walk_data.filter_regex,
|
||||
walk_data.use_apparent_size,
|
||||
false,
|
||||
false,
|
||||
walk_data.by_filecount,
|
||||
)
|
||||
}
|
||||
|
||||
@@ -107,7 +107,6 @@ impl DrawData<'_> {
|
||||
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
pub fn draw_it(
|
||||
permission_error: bool,
|
||||
use_full_path: bool,
|
||||
is_reversed: bool,
|
||||
no_colors: bool,
|
||||
@@ -116,9 +115,6 @@ pub fn draw_it(
|
||||
by_filecount: bool,
|
||||
option_root_node: Option<DisplayNode>,
|
||||
) {
|
||||
if permission_error {
|
||||
eprintln!("Did not have permissions for all directories");
|
||||
}
|
||||
if option_root_node.is_none() {
|
||||
return;
|
||||
}
|
||||
|
||||
+75
-5
@@ -1,6 +1,7 @@
|
||||
use crate::display_node::DisplayNode;
|
||||
use crate::node::Node;
|
||||
use std::collections::BinaryHeap;
|
||||
use std::collections::HashMap;
|
||||
use std::collections::HashSet;
|
||||
use std::path::PathBuf;
|
||||
|
||||
@@ -13,7 +14,11 @@ pub fn get_by_depth(top_level_nodes: Vec<Node>, n: usize) -> Option<DisplayNode>
|
||||
Some(build_by_depth(&root, n - 1))
|
||||
}
|
||||
|
||||
pub fn get_biggest(top_level_nodes: Vec<Node>, n: usize) -> Option<DisplayNode> {
|
||||
pub fn get_biggest(
|
||||
top_level_nodes: Vec<Node>,
|
||||
n: usize,
|
||||
using_file_type_filter: bool,
|
||||
) -> Option<DisplayNode> {
|
||||
if top_level_nodes.is_empty() {
|
||||
// perhaps change this, bring back Error object?
|
||||
return None;
|
||||
@@ -22,18 +27,17 @@ pub fn get_biggest(top_level_nodes: Vec<Node>, n: usize) -> Option<DisplayNode>
|
||||
let mut heap = BinaryHeap::new();
|
||||
let number_top_level_nodes = top_level_nodes.len();
|
||||
let root = get_new_root(top_level_nodes);
|
||||
|
||||
root.children.iter().for_each(|c| heap.push(c));
|
||||
|
||||
let mut allowed_nodes = HashSet::new();
|
||||
|
||||
allowed_nodes.insert(&root.name);
|
||||
heap = add_children(using_file_type_filter, &root, heap);
|
||||
|
||||
for _ in number_top_level_nodes..n {
|
||||
let line = heap.pop();
|
||||
match line {
|
||||
Some(line) => {
|
||||
line.children.iter().for_each(|c| heap.push(c));
|
||||
allowed_nodes.insert(&line.name);
|
||||
heap = add_children(using_file_type_filter, line, heap);
|
||||
}
|
||||
None => break,
|
||||
}
|
||||
@@ -41,6 +45,72 @@ pub fn get_biggest(top_level_nodes: Vec<Node>, n: usize) -> Option<DisplayNode>
|
||||
recursive_rebuilder(&allowed_nodes, &root)
|
||||
}
|
||||
|
||||
pub fn get_all_file_types(top_level_nodes: Vec<Node>, n: usize) -> Option<DisplayNode> {
|
||||
let mut map: HashMap<String, DisplayNode> = HashMap::new();
|
||||
build_by_all_file_types(top_level_nodes, &mut map);
|
||||
let mut by_types: Vec<DisplayNode> = map.into_iter().map(|(_k, v)| v).collect();
|
||||
by_types.sort();
|
||||
by_types.reverse();
|
||||
|
||||
let displayed = if by_types.len() <= n {
|
||||
by_types
|
||||
} else {
|
||||
let (displayed, rest) = by_types.split_at(if n > 1 { n - 1 } else { 1 });
|
||||
let remaining = DisplayNode {
|
||||
name: PathBuf::from("(others)"),
|
||||
size: rest.iter().map(|a| a.size).sum(),
|
||||
children: vec![],
|
||||
};
|
||||
|
||||
let mut displayed = displayed.to_vec();
|
||||
displayed.push(remaining);
|
||||
displayed
|
||||
};
|
||||
|
||||
let result = DisplayNode {
|
||||
name: PathBuf::from("(total)"),
|
||||
size: displayed.iter().map(|a| a.size).sum(),
|
||||
children: displayed,
|
||||
};
|
||||
Some(result)
|
||||
}
|
||||
|
||||
fn add_children<'a>(
|
||||
using_file_type_filter: bool,
|
||||
line: &'a Node,
|
||||
mut heap: BinaryHeap<&'a Node>,
|
||||
) -> BinaryHeap<&'a Node> {
|
||||
if using_file_type_filter {
|
||||
line.children.iter().for_each(|c| {
|
||||
if c.name.is_file() || c.size > 0 {
|
||||
heap.push(c)
|
||||
}
|
||||
});
|
||||
} else {
|
||||
line.children.iter().for_each(|c| heap.push(c));
|
||||
}
|
||||
heap
|
||||
}
|
||||
|
||||
fn build_by_all_file_types(top_level_nodes: Vec<Node>, counter: &mut HashMap<String, DisplayNode>) {
|
||||
for node in top_level_nodes {
|
||||
if node.name.is_file() {
|
||||
let ext = node.name.extension();
|
||||
let key: String = match ext {
|
||||
Some(e) => ".".to_string() + &e.to_string_lossy(),
|
||||
None => "(no extension)".into(),
|
||||
};
|
||||
let mut display_node = counter.entry(key.clone()).or_insert(DisplayNode {
|
||||
name: PathBuf::from(key),
|
||||
size: 0,
|
||||
children: vec![],
|
||||
});
|
||||
display_node.size += node.size;
|
||||
}
|
||||
build_by_all_file_types(node.children, counter)
|
||||
}
|
||||
}
|
||||
|
||||
fn build_by_depth(node: &Node, depth: usize) -> DisplayNode {
|
||||
let new_children = {
|
||||
if depth == 0 {
|
||||
|
||||
+51
-6
@@ -1,15 +1,18 @@
|
||||
#[macro_use]
|
||||
extern crate clap;
|
||||
extern crate rayon;
|
||||
extern crate regex;
|
||||
extern crate unicode_width;
|
||||
|
||||
use std::collections::HashSet;
|
||||
use std::process;
|
||||
|
||||
use self::display::draw_it;
|
||||
use clap::{App, AppSettings, Arg};
|
||||
use dir_walker::walk_it;
|
||||
use dir_walker::WalkData;
|
||||
use filter::{get_biggest, get_by_depth};
|
||||
use filter::{get_all_file_types, get_biggest, get_by_depth};
|
||||
use regex::Regex;
|
||||
use std::cmp::max;
|
||||
use std::path::PathBuf;
|
||||
use terminal_size::{terminal_size, Height, Width};
|
||||
@@ -151,6 +154,23 @@ fn main() {
|
||||
.long("ignore_hidden")
|
||||
.help("Do not display hidden files"),
|
||||
)
|
||||
.arg(
|
||||
Arg::with_name("filter")
|
||||
.short("e")
|
||||
.long("filter")
|
||||
.takes_value(true)
|
||||
.number_of_values(1)
|
||||
.multiple(true)
|
||||
.conflicts_with("types")
|
||||
.help("Only include files matching this regex. For png files type: -e \"\\.png$\" "),
|
||||
)
|
||||
.arg(
|
||||
Arg::with_name("types")
|
||||
.short("t")
|
||||
.long("file_types")
|
||||
.conflicts_with("depth")
|
||||
.help("show only these file types"),
|
||||
)
|
||||
.arg(
|
||||
Arg::with_name("width")
|
||||
.short("w")
|
||||
@@ -169,6 +189,20 @@ fn main() {
|
||||
}
|
||||
};
|
||||
|
||||
let summarize_file_types = options.is_present("types");
|
||||
|
||||
let maybe_filter = if options.is_present("filter") {
|
||||
match Regex::new(options.value_of("filter").unwrap()) {
|
||||
Ok(r) => Some(r),
|
||||
Err(e) => {
|
||||
eprintln!("Ignoring bad value for filter {:?}", e);
|
||||
process::exit(1);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
let number_of_lines = match value_t!(options.value_of("number_of_lines"), usize) {
|
||||
Ok(v) => v,
|
||||
Err(_) => {
|
||||
@@ -217,23 +251,34 @@ fn main() {
|
||||
|
||||
let walk_data = WalkData {
|
||||
ignore_directories: ignored_full_path,
|
||||
filter_regex: maybe_filter,
|
||||
allowed_filesystems,
|
||||
use_apparent_size,
|
||||
by_filecount,
|
||||
ignore_hidden,
|
||||
};
|
||||
|
||||
let (nodes, errors) = walk_it(simplified_dirs, walk_data);
|
||||
let (top_level_nodes, has_errors) = walk_it(simplified_dirs, walk_data);
|
||||
|
||||
let tree = {
|
||||
match depth {
|
||||
None => get_biggest(nodes, number_of_lines),
|
||||
Some(depth) => get_by_depth(nodes, depth),
|
||||
match (depth, summarize_file_types) {
|
||||
(_, true) => get_all_file_types(top_level_nodes, number_of_lines),
|
||||
(Some(depth), _) => get_by_depth(top_level_nodes, depth),
|
||||
(_, _) => get_biggest(
|
||||
top_level_nodes,
|
||||
number_of_lines,
|
||||
options.values_of("filter").is_some(),
|
||||
),
|
||||
}
|
||||
};
|
||||
|
||||
if options.is_present("filter") {
|
||||
println!("Filtering by: {}", options.value_of("filter").unwrap());
|
||||
}
|
||||
if has_errors {
|
||||
eprintln!("Did not have permissions for all directories");
|
||||
}
|
||||
draw_it(
|
||||
errors,
|
||||
options.is_present("display_full_paths"),
|
||||
!options.is_present("reverse"),
|
||||
no_colors,
|
||||
|
||||
+18
-5
@@ -1,5 +1,7 @@
|
||||
use crate::platform::get_metadata;
|
||||
use crate::utils::is_filtered_out_due_to_regex;
|
||||
|
||||
use regex::Regex;
|
||||
use std::cmp::Ordering;
|
||||
use std::path::PathBuf;
|
||||
|
||||
@@ -14,18 +16,29 @@ pub struct Node {
|
||||
pub fn build_node(
|
||||
dir: PathBuf,
|
||||
children: Vec<Node>,
|
||||
filter_regex: &Option<Regex>,
|
||||
use_apparent_size: bool,
|
||||
is_symlink: bool,
|
||||
is_file: bool,
|
||||
by_filecount: bool,
|
||||
) -> Option<Node> {
|
||||
match get_metadata(&dir, use_apparent_size) {
|
||||
Some(data) => {
|
||||
let (size, inode_device) = if by_filecount {
|
||||
(1, data.1)
|
||||
} else if is_symlink && !use_apparent_size {
|
||||
(0, None)
|
||||
let inode_device = if is_symlink && !use_apparent_size {
|
||||
None
|
||||
} else {
|
||||
data
|
||||
data.1
|
||||
};
|
||||
|
||||
let size = if is_filtered_out_due_to_regex(filter_regex, &dir)
|
||||
|| (is_symlink && !use_apparent_size)
|
||||
|| by_filecount && !is_file
|
||||
{
|
||||
0
|
||||
} else if by_filecount {
|
||||
1
|
||||
} else {
|
||||
data.0
|
||||
};
|
||||
|
||||
Some(Node {
|
||||
|
||||
+14
-6
@@ -3,12 +3,7 @@ use std::collections::HashSet;
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
use crate::platform;
|
||||
|
||||
fn is_a_parent_of<P: AsRef<Path>>(parent: P, child: P) -> bool {
|
||||
let parent = parent.as_ref();
|
||||
let child = child.as_ref();
|
||||
child.starts_with(parent) && !parent.starts_with(child)
|
||||
}
|
||||
use regex::Regex;
|
||||
|
||||
pub fn simplify_dir_names<P: AsRef<Path>>(filenames: Vec<P>) -> HashSet<PathBuf> {
|
||||
let mut top_level_names: HashSet<PathBuf> = HashSet::with_capacity(filenames.len());
|
||||
@@ -62,6 +57,19 @@ pub fn normalize_path<P: AsRef<Path>>(path: P) -> PathBuf {
|
||||
path.as_ref().components().collect::<PathBuf>()
|
||||
}
|
||||
|
||||
pub fn is_filtered_out_due_to_regex(filter_regex: &Option<Regex>, dir: &Path) -> bool {
|
||||
match filter_regex {
|
||||
Some(fr) => !fr.is_match(&dir.as_os_str().to_string_lossy()),
|
||||
None => false,
|
||||
}
|
||||
}
|
||||
|
||||
fn is_a_parent_of<P: AsRef<Path>>(parent: P, child: P) -> bool {
|
||||
let parent = parent.as_ref();
|
||||
let child = child.as_ref();
|
||||
child.starts_with(parent) && !parent.starts_with(child)
|
||||
}
|
||||
|
||||
mod tests {
|
||||
#[allow(unused_imports)]
|
||||
use super::*;
|
||||
|
||||
Reference in New Issue
Block a user