Feature: Adding file types filter & F flag changed

-t = Show summary of types

-e = Filter by regex
	allows you to specify a file type like -e "\.txt$"

Change behaviour of '-f' flag - it now counts only files. Before it
counted files & directories. This was needed for compatibility with
the new '-e' filter flag
This commit is contained in:
andy.boot
2021-08-12 12:21:35 +01:00
parent d8a334df3b
commit 124c19b5c9
10 changed files with 221 additions and 45 deletions
+16
View File
@@ -1,8 +1,10 @@
use std::fs;
use crate::node::Node;
use crate::utils::is_filtered_out_due_to_regex;
use rayon::iter::ParallelBridge;
use rayon::prelude::ParallelIterator;
use regex::Regex;
use std::path::PathBuf;
use std::sync::atomic;
@@ -17,6 +19,7 @@ use crate::platform::get_metadata;
pub struct WalkData {
pub ignore_directories: HashSet<PathBuf>,
pub filter_regex: Option<Regex>,
pub allowed_filesystems: HashSet<u64>,
pub use_apparent_size: bool,
pub by_filecount: bool,
@@ -84,6 +87,15 @@ fn ignore_file(entry: &DirEntry, walk_data: &WalkData) -> bool {
}
}
}
// Keeping `walk_data.filter_regex.is_some()` is important for performance reasons, it stops unnecessary work
if walk_data.filter_regex.is_some()
&& entry.path().is_file()
&& is_filtered_out_due_to_regex(&walk_data.filter_regex, &entry.path())
{
return true;
}
(is_dot_file && walk_data.ignore_hidden) || is_ignored_path
}
@@ -110,8 +122,10 @@ fn walk(dir: PathBuf, permissions_flag: &AtomicBool, walk_data: &WalkData) -> Op
return build_node(
entry.path(),
vec![],
&walk_data.filter_regex,
walk_data.use_apparent_size,
data.is_symlink(),
data.is_file(),
walk_data.by_filecount,
);
}
@@ -128,8 +142,10 @@ fn walk(dir: PathBuf, permissions_flag: &AtomicBool, walk_data: &WalkData) -> Op
build_node(
dir,
children,
&walk_data.filter_regex,
walk_data.use_apparent_size,
false,
false,
walk_data.by_filecount,
)
}
-4
View File
@@ -107,7 +107,6 @@ impl DrawData<'_> {
#[allow(clippy::too_many_arguments)]
pub fn draw_it(
permission_error: bool,
use_full_path: bool,
is_reversed: bool,
no_colors: bool,
@@ -116,9 +115,6 @@ pub fn draw_it(
by_filecount: bool,
option_root_node: Option<DisplayNode>,
) {
if permission_error {
eprintln!("Did not have permissions for all directories");
}
if option_root_node.is_none() {
return;
}
+75 -5
View File
@@ -1,6 +1,7 @@
use crate::display_node::DisplayNode;
use crate::node::Node;
use std::collections::BinaryHeap;
use std::collections::HashMap;
use std::collections::HashSet;
use std::path::PathBuf;
@@ -13,7 +14,11 @@ pub fn get_by_depth(top_level_nodes: Vec<Node>, n: usize) -> Option<DisplayNode>
Some(build_by_depth(&root, n - 1))
}
pub fn get_biggest(top_level_nodes: Vec<Node>, n: usize) -> Option<DisplayNode> {
pub fn get_biggest(
top_level_nodes: Vec<Node>,
n: usize,
using_file_type_filter: bool,
) -> Option<DisplayNode> {
if top_level_nodes.is_empty() {
// perhaps change this, bring back Error object?
return None;
@@ -22,18 +27,17 @@ pub fn get_biggest(top_level_nodes: Vec<Node>, n: usize) -> Option<DisplayNode>
let mut heap = BinaryHeap::new();
let number_top_level_nodes = top_level_nodes.len();
let root = get_new_root(top_level_nodes);
root.children.iter().for_each(|c| heap.push(c));
let mut allowed_nodes = HashSet::new();
allowed_nodes.insert(&root.name);
heap = add_children(using_file_type_filter, &root, heap);
for _ in number_top_level_nodes..n {
let line = heap.pop();
match line {
Some(line) => {
line.children.iter().for_each(|c| heap.push(c));
allowed_nodes.insert(&line.name);
heap = add_children(using_file_type_filter, line, heap);
}
None => break,
}
@@ -41,6 +45,72 @@ pub fn get_biggest(top_level_nodes: Vec<Node>, n: usize) -> Option<DisplayNode>
recursive_rebuilder(&allowed_nodes, &root)
}
pub fn get_all_file_types(top_level_nodes: Vec<Node>, n: usize) -> Option<DisplayNode> {
let mut map: HashMap<String, DisplayNode> = HashMap::new();
build_by_all_file_types(top_level_nodes, &mut map);
let mut by_types: Vec<DisplayNode> = map.into_iter().map(|(_k, v)| v).collect();
by_types.sort();
by_types.reverse();
let displayed = if by_types.len() <= n {
by_types
} else {
let (displayed, rest) = by_types.split_at(if n > 1 { n - 1 } else { 1 });
let remaining = DisplayNode {
name: PathBuf::from("(others)"),
size: rest.iter().map(|a| a.size).sum(),
children: vec![],
};
let mut displayed = displayed.to_vec();
displayed.push(remaining);
displayed
};
let result = DisplayNode {
name: PathBuf::from("(total)"),
size: displayed.iter().map(|a| a.size).sum(),
children: displayed,
};
Some(result)
}
fn add_children<'a>(
using_file_type_filter: bool,
line: &'a Node,
mut heap: BinaryHeap<&'a Node>,
) -> BinaryHeap<&'a Node> {
if using_file_type_filter {
line.children.iter().for_each(|c| {
if c.name.is_file() || c.size > 0 {
heap.push(c)
}
});
} else {
line.children.iter().for_each(|c| heap.push(c));
}
heap
}
fn build_by_all_file_types(top_level_nodes: Vec<Node>, counter: &mut HashMap<String, DisplayNode>) {
for node in top_level_nodes {
if node.name.is_file() {
let ext = node.name.extension();
let key: String = match ext {
Some(e) => ".".to_string() + &e.to_string_lossy(),
None => "(no extension)".into(),
};
let mut display_node = counter.entry(key.clone()).or_insert(DisplayNode {
name: PathBuf::from(key),
size: 0,
children: vec![],
});
display_node.size += node.size;
}
build_by_all_file_types(node.children, counter)
}
}
fn build_by_depth(node: &Node, depth: usize) -> DisplayNode {
let new_children = {
if depth == 0 {
+51 -6
View File
@@ -1,15 +1,18 @@
#[macro_use]
extern crate clap;
extern crate rayon;
extern crate regex;
extern crate unicode_width;
use std::collections::HashSet;
use std::process;
use self::display::draw_it;
use clap::{App, AppSettings, Arg};
use dir_walker::walk_it;
use dir_walker::WalkData;
use filter::{get_biggest, get_by_depth};
use filter::{get_all_file_types, get_biggest, get_by_depth};
use regex::Regex;
use std::cmp::max;
use std::path::PathBuf;
use terminal_size::{terminal_size, Height, Width};
@@ -151,6 +154,23 @@ fn main() {
.long("ignore_hidden")
.help("Do not display hidden files"),
)
.arg(
Arg::with_name("filter")
.short("e")
.long("filter")
.takes_value(true)
.number_of_values(1)
.multiple(true)
.conflicts_with("types")
.help("Only include files matching this regex. For png files type: -e \"\\.png$\" "),
)
.arg(
Arg::with_name("types")
.short("t")
.long("file_types")
.conflicts_with("depth")
.help("show only these file types"),
)
.arg(
Arg::with_name("width")
.short("w")
@@ -169,6 +189,20 @@ fn main() {
}
};
let summarize_file_types = options.is_present("types");
let maybe_filter = if options.is_present("filter") {
match Regex::new(options.value_of("filter").unwrap()) {
Ok(r) => Some(r),
Err(e) => {
eprintln!("Ignoring bad value for filter {:?}", e);
process::exit(1);
}
}
} else {
None
};
let number_of_lines = match value_t!(options.value_of("number_of_lines"), usize) {
Ok(v) => v,
Err(_) => {
@@ -217,23 +251,34 @@ fn main() {
let walk_data = WalkData {
ignore_directories: ignored_full_path,
filter_regex: maybe_filter,
allowed_filesystems,
use_apparent_size,
by_filecount,
ignore_hidden,
};
let (nodes, errors) = walk_it(simplified_dirs, walk_data);
let (top_level_nodes, has_errors) = walk_it(simplified_dirs, walk_data);
let tree = {
match depth {
None => get_biggest(nodes, number_of_lines),
Some(depth) => get_by_depth(nodes, depth),
match (depth, summarize_file_types) {
(_, true) => get_all_file_types(top_level_nodes, number_of_lines),
(Some(depth), _) => get_by_depth(top_level_nodes, depth),
(_, _) => get_biggest(
top_level_nodes,
number_of_lines,
options.values_of("filter").is_some(),
),
}
};
if options.is_present("filter") {
println!("Filtering by: {}", options.value_of("filter").unwrap());
}
if has_errors {
eprintln!("Did not have permissions for all directories");
}
draw_it(
errors,
options.is_present("display_full_paths"),
!options.is_present("reverse"),
no_colors,
+18 -5
View File
@@ -1,5 +1,7 @@
use crate::platform::get_metadata;
use crate::utils::is_filtered_out_due_to_regex;
use regex::Regex;
use std::cmp::Ordering;
use std::path::PathBuf;
@@ -14,18 +16,29 @@ pub struct Node {
pub fn build_node(
dir: PathBuf,
children: Vec<Node>,
filter_regex: &Option<Regex>,
use_apparent_size: bool,
is_symlink: bool,
is_file: bool,
by_filecount: bool,
) -> Option<Node> {
match get_metadata(&dir, use_apparent_size) {
Some(data) => {
let (size, inode_device) = if by_filecount {
(1, data.1)
} else if is_symlink && !use_apparent_size {
(0, None)
let inode_device = if is_symlink && !use_apparent_size {
None
} else {
data
data.1
};
let size = if is_filtered_out_due_to_regex(filter_regex, &dir)
|| (is_symlink && !use_apparent_size)
|| by_filecount && !is_file
{
0
} else if by_filecount {
1
} else {
data.0
};
Some(Node {
+14 -6
View File
@@ -3,12 +3,7 @@ use std::collections::HashSet;
use std::path::{Path, PathBuf};
use crate::platform;
fn is_a_parent_of<P: AsRef<Path>>(parent: P, child: P) -> bool {
let parent = parent.as_ref();
let child = child.as_ref();
child.starts_with(parent) && !parent.starts_with(child)
}
use regex::Regex;
pub fn simplify_dir_names<P: AsRef<Path>>(filenames: Vec<P>) -> HashSet<PathBuf> {
let mut top_level_names: HashSet<PathBuf> = HashSet::with_capacity(filenames.len());
@@ -62,6 +57,19 @@ pub fn normalize_path<P: AsRef<Path>>(path: P) -> PathBuf {
path.as_ref().components().collect::<PathBuf>()
}
pub fn is_filtered_out_due_to_regex(filter_regex: &Option<Regex>, dir: &Path) -> bool {
match filter_regex {
Some(fr) => !fr.is_match(&dir.as_os_str().to_string_lossy()),
None => false,
}
}
fn is_a_parent_of<P: AsRef<Path>>(parent: P, child: P) -> bool {
let parent = parent.as_ref();
let child = child.as_ref();
child.starts_with(parent) && !parent.starts_with(child)
}
mod tests {
#[allow(unused_imports)]
use super::*;