mirror of
https://github.com/bootandy/dust.git
synced 2026-06-08 11:29:05 +03:00
feat: Added the ability to filter the corresponding files based on the access time, modify time, and change time of the file for statistics
This commit is contained in:
+27
@@ -259,4 +259,31 @@ pub fn build_cli() -> Command {
|
||||
.action(clap::ArgAction::SetTrue)
|
||||
.help("Output the directory tree as json to the current directory"),
|
||||
)
|
||||
.arg(
|
||||
Arg::new("mtime")
|
||||
.short('M')
|
||||
.long("mtime")
|
||||
.num_args(1)
|
||||
.allow_hyphen_values(true)
|
||||
.value_parser(value_parser!(String))
|
||||
.help("+/-n matches files modified more/less than n days ago , and n matches files modified exactly n days ago, days are rounded down.That is +n => (−∞, curr−(n+1)), n => [curr−(n+1), curr−n), and -n => (𝑐𝑢𝑟𝑟−𝑛, +∞)")
|
||||
)
|
||||
.arg(
|
||||
Arg::new("atime")
|
||||
.short('A')
|
||||
.long("atime")
|
||||
.num_args(1)
|
||||
.allow_hyphen_values(true)
|
||||
.value_parser(value_parser!(String))
|
||||
.help("just like -mtime, but based on file access time")
|
||||
)
|
||||
.arg(
|
||||
Arg::new("ctime")
|
||||
.short('y')
|
||||
.long("ctime")
|
||||
.num_args(1)
|
||||
.allow_hyphen_values(true)
|
||||
.value_parser(value_parser!(String))
|
||||
.help("just like -mtime, but based on file change time")
|
||||
)
|
||||
}
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
use chrono::{Local, TimeZone};
|
||||
use clap::ArgMatches;
|
||||
use config_file::FromConfigFile;
|
||||
use regex::Regex;
|
||||
@@ -6,8 +7,11 @@ use std::io::IsTerminal;
|
||||
use std::path::Path;
|
||||
use std::path::PathBuf;
|
||||
|
||||
use crate::dir_walker::Operater;
|
||||
use crate::display::get_number_format;
|
||||
|
||||
pub static DAY_SECEONDS: i64 = 24 * 60 * 60;
|
||||
|
||||
#[derive(Deserialize, Default)]
|
||||
#[serde(rename_all = "kebab-case")]
|
||||
#[serde(deny_unknown_fields)]
|
||||
@@ -132,6 +136,61 @@ impl Config {
|
||||
pub fn get_output_json(&self, options: &ArgMatches) -> bool {
|
||||
Some(true) == self.output_json || options.get_flag("output_json")
|
||||
}
|
||||
|
||||
pub fn get_modified_time_operator(&self, options: &ArgMatches) -> (Operater, i64) {
|
||||
get_filter_time_operator(
|
||||
options.get_one::<String>("mtime"),
|
||||
get_current_date_epoch_seconds(),
|
||||
)
|
||||
}
|
||||
|
||||
pub fn get_accessed_time_operator(&self, options: &ArgMatches) -> (Operater, i64) {
|
||||
get_filter_time_operator(
|
||||
options.get_one::<String>("atime"),
|
||||
get_current_date_epoch_seconds(),
|
||||
)
|
||||
}
|
||||
|
||||
pub fn get_created_time_operator(&self, options: &ArgMatches) -> (Operater, i64) {
|
||||
get_filter_time_operator(
|
||||
options.get_one::<String>("ctime"),
|
||||
get_current_date_epoch_seconds(),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
fn get_current_date_epoch_seconds() -> i64 {
|
||||
// calcurate current date epoch seconds
|
||||
let now = Local::now();
|
||||
let current_date = now.date_naive();
|
||||
|
||||
let current_date_time = current_date.and_hms_opt(0, 0, 0).unwrap();
|
||||
Local
|
||||
.from_local_datetime(¤t_date_time)
|
||||
.unwrap()
|
||||
.timestamp()
|
||||
}
|
||||
|
||||
fn get_filter_time_operator(
|
||||
option_value: Option<&String>,
|
||||
current_date_epoch_seconds: i64,
|
||||
) -> (Operater, i64) {
|
||||
match option_value {
|
||||
Some(val) => {
|
||||
let time = current_date_epoch_seconds
|
||||
- val
|
||||
.parse::<i64>()
|
||||
.unwrap_or_else(|_| panic!("invalid data format"))
|
||||
.abs()
|
||||
* DAY_SECEONDS;
|
||||
match val.chars().next().expect("Value should not be empty") {
|
||||
'+' => (Operater::LessThan, time - DAY_SECEONDS),
|
||||
'-' => (Operater::GreaterThan, time),
|
||||
_ => (Operater::Equal, time - DAY_SECEONDS),
|
||||
}
|
||||
}
|
||||
None => (Operater::GreaterThan, 0),
|
||||
}
|
||||
}
|
||||
|
||||
fn convert_min_size(input: &str) -> Option<usize> {
|
||||
@@ -191,8 +250,22 @@ pub fn get_config() -> Config {
|
||||
mod tests {
|
||||
#[allow(unused_imports)]
|
||||
use super::*;
|
||||
use chrono::{Datelike, Timelike};
|
||||
use clap::{value_parser, Arg, ArgMatches, Command};
|
||||
|
||||
#[test]
|
||||
fn test_get_current_date_epoch_seconds() {
|
||||
let epoch_seconds = get_current_date_epoch_seconds();
|
||||
let dt = Local.timestamp_opt(epoch_seconds, 0).unwrap();
|
||||
|
||||
assert_eq!(dt.hour(), 0);
|
||||
assert_eq!(dt.minute(), 0);
|
||||
assert_eq!(dt.second(), 0);
|
||||
assert_eq!(dt.date_naive().day(), Local::now().date_naive().day());
|
||||
assert_eq!(dt.date_naive().month(), Local::now().date_naive().month());
|
||||
assert_eq!(dt.date_naive().year(), Local::now().date_naive().year());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_conversion() {
|
||||
assert_eq!(convert_min_size("55"), Some(55));
|
||||
|
||||
+36
-22
@@ -7,6 +7,7 @@ use crate::progress::Operation;
|
||||
use crate::progress::PAtomicInfo;
|
||||
use crate::progress::RuntimeErrors;
|
||||
use crate::progress::ORDERING;
|
||||
use crate::utils::is_filtered_out_due_to_file_time;
|
||||
use crate::utils::is_filtered_out_due_to_invert_regex;
|
||||
use crate::utils::is_filtered_out_due_to_regex;
|
||||
use rayon::iter::ParallelBridge;
|
||||
@@ -20,11 +21,22 @@ use crate::node::build_node;
|
||||
use std::fs::DirEntry;
|
||||
|
||||
use crate::platform::get_metadata;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum Operater {
|
||||
Equal = 0,
|
||||
LessThan = 1,
|
||||
GreaterThan = 2,
|
||||
}
|
||||
|
||||
pub struct WalkData<'a> {
|
||||
pub ignore_directories: HashSet<PathBuf>,
|
||||
pub filter_regex: &'a [Regex],
|
||||
pub invert_filter_regex: &'a [Regex],
|
||||
pub allowed_filesystems: HashSet<u64>,
|
||||
pub filter_modified_time: (Operater, i64),
|
||||
pub filter_accessed_time: (Operater, i64),
|
||||
pub filter_changed_time: (Operater, i64),
|
||||
pub use_apparent_size: bool,
|
||||
pub by_filecount: bool,
|
||||
pub ignore_hidden: bool,
|
||||
@@ -99,13 +111,27 @@ fn ignore_file(entry: &DirEntry, walk_data: &WalkData) -> bool {
|
||||
let is_dot_file = entry.file_name().to_str().unwrap_or("").starts_with('.');
|
||||
let is_ignored_path = walk_data.ignore_directories.contains(&entry.path());
|
||||
|
||||
if !walk_data.allowed_filesystems.is_empty() {
|
||||
let size_inode_device = get_metadata(&entry.path(), false);
|
||||
|
||||
if let Some((_size, Some((_id, dev)))) = size_inode_device {
|
||||
if !walk_data.allowed_filesystems.contains(&dev) {
|
||||
return true;
|
||||
}
|
||||
let size_inode_device = get_metadata(&entry.path(), false);
|
||||
if let Some((_size, Some((_id, dev)), (modified_time, accessed_time, changed_time))) =
|
||||
size_inode_device
|
||||
{
|
||||
if !walk_data.allowed_filesystems.is_empty()
|
||||
&& !walk_data.allowed_filesystems.contains(&dev)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
if entry.path().is_file()
|
||||
&& [
|
||||
(&walk_data.filter_modified_time, modified_time),
|
||||
(&walk_data.filter_accessed_time, accessed_time),
|
||||
(&walk_data.filter_changed_time, changed_time),
|
||||
]
|
||||
.iter()
|
||||
.any(|(filter_time, actual_time)| {
|
||||
is_filtered_out_due_to_file_time(filter_time, *actual_time)
|
||||
})
|
||||
{
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -130,6 +156,7 @@ fn ignore_file(entry: &DirEntry, walk_data: &WalkData) -> bool {
|
||||
fn walk(dir: PathBuf, walk_data: &WalkData, depth: usize) -> Option<Node> {
|
||||
let prog_data = &walk_data.progress_data;
|
||||
let errors = &walk_data.errors;
|
||||
|
||||
if errors.lock().unwrap().abort {
|
||||
return None;
|
||||
}
|
||||
@@ -161,13 +188,10 @@ fn walk(dir: PathBuf, walk_data: &WalkData, depth: usize) -> Option<Node> {
|
||||
let node = build_node(
|
||||
entry.path(),
|
||||
vec![],
|
||||
walk_data.filter_regex,
|
||||
walk_data.invert_filter_regex,
|
||||
walk_data.use_apparent_size,
|
||||
data.is_symlink(),
|
||||
data.is_file(),
|
||||
walk_data.by_filecount,
|
||||
depth,
|
||||
walk_data,
|
||||
);
|
||||
|
||||
prog_data.num_files.fetch_add(1, ORDERING);
|
||||
@@ -216,17 +240,7 @@ fn walk(dir: PathBuf, walk_data: &WalkData, depth: usize) -> Option<Node> {
|
||||
}
|
||||
vec![]
|
||||
};
|
||||
build_node(
|
||||
dir,
|
||||
children,
|
||||
walk_data.filter_regex,
|
||||
walk_data.invert_filter_regex,
|
||||
walk_data.use_apparent_size,
|
||||
false,
|
||||
false,
|
||||
walk_data.by_filecount,
|
||||
depth,
|
||||
)
|
||||
build_node(dir, children, false, false, depth, walk_data)
|
||||
}
|
||||
|
||||
mod tests {
|
||||
|
||||
@@ -211,11 +211,18 @@ fn main() {
|
||||
indicator.spawn(output_format.clone())
|
||||
}
|
||||
|
||||
let filter_modified_time = config.get_modified_time_operator(&options);
|
||||
let filter_accessed_time = config.get_accessed_time_operator(&options);
|
||||
let filter_changed_time = config.get_created_time_operator(&options);
|
||||
|
||||
let walk_data = WalkData {
|
||||
ignore_directories: ignored_full_path,
|
||||
filter_regex: &filter_regexs,
|
||||
invert_filter_regex: &invert_filter_regexs,
|
||||
allowed_filesystems,
|
||||
filter_modified_time,
|
||||
filter_accessed_time,
|
||||
filter_changed_time,
|
||||
use_apparent_size: config.get_apparent_size(&options),
|
||||
by_filecount,
|
||||
ignore_hidden,
|
||||
|
||||
+17
-8
@@ -1,8 +1,9 @@
|
||||
use crate::dir_walker::WalkData;
|
||||
use crate::platform::get_metadata;
|
||||
use crate::utils::is_filtered_out_due_to_file_time;
|
||||
use crate::utils::is_filtered_out_due_to_invert_regex;
|
||||
use crate::utils::is_filtered_out_due_to_regex;
|
||||
|
||||
use regex::Regex;
|
||||
use std::cmp::Ordering;
|
||||
use std::path::PathBuf;
|
||||
|
||||
@@ -19,14 +20,14 @@ pub struct Node {
|
||||
pub fn build_node(
|
||||
dir: PathBuf,
|
||||
children: Vec<Node>,
|
||||
filter_regex: &[Regex],
|
||||
invert_filter_regex: &[Regex],
|
||||
use_apparent_size: bool,
|
||||
is_symlink: bool,
|
||||
is_file: bool,
|
||||
by_filecount: bool,
|
||||
depth: usize,
|
||||
walk_data: &WalkData,
|
||||
) -> Option<Node> {
|
||||
let use_apparent_size = walk_data.use_apparent_size;
|
||||
let by_filecount = walk_data.by_filecount;
|
||||
|
||||
get_metadata(&dir, use_apparent_size).map(|data| {
|
||||
let inode_device = if is_symlink && !use_apparent_size {
|
||||
None
|
||||
@@ -34,11 +35,19 @@ pub fn build_node(
|
||||
data.1
|
||||
};
|
||||
|
||||
let size = if is_filtered_out_due_to_regex(filter_regex, &dir)
|
||||
|| is_filtered_out_due_to_invert_regex(invert_filter_regex, &dir)
|
||||
let size = if is_filtered_out_due_to_regex(walk_data.filter_regex, &dir)
|
||||
|| is_filtered_out_due_to_invert_regex(walk_data.invert_filter_regex, &dir)
|
||||
|| (is_symlink && !use_apparent_size)
|
||||
|| by_filecount && !is_file
|
||||
{
|
||||
|| [
|
||||
(&walk_data.filter_modified_time, data.2 .0),
|
||||
(&walk_data.filter_accessed_time, data.2 .1),
|
||||
(&walk_data.filter_changed_time, data.2 .2),
|
||||
]
|
||||
.iter()
|
||||
.any(|(filter_time, actual_time)| {
|
||||
is_filtered_out_due_to_file_time(filter_time, *actual_time)
|
||||
}) {
|
||||
0
|
||||
} else if by_filecount {
|
||||
1
|
||||
|
||||
+41
-6
@@ -10,15 +10,29 @@ fn get_block_size() -> u64 {
|
||||
512
|
||||
}
|
||||
|
||||
type InodeAndDevice = (u64, u64);
|
||||
type FileTime = (i64, i64, i64);
|
||||
|
||||
#[cfg(target_family = "unix")]
|
||||
pub fn get_metadata(d: &Path, use_apparent_size: bool) -> Option<(u64, Option<(u64, u64)>)> {
|
||||
pub fn get_metadata(
|
||||
d: &Path,
|
||||
use_apparent_size: bool,
|
||||
) -> Option<(u64, Option<InodeAndDevice>, FileTime)> {
|
||||
use std::os::unix::fs::MetadataExt;
|
||||
match d.metadata() {
|
||||
Ok(md) => {
|
||||
if use_apparent_size {
|
||||
Some((md.len(), Some((md.ino(), md.dev()))))
|
||||
Some((
|
||||
md.len(),
|
||||
Some((md.ino(), md.dev())),
|
||||
(md.mtime(), md.atime(), md.ctime()),
|
||||
))
|
||||
} else {
|
||||
Some((md.blocks() * get_block_size(), Some((md.ino(), md.dev()))))
|
||||
Some((
|
||||
md.blocks() * get_block_size(),
|
||||
Some((md.ino(), md.dev())),
|
||||
(md.mtime(), md.atime(), md.ctime()),
|
||||
))
|
||||
}
|
||||
}
|
||||
Err(_e) => None,
|
||||
@@ -26,7 +40,10 @@ pub fn get_metadata(d: &Path, use_apparent_size: bool) -> Option<(u64, Option<(u
|
||||
}
|
||||
|
||||
#[cfg(target_family = "windows")]
|
||||
pub fn get_metadata(d: &Path, use_apparent_size: bool) -> Option<(u64, Option<(u64, u64)>)> {
|
||||
pub fn get_metadata(
|
||||
d: &Path,
|
||||
use_apparent_size: bool,
|
||||
) -> Option<(u64, Option<InodeAndDevice>, FileTime)> {
|
||||
// On windows opening the file to get size, file ID and volume can be very
|
||||
// expensive because 1) it causes a few system calls, and more importantly 2) it can cause
|
||||
// windows defender to scan the file.
|
||||
@@ -93,7 +110,7 @@ pub fn get_metadata(d: &Path, use_apparent_size: bool) -> Option<(u64, Option<(u
|
||||
fn get_metadata_expensive(
|
||||
d: &Path,
|
||||
use_apparent_size: bool,
|
||||
) -> Option<(u64, Option<(u64, u64)>)> {
|
||||
) -> Option<(u64, Option<InodeAndDevice>, FileTime)> {
|
||||
use winapi_util::file::information;
|
||||
|
||||
let h = handle_from_path_limited(d).ok()?;
|
||||
@@ -104,11 +121,21 @@ pub fn get_metadata(d: &Path, use_apparent_size: bool) -> Option<(u64, Option<(u
|
||||
Some((
|
||||
d.size_on_disk().ok()?,
|
||||
Some((info.file_index(), info.volume_serial_number())),
|
||||
(
|
||||
info.last_write_time().unwrap() as i64,
|
||||
info.last_access_time().unwrap() as i64,
|
||||
info.creation_time().unwrap() as i64,
|
||||
),
|
||||
))
|
||||
} else {
|
||||
Some((
|
||||
info.file_size(),
|
||||
Some((info.file_index(), info.volume_serial_number())),
|
||||
(
|
||||
info.last_write_time().unwrap() as i64,
|
||||
info.last_access_time().unwrap() as i64,
|
||||
info.creation_time().unwrap() as i64,
|
||||
),
|
||||
))
|
||||
}
|
||||
}
|
||||
@@ -142,7 +169,15 @@ pub fn get_metadata(d: &Path, use_apparent_size: bool) -> Option<(u64, Option<(u
|
||||
|| md.file_attributes() == FILE_ATTRIBUTE_NORMAL)
|
||||
&& !((attr_filtered & IS_PROBABLY_ONEDRIVE != 0) && use_apparent_size)
|
||||
{
|
||||
Some((md.len(), None))
|
||||
Some((
|
||||
md.len(),
|
||||
None,
|
||||
(
|
||||
md.last_write_time() as i64,
|
||||
md.last_access_time() as i64,
|
||||
md.creation_time() as i64,
|
||||
),
|
||||
))
|
||||
} else {
|
||||
get_metadata_expensive(d, use_apparent_size)
|
||||
}
|
||||
|
||||
+14
-1
@@ -2,6 +2,9 @@ use platform::get_metadata;
|
||||
use std::collections::HashSet;
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
use crate::config::DAY_SECEONDS;
|
||||
|
||||
use crate::dir_walker::Operater;
|
||||
use crate::platform;
|
||||
use regex::Regex;
|
||||
|
||||
@@ -36,7 +39,7 @@ pub fn get_filesystem_devices<'a, P: IntoIterator<Item = &'a PathBuf>>(paths: P)
|
||||
paths
|
||||
.into_iter()
|
||||
.filter_map(|p| match get_metadata(p, false) {
|
||||
Some((_size, Some((_id, dev)))) => Some(dev),
|
||||
Some((_size, Some((_id, dev)), _time)) => Some(dev),
|
||||
_ => None,
|
||||
})
|
||||
.collect()
|
||||
@@ -62,6 +65,16 @@ pub fn is_filtered_out_due_to_regex(filter_regex: &[Regex], dir: &Path) -> bool
|
||||
}
|
||||
}
|
||||
|
||||
pub fn is_filtered_out_due_to_file_time(filter_time: &(Operater, i64), actual_time: i64) -> bool {
|
||||
match filter_time {
|
||||
(Operater::Equal, bound_time) => {
|
||||
!(actual_time >= *bound_time && actual_time < *bound_time + DAY_SECEONDS)
|
||||
}
|
||||
(Operater::GreaterThan, bound_time) => actual_time < *bound_time,
|
||||
(Operater::LessThan, bound_time) => actual_time > *bound_time,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn is_filtered_out_due_to_invert_regex(filter_regex: &[Regex], dir: &Path) -> bool {
|
||||
filter_regex
|
||||
.iter()
|
||||
|
||||
Reference in New Issue
Block a user