From 86b3cccaf6f28314d251f17ff49afbb75dd58735 Mon Sep 17 00:00:00 2001 From: Xavier L'Heureux Date: Sun, 24 Nov 2019 13:26:14 -0500 Subject: [PATCH 1/6] perf(IO): use parallel walkdir (jwalk) for super faster traversal --- Cargo.lock | 184 ++++++++++++++++++++++++++++++++++++++---- Cargo.toml | 2 +- src/utils/mod.rs | 8 +- src/utils/platform.rs | 15 ++-- 4 files changed, 185 insertions(+), 24 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 3f0167f..82732a0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -17,7 +17,7 @@ dependencies = [ "difference 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)", "environment 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", "error-chain 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)", - "serde_json 1.0.41 (registry+https://github.com/rust-lang/crates.io-index)", + "serde_json 1.0.42 (registry+https://github.com/rust-lang/crates.io-index)", "skeptic 0.13.4 (registry+https://github.com/rust-lang/crates.io-index)", ] @@ -30,6 +30,11 @@ dependencies = [ "winapi 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "autocfg" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" + [[package]] name = "backtrace" version = "0.3.40" @@ -75,9 +80,9 @@ source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ "error-chain 0.12.1 (registry+https://github.com/rust-lang/crates.io-index)", "semver 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)", - "serde 1.0.102 (registry+https://github.com/rust-lang/crates.io-index)", - "serde_derive 1.0.102 (registry+https://github.com/rust-lang/crates.io-index)", - "serde_json 1.0.41 (registry+https://github.com/rust-lang/crates.io-index)", + "serde 1.0.103 (registry+https://github.com/rust-lang/crates.io-index)", + "serde_derive 1.0.103 (registry+https://github.com/rust-lang/crates.io-index)", + "serde_json 1.0.42 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] @@ -114,6 +119,67 @@ dependencies = [ "winapi 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "crossbeam" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "cfg-if 0.1.10 (registry+https://github.com/rust-lang/crates.io-index)", + "crossbeam-channel 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)", + "crossbeam-deque 0.7.2 (registry+https://github.com/rust-lang/crates.io-index)", + "crossbeam-epoch 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)", + "crossbeam-queue 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)", + "crossbeam-utils 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "crossbeam-channel" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "crossbeam-utils 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "crossbeam-deque" +version = "0.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "crossbeam-epoch 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)", + "crossbeam-utils 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "crossbeam-epoch" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "autocfg 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)", + "cfg-if 0.1.10 (registry+https://github.com/rust-lang/crates.io-index)", + "crossbeam-utils 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)", + "lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)", + "memoffset 0.5.3 (registry+https://github.com/rust-lang/crates.io-index)", + "scopeguard 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "crossbeam-queue" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "crossbeam-utils 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "crossbeam-utils" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "autocfg 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)", + "cfg-if 0.1.10 (registry+https://github.com/rust-lang/crates.io-index)", + "lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "difference" version = "1.0.0" @@ -126,10 +192,15 @@ dependencies = [ "ansi_term 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)", "assert_cli 0.5.4 (registry+https://github.com/rust-lang/crates.io-index)", "clap 2.33.0 (registry+https://github.com/rust-lang/crates.io-index)", + "jwalk 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)", "tempfile 3.1.0 (registry+https://github.com/rust-lang/crates.io-index)", - "walkdir 2.2.9 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "either" +version = "1.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" + [[package]] name = "environment" version = "0.1.1" @@ -172,11 +243,28 @@ name = "glob" version = "0.2.11" source = "registry+https://github.com/rust-lang/crates.io-index" +[[package]] +name = "hermit-abi" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "libc 0.2.65 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "itoa" version = "0.4.4" source = "registry+https://github.com/rust-lang/crates.io-index" +[[package]] +name = "jwalk" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "crossbeam 0.7.3 (registry+https://github.com/rust-lang/crates.io-index)", + "rayon 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "lazy_static" version = "1.4.0" @@ -187,6 +275,23 @@ name = "libc" version = "0.2.65" source = "registry+https://github.com/rust-lang/crates.io-index" +[[package]] +name = "memoffset" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "rustc_version 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "num_cpus" +version = "1.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "hermit-abi 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)", + "libc 0.2.65 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "ppv-lite86" version = "0.2.6" @@ -278,6 +383,28 @@ dependencies = [ "rand_core 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "rayon" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "crossbeam-deque 0.7.2 (registry+https://github.com/rust-lang/crates.io-index)", + "either 1.5.3 (registry+https://github.com/rust-lang/crates.io-index)", + "rayon-core 1.6.1 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "rayon-core" +version = "1.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "crossbeam-deque 0.7.2 (registry+https://github.com/rust-lang/crates.io-index)", + "crossbeam-queue 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)", + "crossbeam-utils 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)", + "lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)", + "num_cpus 1.11.1 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "rdrand" version = "0.4.0" @@ -304,6 +431,14 @@ name = "rustc-demangle" version = "0.1.16" source = "registry+https://github.com/rust-lang/crates.io-index" +[[package]] +name = "rustc_version" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "semver 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "ryu" version = "1.0.2" @@ -317,13 +452,18 @@ dependencies = [ "winapi-util 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "scopeguard" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" + [[package]] name = "semver" version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ "semver-parser 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)", - "serde 1.0.102 (registry+https://github.com/rust-lang/crates.io-index)", + "serde 1.0.103 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] @@ -333,12 +473,12 @@ source = "registry+https://github.com/rust-lang/crates.io-index" [[package]] name = "serde" -version = "1.0.102" +version = "1.0.103" source = "registry+https://github.com/rust-lang/crates.io-index" [[package]] name = "serde_derive" -version = "1.0.102" +version = "1.0.103" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ "proc-macro2 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)", @@ -348,12 +488,12 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.41" +version = "1.0.42" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ "itoa 0.4.4 (registry+https://github.com/rust-lang/crates.io-index)", "ryu 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)", - "serde 1.0.102 (registry+https://github.com/rust-lang/crates.io-index)", + "serde 1.0.103 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] @@ -366,7 +506,7 @@ dependencies = [ "error-chain 0.12.1 (registry+https://github.com/rust-lang/crates.io-index)", "glob 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)", "pulldown-cmark 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)", - "serde_json 1.0.41 (registry+https://github.com/rust-lang/crates.io-index)", + "serde_json 1.0.42 (registry+https://github.com/rust-lang/crates.io-index)", "tempdir 0.3.7 (registry+https://github.com/rust-lang/crates.io-index)", "walkdir 2.2.9 (registry+https://github.com/rust-lang/crates.io-index)", ] @@ -482,6 +622,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" "checksum ansi_term 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ee49baf6cb617b853aa8d93bf420db2383fab46d314482ca2803b40d5fde979b" "checksum assert_cli 0.5.4 (registry+https://github.com/rust-lang/crates.io-index)" = "72342c21057a3cb5f7c2d849bf7999a83795434dd36d74fa8c24680581bd1930" "checksum atty 0.2.13 (registry+https://github.com/rust-lang/crates.io-index)" = "1803c647a3ec87095e7ae7acfca019e98de5ec9a7d01343f611cf3152ed71a90" +"checksum autocfg 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)" = "1d49d90015b3c36167a20fe2810c5cd875ad504b39cff3d4eae7977e6b7c1cb2" "checksum backtrace 0.3.40 (registry+https://github.com/rust-lang/crates.io-index)" = "924c76597f0d9ca25d762c25a4d369d51267536465dc5064bdf0eb073ed477ea" "checksum backtrace-sys 0.1.32 (registry+https://github.com/rust-lang/crates.io-index)" = "5d6575f128516de27e3ce99689419835fce9643a9b215a14d2b5b685be018491" "checksum bitflags 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "cf1de2fe8c75bc145a2f577add951f8134889b4795d47466a54a5c846d691693" @@ -492,16 +633,27 @@ source = "registry+https://github.com/rust-lang/crates.io-index" "checksum cfg-if 0.1.10 (registry+https://github.com/rust-lang/crates.io-index)" = "4785bdd1c96b2a846b2bd7cc02e86b6b3dbf14e7e53446c4f54c92a361040822" "checksum clap 2.33.0 (registry+https://github.com/rust-lang/crates.io-index)" = "5067f5bb2d80ef5d68b4c87db81601f0b75bca627bc2ef76b141d7b846a3c6d9" "checksum colored 1.9.0 (registry+https://github.com/rust-lang/crates.io-index)" = "433e7ac7d511768127ed85b0c4947f47a254131e37864b2dc13f52aa32cd37e5" +"checksum crossbeam 0.7.3 (registry+https://github.com/rust-lang/crates.io-index)" = "69323bff1fb41c635347b8ead484a5ca6c3f11914d784170b158d8449ab07f8e" +"checksum crossbeam-channel 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "acec9a3b0b3559f15aee4f90746c4e5e293b701c0f7d3925d24e01645267b68c" +"checksum crossbeam-deque 0.7.2 (registry+https://github.com/rust-lang/crates.io-index)" = "c3aa945d63861bfe624b55d153a39684da1e8c0bc8fba932f7ee3a3c16cea3ca" +"checksum crossbeam-epoch 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)" = "5064ebdbf05ce3cb95e45c8b086f72263f4166b29b97f6baff7ef7fe047b55ac" +"checksum crossbeam-queue 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "dfd6515864a82d2f877b42813d4553292c6659498c9a2aa31bab5a15243c2700" +"checksum crossbeam-utils 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ce446db02cdc3165b94ae73111e570793400d0794e46125cc4056c81cbb039f4" "checksum difference 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "b3304d19798a8e067e48d8e69b2c37f0b5e9b4e462504ad9e27e9f3fce02bba8" +"checksum either 1.5.3 (registry+https://github.com/rust-lang/crates.io-index)" = "bb1f6b1ce1c140482ea30ddd3335fc0024ac7ee112895426e0a629a6c20adfe3" "checksum environment 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "1f4b14e20978669064c33b4c1e0fb4083412e40fe56cbea2eae80fd7591503ee" "checksum error-chain 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ff511d5dc435d703f4971bc399647c9bc38e20cb41452e3b9feb4765419ed3f3" "checksum error-chain 0.12.1 (registry+https://github.com/rust-lang/crates.io-index)" = "3ab49e9dcb602294bc42f9a7dfc9bc6e936fca4418ea300dbfb84fe16de0b7d9" "checksum fuchsia-cprng 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "a06f77d526c1a601b7c4cdd98f54b5eaabffc14d5f2f0296febdc7f357c6d3ba" "checksum getrandom 0.1.13 (registry+https://github.com/rust-lang/crates.io-index)" = "e7db7ca94ed4cd01190ceee0d8a8052f08a247aa1b469a7f68c6a3b71afcf407" "checksum glob 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)" = "8be18de09a56b60ed0edf84bc9df007e30040691af7acd1c41874faac5895bfb" +"checksum hermit-abi 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "307c3c9f937f38e3534b1d6447ecf090cafcc9744e4a6360e8b037b2cf5af120" "checksum itoa 0.4.4 (registry+https://github.com/rust-lang/crates.io-index)" = "501266b7edd0174f8530248f87f99c88fbe60ca4ef3dd486835b8d8d53136f7f" +"checksum jwalk 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "2b3dbf0a8f61baee43a2918ff50ac6a2d3b2c105bc08ed53bc298779f1263409" "checksum lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" "checksum libc 0.2.65 (registry+https://github.com/rust-lang/crates.io-index)" = "1a31a0627fdf1f6a39ec0dd577e101440b7db22672c0901fe00a9a6fbb5c24e8" +"checksum memoffset 0.5.3 (registry+https://github.com/rust-lang/crates.io-index)" = "75189eb85871ea5c2e2c15abbdd541185f63b408415e5051f5cac122d8c774b9" +"checksum num_cpus 1.11.1 (registry+https://github.com/rust-lang/crates.io-index)" = "76dac5ed2a876980778b8b85f75a71b6cbf0db0b1232ee12f826bccb00d09d72" "checksum ppv-lite86 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)" = "74490b50b9fbe561ac330df47c08f3f33073d2d00c150f719147d7c54522fa1b" "checksum proc-macro2 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)" = "9c9e470a8dc4aeae2dee2f335e8f533e2d4b347e1434e5671afc49b054592f27" "checksum pulldown-cmark 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "eef52fac62d0ea7b9b4dc7da092aa64ea7ec3d90af6679422d3d7e0e14b6ee15" @@ -513,17 +665,21 @@ source = "registry+https://github.com/rust-lang/crates.io-index" "checksum rand_core 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)" = "9c33a3c44ca05fa6f1807d8e6743f3824e8509beca625669633be0acbdf509dc" "checksum rand_core 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)" = "90bde5296fc891b0cef12a6d03ddccc162ce7b2aff54160af9338f8d40df6d19" "checksum rand_hc 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ca3129af7b92a17112d59ad498c6f81eaf463253766b90396d39ea7a39d6613c" +"checksum rayon 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "43739f8831493b276363637423d3622d4bd6394ab6f0a9c4a552e208aeb7fddd" +"checksum rayon-core 1.6.1 (registry+https://github.com/rust-lang/crates.io-index)" = "f8bf17de6f23b05473c437eb958b9c850bfc8af0961fe17b4cc92d5a627b4791" "checksum rdrand 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "678054eb77286b51581ba43620cc911abf02758c91f93f479767aed0f90458b2" "checksum redox_syscall 0.1.56 (registry+https://github.com/rust-lang/crates.io-index)" = "2439c63f3f6139d1b57529d16bc3b8bb855230c8efcc5d3a896c8bea7c3b1e84" "checksum remove_dir_all 0.5.2 (registry+https://github.com/rust-lang/crates.io-index)" = "4a83fa3702a688b9359eccba92d153ac33fd2e8462f9e0e3fdf155239ea7792e" "checksum rustc-demangle 0.1.16 (registry+https://github.com/rust-lang/crates.io-index)" = "4c691c0e608126e00913e33f0ccf3727d5fc84573623b8d65b2df340b5201783" +"checksum rustc_version 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "138e3e0acb6c9fb258b19b67cb8abd63c00679d2851805ea151465464fe9030a" "checksum ryu 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "bfa8506c1de11c9c4e4c38863ccbe02a305c8188e85a05a784c9e11e1c3910c8" "checksum same-file 1.0.5 (registry+https://github.com/rust-lang/crates.io-index)" = "585e8ddcedc187886a30fa705c47985c3fa88d06624095856b36ca0b82ff4421" +"checksum scopeguard 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "b42e15e59b18a828bbf5c58ea01debb36b9b096346de35d941dcb89009f24a0d" "checksum semver 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)" = "1d7eb9ef2c18661902cc47e535f9bc51b78acd254da71d375c2f6720d9a40403" "checksum semver-parser 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "388a1df253eca08550bef6c72392cfe7c30914bf41df5269b68cbd6ff8f570a3" -"checksum serde 1.0.102 (registry+https://github.com/rust-lang/crates.io-index)" = "0c4b39bd9b0b087684013a792c59e3e07a46a01d2322518d8a1104641a0b1be0" -"checksum serde_derive 1.0.102 (registry+https://github.com/rust-lang/crates.io-index)" = "ca13fc1a832f793322228923fbb3aba9f3f44444898f835d31ad1b74fa0a2bf8" -"checksum serde_json 1.0.41 (registry+https://github.com/rust-lang/crates.io-index)" = "2f72eb2a68a7dc3f9a691bfda9305a1c017a6215e5a4545c258500d2099a37c2" +"checksum serde 1.0.103 (registry+https://github.com/rust-lang/crates.io-index)" = "1217f97ab8e8904b57dd22eb61cde455fa7446a9c1cf43966066da047c1f3702" +"checksum serde_derive 1.0.103 (registry+https://github.com/rust-lang/crates.io-index)" = "a8c6faef9a2e64b0064f48570289b4bf8823b7581f1d6157c1b52152306651d0" +"checksum serde_json 1.0.42 (registry+https://github.com/rust-lang/crates.io-index)" = "1a3351dcbc1f067e2c92ab7c3c1f288ad1a4cffc470b5aaddb4c2e0a3ae80043" "checksum skeptic 0.13.4 (registry+https://github.com/rust-lang/crates.io-index)" = "d6fb8ed853fdc19ce09752d63f3a2e5b5158aeb261520cd75eb618bd60305165" "checksum strsim 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)" = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a" "checksum syn 1.0.8 (registry+https://github.com/rust-lang/crates.io-index)" = "661641ea2aa15845cddeb97dad000d22070bb5c1fb456b96c1cba883ec691e92" diff --git a/Cargo.toml b/Cargo.toml index cd4f5d2..47f6ce4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -23,7 +23,7 @@ path = "src/main.rs" [dependencies] ansi_term = "=0.11" clap = "=2.33" -walkdir = "=2" +jwalk = "0.4" [dev-dependencies] assert_cli = "=0.5" diff --git a/src/utils/mod.rs b/src/utils/mod.rs index 6ab741d..ff91454 100644 --- a/src/utils/mod.rs +++ b/src/utils/mod.rs @@ -2,7 +2,7 @@ use std::cmp::Ordering; use std::collections::HashMap; use std::collections::HashSet; -use walkdir::WalkDir; +use jwalk::WalkDir; mod platform; use self::platform::*; @@ -78,7 +78,7 @@ fn examine_dir( data: &mut HashMap, file_count_no_permission: &mut u64, ) { - for entry in WalkDir::new(top_dir) { + for entry in WalkDir::new(top_dir).preload_metadata(true) { if let Ok(e) = entry { let maybe_size_and_inode = get_metadata(&e, apparent_size); @@ -93,12 +93,12 @@ fn examine_dir( } } // This path and all its parent paths have their counter incremented - let mut e_path = e.path().to_path_buf(); + let mut e_path = e.path(); loop { let path_name = e_path.to_string_lossy().to_string(); let s = data.entry(path_name.clone()).or_insert(0); *s += size; - if path_name == *top_dir { + if path_name == top_dir || path_name == "/" { break; } assert!(path_name != ""); diff --git a/src/utils/platform.rs b/src/utils/platform.rs index 49cba52..e694ac6 100644 --- a/src/utils/platform.rs +++ b/src/utils/platform.rs @@ -1,4 +1,4 @@ -use walkdir::DirEntry; +use jwalk::DirEntry; fn get_block_size() -> u64 { // All os specific implementations of MetatdataExt seem to define a block as 512 bytes @@ -9,17 +9,22 @@ fn get_block_size() -> u64 { #[cfg(target_family = "unix")] pub fn get_metadata(d: &DirEntry, use_apparent_size: bool) -> Option<(u64, Option<(u64, u64)>)> { use std::os::unix::fs::MetadataExt; - d.metadata().ok().and_then(|md| { + d.metadata.as_ref().unwrap().as_ref().ok().map(|md| { let inode = Some((md.ino(), md.dev())); if use_apparent_size { - Some((md.len(), inode)) + (md.len(), inode) } else { - Some((md.blocks() * get_block_size(), inode)) + (md.blocks() * get_block_size(), inode) } }) } #[cfg(not(target_family = "unix"))] pub fn get_metadata(d: &DirEntry, _apparent: bool) -> Option<(u64, Option<(u64, u64)>)> { - d.metadata().ok().map_or(None, |md| Some((md.len(), None))) + d.metadata + .as_ref() + .unwrap() + .as_ref() + .ok() + .map(|md| (md.len(), None)) } From f8ce6c97bf54afa5183b7d3c5fe6f79e1e6a1532 Mon Sep 17 00:00:00 2001 From: Xavier L'Heureux Date: Sun, 24 Nov 2019 21:57:16 -0500 Subject: [PATCH 2/6] Use more rusty patterns and preallocate enough space --- src/main.rs | 23 +++++++++++------------ src/utils/mod.rs | 40 ++++++++++++++++++---------------------- 2 files changed, 29 insertions(+), 34 deletions(-) diff --git a/src/main.rs b/src/main.rs index e91161c..ce4fcfa 100644 --- a/src/main.rs +++ b/src/main.rs @@ -109,18 +109,14 @@ fn main() { } fn build_tree(biggest_ones: Vec<(String, u64)>, depth: Option) -> Node { - let mut top_parent = Node { - name: "".to_string(), - size: 0, - children: vec![], - }; + let mut top_parent = Node::default(); // assume sorted order for b in biggest_ones { let n = Node { name: b.0, size: b.1, - children: vec![], + children: Vec::default(), }; recursively_build_tree(&mut top_parent, n, depth) } @@ -133,13 +129,16 @@ fn recursively_build_tree(parent_node: &mut Node, new_node: Node, depth: Option< Some(0) => return, Some(d) => Some(d - 1), }; - for c in parent_node.children.iter_mut() { - if new_node.name.starts_with(&c.name) { - return recursively_build_tree(&mut *c, new_node, new_depth); - } + if let Some(c) = parent_node + .children + .iter_mut() + .find(|c| new_node.name.starts_with(&c.name)) + { + recursively_build_tree(&mut *c, new_node, new_depth); + } else { + let temp = Box::::new(new_node); + parent_node.children.push(temp); } - let temp = Box::::new(new_node); - parent_node.children.push(temp); } #[cfg(test)] diff --git a/src/utils/mod.rs b/src/utils/mod.rs index ff91454..5253783 100644 --- a/src/utils/mod.rs +++ b/src/utils/mod.rs @@ -1,13 +1,14 @@ use std::cmp::Ordering; use std::collections::HashMap; use std::collections::HashSet; +use std::path::PathBuf; use jwalk::WalkDir; mod platform; use self::platform::*; -#[derive(Debug)] +#[derive(Debug, Default)] pub struct Node { pub name: String, pub size: u64, @@ -15,26 +16,25 @@ pub struct Node { } pub fn simplify_dir_names(filenames: Vec<&str>) -> HashSet { - let mut top_level_names: HashSet = HashSet::new(); + let mut top_level_names: HashSet = HashSet::with_capacity(filenames.len()); + let mut to_remove: Vec = Vec::with_capacity(filenames.len()); for t in filenames { let top_level_name = ensure_end_slash(t); let mut can_add = true; - let mut to_remove: Vec = Vec::new(); for tt in top_level_names.iter() { - let temp = tt.to_string(); - if top_level_name.starts_with(&temp) { + if top_level_name.starts_with(tt) { can_add = false; } else if tt.starts_with(&top_level_name) { - to_remove.push(temp); + to_remove.push(tt.to_string()); } } - for tr in to_remove { - top_level_names.remove(&tr); - } + to_remove.sort_unstable(); + top_level_names.retain(|tr| to_remove.binary_search(tr).is_err()); + to_remove.clear(); if can_add { - top_level_names.insert(strip_end_slash(t)); + top_level_names.insert(strip_end_slash(t).to_owned()); } } @@ -63,10 +63,9 @@ pub fn ensure_end_slash(s: &str) -> String { new_name + "/" } -pub fn strip_end_slash(s: &str) -> String { - let mut new_name = String::from(s); +pub fn strip_end_slash(mut new_name: &str) -> &str { while (new_name.ends_with('/') || new_name.ends_with("/.")) && new_name.len() > 1 { - new_name.pop(); + new_name = &new_name[..new_name.len() - 1]; } new_name } @@ -93,16 +92,13 @@ fn examine_dir( } } // This path and all its parent paths have their counter incremented - let mut e_path = e.path(); - loop { - let path_name = e_path.to_string_lossy().to_string(); - let s = data.entry(path_name.clone()).or_insert(0); + for path_name in PathBuf::from(e.path()).ancestors() { + let path_name = path_name.to_string_lossy(); + let s = data.entry(path_name.to_string()).or_insert(0); *s += size; - if path_name == top_dir || path_name == "/" { + if path_name == top_dir { break; } - assert!(path_name != ""); - e_path.pop(); } } None => *file_count_no_permission += 1, @@ -124,7 +120,7 @@ pub fn sort_by_size_first_name_second(a: &(String, u64), b: &(String, u64)) -> O pub fn sort(data: HashMap) -> Vec<(String, u64)> { let mut new_l: Vec<(String, u64)> = data.iter().map(|(a, b)| (a.clone(), *b)).collect(); - new_l.sort_by(|a, b| sort_by_size_first_name_second(&a, &b)); + new_l.sort_unstable_by(sort_by_size_first_name_second); new_l } @@ -141,7 +137,7 @@ pub fn trim_deep_ones( max_depth: u64, top_level_names: &HashSet, ) -> Vec<(String, u64)> { - let mut result: Vec<(String, u64)> = vec![]; + let mut result: Vec<(String, u64)> = Vec::with_capacity(input.len() * top_level_names.len()); for name in top_level_names { let my_max_depth = name.matches('/').count() + max_depth as usize; From bf28d424837cba93d3c7676dcfe0d6f9daf523d2 Mon Sep 17 00:00:00 2001 From: Xavier L'Heureux Date: Sun, 24 Nov 2019 22:19:49 -0500 Subject: [PATCH 3/6] Update Performance section of README --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 767ab85..55eca92 100644 --- a/README.md +++ b/README.md @@ -63,7 +63,7 @@ djin:git/dust> dust ## Performance -Dust is currently about 4 times slower than du. +Dust uses a parallel fetching implementation that greatly improves performance for directory trees with reasonable amount of files (read more than 20) compared to du. This can be as much as 7x faster than du on a clean cache. ## Alternatives From 62ac9b623adcbc7cc148c523fa8e19ca36c287e8 Mon Sep 17 00:00:00 2001 From: Xavier L'Heureux Date: Tue, 26 Nov 2019 08:25:38 -0500 Subject: [PATCH 4/6] Make sure to count the hidden directories --- src/utils/mod.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/utils/mod.rs b/src/utils/mod.rs index 5253783..55f9487 100644 --- a/src/utils/mod.rs +++ b/src/utils/mod.rs @@ -77,7 +77,10 @@ fn examine_dir( data: &mut HashMap, file_count_no_permission: &mut u64, ) { - for entry in WalkDir::new(top_dir).preload_metadata(true) { + for entry in WalkDir::new(top_dir) + .preload_metadata(true) + .skip_hidden(false) + { if let Ok(e) = entry { let maybe_size_and_inode = get_metadata(&e, apparent_size); From 19a41aa3825a93b64e5b6f2dd719e4df4603e469 Mon Sep 17 00:00:00 2001 From: Xavier L'Heureux Date: Tue, 3 Dec 2019 18:27:02 -0500 Subject: [PATCH 5/6] Add CLI option for the number of threads to spawn --- src/main.rs | 36 ++++++++++++++++++++++-------------- src/utils/mod.rs | 20 ++++++++++++++++---- 2 files changed, 38 insertions(+), 18 deletions(-) diff --git a/src/main.rs b/src/main.rs index ce4fcfa..4a596fe 100644 --- a/src/main.rs +++ b/src/main.rs @@ -23,6 +23,13 @@ fn main() { .help("Depth to show") .takes_value(true), ) + .arg( + Arg::with_name("threads") + .short("t") + .long("threads") + .help("Number of threads to spawn simultaneously") + .takes_value(true), + ) .arg( Arg::with_name("number_of_lines") .short("n") @@ -67,19 +74,20 @@ fn main() { } }; - let depth = { - if options.is_present("depth") { - match value_t!(options.value_of("depth"), u64) { - Ok(v) => Some(v + 1), - Err(_) => { - eprintln!("Ignoring bad value for depth"); - None - } - } - } else { - None - } - }; + let threads = options.value_of("threads").and_then(|threads| { + threads + .parse::() + .map_err(|_| eprintln!("Ignoring bad value for threads: {:?}", threads)) + .ok() + }); + + let depth = options.value_of("depth").and_then(|depth| { + depth + .parse::() + .map(|v| v + 1) + .map_err(|_| eprintln!("Ignoring bad value for depth")) + .ok() + }); if options.is_present("depth") && number_of_lines != DEFAULT_NUMBER_OF_LINES { eprintln!("Use either -n or -d. Not both"); return; @@ -89,7 +97,7 @@ fn main() { let use_full_path = options.is_present("display_full_paths"); let simplified_dirs = simplify_dir_names(target_dirs); - let (permissions, nodes) = get_dir_tree(&simplified_dirs, use_apparent_size); + let (permissions, nodes) = get_dir_tree(&simplified_dirs, use_apparent_size, threads); let sorted_data = sort(nodes); let biggest_ones = { match depth { diff --git a/src/utils/mod.rs b/src/utils/mod.rs index 55f9487..5b63be8 100644 --- a/src/utils/mod.rs +++ b/src/utils/mod.rs @@ -44,13 +44,21 @@ pub fn simplify_dir_names(filenames: Vec<&str>) -> HashSet { pub fn get_dir_tree( top_level_names: &HashSet, apparent_size: bool, + threads: Option, ) -> (bool, HashMap) { let mut permissions = 0; let mut inodes: HashSet<(u64, u64)> = HashSet::new(); let mut data: HashMap = HashMap::new(); for b in top_level_names.iter() { - examine_dir(&b, apparent_size, &mut inodes, &mut data, &mut permissions); + examine_dir( + &b, + apparent_size, + &mut inodes, + &mut data, + &mut permissions, + threads, + ); } (permissions == 0, data) } @@ -76,11 +84,15 @@ fn examine_dir( inodes: &mut HashSet<(u64, u64)>, data: &mut HashMap, file_count_no_permission: &mut u64, + cpus: Option, ) { - for entry in WalkDir::new(top_dir) + let mut iter = WalkDir::new(top_dir) .preload_metadata(true) - .skip_hidden(false) - { + .skip_hidden(false); + if let Some(cpus) = cpus { + iter = iter.num_threads(cpus); + } + for entry in iter { if let Ok(e) = entry { let maybe_size_and_inode = get_metadata(&e, apparent_size); From b66523cff3fc871b378d1f3ca4d3a970f2ab98dc Mon Sep 17 00:00:00 2001 From: Xavier L'Heureux Date: Tue, 3 Dec 2019 18:34:47 -0500 Subject: [PATCH 6/6] Apply clippy lints --- src/display.rs | 14 +++++++------- src/main.rs | 5 ++--- src/utils/mod.rs | 5 ++--- 3 files changed, 11 insertions(+), 13 deletions(-) diff --git a/src/display.rs b/src/display.rs index 343ab81..f7d6a49 100644 --- a/src/display.rs +++ b/src/display.rs @@ -61,12 +61,12 @@ impl DisplayData { } } - fn get_children_from_node(&self, node: Node) -> impl Iterator> { + fn get_children_from_node(&self, node: Node) -> impl Iterator { if self.is_reversed { - let n: Vec> = node.children.into_iter().rev().map(|a| a).collect(); - return n.into_iter(); + let n: Vec = node.children.into_iter().rev().map(|a| a).collect(); + n.into_iter() } else { - return node.children.into_iter(); + node.children.into_iter() } } } @@ -82,7 +82,7 @@ pub fn draw_it(permissions: bool, use_full_path: bool, is_reversed: bool, root_n for c in display_data.get_children_from_node(root_node) { let first_tree_chars = display_data.get_first_chars(); - display_node(*c, true, first_tree_chars, &display_data) + display_node(c, true, first_tree_chars, &display_data) } } @@ -101,10 +101,10 @@ fn display_node(node: Node, is_biggest: bool, indent: &str, display_data: &Displ for c in display_data.get_children_from_node(node) { num_siblings -= 1; - let chars = display_data.get_tree_chars(num_siblings, max_sibling, c.children.len() > 0); + let chars = display_data.get_tree_chars(num_siblings, max_sibling, !c.children.is_empty()); let is_biggest = display_data.is_biggest(num_siblings, max_sibling); let full_indent = new_indent.clone() + chars; - display_node(*c, is_biggest, &*full_indent, display_data) + display_node(c, is_biggest, &*full_indent, display_data) } if display_data.is_reversed { diff --git a/src/main.rs b/src/main.rs index 4a596fe..fb28148 100644 --- a/src/main.rs +++ b/src/main.rs @@ -142,10 +142,9 @@ fn recursively_build_tree(parent_node: &mut Node, new_node: Node, depth: Option< .iter_mut() .find(|c| new_node.name.starts_with(&c.name)) { - recursively_build_tree(&mut *c, new_node, new_depth); + recursively_build_tree(c, new_node, new_depth); } else { - let temp = Box::::new(new_node); - parent_node.children.push(temp); + parent_node.children.push(new_node); } } diff --git a/src/utils/mod.rs b/src/utils/mod.rs index 5b63be8..5e649d2 100644 --- a/src/utils/mod.rs +++ b/src/utils/mod.rs @@ -1,7 +1,6 @@ use std::cmp::Ordering; use std::collections::HashMap; use std::collections::HashSet; -use std::path::PathBuf; use jwalk::WalkDir; @@ -12,7 +11,7 @@ use self::platform::*; pub struct Node { pub name: String, pub size: u64, - pub children: Vec>, + pub children: Vec, } pub fn simplify_dir_names(filenames: Vec<&str>) -> HashSet { @@ -107,7 +106,7 @@ fn examine_dir( } } // This path and all its parent paths have their counter incremented - for path_name in PathBuf::from(e.path()).ancestors() { + for path_name in e.path().ancestors() { let path_name = path_name.to_string_lossy(); let s = data.entry(path_name.to_string()).or_insert(0); *s += size;