1 ! Copyright (C) 2022 Doug Coleman.
2 ! See https://factorcode.org/license.txt for BSD license.
3 USING: accessors assocs assocs.extras combinators
4 combinators.short-circuit formatting io io.backend
5 io.directories io.encodings.binary io.files io.files.info
6 io.files.types io.pathnames kernel math math.statistics prettyprint
7 sequences sets sorting splitting toml tools.memory.private
11 : file-sizes ( paths -- assoc )
12 [ dup file-info size>> ] { } map>assoc ;
14 : binary-file? ( path -- ? )
15 binary [ 1024 read ] with-file-reader [ 0 = ] any? ;
17 : binary-files ( paths -- ? ) [ binary-file? ] filter ;
19 : partition-binary ( paths -- binary text )
20 [ binary-file? ] partition ;
22 : with-file-extensions ( paths -- paths' )
23 [ has-file-extension? ] filter ;
25 : without-git-paths ( paths -- paths' )
26 [ "/.git/" subseq-of? ] reject ;
28 : without-node-modules-paths ( paths -- paths' )
29 [ "/node_modules/" subseq-of? ] reject ;
31 : regular-directory-files ( path -- seq )
32 recursive-directory-files
33 [ link-info type>> +regular-file+ = ] filter ;
35 : codebase-paths ( path -- seq )
36 regular-directory-files
38 without-node-modules-paths ;
40 : count-by-file-extension ( paths -- assoc )
42 [ file-extension ] histogram-by
45 : collect-extensions-by-line-count ( paths -- assoc )
50 : collect-by-file-extension ( paths -- assoc )
52 [ file-extension ] collect-by ;
54 : sum-line-counts-by-extension ( paths -- assoc )
55 [ binary-file? ] reject
56 collect-by-file-extension
57 [ [ wc ] map-sum ] assoc-map
60 : sum-sizes-by-extension ( paths -- assoc )
61 collect-by-file-extension
62 [ [ file-info size>> ] map-sum ] assoc-map
65 : upper-file? ( path -- ? )
67 [ { [ file-stem length 4 > ] [ file-extension length 3 <= ] } 1&& ]
69 [ file-stem [ { [ digit? ] [ "-." member? ] } 1|| ] all? not ]
71 : upper-files ( paths -- seq ) [ upper-file? ] filter ;
73 : configure-file? ( path -- ? ) file-name >lower { [ "configure" = ] [ "configure." head? ] } 1|| ;
74 : configure-files ( paths -- paths' ) [ configure-file? ] filter ;
76 : cmake-file? ( path -- ? ) { [ "CMakeLists.txt" tail? ] [ ".cmake" tail? ] } 1|| ;
77 : cmake-files ( paths -- paths' ) [ cmake-file? ] filter ;
78 : uses-cmake? ( paths -- ? ) [ cmake-file? ] any? ;
80 : in-file? ( paths -- ? ) ".in" tail? ;
81 : in-files ( paths -- seq ) [ in-file? ] filter ;
82 : uses-in-files? ( paths -- ? ) [ in-file? ] any? ;
84 : shell-file? ( path -- ? ) >lower file-extension { "sh" "zsh" } member? ;
85 : shell-files ( paths -- paths' ) [ shell-file? ] filter ;
86 : uses-shell? ( paths -- ? ) [ shell-file? ] any? ;
88 : swift-files ( paths -- paths' ) [ ".swift" tail? ] filter ;
90 : c-file? ( path -- ? )
91 >lower file-extension { "h" "c" } member? ;
92 : c-files ( paths -- paths' ) [ c-file? ] filter ;
94 : cpp-file? ( path -- ? )
95 >lower file-extension { "h" "hh" "hpp" "cc" "cpp" } member? ;
96 : cpp-files ( paths -- paths' ) [ cpp-file? ] filter ;
98 : python-file? ( path -- ? )
99 >lower file-extension {
100 "py" "py3" "pyc" "pyo" "pyw" "pyx" "pyd"
101 "pxd" "pxi" "pyd" "pxi" "pyi" "pyz" "pwxz" "pth"
103 : python-files ( paths -- paths' ) [ python-file? ] filter ;
105 : markdown-file? ( path -- ? ) { [ ".md" tail? ] [ ".markdown" tail? ] } 1|| ;
106 : markdown-files ( paths -- paths' ) [ markdown-file? ] filter ;
108 : dot-file? ( path -- ? ) file-name "." head? ;
109 : dot-files ( paths -- paths' ) [ dot-file? ] filter ;
111 : txt-file? ( path -- ? )
113 [ { [ ".txt" tail? ] [ ".TXT" tail? ] } 1|| ]
114 [ "CMakeLists.txt" tail? not ]
116 : txt-files ( paths -- paths' ) [ txt-file? ] filter ;
118 : license-file? ( path -- ? )
119 >lower { [ file-stem "license" = ] [ "license-mit" tail? ] } 1|| ;
120 : license-files ( paths -- paths' ) [ license-file? ] filter ;
122 : readme-file? ( path -- ? )
123 >lower file-stem "readme" = ;
124 : readme-files ( paths -- paths' ) [ readme-file? ] filter ;
126 : owners-file? ( path -- ? )
127 >lower file-stem "owners" = ;
128 : owners-files ( paths -- paths' ) [ owners-file? ] filter ;
130 : version-file? ( path -- ? )
131 >lower file-stem "version" = ;
132 : version-files ( paths -- paths' ) [ version-file? ] filter ;
134 : json-file? ( path -- ? )
135 >lower file-extension
136 { "json" "jsonc" } member? ;
138 : json-files ( paths -- paths' ) [ json-file? ] filter ;
140 : yaml-file? ( path -- ? ) { [ ".yaml" tail? ] [ ".yml" tail? ] } 1|| ;
141 : yaml-files ( paths -- paths' ) [ yaml-file? ] filter ;
142 : uses-yaml? ( paths -- ? ) [ yaml-file? ] any? ;
144 : docker-file? ( path -- ? ) >lower file-name { "dockerfile" ".dockerignore" "docker-compose.yaml" } member? ;
145 : docker-files ( paths -- paths' ) [ docker-file? ] filter ;
146 : uses-docker? ( paths -- ? ) [ docker-file? ] any? ;
148 : automake-file? ( path -- ? )
151 [ "makefile.am" tail? ]
152 [ "makefile.am.inc" tail? ]
154 : automake-files ( paths -- paths' ) [ automake-file? ] filter ;
155 : uses-automake? ( paths -- ? ) [ automake-file? ] any? ;
157 : make-file? ( path -- ? )
158 >lower file-name { "gnumakefile" "makefile" } member? ;
159 : make-files ( paths -- paths' ) [ make-file? ] filter ;
160 : uses-make? ( paths -- ? ) [ make-file? ] any? ;
162 : nmake-file? ( path -- ? ) >lower file-name "nmakefile" = ;
163 : nmake-files ( paths -- paths' ) [ nmake-file? ] filter ;
164 : uses-nmake? ( paths -- ? ) [ nmake-file? ] any? ;
166 : gradle-file? ( path -- ? ) >lower { [ "gradle" head? ] [ ".gradle" tail? ] } 1|| ;
167 : gradle-files ( paths -- paths' ) [ gradle-file? ] filter ;
168 : uses-gradle? ( paths -- ? ) [ gradle-file? ] any? ;
170 : github-file? ( path -- ? ) >lower ".github" swap subseq? ;
171 : github-files ( paths -- paths' ) [ github-file? ] filter ;
172 : has-github-files? ( paths -- ? ) [ github-file? ] any? ;
174 : cargo-file? ( path -- ? ) file-name { "Cargo.toml" "Cargo.lock" } member? ;
175 : cargo-files ( paths -- paths' ) [ cargo-file? ] filter ;
176 : has-cargo-files? ( paths -- ? ) [ cargo-file? ] any? ;
178 : julia-project-file? ( path -- ? ) file-name { "Project.toml" } member? ;
179 : julia-project-files ( paths -- paths' ) [ julia-project-file? ] filter ;
180 : has-julia-project-files? ( paths -- ? ) [ julia-project-file? ] any? ;
182 : rust-project-dir? ( path -- ? ) file-name "Cargo.toml" = ;
184 : rust-source-file? ( path -- ? )
189 : rust-source-files ( path -- paths ) [ rust-source-file? ] filter ;
191 : rust-build-system-files ( path -- ? )
193 [ "Carg.toml" tail? ]
194 [ "Carg.lock" tail? ]
197 : rust-intermediate-build-files ( path -- ? )
204 : rust-output-files ( path -- ? )
212 : analyze-rust-cargo-toml ( assoc -- )
214 [ "workspace" of "members" of length [ " %d member projects" sprintf print ] unless-zero ]
215 [ "package" of "name" of [ " name: %s" sprintf print ] when* ]
216 [ "package" of "version" of [ " version: %s" sprintf print ] when* ]
217 [ "package" of "license" of [ " license: %s" sprintf print ] when* ]
218 [ "package" of "edition" of [ " rust edition: %s" sprintf print ] when* ]
221 : analyze-rust-project ( path -- )
222 [ "Analyzing rust project at %s" sprintf print ]
223 [ path>toml analyze-rust-cargo-toml ]
224 [ containing-directory recursive-directory-files ] tri
226 [ rust-source-files length " %d rust source files" sprintf print ]
229 : web-file? ( path -- ? )
230 >lower file-extension
232 "css" "scss" "js" "jsx" "ejs" "mjs" "ts" "tsx" "json" "html"
233 "less" "mustache" "snap" "wasm"
235 : web-files ( paths -- paths' ) [ web-file? ] filter ;
237 : rc-file? ( path -- ? ) >lower file-name { [ "." head? ] [ "rc" tail? ] } 1&& ;
238 : rc-files ( paths -- paths' ) [ rc-file? ] filter ;
240 : env-file? ( path -- ? ) >lower ".env" tail? ;
241 : env-files ( paths -- paths' ) [ env-file? ] filter ;
243 : image-file? ( path -- ? ) >lower file-extension { "png" "jpg" "jpeg" "ico" } member? ;
244 : image-files ( paths -- paths' ) [ image-file? ] filter ;
246 : ignore-file? ( path -- ? ) >lower file-name { [ "." head? ] [ "ignore" tail? ] } 1&& ;
247 : ignore-files ( paths -- paths' ) [ ignore-file? ] filter ;
249 : has-package-json? ( path -- ? ) "package.json" append-path file-exists? ;
250 : uses-git? ( path -- ? ) ".git" append-path file-exists? ;
252 : diff-paths ( paths quot -- paths' )
253 keep swap [ [ normalize-path ] map ] bi@ diff ; inline
255 : assoc. ( assoc -- )
256 [ commas ] map-values simple-table. ;
258 : analyze-codebase-path ( path -- )
260 [ normalize-path "project at path `%s`" sprintf print nl ]
261 [ uses-git? [ "uses git" print ] when ]
262 [ has-package-json? [ "has a package.json file" print ] when ]
265 : analyze-codebase-paths ( paths -- )
269 [ length "%d binary files" sprintf print ]
270 [ length "%d text files" sprintf print ] bi*
272 [ github-files [ "has .github files" print ... ] unless-empty ]
273 [ license-files [ [ length "has %d license files" sprintf print ] [ ... ] bi ] unless-empty ]
274 [ readme-files [ "has readme files" print ... ] unless-empty ]
275 [ owners-files [ "has owners files" print ... ] unless-empty ]
276 [ version-files [ "has version files" print ... ] unless-empty ]
278 { [ dot-files ] [ rc-files diff ] [ ignore-files diff ] } cleave
279 [ "has dot files" print ... ] unless-empty
281 [ rc-files [ [ length "has %d rc files" sprintf print ] [ ... ] bi ] unless-empty ]
282 [ configure-files [ "uses configure files" print ... ] unless-empty ]
283 [ automake-files [ "uses automake" print ... ] unless-empty ]
284 [ make-files [ "uses make" print ... ] unless-empty ]
285 [ nmake-files [ "uses nmake" print ... ] unless-empty ]
286 [ cmake-files [ "uses cmake" print ... ] unless-empty ]
287 [ gradle-files [ "uses gradle" print ... ] unless-empty ]
288 [ cargo-files [ "uses rust/cargo" print ... ] unless-empty ]
289 [ julia-project-files [ "uses julia Project.toml" print ... ] unless-empty ]
290 [ in-files [ "uses 'in' files" print ... ] unless-empty ]
291 [ ignore-files [ [ length "has %d ignore files" sprintf print ] [ ... ] bi ] unless-empty nl ]
292 [ [ rust-project-dir? ] filter [ [ "rust projects at " print . ] [ [ analyze-rust-project ] each ] bi ] unless-empty nl ]
296 [ license-files diff ]
297 [ readme-files diff ]
298 [ owners-files diff ]
299 [ version-files diff ]
301 [ [ length "has %d UPPER files (minus license,readme,owner,version)" sprintf print ] [ ... ] bi ] unless-empty nl
303 [ "Top 20 largest files" print file-sizes sort-values 20 index-or-length tail* [ normalize-path ] map-keys reverse assoc. nl ]
304 [ "Top 10 file extension sizes" print sum-sizes-by-extension 10 index-or-length tail* reverse assoc. nl ]
305 [ "Top 10 text file line counts" print sum-line-counts-by-extension 10 index-or-length tail* reverse assoc. nl ]
306 [ "Top 10 file extension counts" print count-by-file-extension 10 index-or-length tail* reverse assoc. nl ]
309 : analyze-codebase ( path -- )
310 [ analyze-codebase-path ]
311 [ codebase-paths analyze-codebase-paths ] bi ;
313 : analyze-codebases ( path -- )
314 [ directory-files ] keep [ prepend-path ] curry map
315 [ file-info directory? ] filter
316 [ analyze-codebase ] each ;