1 ! Copyright (C) 2022 Doug Coleman.
2 ! See http://factorcode.org/license.txt for BSD license.
3 USING: accessors alien.c-types assocs assocs.extras combinators
4 combinators.short-circuit formatting io io.backend
5 io.directories io.encodings.binary io.files io.files.info
6 io.files.types io.pathnames kernel math math.statistics
7 prettyprint sequences sets sorting specialized-arrays
8 tools.memory.private tools.wc unicode ;
11 : file-sizes ( paths -- assoc )
12 [ dup file-info size>> ] { } map>assoc ;
14 : binary-file? ( path -- ? )
15 binary [ 1024 read ] with-file-reader [ 0 = ] any? ;
17 : binary-files ( paths -- ? ) [ binary-file? ] filter ;
19 : partition-binary ( paths -- binary text )
20 [ binary-file? ] partition ;
22 : with-file-extensions ( paths -- paths' )
23 [ has-file-extension? ] filter ;
25 : without-git-paths ( paths -- paths' )
26 [ "/.git/" find-subseq? ] reject ;
28 : without-node-modules-paths ( paths -- paths' )
29 [ "/node_modules/" find-subseq? ] reject ;
31 : regular-directory-files ( path -- seq )
32 recursive-directory-files
33 [ link-info type>> +regular-file+ = ] filter ;
35 : codebase-paths ( path -- seq )
36 regular-directory-files
39 : count-by-file-extension ( paths -- assoc )
41 [ file-extension ] histogram-by
44 : collect-extensions-by-line-count ( paths -- assoc )
49 : collect-by-file-extension ( paths -- assoc )
51 [ file-extension ] collect-by ;
53 : sum-line-counts-by-extension ( paths -- assoc )
54 [ binary-file? ] reject
55 collect-by-file-extension
56 [ [ wc ] map-sum ] assoc-map
59 : sum-sizes-by-extension ( paths -- assoc )
60 collect-by-file-extension
61 [ [ file-info size>> ] map-sum ] assoc-map
65 : cmake-file? ( path -- ? ) { [ "CMakeLists.txt" tail? ] [ ".cmake" tail? ] } 1|| ;
66 : cmake-files ( paths -- paths' ) [ cmake-file? ] filter ;
67 : uses-cmake? ( paths -- ? ) [ cmake-file? ] any? ;
69 : shell-file? ( path -- ? ) >lower file-extension { "sh" "zsh" } member? ;
70 : shell-files ( paths -- paths' ) [ shell-file? ] filter ;
71 : uses-shell? ( paths -- ? ) [ shell-file? ] any? ;
73 : swift-files ( paths -- paths' ) [ ".swift" tail? ] filter ;
75 : c-file? ( path -- ? )
76 >lower file-extension { "h" "c" } member? ;
77 : c-files ( paths -- paths' ) [ c-file? ] filter ;
79 : cpp-file? ( path -- ? )
80 >lower file-extension { "h" "hh" "hpp" "cc" "cpp" } member? ;
81 : cpp-files ( paths -- paths' ) [ cpp-file? ] filter ;
83 : python-file? ( path -- ? )
84 >lower file-extension {
85 "py" "py3" "pyc" "pyo" "pyw" "pyx" "pyd"
86 "pxd" "pxi" "pyd" "pxi" "pyi" "pyz" "pwxz" "pth"
88 : python-files ( paths -- paths' ) [ python-file? ] filter ;
90 : markdown-file? ( path -- ? ) { [ ".md" tail? ] [ ".markdown" tail? ] } 1|| ;
91 : markdown-files ( paths -- paths' ) [ markdown-file? ] filter ;
93 : txt-file? ( path -- ? )
95 [ { [ ".txt" tail? ] [ ".TXT" tail? ] } 1|| ]
96 [ "CMakeLists.txt" tail? not ]
98 : txt-files ( paths -- paths' ) [ txt-file? ] filter ;
100 : license-file? ( path -- ? )
101 >lower { [ file-stem "license" = ] [ "license-mit" tail? ] } 1|| ;
103 : license-files ( paths -- paths' ) [ license-file? ] filter ;
105 : json-file? ( path -- ? )
106 >lower file-extension
107 { "json" "jsonc" } member? ;
109 : json-files ( paths -- paths' ) [ json-file? ] filter ;
111 : yaml-file? ( path -- ? ) { [ ".yaml" tail? ] [ ".yml" tail? ] } 1|| ;
112 : yaml-files ( paths -- paths' ) [ yaml-file? ] filter ;
113 : uses-yaml? ( paths -- ? ) [ yaml-file? ] any? ;
115 : docker-file? ( path -- ? ) >lower file-name { "dockerfile" ".dockerignore" "docker-compose.yaml" } member? ;
116 : docker-files ( paths -- paths' ) [ docker-file? ] filter ;
117 : uses-docker? ( paths -- ? ) [ docker-file? ] any? ;
119 : make-file? ( path -- ? ) >lower file-name { "gnumakefile" "makefile" "nmakefile" } member? ;
120 : make-files ( paths -- paths' ) [ make-file? ] filter ;
121 : uses-make? ( paths -- ? ) [ make-file? ] any? ;
123 : web-file? ( path -- ? )
124 >lower file-extension
126 "css" "scss" "js" "jsx" "ejs" "mjs" "ts" "tsx" "json" "html"
127 "less" "mustache" "snap" "wasm"
129 : web-files ( paths -- paths' ) [ web-file? ] filter ;
131 : rc-file? ( path -- ? ) >lower file-name { [ "." head? ] [ "rc" tail? ] } 1&& ;
132 : rc-files ( paths -- paths' ) [ rc-file? ] filter ;
134 : env-file? ( path -- ? ) >lower ".env" tail? ;
135 : env-files ( paths -- paths' ) [ env-file? ] filter ;
137 : image-file? ( path -- ? ) >lower file-extension { "png" "jpg" "jpeg" "ico" } member? ;
138 : image-files ( paths -- paths' ) [ image-file? ] filter ;
140 : ignore-file? ( path -- ? ) >lower file-name { [ "." head? ] [ "ignore" tail? ] } 1&& ;
141 : ignore-files ( paths -- paths' ) [ ignore-file? ] filter ;
143 : has-package-json? ( path -- ? ) "package.json" append-path file-exists? ;
144 : uses-git? ( path -- ? ) ".git" append-path file-exists? ;
146 : diff-paths ( paths quot -- paths' )
147 keep swap [ [ normalize-path ] map ] bi@ diff ; inline
149 : assoc. ( assoc -- )
150 [ commas ] map-values simple-table. ;
152 : analyze-codebase-path ( path -- )
154 [ normalize-path "project at path `%s`" sprintf print nl ]
155 [ uses-git? [ "uses git" print ] when ]
156 [ has-package-json? [ "has a package.json file" print ] when ]
159 : analyze-codebase-paths ( paths -- )
163 [ length "%d binary files" sprintf print ]
164 [ length "%d text files" sprintf print ] bi*
166 [ uses-cmake? [ "uses cmake" print ] when ]
167 [ uses-make? [ "uses make" print ] when ]
168 [ rc-files [ length "has %d rc files" sprintf print ] unless-empty ]
169 [ ignore-files [ length "has %d ignore files" sprintf print ] unless-empty nl ]
170 [ "Top 20 largest files" print file-sizes sort-values 20 sequences:short tail* [ normalize-path ] map-keys reverse assoc. nl ]
171 [ "Top 10 file extension sizes" print sum-sizes-by-extension 10 sequences:short tail* reverse assoc. nl ]
172 [ "Top 10 text file line counts" print sum-line-counts-by-extension 10 sequences:short tail* reverse assoc. nl ]
173 [ "Top 10 file extension counts" print count-by-file-extension 10 sequences:short tail* reverse assoc. nl ]
176 : analyze-codebase ( path -- )
177 [ analyze-codebase-path ]
178 [ codebase-paths analyze-codebase-paths ] bi ;
180 : analyze-codebases ( path -- )
181 [ directory-files ] keep [ prepend-path ] curry map
182 [ analyze-codebase ] each ;