op public repos

Blob

Date:: Tue Jan 3 19:35:55 2023 UTC
Message:: rewrite diffstat so it actually parses the diff
Actions:: History | Blame | Raw File
1 # diffstat
2 
3 Show diff statistics.
4 
5 	#!/usr/bin/awk -f
6 
7 AWK is great.  All hail AWK!
8 
9 Now, some utility functions.  parsehdr parse extracts the number of
10 lines (old or new) in the following hunk.
11 
12 	function parsehdr(s) {
13 		s = gensub(".*,", "", 1, s)
14 		s = gensub("^-", "", 1, s)
15 		return s + 0
16 	}
17 
18 Switches the current file to the one provided.  It's a great place where
19 accumulate part of the summary showed at the end and to reset the
20 per-file counters.
21 
22 	function switchfile(newfile) {
23 		if (file != "") {
24 			summary = sprintf("%s+%d -%d\t%s\n",
25 			    summary, add, rem, file)
26 		}
27 
28 		add = 0
29 		rem = 0
30 		file = newfile
31 	}
32 
33 Now, the real "parser".  It start in the "out" state
34 
35 	BEGIN {
36 		state = "out"
37 	}
38 
39 Match the start of a diff on the "+++" line.
40 
41 	state == "out" && /^\+\+\+ / {
42 		nfile = gensub("\\\+\\\+\\\+ ", "", 1)
43 		if (nfile == "/dev/null") {
44 
45 When deleting a file, the name will be "/dev/null", but that's not a
46 great name for the stats.  Let's use the "old" name instead.
47 
48 			nfile = delfile
49 		}
50 
51 		switchfile(nfile)
52 		delfile = ""
53 	}
54 
55 Let's save the old name in case it's needed.
56 
57 	state == "out" && /^--- / && file == "" {
58 		delfile = gensub("--- ", "", 1)
59 	}
60 
61 Match the start of a hunk and switch the state to "in"
62 
63 	state == "out" && /^@@ / {
64 
65 This part is a bit complicated, but all it does is extracting the number
66 of "new" and "old" lines showed in the hunk.  A hunk header looks like this
67 (except for the initial '#' character)
68 
69 		# @@ -55,7 +55,19 @@ ...
70 
71 So first extract the text inside the pair of "@@"
72 
73 		s = gensub("@@ ", "", 1)
74 		s = gensub(" @@.*", "", 1, s)
75 
76 and then parse each number.
77 
78 		old = gensub(" .*", "", 1, s)
79 		old = parsehdr(old)
80 
81 		new = gensub(".* ", "", 1, s)
82 		new = parsehdr(new)
83 
84 Don't forget to switch the state of the parser, now we're reading a
85 hunk.
86 
87 		state = "in"
88 	}
89 
90 Keep count of the added and removed line.  Also, decrement the "old" and
91 "new" lines when needed, to know when we're done with the hunk.
92 
93 	state == "in" && /^ / {
94 		old--
95 		new--
96 	}
97 
98 	state == "in" && /^-/ {
99 		old--
100 		rem++
101 		totrem++
102 	}
103 
104 	state == "in" && /^\+/ {
105 		new--
106 		add++
107 		totadd++
108 	}
109 
110 When there are no more "new" and "old" lines to read, go back to the
111 "out" state, ready to read another hunk or another file.
112 
113 	state == "in" && old <= 0 && new <= 0 {
114 		state = "out"
115 	}
116 
117 Don't be a sink!  Continue the pipeline so we can further save or apply
118 the diff.
119 
120 	// { print $0 }
121 
122 At the end, print the stats to standard error to avoid mangling the
123 input.  Unfortunately, there doesn't seem to be a "built-in" way of
124 printing to stderr other than using the pseudo-device.
125 
126 	END {
127 		fflush()
128 		switchfile("")
129 
130 		printf("%s", summary) > "/dev/stderr"
131 		printf("+%d -%d\ttotal\n", totadd, totrem) > "/dev/stderr"
132 	}
133 
134 some example usages:
135 
136 * cvs -q di | diffstat | tee /tmp/diff | less
137 * git diff | diffstat > /tmp/diff
138 * got di | diffstat | ssh foo 'cd xyz && got patch'