op public repos

Blob

Date:: Mon Jan 9 10:46:49 2023 UTC
Message:: diffstat: various tweaks to the commentary
Actions:: History | Blame | Raw File
1 # diffstat
2 
3 Show diff statistics.
4 
5 	#!/usr/bin/awk -f
6 
7 AWK is great.  All hail AWK!
8 
9 First, some utility functions.  parsehdr extracts the number of lines
10 (old or new) in the given hunk header line.
11 
12 	function parsehdr(s) {
13 		s = gensub(".*,", "", 1, s)
14 		s = gensub("^-", "", 1, s)
15 		return s + 0
16 	}
17 
18 filename extracts the name of the file from a "+++ path" or "--- path"
19 line.
20 
21 	function filename(s) {
22 		s = gensub("^... ", "", 1, s)
23 
24 These lines have an optional tab followed by extra informations (the
25 date for example) that needs to be removed too.
26 
27 		s = gensub("\t.*", "", 1, s)
28 		return s
29 	}
30 
31 Switches the current file to the one provided.  It's a great place where
32 accumulate part of the summary showed at the end and to reset the
33 per-file counters.
34 
35 	function switchfile(newfile) {
36 		if (file != "") {
37 			summary = sprintf("%s%4d+ %4d-\t%s\n",
38 			    summary, add, rem, file)
39 		}
40 
41 		add = 0
42 		rem = 0
43 		file = newfile
44 	}
45 
46 Now, the real "parser".  Initialize the state to "out" since we're
47 looking for the start of a diff.
48 
49 	BEGIN {
50 		state = "out"
51 	}
52 
53 Parse the changed file.
54 
55 	state == "out" && /^\+\+\+ / {
56 		nfile = filename($0)
57 		if (nfile == "/dev/null") {
58 
59 When deleting a file, the name will be "/dev/null", but it's not a great
60 name for the stats.  Let's use the "old" name instead.
61 
62 			nfile = delfile
63 		}
64 
65 		switchfile(nfile)
66 		delfile = ""
67 	}
68 
69 Similarly, extract the "old" file name for when it's needed.
70 
71 	state == "out" && /^--- / && file == "" {
72 		delfile = filename($0)
73 	}
74 
75 Match the start of a hunk
76 
77 	state == "out" && /^@@ / {
78 
79 This part is a bit complicated, but all it does is extracting the number
80 of "new" and "old" lines showed in the hunk.  A hunk header looks like this
81 (except for the initial '#' character)
82 
83 		# @@ -55,7 +55,19 @@ ...
84 
85 So first extract the text inside the pair of "@@"
86 
87 		s = gensub("@@ ", "", 1)
88 		s = gensub(" @@.*", "", 1, s)
89 
90 and then parse each number.
91 
92 		old = gensub(" .*", "", 1, s)
93 		old = parsehdr(old)
94 
95 		new = gensub(".* ", "", 1, s)
96 		new = parsehdr(new)
97 
98 Don't forget to switch the state of the parser, now we're reading a
99 hunk.
100 
101 		state = "in"
102 	}
103 
104 Keep count of the added and removed line.  Also, decrement the "old" and
105 "new" lines when needed, to know when we're done with the hunk.
106 
107 	state == "in" && /^ / {
108 		old--
109 		new--
110 	}
111 
112 	state == "in" && /^-/ {
113 		old--
114 		rem++
115 		totrem++
116 	}
117 
118 	state == "in" && /^\+/ {
119 		new--
120 		add++
121 		totadd++
122 	}
123 
124 When there are no more "new" and "old" lines to read, go back to the
125 "out" state, ready to read another hunk or another file.
126 
127 	state == "in" && old <= 0 && new <= 0 {
128 		state = "out"
129 	}
130 
131 Don't be a sink!  Continue the pipeline so we can further save or apply
132 the diff.
133 
134 	// { print $0 }
135 
136 At the end, print the stats.
137 
138 	END {
139 
140 It's better to flush the output here, otherwise the stats (printed to
141 stderr and unbuffered) may be interleaved with the output (on stdout,
142 buffered.)
143 
144 		fflush()
145 
146 Generate the stat summary for the last processed file
147 
148 		switchfile("")
149 
150 Print the stat to the standard error, to avoid "changing" the patch.
151 
152 Unfortunately, there doesn't seem to be a "built-in" way of printing to
153 stderr other than using the pseudo-device "/dev/stderr".
154 
155 		printf("%s", summary) > "/dev/stderr"
156 		printf("%4d+ %4d-\ttotal\n", totadd, totrem) > "/dev/stderr"
157 	}
158 
159 Some example usages:
160 
161 * cvs -q diff | diffstat > /tmp/diff
162 * got diff | diffstat | ssh foo 'cd xyz && got patch'