op public repos

Blob

Date:: Mon Jan 9 09:41:17 2023 UTC
Message:: diffstat: remove optional trailing garbage when extracting filenames
Actions:: History | Blame | Raw File
1 # diffstat
2 
3 Show diff statistics.
4 
5 	#!/usr/bin/awk -f
6 
7 AWK is great.  All hail AWK!
8 
9 Now, some utility functions.  parsehdr parse extracts the number of
10 lines (old or new) in the following hunk.
11 
12 	function parsehdr(s) {
13 		s = gensub(".*,", "", 1, s)
14 		s = gensub("^-", "", 1, s)
15 		return s + 0
16 	}
17 
18 Extracts the name of the file from a "+++ path" or "--- path" line.
19 
20 	function filename(s) {
21 		s = gensub("^... ", "", 1, s)
22 
23 These lines have an optional tab followed by extra informations (the
24 date for example) that needs to be removed too.
25 
26 		s = gensub("\t.*", "", 1, s)
27 		return s
28 	}
29 
30 Switches the current file to the one provided.  It's a great place where
31 accumulate part of the summary showed at the end and to reset the
32 per-file counters.
33 
34 	function switchfile(newfile) {
35 		if (file != "") {
36 			summary = sprintf("%s+%d -%d\t%s\n",
37 			    summary, add, rem, file)
38 		}
39 
40 		add = 0
41 		rem = 0
42 		file = newfile
43 	}
44 
45 Now, the real "parser".  It start in the "out" state
46 
47 	BEGIN {
48 		state = "out"
49 	}
50 
51 Match the start of a diff on the "+++" line.
52 
53 	state == "out" && /^\+\+\+ / {
54 		nfile = filename($0)
55 		if (nfile == "/dev/null") {
56 
57 When deleting a file, the name will be "/dev/null", but that's not a
58 great name for the stats.  Let's use the "old" name instead.
59 
60 			nfile = delfile
61 		}
62 
63 		switchfile(nfile)
64 		delfile = ""
65 	}
66 
67 Let's save the old name in case it's needed.
68 
69 	state == "out" && /^--- / && file == "" {
70 		delfile = filename($0)
71 	}
72 
73 Match the start of a hunk and switch the state to "in"
74 
75 	state == "out" && /^@@ / {
76 
77 This part is a bit complicated, but all it does is extracting the number
78 of "new" and "old" lines showed in the hunk.  A hunk header looks like this
79 (except for the initial '#' character)
80 
81 		# @@ -55,7 +55,19 @@ ...
82 
83 So first extract the text inside the pair of "@@"
84 
85 		s = gensub("@@ ", "", 1)
86 		s = gensub(" @@.*", "", 1, s)
87 
88 and then parse each number.
89 
90 		old = gensub(" .*", "", 1, s)
91 		old = parsehdr(old)
92 
93 		new = gensub(".* ", "", 1, s)
94 		new = parsehdr(new)
95 
96 Don't forget to switch the state of the parser, now we're reading a
97 hunk.
98 
99 		state = "in"
100 	}
101 
102 Keep count of the added and removed line.  Also, decrement the "old" and
103 "new" lines when needed, to know when we're done with the hunk.
104 
105 	state == "in" && /^ / {
106 		old--
107 		new--
108 	}
109 
110 	state == "in" && /^-/ {
111 		old--
112 		rem++
113 		totrem++
114 	}
115 
116 	state == "in" && /^\+/ {
117 		new--
118 		add++
119 		totadd++
120 	}
121 
122 When there are no more "new" and "old" lines to read, go back to the
123 "out" state, ready to read another hunk or another file.
124 
125 	state == "in" && old <= 0 && new <= 0 {
126 		state = "out"
127 	}
128 
129 Don't be a sink!  Continue the pipeline so we can further save or apply
130 the diff.
131 
132 	// { print $0 }
133 
134 At the end, print the stats to standard error to avoid mangling the
135 input.  Unfortunately, there doesn't seem to be a "built-in" way of
136 printing to stderr other than using the pseudo-device.
137 
138 	END {
139 		fflush()
140 		switchfile("")
141 
142 		printf("%s", summary) > "/dev/stderr"
143 		printf("+%d -%d\ttotal\n", totadd, totrem) > "/dev/stderr"
144 	}
145 
146 some example usages:
147 
148 * cvs -q di | diffstat | tee /tmp/diff | less
149 * git diff | diffstat > /tmp/diff
150 * got di | diffstat | ssh foo 'cd xyz && got patch'