awk ' # range.collapse # Input: lines of form: string (tab) ["b"|"e"|"a"] (tab) number # Output: lines of form: string (tab) num [(space) num] # In sequence of lines with same value of string: # b line and following e line are combined into single line: # string (tab) num num # a line disappears if between paired b and e # a line otherwise becomes single line: # string (tab) num function error(s) { print "range.collapse: " s " near pp " rlo "-" rhi | "cat 1>&2" } function printoldrange() { if (range == 1) { error("no %end for " term); rhi = "XXX" } if (NR > 1) { if (rlo == rhi) print term, rlo else print term, (rlo " " rhi) } rlo = rhi = $3 # bounds of current range } BEGIN { FS = OFS = "\t" } $1 != term { printoldrange(); term = $1; range = 0 } $2 == "e" { if (range == 1) { range = 0; rhi = $3 } else { printoldrange(); error("no %begin for " term); rlo = "XXX" } next } $3 <= rhi + 1 { rhi = $3} $3 > rhi + 1 { if (range == 0) printoldrange() } $2 == "b" { if (range == 1) error("multiple %begin for " term); range = 1 } END { if (NR == 1) NR = 2; printoldrange() } ' $*