## AWK

### Print a count of words by ending character

 Average two values in scientific notation echo “9.936E-2 1.138E-2” | awk ‘{printf “%.3E\n”, (\$1+\$2)/2}’ | sed -E ‘s/-0?/-/g’ Average a column with awk awk ‘{s += \$1} END {print (s / NR) }’ Average data given in scientific notation awk ‘{s += \$1} END {printf “%.6E\n”,(s / NR) }’ data.txt Calculate pi at scale 500 and print at scale 400 PI=\$(BC_LINE_LENGTH=0 bc -l <<< ‘scale = 500; 4*a(1)’); echo \${PI:0:400} Calculate the Lorentz factor for fractions of c awk ‘BEGIN { for (v =0; v <=99; v++) printf “%.2f %.4f\n”, v/100, 1/sqrt(1-(v / 100)^2)}’ Center text in a terminal window of any size ls -l | awk ‘{ pddrintf(“%*s\n”, (‘\${COLUMNS}’ + length(\$1))/2, \$9); } Count MP3 files and report disk usage find /home -name ‘*.mp3’ -ls | awk ‘{s+=\$7;f+=1}END{print ” MP3 files: “f”\nTotal size: “s/1000000″ MB”}’ Count processes running as each user: ps -ef | awk ‘{print \$1}’ | sed ‘/UID/d’ | sort | uniq -c Factorize the first 100 integers, sort by number of factors seq 1 100 | factor | awk ‘{print NF-1,\$0} ‘ | sort -nr Find gaps in a numerical sequence cat list.txt | awk ‘\$1!=p+1{print p+1″-“\$1-1}{p=\$1}’ Find the longest verse in the Bible awk ‘length > max { max=length;maxline=\$0 } END { print maxline; }’ kjv.txt Find the verse in the Bible containing the most words: cat kjv.txt | awk ‘{if (NF > max) {max = NF; line = \$0}} END {print line}’ Get a one-line description for the files in a directory ls -l /usr/bin | awk ‘{print \$9}’ | xargs whatis | sed ‘/appropriate/d’ List files referenced by library symbolic links sudo ls -lR /lib | grep ^l | awk ‘{print \$9″ “\$10” “\$11}’ | grep ^lib | sort List directories find . -type d -print List only non-blank lines in a file awk ‘NF >0’ file.txt Print all lines with exactly 40 characters awk ‘length ==40’ tags.tx3 Print all prime numbers between 1 and 5000 seq 1 5000 | factor | awk ‘NF==2 { print \$2 }’ | tr ‘\n’ ‘,’;echo Print a random line from a file of unknown length awk ‘BEGIN{ srand() }; rand() * NR < 1 { line = \$0} END {print line}’ tags.tx3 Print a count of words by ending character awk ‘length > 1{++a[substr(tolower(\$0), length)]}END{for (k in a) print a[k], k}’ /usr/share/dict/words | sort -n Print a short list of your most recently-used commands fc -rl | awk ‘{print \$1=””; print}’ Print lines from a file with the values of the seventh field falling within a certain range awk ‘(\$7 >= .9 && \$7 <= 1.1)’ nearby.txt Print lines with a unique first field awk ‘!x[\$1]++’ ana5.txt Print lines with only four letter words (or less) awk ‘length < 5’ tags.tx3 Print number of processes running as each user ps -ef | awk ‘{print \$1}’ | sort | uniq -c Print sizes of files in current directory with commas ls -lp | grep -v / | awk ‘{\$1=\$2=\$3=\$4=\$6=\$7=\$8=””;print}’ | sed -e :a -e ‘s/\(.*[0-9]\)\([0-9]\{3\}\)/\1,\2/;ta’ Print the first sixteen binary numbers echo “obase=2;i=0;while(i<=15) {i;i+=1}” | bc | awk ‘{printf “%4s\n”, \$0}’ Print the pieces of a striong that aren’t digits, and their numerical separators, in order: awk ‘{n=patsplit(\$0,z,/[^0-9]+/,x); for (i in z) print i,z[i],x[i]}’ OFS=”|” <<< “a4b33c19d9e55” Print the unique commands in your history history | awk ‘{\$1=””} !a[\$0]++’ Print working directory file information ls -l | awk ‘{\$5=sprintf(“%9s”,\$5); printf “%s %9s”, substr(\$1,1,4),\$5″ “; for (i=9;i<=NF;i++) printf \$i” “; printf “\n”}’ Print your command history, omitting duplicates history |c awk ‘{\$1=””;print }’ | sort | uniq Print your top fifteen most frequently-used commands: history | awk ‘{print \$2}’ | sort | uniq -c | sort -n | tail -n15 | sort -nr Reverse the order of the fields on each line awk ‘{for (i=NF;i>0;i–){printf \$i” “};printf “\n”}’ gettysburg2.txt Search for “foo” and “bar” in any order awk ‘/foo/ && /bar/’ tags.txt Search for “foo” and “bar” in that order awk ‘/foo.*bar/’ tags.txt See what’s eating your system clocks ps aux –sort %cpu | awk ‘{print \$3,\$1,\$11}’ | tail -30 Show all mounted media lsblk | awk ‘NF >6 {print\$1,”\t”,\$4,”\t”,\$7}’ Sort user processes by memory footprint ps aux –sort -%mem | awk ‘{print \$4,\$1,\$11}’ | sed ‘/root/d’ | head -50 Strip blank lines from a text file awk ‘NF > 0’ yeshua > yeshua2 Tally up a column of figures awk ‘{s+=\$1} END {print s}’ figures.txt Tally up bytes of all files in a directory ls -l | sed ‘/^d/d’ | awk ‘{ x += \$5 } ; END { print “Total bytes: ” x }’ Tally up bytes of selected files in a directory du -b *.msg | awk ‘{s+=\$1} END {print s}’ Print prime numbers echo {1..80} | factor | awk ‘NF==2 { print \$2 }’ Use awk like grep awk ‘/sex/’ tags.txt

### Show ASCII table

```# Usage: awk -f showascii
BEGIN {
for (i=0; i<16; i++) {
for (j=32+i; j<128; j+=16) {
if (j == 32) { x = "SPC" }
else if (j == 127) { x = "DEL" }
else { x = sprintf("%c",j) }
printf("%3d: %-5s",j,x)
}
print ""
}
}
```

### Tally up a column of figures

```#! /bin/sh
case "\$1" in
[1-9]*) colnum="\$1"; shift;;
*) echo "Usage: `basename \$0` colnum [files]" 1>&2; exit 1;;
esac
awk '{sum += \$col}
END {print sum}' col=\$colnum OFMT='%.2f' \${1+"\$@"}
```

### Use awk to list the thirty most-common words in a text file

```#Usage: awk -f wordfreq textfile.txt

{   nbytes += length(\$0) + 2 # +2 for CR/LF
nfields += NF
\$0 = tolower(\$0)
for (i=1; i<=NF; i++) {
arr[\$i]++
}
}
END {
show = (show == "") ? 30 : show
width1 = length(show)
PROCINFO["sorted_in"] = "@val_num_desc"
for (i in arr) {
if (width2 == 0) { width2 = length(arr[i]) }
if (n++ >= show) { break }
printf("%*d %*d %s\n",width1,n,width2,arr[i],i)
}
printf("input: %d records, %d bytes, %d words of which %d are unique\n",NR,nbytes,nfields,length(arr))
exit(0)
}
```

### Print lines with a unique first field

```\$ awk '!x[\$1]++' ana5.txt
abby baby
abeam ameba
abel able bale bela elba
abet bate beat beta
abets baste beast beats betas
abetter beretta
abhorred harbored
abhorring harboring
abhors hasbro
abides biased
able abel bale bela elba
abler blare
ablest bleats stable tables 