#!/bin/sh
# standard output.
#
# Usage:
# wf [ n]
tr -cs A-Za-z\' '\n'|
tr A-Z a-z |
sort |
uniq -c |
sort -k1,1nr -k2 |
sed ${1:-25}q
e.g.
cat fileName.txt | ./wf.sh 12
Description :
tr -cs A-Za-z\' ' \n' | Replace nonletters with newlines
tr A-Z a-z | Map uppercase to lowercase
sort | Sort the words in ascending order
uniq -c | Eliminate duplicates, showing their counts
sort -k1,1nr -k2 | Sort by descending count, and then by ascending word
sed ${1:-25}q Print only the first n (default: 25) lines;
$ wf 999999 < filename.txt | awk ' $1 >= 5' | wc -l
number of unique words occurring at least 5 times
$ wf 999999 < hamlet | tail -n 12 | pr -c4 -t -w80
some of the least frequent words
$ wf 999999 < hamlet | wc -l
number of unique words
$ wf 999999 < hamlet | grep -c ' ^ *1•'
or
$ wf 999999 < hamlet | egrep -c '[[:space]]+1[[:space:]]+'
Source
No comments:
Post a Comment