diff options
Diffstat (limited to 'test/longwrds.awk')
-rw-r--r-- | test/longwrds.awk | 27 |
1 files changed, 27 insertions, 0 deletions
diff --git a/test/longwrds.awk b/test/longwrds.awk new file mode 100644 index 0000000..d4b4d92 --- /dev/null +++ b/test/longwrds.awk @@ -0,0 +1,27 @@ +# From Gawk Manual modified by bug fix and removal of punctuation + +# Invoker can customize sort command if necessary. +BEGIN { + if (!SORT) SORT = "LC_ALL=C sort" +} + +# Record every word which is used at least once +{ + for (i = 1; i <= NF; i++) { + tmp = tolower($i) + if (0 != (pos = match(tmp, /([a-z]|-)+/))) + used[substr(tmp, pos, RLENGTH)] = 1 + } +} + +#Find a number of distinct words longer than 10 characters +END { + num_long_words = 0 + for (x in used) + if (length(x) > 10) { + ++num_long_words + print x | SORT + } + print(num_long_words, "long words") | SORT + close(SORT) +} |