summaryrefslogtreecommitdiff
path: root/test/longwrds.awk
diff options
context:
space:
mode:
Diffstat (limited to 'test/longwrds.awk')
-rw-r--r--test/longwrds.awk27
1 files changed, 27 insertions, 0 deletions
diff --git a/test/longwrds.awk b/test/longwrds.awk
new file mode 100644
index 0000000..d4b4d92
--- /dev/null
+++ b/test/longwrds.awk
@@ -0,0 +1,27 @@
+# From Gawk Manual modified by bug fix and removal of punctuation
+
+# Invoker can customize sort command if necessary.
+BEGIN {
+ if (!SORT) SORT = "LC_ALL=C sort"
+}
+
+# Record every word which is used at least once
+{
+ for (i = 1; i <= NF; i++) {
+ tmp = tolower($i)
+ if (0 != (pos = match(tmp, /([a-z]|-)+/)))
+ used[substr(tmp, pos, RLENGTH)] = 1
+ }
+}
+
+#Find a number of distinct words longer than 10 characters
+END {
+ num_long_words = 0
+ for (x in used)
+ if (length(x) > 10) {
+ ++num_long_words
+ print x | SORT
+ }
+ print(num_long_words, "long words") | SORT
+ close(SORT)
+}