summaryrefslogtreecommitdiffstats
path: root/wordcount.py
diff options
context:
space:
mode:
authorKyle K <kylek389@gmail.com>2022-08-04 01:31:23 -0500
committerKyle K <kylek389@gmail.com>2022-08-04 01:31:23 -0500
commitbc48463d44bc4e113304ff030c172ad858702bbb (patch)
treebec6ac7948d450bf672e840958353d7121dffc23 /wordcount.py
parentb01d896f8699b1c96a98d8c21e748403659f92d3 (diff)
downloadPythonPractice-bc48463d44bc4e113304ff030c172ad858702bbb.tar.gz
PythonPractice-bc48463d44bc4e113304ff030c172ad858702bbb.tar.bz2
PythonPractice-bc48463d44bc4e113304ff030c172ad858702bbb.zip
wordcount example
Diffstat (limited to 'wordcount.py')
-rw-r--r--wordcount.py19
1 files changed, 19 insertions, 0 deletions
diff --git a/wordcount.py b/wordcount.py
new file mode 100644
index 0000000..11a34fd
--- /dev/null
+++ b/wordcount.py
@@ -0,0 +1,19 @@
+"""
+problem: count ocurrences of words
+solution: tr ' ' '\n' | sort | uniq -c
+e.g.: echo "When the going gets tough, the tough get going" | tr -s ' ' '\n' | sort | uniq -c | sort -bnr
+
+heuristic: use a hashtable, a dictionary in python is close enough and will suffice
+"""
+
+import sys
+
+counts = {}
+for line in sys.stdin:
+ words = line.lower().split()
+ for word in words:
+ counts[word] = counts.get(word, 0) + 1
+
+tupleList = sorted(counts.items(), key=lambda tup: tup[1], reverse=True)
+for word, count in tupleList:
+ print(word, count)