merge_word_counts.py 766 Bytes
#! /usr/bin/env python

# Copyright 2016    Vimal Manohar
# Apache 2.0.

"""This script merges pocolm word_counts and write a new word_counts file.
A min-count argument is required to only write counts that are above the
specified minimum count.
"""
from __future__ import print_function

import sys


def main():
    if len(sys.argv) != 2:
        sys.stderr.write("Usage: {0} <min-count>\n".format(sys.argv[0]))
        raise SystemExit(1)

    words = {}
    for line in sys.stdin.readlines():
        parts = line.strip().split()
        words[parts[1]] = words.get(parts[1], 0) + int(parts[0])

    for word, count in words.items():
        if count >= int(sys.argv[1]):
            print ("{0} {1}".format(count, word))


if __name__ == '__main__':
    main()