#!/usr/bin/python
#copyright 2010 ben lipkowitz
#you may redistribute any changes to this code under the terms of the GNU GPL version 2 or later
#turns a pda_db.txt into a tree with labeled fields, nice!
#TODO unit tests!!

from pyparsing import *

#log_file = open('100.txt')
#log_file = open('10000.txt')
#log_file = open('pda_db.txt')
log_file = open('test.txt')

start_time = Word(nums, exact=4)('start_time')
end_time = Word(nums, exact=4)('end_time')

comment = Literal("(")+OneOrMore(Word(printables))+Literal(")")('comment')

other_unit = oneOf('pk pc')('count') #pk = pack, pc = piece, TODO default=1pc
volume_unit = oneOf('c cup l ml floz')('volume') #pk = pack, pc = piece, TODO default=1pc
mass_unit = oneOf('g mg ug kg oz lb')('mass') #pk = pack, pc = piece, TODO default=1pc
unit = other_unit ^ volume_unit ^ mass_unit
measurement = Group(Combine(Word(nums) + Optional(Word('.')+Word(nums)))('number') + unit)('measurement')
food = Suppress(Literal('food')) + OneOrMore(Group(Word(alphas+'-/')('food_type') + Optional(measurement)))

#verb = Group(oneOf('chat sleep code fix search act net'))('verb')
verb = Word(alphas)('verb')

noun = Word(alphas+'.-+/')('noun')
#complex_noun = Group(noun + OneOrMore(Word('.-+/') + noun))
complex_noun = noun

generic_action = Group(verb + White(" ") + OneOrMore(complex_noun))
action = Group(generic_action ^ food)('action')
#actions = Group(action + Group(Literal(',') ^ lineEnd))('actions')
actions = commaSeparatedList(action)('actions')

interval = Group(start_time + end_time + actions +Optional(comment))('interval') # why no lineEnd??

year = Word(nums, exact=4)('year')
month = Word(nums, exact=2)('month')
day = Word(nums, exact=2)('day')
date = Group(Literal('date') + year + month + day + Optional(comment))('date')
#date = Group(Literal('date') + year + month + day + Optional(comment) + lineEnd)('date')
#date = lineStart + Literal('date') + year + month + day + Suppress(lineEnd)


log_day = Group(date + OneOrMore(interval))('log_day')
log = OneOrMore(log_day)


def sort_by_value(mydict, reverse=False):
    return sorted([(value,key) for (key,value) in mydict.items()], reverse=reverse)

    
    
def main():    
    import time
    categories = {}
    food_types = {}
    my_log = log.parseString(log_file.read())
    for my_day in my_log:
        year, month, day = int(my_day[0]['year']), int(my_day[0]['month']), int(my_day[0]['day'])
        print 'date %s %s %s' % (year, month, day)
        for interval in my_day[1:]:
            start_epoch = time.mktime((year, month, day, int(interval['start_time'][0:2]), int(interval['start_time'][2:4]), 0, 0, 0, 0))
            end_epoch = time.mktime((year, month, day, int(interval['end_time'][0:2]), int(interval['end_time'][2:4]), 0, 0, 0, 0))
            elapsed = end_epoch - start_epoch
            if elapsed < 0: print "time discrepancy!", interval

            for action_string in interval['actions']:
                try: 
                    my_foods = food.parseString(action_string) #this is lame
                    for my_food in my_foods:
                        try: food_types[my_food['food_type']] += 1
                        except KeyError: food_types[my_food['food_type']] = 1 
                except ParseException: pass
                try:
                    my_action = generic_action.parseString(action_string)
                    for x in my_action:
                        #try: categories[x['verb'][0]] += 1
                        #except KeyError: categories[x['verb'][0]] = 1
                        try: categories[x['verb']] += 1
                        except KeyError: categories[x['verb']] = 1
                except ParseException: pass
    
    print "food types:"
    for x in sort_by_value(food_types, reverse=True): print x
    #print sort_by_value(food_types, reverse=True)
    print "action categories:"
    for x in sort_by_value(categories, reverse=True): print x
    #print sort_by_value(categories, reverse=True)
    
    #tally the number of times a particular word is used, i.e. 'ice' in 'ice-cream' and 'shaved-ice'
    food_words = {}
    for complex in food_types.keys():
        #sum([x.split('-') for x in 'foo-bar+baz-biff+buff'.split('+')], [])
        for word in sum([simple.split('-') for simple in complex.split('+')],[]): #get flat list of words, separated by + or -
            try: food_words[word] += food_types[complex]
            except KeyError: food_words[word] = food_types[complex]
    #print sort_by_value(food_words, reverse=True)
    print "food words:"
    for x in sort_by_value(food_words, reverse=True): print x

    return (categories, food_types, food_words)
#import yaml
#print yaml.dump( [x for x in log.scanString(log_file.read()[1000])], default_flow_style=False)
if __name__ == '__main__': 
    main()


