summaryrefslogtreecommitdiffstats
path: root/askbot/management/commands/get_tag_stats.py
blob: c30643b3e2a8de75763d9ed4a99d2e0b4ea3dfee (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
import sys
import optparse
from django.core.management.base import BaseCommand, CommandError
from askbot import models

def get_tag_lines(tag_marks, width = 25):
    output = list()
    line = ''
    for name in tag_marks:
        if line == '':
            line = name
        elif len(line) + len(name) + 1 > width:
            output.append(line)
            line = name
        else:
            line += ' ' + name
    output.append(line)
    return output

def get_empty_lines(num_lines):
    output = list()
    for idx in xrange(num_lines):
        output.append('')
    return output

def pad_list(the_list, length):
    if len(the_list) < length:
        the_list.extend(get_empty_lines(length - len(the_list)))

def format_table_row(*cols, **kwargs):
    max_len = max(map(len, cols))
    for col in cols:
        pad_list(col, max_len)

    output = list()
    for idx in xrange(max_len):
        bits = list()
        for col in cols:
            bits.append(col[idx])
        line = kwargs['format_string'] % tuple(bits)
        output.append(line)

    return output


class Command(BaseCommand):
    help = 'Prints statistics of tag usage'

    option_list = BaseCommand.option_list + (
            optparse.make_option(
                '-t',
                '--sub-counts',
                action = 'store_true',
                default = False,
                dest = 'sub_counts',
                help = 'Print tag subscription statistics, for all tags, listed alphabetically'
            ),
            optparse.make_option(
                '-u',
                '--user-sub-counts',
                action = 'store_true',
                default = False,
                dest = 'user_sub_counts',
                help = 'Print tag subscription data per user, with users listed alphabetically'
            ),
            optparse.make_option(
                '-e',
                '--print-empty',
                action = 'store_true',
                default = False,
                dest = 'print_empty',
                help = 'Print empty records too (with zero counts)'
            ),
        )
    def handle(self, *args, **options):
        if not(options['sub_counts'] ^ options['user_sub_counts']):
            raise CommandError('Please use either -u or -t (but not both)')

        print ''
        if options['sub_counts']:
            self.print_sub_counts(options['print_empty'])

        if options['user_sub_counts']:
            self.print_user_sub_counts(options['print_empty'])
        print ''

    def print_user_sub_counts(self, print_empty):
        """prints list of users and what tags they follow/ignore
        """
        users = models.User.objects.all().order_by('username')
        item_count = 0
        for user in users:
            tag_marks = user.tag_selections

            #add names of explicitly followed tags
            followed_tags = list()
            followed_tags.extend(   
                tag_marks.filter(
                            reason='good'
                        ).values_list(
                            'tag__name', flat = True
                        )
            )

            #add wildcards to the list of interesting tags
            followed_tags.extend(user.interesting_tags.split())

            for good_tag in user.interesting_tags.split():
                followed_tags.append(good_tag)

            ignored_tags = list()
            ignored_tags.extend(
                tag_marks.filter(
                    reason='bad'
                ).values_list(
                    'tag__name', flat = True
                )
            )

            for bad_tag in user.ignored_tags.split():
                ignored_tags.append(bad_tag)

            followed_count = len(followed_tags)
            ignored_count = len(ignored_tags)
            if followed_count == 0 and ignored_count == 0 and print_empty == False:
                continue
            if item_count == 0:
                print '%-28s %25s %25s' % ('User (id)', 'Interesting tags', 'Ignored tags')
                print '%-28s %25s %25s' % ('=========', '================', '============')
            followed_lines = get_tag_lines(followed_tags, width = 25)
            ignored_lines = get_tag_lines(ignored_tags, width = 25)

            follow = '*'
            if user.email_tag_filter_strategy == const.INCLUDE_INTERESTING:
                follow = ''
            user_string = '%s (%d)%s' % (user.username, user.id, follow)
            output_lines = format_table_row(
                                [user_string,], 
                                followed_lines,
                                ignored_lines,
                                format_string = '%-28s %25s %25s'
                            )
            item_count += 1
            for line in output_lines:
                print line
            print ''

        self.print_postamble(item_count)

    def get_wildcard_tag_stats(self):
        """This method collects statistics on all tags
        that are followed or ignored via a wildcard selection

        The return value is a dictionary, where keys are tag names
        and values are two element lists with whe first value - follow count
        and the second value - ignore count
        """
        wild = dict()#the dict that is returned in the end

        users = models.User.objects.all().order_by('username')
        for user in users:
            wk = user.interesting_tags.strip().split()
            interesting_tags = models.Tag.objects.get_by_wildcards(wk)
            for tag in interesting_tags:
                if tag.name not in wild:
                    wild[tag.name] = [0, 0]
                wild[tag.name][0] += 1

            wk = user.ignored_tags.strip().split()
            ignored_tags = models.Tag.objects.get_by_wildcards(wk)
            for tag in ignored_tags:
                if tag.name not in wild:
                    wild[tag.name] = [0, 0]
                wild[tag.name][1] += 1

        return wild

    def print_sub_counts(self, print_empty):
        """prints subscription counts for
        each tag (ignored and favorite counts)
        """
        wild_tags = self.get_wildcard_tag_stats()
        tags = models.Tag.objects.all().order_by('name')
        item_count = 0
        for tag in tags:
            wild_follow = 0
            wild_ignore = 0
            if tag.name in wild_tags:
                (wild_follow, wild_ignore) = wild_tags[tag.name]

            tag_marks = tag.user_selections
            follow_count = tag_marks.filter(reason='good').count() \
                                                        + wild_follow
            ignore_count = tag_marks.filter(reason='bad').count() \
                                                        + wild_ignore
            follow_str = '%d (%d)' % (follow_count, wild_follow)
            ignore_str = '%d (%d)' % (ignore_count, wild_ignore)
            counts = (11-len(follow_str)) * ' ' + follow_str + '  ' 
            counts += (11-len(ignore_str)) * ' ' + ignore_str

            if follow_count + ignore_count == 0 and print_empty == False:
                continue
            if item_count == 0:
                print '%-32s %12s %12s' % ('', 'Interesting', 'Ignored  ')
                print '%-32s %12s %12s' % ('Tag name', 'Total(wild)', 'Total(wild)')
                print '%-32s %12s %12s' % ('========', '===========', '===========')
            print '%-32s %s' % (tag.name, counts)
            item_count += 1

        self.print_postamble(item_count)

    def print_postamble(self, item_count):
        print ''
        if item_count == 0:
            print 'Did not find anything'
        else:
            print '%d records shown' % item_count
        print 'Since -e option was not selected, empty records were hidden'