From 8781c47d0ddac22192f1c233606d3a2923abd6f9 Mon Sep 17 00:00:00 2001 From: Tim Laszlo Date: Sat, 5 Jun 2010 13:41:23 +0000 Subject: Performance improvements for bcfg2-admin reports scrub. Switched object updates to executemany statements. Added BatchFetch to retreive Django objects in groups. git-svn-id: https://svn.mcs.anl.gov/repos/bcfg/trunk/bcfg2@5891 ce84e21b-d406-0410-9b95-82705330c041 --- src/lib/Server/Admin/Reports.py | 25 +++++++++++-------------- src/lib/Server/Reports/utils.py | 30 ++++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+), 14 deletions(-) create mode 100755 src/lib/Server/Reports/utils.py (limited to 'src') diff --git a/src/lib/Server/Admin/Reports.py b/src/lib/Server/Admin/Reports.py index e712ae77d..5d0595869 100755 --- a/src/lib/Server/Admin/Reports.py +++ b/src/lib/Server/Admin/Reports.py @@ -9,6 +9,7 @@ import platform import sys from Bcfg2.Server.Reports.importscript import load_stats from Bcfg2.Server.Reports.updatefix import update_database +from Bcfg2.Server.Reports.utils import * from lxml.etree import XML, XMLSyntaxError try: @@ -117,7 +118,8 @@ class Reports(Bcfg2.Server.Admin.Mode): dup_reasons = [] cmp_reasons = dict() - for reason in Reason.objects.all(): + batch_update = [] + for reason in BatchFetch(Reason.objects): ''' Loop through each reason and create a key out of the data. \ This lets us take advantage of a fast hash lookup for \ comparisons ''' @@ -129,24 +131,19 @@ class Reports(Bcfg2.Server.Admin.Mode): if key in cmp_reasons: self.log.debug("Update interactions from %d to %d" \ % (reason.id, cmp_reasons[key])) - try: - Entries_interactions.objects.filter(reason=reason).\ - update(reason=cmp_reasons[key]) - dup_reasons.append(reason.id) - except Exception, ex: - self.log.error("Failed to update interactions for %d : %s" \ - % (reason.id, ex)) + dup_reasons.append(reason.id) + batch_update.append([cmp_reasons[key], reason.id]) else: cmp_reasons[key] = reason.id self.log.debug("key %d" % reason.id) self.log.debug("Done with updates, deleting dupes") - for dup in dup_reasons: - self.log.debug("Deleting %d" % dup) - try: - Reason.objects.get(id=dup).delete() - except Exception, ex: - self.log.error("Failed to delete reason %d: %s" % (dup, ex)) + try: + cursor = connection.cursor() + cursor.executemany('update reports_entries_interactions set reason_id=%s where reason_id=%s', batch_update) + cursor.executemany('delete from reports_reason where id = %s', dup_reasons) + except Exception, ex: + self.log.error("Failed to delete reasons: %s" % ex) self.log.info("Found %d dupes out of %d" % (len(dup_reasons), start_count)) diff --git a/src/lib/Server/Reports/utils.py b/src/lib/Server/Reports/utils.py new file mode 100755 index 000000000..2ef21e446 --- /dev/null +++ b/src/lib/Server/Reports/utils.py @@ -0,0 +1,30 @@ +'''Helper functions for reports''' + +class BatchFetch(object): + '''Fetch Django objects in smaller batches to save memory''' + + def __init__(self, obj, step=10000): + self.count = 0 + self.block_count = 0 + self.obj = obj + self.data = None + self.step = step + self.max = obj.count() + + def __iter__(self): + return self + + def next(self): + '''Return the next object from our array and fetch from the + database when needed''' + if self.block_count + self.count - self.step == self.max: + raise StopIteration + if self.block_count == 0 or self.count == self.step: + # Without list() this turns into LIMIT 1 OFFSET x queries + self.data = list(self.obj.all()[self.block_count: \ + (self.block_count + self.step)]) + self.block_count += self.step + self.count = 0 + self.count += 1 + return self.data[self.count - 1] + -- cgit v1.2.3-1-g7c22