summaryrefslogtreecommitdiffstats
path: root/src/lib
diff options
context:
space:
mode:
authorTim Laszlo <tim.laszlo@gmail.com>2010-06-05 13:41:23 +0000
committerSol Jerome <solj@ices.utexas.edu>2010-06-07 08:42:52 -0500
commit8781c47d0ddac22192f1c233606d3a2923abd6f9 (patch)
tree0495b92a1ecc1a7fa2351a71b2c685952143b119 /src/lib
parent20f1d3918ecbb22a3452c705ee0139a1ebe1265e (diff)
downloadbcfg2-8781c47d0ddac22192f1c233606d3a2923abd6f9.tar.gz
bcfg2-8781c47d0ddac22192f1c233606d3a2923abd6f9.tar.bz2
bcfg2-8781c47d0ddac22192f1c233606d3a2923abd6f9.zip
Performance improvements for bcfg2-admin reports scrub.
Switched object updates to executemany statements. Added BatchFetch to retreive Django objects in groups. git-svn-id: https://svn.mcs.anl.gov/repos/bcfg/trunk/bcfg2@5891 ce84e21b-d406-0410-9b95-82705330c041
Diffstat (limited to 'src/lib')
-rwxr-xr-xsrc/lib/Server/Admin/Reports.py25
-rwxr-xr-xsrc/lib/Server/Reports/utils.py30
2 files changed, 41 insertions, 14 deletions
diff --git a/src/lib/Server/Admin/Reports.py b/src/lib/Server/Admin/Reports.py
index e712ae77d..5d0595869 100755
--- a/src/lib/Server/Admin/Reports.py
+++ b/src/lib/Server/Admin/Reports.py
@@ -9,6 +9,7 @@ import platform
import sys
from Bcfg2.Server.Reports.importscript import load_stats
from Bcfg2.Server.Reports.updatefix import update_database
+from Bcfg2.Server.Reports.utils import *
from lxml.etree import XML, XMLSyntaxError
try:
@@ -117,7 +118,8 @@ class Reports(Bcfg2.Server.Admin.Mode):
dup_reasons = []
cmp_reasons = dict()
- for reason in Reason.objects.all():
+ batch_update = []
+ for reason in BatchFetch(Reason.objects):
''' Loop through each reason and create a key out of the data. \
This lets us take advantage of a fast hash lookup for \
comparisons '''
@@ -129,24 +131,19 @@ class Reports(Bcfg2.Server.Admin.Mode):
if key in cmp_reasons:
self.log.debug("Update interactions from %d to %d" \
% (reason.id, cmp_reasons[key]))
- try:
- Entries_interactions.objects.filter(reason=reason).\
- update(reason=cmp_reasons[key])
- dup_reasons.append(reason.id)
- except Exception, ex:
- self.log.error("Failed to update interactions for %d : %s" \
- % (reason.id, ex))
+ dup_reasons.append(reason.id)
+ batch_update.append([cmp_reasons[key], reason.id])
else:
cmp_reasons[key] = reason.id
self.log.debug("key %d" % reason.id)
self.log.debug("Done with updates, deleting dupes")
- for dup in dup_reasons:
- self.log.debug("Deleting %d" % dup)
- try:
- Reason.objects.get(id=dup).delete()
- except Exception, ex:
- self.log.error("Failed to delete reason %d: %s" % (dup, ex))
+ try:
+ cursor = connection.cursor()
+ cursor.executemany('update reports_entries_interactions set reason_id=%s where reason_id=%s', batch_update)
+ cursor.executemany('delete from reports_reason where id = %s', dup_reasons)
+ except Exception, ex:
+ self.log.error("Failed to delete reasons: %s" % ex)
self.log.info("Found %d dupes out of %d" % (len(dup_reasons), start_count))
diff --git a/src/lib/Server/Reports/utils.py b/src/lib/Server/Reports/utils.py
new file mode 100755
index 000000000..2ef21e446
--- /dev/null
+++ b/src/lib/Server/Reports/utils.py
@@ -0,0 +1,30 @@
+'''Helper functions for reports'''
+
+class BatchFetch(object):
+ '''Fetch Django objects in smaller batches to save memory'''
+
+ def __init__(self, obj, step=10000):
+ self.count = 0
+ self.block_count = 0
+ self.obj = obj
+ self.data = None
+ self.step = step
+ self.max = obj.count()
+
+ def __iter__(self):
+ return self
+
+ def next(self):
+ '''Return the next object from our array and fetch from the
+ database when needed'''
+ if self.block_count + self.count - self.step == self.max:
+ raise StopIteration
+ if self.block_count == 0 or self.count == self.step:
+ # Without list() this turns into LIMIT 1 OFFSET x queries
+ self.data = list(self.obj.all()[self.block_count: \
+ (self.block_count + self.step)])
+ self.block_count += self.step
+ self.count = 0
+ self.count += 1
+ return self.data[self.count - 1]
+