diff options
Diffstat (limited to 'trunk/etherpad/src/etherpad/statistics/statistics.js')
-rw-r--r-- | trunk/etherpad/src/etherpad/statistics/statistics.js | 1248 |
1 files changed, 1248 insertions, 0 deletions
diff --git a/trunk/etherpad/src/etherpad/statistics/statistics.js b/trunk/etherpad/src/etherpad/statistics/statistics.js new file mode 100644 index 0000000..8174405 --- /dev/null +++ b/trunk/etherpad/src/etherpad/statistics/statistics.js @@ -0,0 +1,1248 @@ +/** + * Copyright 2009 Google Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS-IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import("dateutils.noon"); +import("execution"); +import("exceptionutils"); +import("fastJSON"); +import("fileutils.fileLineIterator"); +import("jsutils.*"); +import("sqlbase.sqlobj"); + +import("etherpad.log"); + +jimport("net.appjet.oui.GenericLoggerUtils"); +jimport("net.appjet.oui.LoggableFromJson"); +jimport("net.appjet.oui.FilterWrangler"); +jimport("java.lang.System.out.println"); +jimport("net.appjet.common.util.ExpiringMapping"); + +var millisInDay = 86400*1000; + +function _stats() { + if (! appjet.cache.statistics) { + appjet.cache.statistics = {}; + } + return appjet.cache.statistics; +} + +function onStartup() { + execution.initTaskThreadPool("statistics", 1); + _scheduleNextDailyUpdate(); + + onReset(); +} + +function _info(m) { + log.info({type: 'statistics', message: m}); +} + +function _warn(m) { + log.info({type: 'statistics', message: m}); +} + +function _statData() { + return _stats().stats; +} + +function getAllStatNames() { + return keys(_statData()); +} + +function getStatData(statName) { + return _statData()[statName]; +} + +function _setStatData(statName, data) { + _statData()[statName] = data; +} + +function liveSnapshot(stat) { + var statObject; + if (typeof(stat) == 'string') { + // "stat" is the stat name. + statObject = getStatData(stat); + } else if (typeof(stat) == 'object') { + statObject = stat; + } else { + return; + } + return _callFunction(statObject.snapshot_f, + statObject.name, statObject.options, statObject.data); +} + +// ------------------------------------------------------------------ +// stats processing +// ------------------------------------------------------------------ + +// some useful constants +var LIVE = 'live'; +var HIST = 'historical'; +var HITS = 'hits'; +var UNIQ = 'uniques'; +var VALS = 'values'; +var HGRM = 'histogram'; + +// helpers + +function _date(d) { + return new Date(d); +} + +function _saveStat(day, name, value) { + var timestamp = Math.floor(day.valueOf() / 1000); + _info({statistic: name, + timestamp: timestamp, + value: value}); + try { + sqlobj.insert('statistics', { + name: name, + timestamp: timestamp, + value: fastJSON.stringify(value) + }); + } catch (e) { + var msg; + try { + msg = e.getMessage(); + } catch (e2) { + try { + msg = e.toSource(); + } catch (e3) { + msg = "(none)"; + } + } + _warn("failed to save stat "+name+": "+msg); + } +} + +function _convertScalaTopValuesToJs(topValues) { + var totalValue = topValues._1(); + var countsMap = topValues._2(); + countsObj = {}; + countsMap.foreach(scalaF1(function(pair) { countsObj[pair._1()] = pair._2(); })); + return {total: totalValue, counts: countsObj}; +} + +function _fakeMap() { + var map = {} + return { + get: function(k) { return map[k]; }, + put: function(k, v) { map[k] = v; }, + remove: function(k) { delete map[k]; } + } +} + +function _withinSecondsOf(numSeconds, t1, t2) { + return (t1 > t2-numSeconds*1000) && (t1 < t2+numSeconds*1000); +} + +function _callFunction(functionName, arg1, arg2, etc) { + var f = this[functionName]; + var args = Array.prototype.slice.call(arguments, 1); + return f.apply(this, args); +} + +// trackers and other init functions + +function _hitTracker(trackerType, timescaleType) { + var className; + switch (trackerType) { + case HITS: className = "BucketedLastHits"; break; + case UNIQ: className = "BucketedUniques"; break; + case VALS: className = "BucketedValueCounts"; break; + case HGRM: className = "BucketedLastHitsHistogram"; break; + } + var tracker; + switch (timescaleType) { + case LIVE: + tracker = new net.appjet.oui[className](24*60*60*1000); + break; + case HIST: + // timescale just needs to be longer than a day. + tracker = new net.appjet.oui[className](365*24*60*60*1000, true); + break; + } + + var conversionData = { + total_f: "count", + history_f: "history", + latest_f: "latest", + }; + switch (trackerType) { + case HITS: case UNIQ: + conversionData.conversionFunction = + function(x) { return x; } // no conversion necessary. + break; + case VALS: + conversionData.conversionFunction = _convertScalaTopValuesToJs + break; + case HGRM: + conversionData.conversionFunction = + function(hFunc) { return function(pct) { return hFunc.apply(pct); } } + break; + } + + + return { + tracker: tracker, + conversionData: conversionData, + hit: function(d, n1, n2) { + d = _date(d); + if (n2 === undefined) { + this.tracker.hit(d, n1); + } else { + this.tracker.hit(d, n1, n2); + } + }, + get total() { + return this.conversionData.conversionFunction(this.tracker[this.conversionData.total_f]()); + }, + history: function(bucketsPerSample, numSamples) { + var scalaArray = this.tracker[this.conversionData.history_f](bucketsPerSample, numSamples); + var jsArray = []; + for (var i = 0; i < scalaArray.length(); ++i) { + jsArray.push(this.conversionData.conversionFunction(scalaArray.apply(i))); + } + return jsArray; + }, + latest: function(bucketsPerSample) { + return this.conversionData.conversionFunction(this.tracker[this.conversionData.latest_f](bucketsPerSample)); + } + } +} + +function _initCount(statName, options, timescaleType) { + return _hitTracker(HITS, timescaleType); +} +function _initUniques(statName, options, timescaleType) { + return _hitTracker(UNIQ, timescaleType); +} +function _initTopValues(statName, options, timescaleType) { + return _hitTracker(VALS, timescaleType); +} +function _initHistogram(statName, options, timescaleType) { + return _hitTracker(HGRM, timescaleType); +} + +function _initLatencies(statName, options, type) { + var hits = _initTopValues(statName, options, type); + var latencies = _initTopValues(statName, options, type); + + return { + hit: function(d, value, latency) { + hits.hit(d, value); + latencies.hit(d, value, latency); + }, + hits: hits, + latencies: latencies + } +} + +function _initDisconnectTracker(statName, options, timescaleType) { + return { + map: (timescaleType == LIVE ? new ExpiringMapping(60*1000) : _fakeMap()), + counter: _initCount(statName, options, timescaleType), + uniques: _initUniques(statName, options, timescaleType), + isLive: timescaleType == LIVE + } +} + +// update functions + +function _updateCount(statName, options, logName, data, logObject) { + // println("update count: "+statName+" on log "+logName+", with data: "+data.toSource()+" with log entry: "+logObject.toSource()); + if (options.filter == null || options.filter(logObject)) { + data.hit(logObject.date, 1); + } +} + +function _updateSum(statName, options, logName, data, logObject) { + // println("update sum: "+statName+" on log "+logName+", with data: "+data.toSource()+" with log entry: "+logObject.toSource()); + if (options.filter == null || options.filter(logObject)) { + data.hit(logObject.date, Math.round(Number(logObject[options.fieldName]))); + } +} + +function _updateUniquenessCount(statName, options, logName, data, logObject) { + // println("update uniqueness: "+statName+" on log "+logName+", with data: "+data.toSource()+" with log entry: "+logObject.toSource()); + if (options.filter == null || options.filter(logObject)) { + var value = logObject[options.fieldName]; + if (value === undefined) { return; } + data.hit(logObject.date, value); + } +} + +function _updateTopValues(statName, options, logName, data, logObject) { + // println("update topvalues: "+statName+" on log "+logName+", with data: "+data.toSource()+" with log entry: "+logObject.toSource()); + + if (options.filter == null || options.filter(logObject)) { + var value = logObject[options.fieldName]; + if (value === undefined) { return; } + if (options.canonicalizer) { + value = options.canonicalizer(value); + } + data.hit(logObject.date, value); + } +} + +function _updateLatencies(statName, options, logName, data, logObject) { + // println("update latencies: "+statName+" on log "+logName+", with data: "+data.toSource()+" with log entry: "+logObject.toSource()); + + if (options.filter == null || options.filter(logObject)) { + var value = logObject[options.fieldName]; + var latency = logObject[options.latencyFieldName]; + if (value === undefined) { return; } + data.hit(logObject.date, value, latency); + } +} + +function _updateDisconnectTracker(statName, options, logName, data, logObject) { + if (logName == "frontend/padevents" && logObject.type != "userleave") { + // we only care about userleaves from the padevents log. + return; + } + + var [evtPrefix, otherPrefix] = + (logName == "frontend/padevents" ? ["l-", "d-"] : ["d-", "l-"]); + var dateLong = logObject.date; + var userId = logObject.session; + + var lastOtherEvent = data.map.get(otherPrefix+userId); + if (lastOtherEvent != null && _withinSecondsOf(60, dateLong, lastOtherEvent.date)) { + data.counter.hit(logObject.date, 1); + data.uniques.hit(logObject.date, userId); + data.map.remove(otherPrefix+userId); + if (data.isLive) { + log.custom("avoidable_disconnects", + {userId: userId, + errorMessage: lastOtherEvent.errorMessage || logObject.errorMessage}); + } + } else { + data.map.put(evtPrefix+userId, {date: dateLong, message: logObject.errorMessage}); + } +} + +// snapshot functions + +function _lazySnapshot(snapshot) { + var total; + var history = {}; + var latest = {}; + return { + get total() { + if (total === undefined) { + total = snapshot.total; + } + return total; + }, + history: function(bucketsPerSample, numSamples) { + if (history[""+bucketsPerSample+":"+numSamples] === undefined) { + history[""+bucketsPerSample+":"+numSamples] = snapshot.history(bucketsPerSample, numSamples); + } + return history[""+bucketsPerSample+":"+numSamples]; + }, + latest: function(bucketsPerSample) { + if (latest[""+bucketsPerSample] === undefined) { + latest[""+bucketsPerSample] = snapshot.latest(bucketsPerSample); + } + return latest[""+bucketsPerSample]; + } + } +} + +function _snapshotTotal(statName, options, data) { + return _lazySnapshot(data); +} + +function _convertTopValue(topValue) { + var counts = topValue.counts; + var sortedValues = keys(counts).sort(function(x, y) { + return counts[y] - counts[x]; + }).map(function(key) { + return { value: key, count: counts[key] }; + }); + return {count: topValue.total, topValues: sortedValues.slice(0, 50) }; +} + +function _snapshotTopValues(statName, options, data) { + var convertedData = {}; + + return _lazySnapshot({ + get total() { + return _convertTopValue(data.total); + }, + history: function(bucketsPerSample, numSamples) { + return data.history(bucketsPerSample, numSamples).map(_convertTopValue); + }, + latest: function(bucketsPerSample) { + return _convertTopValue(data.latest(bucketsPerSample)); + } + }); +} + +function _snapshotLatencies(statName, options, data) { + // convert the hits + total latencies into a topValues-style data object. + var hits = data.hits; + var totalLatencies = data.latencies; + + function convertCountsObjects(latencyCounts, hitCounts) { + var mergedCounts = {} + keys(latencyCounts.counts).forEach(function(value) { + mergedCounts[value] = + Math.round(latencyCounts.counts[value] / (hitCounts.counts[value] || 1)); + }); + return {counts: mergedCounts, total: latencyCounts.total / (hitCounts.total || 1)}; + } + + // ...and then convert that object into a snapshot. + return _snapshotTopValues(statName, options, { + get total() { + return convertCountsObjects(totalLatencies.total, hits.total); + }, + history: function(bucketsPerSample, numSamples) { + return mergeArrays( + convertCountsObjects, + totalLatencies.history(bucketsPerSample, numSamples), + hits.history(bucketsPerSample, numSamples)); + }, + latest: function(bucketsPerSample) { + return convertCountsObjects(totalLatencies.latest(bucketsPerSample), hits.latest(bucketsPerSample)); + } + }); +} + +function _snapshotDisconnectTracker(statName, options, data) { + var topValues = {}; + var counts = data.counter; + var uniques = data.uniques; + function topValue(counts, uniques) { + return { + count: counts, + topValues: [{value: "total_disconnects", count: counts}, + {value: "disconnected_userids", count: uniques}] + } + } + return _lazySnapshot({ + get total() { + return topValue(counts.total, uniques.total); + }, + history: function(bucketsPerSample, numSamples) { + return mergeArrays( + topValue, + counts.history(bucketsPerSample, numSamples), + uniques.history(bucketsPerSample, numSamples)); + }, + latest: function(bucketsPerSample) { + return topValue(counts.latest(bucketsPerSample), uniques.latest(bucketsPerSample)); + } + }); +} + +function _generateLogInterestMap(statNames) { + var interests = {}; + statNames.forEach(function(statName) { + var logs = getStatData(statName).logNames; + logs.forEach(function(logName) { + if (! interests[logName]) { + interests[logName] = {}; + } + interests[logName][statName] = true; + }); + }); + return interests; +} + + +// ------------------------------------------------------------------ +// stat generators +// ------------------------------------------------------------------ + +// statSpec has these properties +// name +// dataType - line, topvalues, histogram, etc. +// logNames +// init_f +// update_f +// snapshot_f +// options - object containing any additional data, passed in to to the various functions. + +// init_f gets (statName, options, "live"|"historical") +// update_f gets (statName, options, logName, data, logObject) +// snapshot_f gets (statName, options, data) +function addStat(statSpec) { + var statName = statSpec.name; + if (! getStatData(statName)) { + var initialData = + _callFunction(statSpec.init_f, statName, statSpec.options, LIVE); + _setStatData(statName, { + data: initialData, + }); + } + + var s = getStatData(statName); + + s.options = statSpec.options; + s.name = statName; + s.logNames = statSpec.logNames; + s.dataType = statSpec.dataType; + s.historicalDays = ("historicalDays" in statSpec ? statSpec.historicalDays : 1); + + s.init_f = statSpec.init_f; + s.update_f = statSpec.update_f; + s.snapshot_f = statSpec.snapshot_f; + + function registerInterest(logName) { + if (! _stats().logNamesToInterestedStatNames[logName]) { + _stats().logNamesToInterestedStatNames[logName] = {}; + } + _stats().logNamesToInterestedStatNames[logName][statName] = true; + } + statSpec.logNames.forEach(registerInterest); +} + +function addSimpleCount(statName, historicalDays, logName, filter) { + addStat({ + name: statName, + dataType: "line", + logNames: (logName instanceof Array ? logName : [logName]), + init_f: "_initCount", + update_f: "_updateCount", + snapshot_f: "_snapshotTotal", + options: { filter: filter }, + historicalDays: historicalDays || 1 + }); +} + +function addSimpleSum(statName, historicalDays, logName, filter, fieldName) { + addStat({ + name: statName, + dataType: "line", + logNames: (logName instanceof Array ? logName : [logName]), + init_f: "_initCount", + update_f: "_updateSum", + snapshot_f: "_snapshotTotal", + options: { filter: filter, fieldName: fieldName }, + historicalDays: historicalDays || 1 + }); +} + +function addUniquenessCount(statName, historicalDays, logName, filter, fieldName) { + addStat({ + name: statName, + dataType: "line", + logNames: (logName instanceof Array ? logName : [logName]), + init_f: "_initUniques", + update_f: "_updateUniquenessCount", + snapshot_f: "_snapshotTotal", + options: { filter: filter, fieldName: fieldName }, + historicalDays: historicalDays || 1 + }) +} + +function addTopValuesStat(statName, historicalDays, logName, filter, fieldName, canonicalizer) { + addStat({ + name: statName, + dataType: "topValues", + logNames: (logName instanceof Array ? logName : [logName]), + init_f: "_initTopValues", + update_f: "_updateTopValues", + snapshot_f: "_snapshotTopValues", + options: { filter: filter, fieldName: fieldName, canonicalizer: canonicalizer }, + historicalDays: historicalDays || 1 + }); +} + +function addLatenciesStat(statName, historicalDays, logName, filter, fieldName, latencyFieldName) { + addStat({ + name: statName, + dataType: "topValues", + logNames: (logName instanceof Array ? logName : [logName]), + init_f: "_initLatencies", + update_f: "_updateLatencies", + snapshot_f: "_snapshotLatencies", + options: { filter: filter, fieldName: fieldName, latencyFieldName: latencyFieldName }, + historicalDays: historicalDays || 1 + }); +} + + +// RETURNING USERS + +function _initReturningUsers(statName, options, timescaleType) { + return { cache: {}, uniques: _initUniques(statName, options, timescaleType) }; +} + +function _returningUsersUserId(logObject) { + if (logObject.type == "userjoin") { + return logObject.userId; + } +} + +function _returningUsersUserCreationDate(userId) { + var record = sqlobj.selectSingle('pad_cookie_userids', {id: userId}); + if (record) { + return record.createdDate.getTime(); + } +} + +function _returningUsersAccountId(logObject) { + return logObject.proAccountId; +} + +function _returningUsersAccountCreationDate(accountId) { + var record = sqlobj.selectSingle('pro_accounts', {id: accountId}); + if (record) { + return record.createdDate.getTime(); + } +} + + +function _updateReturningUsers(statName, options, logName, data, logObject) { + var userId = (options.useProAccountId ? _returningUsersAccountId(logObject) : _returningUsersUserId(logObject)); + if (! userId) { return; } + var date = logObject.date; + if (! data.cache[""+userId]) { + var creationTime = (options.useProAccountId ? _returningUsersAccountCreationDate(userId) : _returningUsersUserCreationDate(userId)); + if (! creationTime) { return; } // hm. weird case. + data.cache[""+userId] = creationTime; + } + if (data.cache[""+userId] < date - options.registeredNDaysAgo*24*60*60*1000) { + data.uniques.hit(logObject.date, ""+userId); + } +} +function _snapshotReturningUsers(statName, options, data) { + return _lazySnapshot(data.uniques); +} + +function addReturningUserStat(statName, pastNDays, registeredNDaysAgo) { + addStat({ + name: statName, + dataType: "line", + logNames: ["frontend/padevents"], + init_f: "_initReturningUsers", + update_f: "_updateReturningUsers", + snapshot_f: "_snapshotReturningUsers", + options: { registeredNDaysAgo: registeredNDaysAgo }, + historicalDays: pastNDays + }); +} + +function addReturningProAccountStat(statName, pastNDays, registeredNDaysAgo) { + addStat({ + name: statName, + dataType: "line", + logNames: ["frontend/request"], + init_f: "_initReturningUsers", + update_f: "_updateReturningUsers", + snapshot_f: "_snapshotReturningUsers", + options: { registeredNDaysAgo: registeredNDaysAgo, useProAccountId: true }, + historicalDays: pastNDays + }); +} + + +function addDisconnectStat() { + addStat({ + name: "streaming_disconnects", + dataType: "topValues", + logNames: ["frontend/padevents", "frontend/reconnect", "frontend/disconnected_autopost"], + init_f: "_initDisconnectTracker", + update_f: "_updateDisconnectTracker", + snapshot_f: "_snapshotDisconnectTracker", + options: {} + }); +} + +// PAD STARTUP LATENCY +function _initPadStartupLatency(statName, options, timescaleType) { + return { + recentGets: (timescaleType == LIVE ? new ExpiringMapping(60*1000) : _fakeMap()), + latencies: _initHistogram(statName, options, timescaleType), + } +} + +function _updatePadStartupLatency(statName, options, logName, data, logObject) { + var session = logObject.session; + if (logName == "frontend/request") { + if (! ('padId' in logObject)) { return; } + var padId = logObject.padId; + if (! data.recentGets.get(session)) { + data.recentGets.put(session, {}); + } + data.recentGets.get(session)[padId] = logObject.date; + } + if (logName == "frontend/padevents") { + if (logObject.type != 'userjoin') { return; } + if (! data.recentGets.get(session)) { return; } + var padId = logObject.padId; + var getTime = data.recentGets.get(session)[padId]; + if (! getTime) { return; } + delete data.recentGets.get(session)[padId]; + var latency = logObject.date - getTime; + if (latency < 60*1000) { + // latencies longer than 60 seconds don't represent data we care about for this stat. + data.latencies.hit(logObject.date, latency); + } + } +} + +function _snapshotPadStartupLatency(statName, options, data) { + var latencies = data.latencies; + function convertHistogram(histogram_f) { + var deciles = {}; + [0, 1, 5, 10, 25, 50, 75, 90, 95, 99, 100].forEach(function(pct) { + deciles[""+pct] = histogram_f(pct); + }); + return deciles; + } + return _lazySnapshot({ + latencies: latencies, + get total() { + return convertHistogram(this.latencies.total); + }, + history: function(bucketsPerSample, numSamples) { + return this.latencies.history(bucketsPerSample, numSamples).map(convertHistogram); + }, + latest: function(bucketsPerSample) { + return convertHistogram(this.latencies.latest(bucketsPerSample)); + } + }); +} + +function addPadStartupLatencyStat() { + addStat({ + name: "pad_startup_times", + dataType: "histogram", + logNames: ["frontend/padevents", "frontend/request"], + init_f: "_initPadStartupLatency", + update_f: "_updatePadStartupLatency", + snapshot_f: "_snapshotPadStartupLatency", + options: {} + }); +} + + +function _initSampleTracker(statName, options, timescaleType) { + return { + samples: Array(1440), // 1 hour at 1 sample/minute + nextSample: 0, + numSamples: 0 + } +} + +function _updateSampleTracker(statName, options, logName, data, logObject) { + if (options.filter && ! options.filter(logObject)) { + return; + } + if (options.fieldName && ! (options.fieldName in logObject)) { + return; + } + data.samples[data.nextSample] = (options.fieldName ? logObject[fieldName] : logObject); + data.nextSample++; + data.nextSample %= data.samples.length; + data.numSamples = Math.min(data.samples.length, data.numSamples+1); +} + +function _snapshotSampleTracker(statName, options, data) { + function indexTransform(i) { + return (data.nextSample-data.numSamples+i + data.samples.length) % data.samples.length; + } + var merge_f = options.mergeFunction || function(a, b) { return a+b; } + var process_f = options.processFunction || function(a) { return a; } + function mergeValues(values) { + if (values.length <= 1) { return values[0]; } + var t = values[0]; + for (var i = 1; i < values.length; ++i) { + t = merge_f(values[i], t); + } + return t; + } + return _lazySnapshot({ + get total() { + var t = []; + for (var i = 0; i < data.numSamples; ++i) { + t.push(data.samples[indexTransform(i)]); + } + return process_f(mergeValues(t), t.length); + }, + history: function(bucketsPerSample, numSamples) { + var allSamples = []; + for (var i = data.numSamples-1; i >= Math.max(0, data.numSamples - bucketsPerSample*numSamples); --i) { + allSamples.push(data.samples[indexTransform(i)]); + } + var out = []; + for (var i = 0; i < numSamples && i*bucketsPerSample < allSamples.length; ++i) { + var subArray = []; + for (var j = 0; j < bucketsPerSample && i*bucketsPerSample+j < allSamples.length; ++j) { + subArray.push(allSamples[i*bucketsPerSample+j]); + } + out.push(process_f(mergeValues(subArray), subArray.length)); + } + return out.reverse(); + }, + latest: function(bucketsPerSample) { + var t = []; + for (var i = data.numSamples-1; i >= Math.max(0, data.numSamples-bucketsPerSample); --i) { + t.push(data.samples[indexTransform(i)]); + } + return process_f(mergeValues(t), t.length); + } + }); +} + +function addSampleTracker(statName, logName, filter, fieldName, mergeFunction, processFunction) { + addStat({ + name: statName, + dataType: "histogram", + logNames: (logName instanceof Array ? logName : [logName]), + init_f: "_initSampleTracker", + update_f: "_updateSampleTracker", + snapshot_f: "_snapshotSampleTracker", + options: { filter: filter, fieldName: fieldName, + mergeFunction: mergeFunction, processFunction: processFunction } + }); +} + +function addCometLatencySampleTracker(statName) { + addSampleTracker(statName, "backend/server-events", typeMatcher("streaming-message-latencies"), null, + function(a, b) { + var ret = {}; + ["count", "p50", "p90", "p95", "p99", "max"].forEach(function(key) { + ret[key] = (Number(a[key]) || 0) + (Number(b[key]) || 0); + }); + return ret; + }, + function(v, count) { + if (count == 0) { + return { + "50": 0, "90": 0, "95": 0, "99": 0, "100": 0 + } + } + var ret = {count: v.count}; + ["p50", "p90", "p95", "p99", "max"].forEach(function(key) { + ret[key] = (Number(v[key]) || 0)/(Number(count) || 1); + }); + return {"50": Math.round(ret.p50/1000), + "90": Math.round(ret.p90/1000), + "95": Math.round(ret.p95/1000), + "99": Math.round(ret.p99/1000), + "100": Math.round(ret.max/1000)}; + }); +} + +function addConnectionTypeSampleTracker(statName) { + var caredAboutFields = ["streaming", "longpolling", "shortpolling", "(unconnected)"]; + + addSampleTracker(statName, "backend/server-events", typeMatcher("streaming-connection-count"), null, + function(a, b) { + var ret = {}; + caredAboutFields.forEach(function(k) { + ret[k] = (Number(a[k]) || 0) + (Number(b[k]) || 0); + }); + return ret; + }, + function(v, count) { + if (count == 0) { + return _convertTopValue({total: 0, counts: {}}); + } + var values = {}; + var total = 0; + caredAboutFields.forEach(function(k) { + values[k] = Math.round((Number(v[k]) || 0)/count); + total += values[k]; + }); + values["Total"] = total; + return _convertTopValue({ + total: Math.round(total), + counts: values + }); + }); +} + +// helpers for filter functions + +function expectedHostnames() { + var hostPart = appjet.config.listenHost || "localhost"; + if (appjet.config.listenSecureHost != hostPart) { + hostPart = "("+hostPart+"|"+(appjet.config.listenSecureHost || "localhost")+")"; + } + var ports = []; + if (appjet.config.listenPort != 80) { + ports.push(""+appjet.config.listenPort); + } + if (appjet.config.listenSecurePort != 443) { + ports.push(""+appjet.config.listenSecurePort); + } + var portPart = (ports.length > 0 ? ":("+ports.join("|")+")" : ""); + return hostPart + portPart; +} + +function fieldMatcher(fieldName, fieldValue) { + if (fieldValue instanceof RegExp) { + return function(logObject) { + return fieldValue.test(logObject[fieldName]); + } + } else { + return function(logObject) { + return logObject[fieldName] == fieldValue; + } + } +} + +function typeMatcher(type) { + return fieldMatcher("type", type); +} + +function invertMatcher(f) { + return function(logObject) { + return ! f(logObject); + } +} + +function setupStatsCollector() { + var c; + + function unwatchLog(logName) { + GenericLoggerUtils.clearWrangler(logName.split('/')[1], c.wranglers[logName]); + } + function watchLog(logName) { + c.wranglers[logName] = new Packages.net.appjet.oui.LogWrangler({ + tell: function(lpb) { + c.queue.add({logName: logName, json: lpb.json()}); + } + }); + c.wranglers[logName].watch(logName.split('/')[1]); + } + + c = _stats().liveCollector; + if (c) { + c.watchedLogs.forEach(unwatchLog); + delete c.wrangler; + } else { + c = _stats().liveCollector = {}; + } + c.watchedLogs = keys(_stats().logNamesToInterestedStatNames); + c.queue = new java.util.concurrent.ConcurrentLinkedQueue(); + c.wranglers = {}; + c.watchedLogs.forEach(watchLog); + + if (! c.updateTask || c.updateTask.isDone()) { + c.updateTask = execution.scheduleTask('statistics', "statisticsLiveUpdate", 2000, []); + } +} + +serverhandlers.tasks.statisticsLiveUpdate = function() { + var c = _stats().liveCollector; + try { + while (true) { + var obj = c.queue.poll(); + if (obj != null) { + var statNames = + keys(_stats().logNamesToInterestedStatNames[obj.logName]); + var logObject = fastJSON.parse(obj.json); + statNames.forEach(function(statName) { + var statObject = getStatData(statName); + _callFunction(statObject.update_f, + statName, statObject.options, obj.logName, statObject.data, logObject); + }); + } else { + break; + } + } + } catch (e) { + println("EXCEPTION IN LIVE UPDATE: "+e+" / "+e.fileName+":"+e.lineNumber) + println(exceptionutils.getStackTracePlain(new net.appjet.bodylock.JSRuntimeException(String(e), e.javaException || e.rhinoException))); + } finally { + c.updateTask = execution.scheduleTask('statistics', "statisticsLiveUpdate", 2000, []); + } +} + +function onReset() { + // this gets refilled every reset. + _stats().logNamesToInterestedStatNames = {}; + + // we'll want to keep around the live data, though, so this is conditionally set. + if (! _stats().stats) { + _stats().stats = {}; + } + + addSimpleCount("site_pageviews", 1, "frontend/request", null); + addUniquenessCount("site_unique_ips", 1, "frontend/request", null, "clientAddr"); + + addUniquenessCount("active_user_ids", 1, "frontend/padevents", typeMatcher("userjoin"), "userId"); + addUniquenessCount("active_user_ids_7days", 7, "frontend/padevents", typeMatcher("userjoin"), "userId"); + addUniquenessCount("active_user_ids_30days", 30, "frontend/padevents", typeMatcher("userjoin"), "userId"); + + addUniquenessCount("active_pro_accounts", 1, "frontend/request", invertMatcher(fieldMatcher("proAccountId", undefined)), + "proAccountId"); + addUniquenessCount("active_pro_accounts_7days", 7, "frontend/request", invertMatcher(fieldMatcher("proAccountId", undefined)), + "proAccountId"); + addUniquenessCount("active_pro_accounts_30days", 30, "frontend/request", invertMatcher(fieldMatcher("proAccountId", undefined)), + "proAccountId"); + + + addUniquenessCount("active_pads", 1, "frontend/padevents", typeMatcher("userjoin"), "padId"); + addSimpleCount("new_pads", 1, "frontend/padevents", typeMatcher("newpad")); + + addSimpleCount("chat_messages", 1, "frontend/chat", null); + addUniquenessCount("active_chatters", 1, "frontend/chat", null, "userId"); + + addSimpleCount("exceptions", 1, "frontend/exception", null); + + addSimpleCount("eepnet_trial_downloads", 1, "frontend/eepnet_download_info", null); + + addSimpleSum("revenue", 1, "frontend/billing", typeMatcher("purchase-complete"), "dollars") + + var hostRegExp = new RegExp("^https?:\\/\\/([-a-zA-Z0-9]+.)?"+expectedHostnames()+"\\/"); + addTopValuesStat("top_referers", 1, "frontend/request", + invertMatcher(fieldMatcher( + "referer", hostRegExp)), + "referer"); + + addTopValuesStat("paths_404", 1, "frontend/request", fieldMatcher("statusCode", 404), "path"); + addTopValuesStat("paths_500", 1, "frontend/request", fieldMatcher("statusCode", 500), "path"); + addTopValuesStat("paths_exception", 1, "frontend/exception", null, "path"); + + addTopValuesStat("top_exceptions", 1, ["frontend/exception", "backend/exceptions"], + invertMatcher(fieldMatcher("trace", undefined)), + "trace", function(trace) { + var jstrace = trace.split("\n").filter(function(line) { + return /^\tat JS\$.*?\.js:\d+\)$/.test(line); + }); + if (jstrace.length > 3) { + return "JS Exception:\n"+jstrace.slice(0, 10).join("\n").replace(/\t[^\(]*/g, ""); + } + return trace.split("\n").slice(1, 10).join("\n").replace(/\t/g, ""); + }); + + addReturningUserStat("users_1day_returning_7days", 1, 7); + addReturningUserStat("users_7day_returning_7days", 7, 7); + addReturningUserStat("users_30day_returning_7days", 30, 7); + + addReturningUserStat("users_1day_returning_30days", 1, 30); + addReturningUserStat("users_7day_returning_30days", 7, 30); + addReturningUserStat("users_30day_returning_30days", 30, 30); + + addReturningProAccountStat("pro_accounts_1day_returning_7days", 1, 7); + addReturningProAccountStat("pro_accounts_7day_returning_7days", 7, 7); + addReturningProAccountStat("pro_accounts_30day_returning_7days", 30, 7); + + addReturningProAccountStat("pro_accounts_1day_returning_30days", 1, 30); + addReturningProAccountStat("pro_accounts_7day_returning_30days", 7, 30); + addReturningProAccountStat("pro_accounts_30day_returning_30days", 30, 30); + + + addDisconnectStat(); + addTopValuesStat("disconnect_causes", 1, "frontend/avoidable_disconnects", null, "errorMessage"); + + var staticFileRegExp = /^\/static\/|^\/favicon.ico/; + addLatenciesStat("execution_latencies", 1, "backend/latency", + invertMatcher(fieldMatcher('path', staticFileRegExp)), + "path", "time"); + addLatenciesStat("static_file_latencies", 1, "backend/latency", + fieldMatcher('path', staticFileRegExp), + "path", "time"); + + addUniquenessCount("disconnects_with_clientside_errors", 1, + ["frontend/reconnect", "frontend/disconnected_autopost"], + fieldMatcher("hasClientErrors", true), "uniqueId"); + + addTopValuesStat("imports_exports_counts", 1, "frontend/import-export", + typeMatcher("request"), "direction"); + + addPadStartupLatencyStat(); + + addCometLatencySampleTracker("streaming_latencies"); + addConnectionTypeSampleTracker("streaming_connections"); + // TODO: add more stats here. + + setupStatsCollector(); +} + +//---------------------------------------------------------------- +// Log processing +//---------------------------------------------------------------- + +function _whichStats(statNames) { + var whichStats = _statData(); + var logNamesToInterestedStatNames = _stats().logNamesToInterestedStatNames; + + if (statNames) { + whichStats = {}; + statNames.forEach(function(statName) { whichStats[statName] = getStatData(statName) }); + logNamesToInterestedStatNames = _generateLogInterestMap(statNames); + } + + return [whichStats, logNamesToInterestedStatNames]; +} + +function _initStatDataMap(statNames) { + var [whichStats, logNamesToInterestedStatNames] = _whichStats(statNames); + + var statDataMap = {}; + + function initStat(statName, statObject) { + statDataMap[statName] = + _callFunction(statObject.init_f, statName, statObject.options, HIST); + } + eachProperty(whichStats, initStat); + + return statDataMap; +} + +function _saveStats(day, statDataMap, statNames) { + var [whichStats, logNamesToInterestedStatNames] = _whichStats(statNames); + + function saveStat(statName, statObject) { + var value = _callFunction(statObject.snapshot_f, + statName, statObject.options, statDataMap[statName]).total; + if (typeof(value) != 'object') { + value = {value: value}; + } + _saveStat(day, statName, value); + } + eachProperty(whichStats, saveStat); +} + +function _processSingleDayLogs(day, logNamesToInterestedStatNames, statDataMap) { + var iterators = {}; + keys(logNamesToInterestedStatNames).forEach(function(logName) { + var [prefix, logId] = logName.split("/"); + var fileName = log.logFileName(prefix, logId, day); + if (! fileName) { + _info("No such file: "+logName+" on day "+day); + return; + } + iterators[logName] = fileLineIterator(fileName); + }); + + var numIterators = keys(iterators).length; + if (numIterators == 0) { + _info("No logs to process on day "+day); + return; + } + var sortedLogObjects = new java.util.PriorityQueue(numIterators, + new java.util.Comparator({ + compare: function(o1, o2) { return o1.logObject.date - o2.logObject.date } + })); + + function lineToLogObject(logName, json) { + return {logName: logName, logObject: fastJSON.parse(json)}; + } + + // begin by filling the queue with one object from each log. + eachProperty(iterators, function(logName, iterator) { + if (iterator.hasNext) { + sortedLogObjects.add(lineToLogObject(logName, iterator.next)); + } + }); + + // update with all log objects, in date order (enforced by priority queue). + while (! sortedLogObjects.isEmpty()) { + var nextObject = sortedLogObjects.poll(); + var logName = nextObject.logName; + + keys(logNamesToInterestedStatNames[logName]).forEach(function(statName) { + var statObject = getStatData(statName); + _callFunction(statObject.update_f, + statName, statObject.options, logName, statDataMap[statName], nextObject.logObject); + }); + + // get next entry from this log, if there is one. + if (iterators[logName].hasNext) { + sortedLogObjects.add(lineToLogObject(logName, iterators[logName].next)); + } + } +} + +function processStatsForDay(day, statNames, statDataMap) { + var [whichStats, logNamesToInterestedStatNames] = _whichStats(statNames); + + // process the logs, notifying the right statistics updaters. + _processSingleDayLogs(day, logNamesToInterestedStatNames, statDataMap); +} + +//---------------------------------------------------------------- +// Daily update +//---------------------------------------------------------------- +serverhandlers.tasks.statisticsDailyUpdate = function() { +// do nothing for now. + +// dailyUpdate(); +}; + +function _scheduleNextDailyUpdate() { + // Run at 1:11am every day + var now = +(new Date); + var tomorrow = new Date(now + 1000*60*60*24); + tomorrow.setHours(1); + tomorrow.setMinutes(11); + tomorrow.setMilliseconds(111); + log.info("Scheduling next daily statistics update for: "+tomorrow.toString()); + var delay = +tomorrow - (+(new Date)); + execution.scheduleTask("statistics", "statisticsDailyUpdate", delay, []); +} + +function processStatsAsOfDay(date, statNames) { + var latestDay = noon(new Date(date - 1000*60*60*24)); + + _processLogsForNeededDays(latestDay, statNames); +} + +function _processLogsForNeededDays(latestDay, statNames) { + if (! statNames) { + statNames = getAllStatNames(); + } + var statDataMap = _initStatDataMap(statNames); + + var agesToStats = []; + var atLeastOneStat = true; + for (var i = 0; atLeastOneStat; ++i) { + atLeastOneStat = false; + agesToStats[i] = []; + statNames.forEach(function(statName) { + var statData = getStatData(statName); + if (statData.historicalDays > i) { + atLeastOneStat = true; + agesToStats[i].push(statName); + } + }); + } + agesToStats.pop(); + + for (var i = agesToStats.length-1; i >= 0; --i) { + var day = new Date(+latestDay - i*24*60*60*1000); + processStatsForDay(day, agesToStats[i], statDataMap); + } + _saveStats(latestDay, statDataMap, statNames); +} + +function doDailyUpdate(date) { + var now = (date === undefined ? new Date() : date); + var yesterdayNoon = noon(new Date(+now - 1000*60*60*24)); + + _processLogsForNeededDays(yesterdayNoon); +} + +function dailyUpdate() { + try { + doDailyUpdate(); + } catch (ex) { + log.warn("statistics.dailyUpdate() failed: "+ex.toString()); + } finally { + _scheduleNextDailyUpdate(); + } +}
\ No newline at end of file |