summaryrefslogtreecommitdiffstats
path: root/vendor/github.com/mssola/user_agent/bot.go
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/github.com/mssola/user_agent/bot.go')
-rw-r--r--vendor/github.com/mssola/user_agent/bot.go14
1 files changed, 8 insertions, 6 deletions
diff --git a/vendor/github.com/mssola/user_agent/bot.go b/vendor/github.com/mssola/user_agent/bot.go
index efcab9253..a6222d17f 100644
--- a/vendor/github.com/mssola/user_agent/bot.go
+++ b/vendor/github.com/mssola/user_agent/bot.go
@@ -1,4 +1,4 @@
-// Copyright (C) 2014 Miquel Sabaté Solà <mikisabate@gmail.com>
+// Copyright (C) 2014-2017 Miquel Sabaté Solà <mikisabate@gmail.com>
// This file is licensed under the MIT license.
// See the LICENSE file.
@@ -9,6 +9,8 @@ import (
"strings"
)
+var botFromSiteRegexp = regexp.MustCompile("http://.+\\.\\w+")
+
// Get the name of the bot from the website that may be in the given comment. If
// there is no website in the comment, then an empty string is returned.
func getFromSite(comment []string) string {
@@ -23,8 +25,7 @@ func getFromSite(comment []string) string {
}
// Pick the site.
- re := regexp.MustCompile("http://.+\\.\\w+")
- results := re.FindStringSubmatch(comment[idx])
+ results := botFromSiteRegexp.FindStringSubmatch(comment[idx])
if len(results) == 1 {
// If it's a simple comment, just return the name of the site.
if idx == 0 {
@@ -74,6 +75,8 @@ func (p *UserAgent) fixOther(sections []section) {
}
}
+var botRegex = regexp.MustCompile("(?i)(bot|crawler|sp(i|y)der|search|worm|fetch|nutch)")
+
// Check if we're dealing with a bot or with some weird browser. If that is the
// case, the receiver will be modified accordingly.
func (p *UserAgent) checkBot(sections []section) {
@@ -82,9 +85,8 @@ func (p *UserAgent) checkBot(sections []section) {
if len(sections) == 1 && sections[0].name != "Mozilla" {
p.mozilla = ""
- // Check whether the name has some suspicious "bot" in his name.
- reg, _ := regexp.Compile("(?i)bot")
- if reg.Match([]byte(sections[0].name)) {
+ // Check whether the name has some suspicious "bot" or "crawler" in his name.
+ if botRegex.Match([]byte(sections[0].name)) {
p.setSimple(sections[0].name, "", true)
return
}