From 5f00e5efb66b95c42f7fdbf9542e8aef15761abc Mon Sep 17 00:00:00 2001 From: Christine Dodrill Date: Wed, 25 Jan 2017 10:40:15 -0800 Subject: [PATCH] initial commit --- .gitignore | 4 ++ main.go | 175 +++++++++++++++++++++++++++++++++++++++++++++++++++ main_test.go | 37 +++++++++++ 3 files changed, 216 insertions(+) create mode 100644 .gitignore create mode 100644 main.go create mode 100644 main_test.go diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..4e736c8 --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +their +*.log +*.csv +lines.txt diff --git a/main.go b/main.go new file mode 100644 index 0000000..6e9edf0 --- /dev/null +++ b/main.go @@ -0,0 +1,175 @@ +package main + +import ( + "bufio" + "bytes" + "fmt" + "log" + "os" + "sort" + "time" +) + +type LogLine struct { + Time time.Time + TimeTook time.Duration + Host string +} + +type Bucket struct { + Time time.Time // only resolved to the minute + Entries []*LogLine +} + +const dateFormat = "2006-01-02T15:04:05.999999Z07:00" +const outDateFormat = "2006-01-02T15:04:05" +const outDateFormatNoSeconds = "2006-01-02T15:04" + +func ParseLogLine(line []byte) (*LogLine, error) { + sl := bytes.Split(line, []byte(" ")) + + date := sl[0] + lTime, err := time.Parse(dateFormat, string(date)) + if err != nil { + return nil, err + } + + var sdur time.Duration + var host string + + for i, section := range sl { + switch i { + case 0, 1: + continue // not key->value pairs + } + + set := bytes.Split(section, []byte("=")) + if len(set) != 2 { + log.Printf("invalid: %v", set) + continue + } + + k := string(set[0]) + v := set[1] + + if k == "service" { + dur, err := time.ParseDuration(string(v)) + if err != nil { + return nil, err + } + + sdur = dur + } + + if k == "host" { + host = Shuck(string(v)) + } + } + + ll := &LogLine{ + Time: lTime, + TimeTook: sdur, + Host: host, + } + + return ll, nil +} + +// Shuck removes the first and last character of a string, analogous to +// shucking off the husk of an ear of corn. +func Shuck(victim string) string { + return victim[1 : len(victim)-1] +} + +func main() { + scanner := bufio.NewScanner(os.Stdin) + + var lastTime time.Time + + // active buckets, short var name because it's going to be referenced a lot + ab := map[string]*Bucket{} + + for scanner.Scan() { + line := scanner.Bytes() + + ll, err := ParseLogLine(line) + if err != nil { + log.Fatal(err) + } + + if lastTime.IsZero() { + year, month, day := ll.Time.Date() + hour, minute, _ := ll.Time.Clock() + lastTime = time.Date(year, month, day, hour, minute, 0, 0, time.UTC) + } + + // last line minutes + _, llm, _ := ll.Time.Clock() + _, ltm, _ := lastTime.Clock() + + if llm != ltm { + processBuckets(ab) + ab = map[string]*Bucket{} + + lastTime = lastTime.Add(5 * time.Minute) + } + + b := ab[ll.Host] + if b == nil { + year, month, day := ll.Time.Date() + hour, minute, _ := ll.Time.Clock() + b = &Bucket{ + Time: time.Date(year, month, day, hour, minute, 0, 0, time.UTC), + } + ab[ll.Host] = b + } + + b.Entries = append(b.Entries, ll) + + ab[ll.Host] = b + } + + processBuckets(ab) +} + +// sees if the minute and hour field of a == the minute and hour field of b +func cmpTime(a, b time.Time) bool { + _, am, _ := a.Clock() + _, bm, _ := b.Clock() + + return am == bm +} + +func toMS(dur time.Duration) int64 { + return dur.Nanoseconds() / 1000000 +} + +func processBuckets(set map[string]*Bucket) { + hosts := []string{} + for host, _ := range set { + hosts = append(hosts, host) + } + + sort.Sort(sort.StringSlice(hosts)) + + for _, host := range hosts { + bucket := set[host] + var longest time.Duration + var shortest time.Duration + shortest = 9999 * time.Minute // make this absurdly large so everything is smaller + var total time.Duration + + for _, entry := range bucket.Entries { + switch true { + case entry.TimeTook > longest: + longest = entry.TimeTook + case entry.TimeTook < shortest: + shortest = entry.TimeTook + } + + total += entry.TimeTook + } + + fmt.Printf("%s,%s,%d,%d,%d,%d\n", bucket.Time.Format(outDateFormat), host, len(bucket.Entries), toMS(total), toMS(shortest), toMS(longest)) + } +} diff --git a/main_test.go b/main_test.go new file mode 100644 index 0000000..555a08c --- /dev/null +++ b/main_test.go @@ -0,0 +1,37 @@ +package main + +import ( + "testing" + "time" + + "github.com/kr/pretty" +) + +func TestParseLogLine(t *testing.T) { + const line = `2016-05-07T09:07:00.001490+00:00 heroku[router]: at=info method=GET path="/blog" host="brs.org" request_id=fc693802-8851-484e-aab4-1d013714b68b fwd="10.29.10.29" dyno=web.3 connect=2ms service=994ms status=200 bytes=552` + + ll, err := ParseLogLine([]byte(line)) + if err != nil { + t.Fatal(err) + } + + ms := (ll.TimeTook.Nanoseconds() / 1000000) // ns -> ms == div 1e6 + if ms != 996 { + t.Fatalf("invalid time took for this line") + } + + if ll.Host != "brs.org" { + pretty.Println(ll) + + t.Fatal("invalid host") + } +} + +func TestCmpTime(t *testing.T) { + now := time.Date(2016, time.January, 1, 13, 37, 0, 0, time.UTC) + then := now.Add(5 * time.Minute) + + if cmpTime(then, now) { + t.Fatal("cmpTime error") + } +}