2017-03-26 19:50:51 +00:00
package tun2
import (
2017-09-30 18:04:33 +00:00
"bytes"
2017-03-26 20:38:05 +00:00
"context"
2017-03-26 19:50:51 +00:00
"encoding/json"
"errors"
2017-10-07 16:41:48 +00:00
"expvar"
2017-09-30 18:04:33 +00:00
"fmt"
2017-10-04 06:43:31 +00:00
"io"
2017-09-30 18:04:33 +00:00
"io/ioutil"
2017-03-26 19:50:51 +00:00
"math/rand"
"net"
"net/http"
2017-09-30 18:04:33 +00:00
"os"
2017-03-26 19:50:51 +00:00
"sync"
2017-03-26 22:14:13 +00:00
"time"
2017-03-26 19:50:51 +00:00
"github.com/Xe/ln"
2017-04-05 21:31:15 +00:00
failure "github.com/dgryski/go-failure"
2017-03-26 19:50:51 +00:00
"github.com/mtneug/pkg/ulid"
2017-03-27 04:56:54 +00:00
cmap "github.com/streamrail/concurrent-map"
2017-03-26 19:50:51 +00:00
"github.com/xtaci/smux"
)
2017-04-28 22:23:26 +00:00
// Error values
var (
ErrNoSuchBackend = errors . New ( "tun2: there is no such backend" )
ErrAuthMismatch = errors . New ( "tun2: authenication doesn't match database records" )
ErrCantRemoveWhatDoesntExist = errors . New ( "tun2: this connection does not exist, cannot remove it" )
)
2017-10-03 20:20:23 +00:00
// gen502Page creates the page that is shown when a backend is not connected to a given route.
func gen502Page ( req * http . Request ) * http . Response {
template := ` <html><head><title>no backends connected</title></head><body><h1>no backends connected</h1><p>Please ensure a backend is running for $ { HOST}. This is request ID $ { REQ_ID}.</p></body></html> `
resbody := [ ] byte ( os . Expand ( template , func ( in string ) string {
switch in {
case "HOST" :
return req . Host
case "REQ_ID" :
return req . Header . Get ( "X-Request-Id" )
}
return "<unknown>"
} ) )
reshdr := req . Header
reshdr . Set ( "Content-Type" , "text/html; charset=utf-8" )
resp := & http . Response {
Status : fmt . Sprintf ( "%d Bad Gateway" , http . StatusBadGateway ) ,
StatusCode : http . StatusBadGateway ,
Body : ioutil . NopCloser ( bytes . NewBuffer ( resbody ) ) ,
Proto : req . Proto ,
ProtoMajor : req . ProtoMajor ,
ProtoMinor : req . ProtoMinor ,
Header : reshdr ,
ContentLength : int64 ( len ( resbody ) ) ,
Close : true ,
Request : req ,
}
return resp
}
2017-04-28 22:23:26 +00:00
// ServerConfig ...
2017-03-26 19:50:51 +00:00
type ServerConfig struct {
SmuxConf * smux . Config
Storage Storage
}
2017-04-28 22:23:26 +00:00
// Storage is the minimal subset of features that tun2's Server needs out of a
// persistence layer.
2017-03-26 19:50:51 +00:00
type Storage interface {
2017-04-28 22:23:26 +00:00
HasToken ( token string ) ( user string , scopes [ ] string , err error )
HasRoute ( domain string ) ( user string , err error )
2017-03-26 19:50:51 +00:00
}
2017-04-28 22:23:26 +00:00
// Server routes frontend HTTP traffic to backend TCP traffic.
2017-03-26 19:50:51 +00:00
type Server struct {
2017-10-04 06:43:31 +00:00
cfg * ServerConfig
ctx context . Context
cancel context . CancelFunc
2017-03-26 19:50:51 +00:00
connlock sync . Mutex
conns map [ net . Conn ] * Connection
2017-03-27 04:56:54 +00:00
domains cmap . ConcurrentMap
2017-03-26 19:50:51 +00:00
}
2017-04-28 22:23:26 +00:00
// NewServer creates a new Server instance with a given config, acquiring all
// relevant resources.
2017-03-26 19:50:51 +00:00
func NewServer ( cfg * ServerConfig ) ( * Server , error ) {
if cfg == nil {
return nil , errors . New ( "tun2: config must be specified" )
}
if cfg . SmuxConf == nil {
cfg . SmuxConf = smux . DefaultConfig ( )
2017-10-04 06:43:31 +00:00
cfg . SmuxConf . KeepAliveInterval = time . Second
cfg . SmuxConf . KeepAliveTimeout = 15 * time . Second
2017-03-26 19:50:51 +00:00
}
2017-10-04 06:43:31 +00:00
ctx , cancel := context . WithCancel ( context . Background ( ) )
2017-04-05 22:54:58 +00:00
2017-03-26 19:50:51 +00:00
server := & Server {
cfg : cfg ,
conns : map [ net . Conn ] * Connection { } ,
2017-03-27 04:56:54 +00:00
domains : cmap . New ( ) ,
2017-10-04 06:43:31 +00:00
ctx : ctx ,
cancel : cancel ,
2017-03-26 19:50:51 +00:00
}
2017-10-04 06:43:31 +00:00
go server . phiDetectionLoop ( ctx )
2017-03-26 19:50:51 +00:00
return server , nil
}
2017-10-04 06:43:31 +00:00
// Close stops the background tasks for this Server.
func ( s * Server ) Close ( ) {
s . cancel ( )
}
// Wait blocks until the server context is cancelled.
func ( s * Server ) Wait ( ) {
for {
select {
case <- s . ctx . Done ( ) :
return
}
}
}
2017-10-03 20:20:23 +00:00
// Listen passes this Server a given net.Listener to accept backend connections.
func ( s * Server ) Listen ( l net . Listener , isKCP bool ) {
2017-10-04 17:17:07 +00:00
ctx := s . ctx
2017-09-30 16:47:47 +00:00
2017-10-04 06:43:31 +00:00
f := ln . F {
"listener_addr" : l . Addr ( ) ,
"listener_network" : l . Addr ( ) . Network ( ) ,
}
2017-10-03 20:20:23 +00:00
for {
2017-10-04 17:17:07 +00:00
select {
case <- ctx . Done ( ) :
return
default :
}
2017-10-03 20:20:23 +00:00
conn , err := l . Accept ( )
if err != nil {
2017-10-04 06:43:31 +00:00
ln . Error ( ctx , err , f , ln . Action ( "accept connection" ) )
2017-09-30 16:47:47 +00:00
continue
}
2017-10-04 06:43:31 +00:00
ln . Log ( ctx , f , ln . Action ( "new backend client connected" ) , ln . F {
"conn_addr" : conn . RemoteAddr ( ) ,
"conn_network" : conn . RemoteAddr ( ) . Network ( ) ,
2017-09-30 16:47:47 +00:00
} )
2017-10-03 20:20:23 +00:00
go s . HandleConn ( conn , isKCP )
2017-09-30 16:47:47 +00:00
}
}
2017-10-04 06:43:31 +00:00
// phiDetectionLoop is an infinite loop that will run the [phi accrual failure detector]
// for each of the backends connected to the Server. This is fairly experimental and
// may be removed.
//
// [phi accrual failure detector]: https://dspace.jaist.ac.jp/dspace/handle/10119/4784
func ( s * Server ) phiDetectionLoop ( ctx context . Context ) {
t := time . NewTicker ( 5 * time . Second )
defer t . Stop ( )
2017-03-26 20:15:42 +00:00
2017-10-04 06:43:31 +00:00
for {
select {
case <- ctx . Done ( ) :
return
case <- t . C :
now := time . Now ( )
2017-03-26 19:50:51 +00:00
2017-10-04 06:43:31 +00:00
s . connlock . Lock ( )
for _ , c := range s . conns {
failureChance := c . detector . Phi ( now )
const thresh = 0.9 // the threshold for phi failure detection causing logs
2017-03-26 20:14:05 +00:00
2017-10-04 06:43:31 +00:00
if failureChance > thresh {
ln . Log ( ctx , c , ln . Action ( "phi failure detection" ) , ln . F {
"value" : failureChance ,
"threshold" : thresh ,
} )
2017-03-26 19:50:51 +00:00
}
}
2017-10-04 06:43:31 +00:00
s . connlock . Unlock ( )
}
2017-03-26 19:50:51 +00:00
}
2017-10-04 06:43:31 +00:00
}
2017-03-26 19:50:51 +00:00
2017-10-04 06:43:31 +00:00
// backendAuthv1 runs a simple backend authentication check. It expects the
// client to write a json-encoded instance of Auth. This is then checked
// for token validity and domain matching.
//
// This returns the user that was authenticated and the domain they identified
// with.
func ( s * Server ) backendAuthv1 ( ctx context . Context , st io . Reader ) ( string , * Auth , error ) {
f := ln . F {
"action" : "backend authentication" ,
"backend_auth_version" : 1 ,
}
2017-03-26 20:14:05 +00:00
2017-10-04 06:43:31 +00:00
f [ "stage" ] = "json decoding"
2017-03-26 19:50:51 +00:00
2017-10-04 06:43:31 +00:00
d := json . NewDecoder ( st )
var auth Auth
err := d . Decode ( & auth )
if err != nil {
ln . Error ( ctx , err , f )
return "" , nil , err
}
2017-03-26 20:23:52 +00:00
2017-10-04 07:26:43 +00:00
f [ "auth_domain" ] = auth . Domain
2017-10-04 06:43:31 +00:00
f [ "stage" ] = "checking domain"
2017-03-26 19:50:51 +00:00
2017-10-04 06:43:31 +00:00
routeUser , err := s . cfg . Storage . HasRoute ( auth . Domain )
if err != nil {
ln . Error ( ctx , err , f )
return "" , nil , err
2017-03-26 19:50:51 +00:00
}
2017-10-04 06:43:31 +00:00
f [ "route_user" ] = routeUser
f [ "stage" ] = "checking token"
2017-04-05 21:31:15 +00:00
2017-10-04 06:43:31 +00:00
tokenUser , scopes , err := s . cfg . Storage . HasToken ( auth . Token )
if err != nil {
ln . Error ( ctx , err , f )
return "" , nil , err
}
2017-04-05 21:31:15 +00:00
2017-10-04 06:43:31 +00:00
f [ "token_user" ] = tokenUser
f [ "stage" ] = "checking token scopes"
2017-04-05 21:31:15 +00:00
2017-10-04 06:43:31 +00:00
ok := false
for _ , sc := range scopes {
if sc == "connect" {
ok = true
break
2017-03-27 05:19:43 +00:00
}
2017-10-04 06:43:31 +00:00
}
if ! ok {
ln . Error ( ctx , ErrAuthMismatch , f )
return "" , nil , ErrAuthMismatch
}
f [ "stage" ] = "user verification"
if routeUser != tokenUser {
ln . Error ( ctx , ErrAuthMismatch , f )
return "" , nil , ErrAuthMismatch
}
2017-03-26 22:14:13 +00:00
2017-10-04 06:43:31 +00:00
return routeUser , & auth , nil
2017-03-27 05:19:43 +00:00
}
2017-03-26 23:16:39 +00:00
2017-04-28 22:23:26 +00:00
// HandleConn starts up the needed mechanisms to relay HTTP traffic to/from
// the currently connected backend.
2017-03-26 19:50:51 +00:00
func ( s * Server ) HandleConn ( c net . Conn , isKCP bool ) {
2017-04-28 22:23:26 +00:00
defer c . Close ( )
2017-03-26 20:38:05 +00:00
ctx , cancel := context . WithCancel ( context . Background ( ) )
defer cancel ( )
2017-10-03 20:20:23 +00:00
f := ln . F {
"local" : c . LocalAddr ( ) . String ( ) ,
"remote" : c . RemoteAddr ( ) . String ( ) ,
}
2017-03-26 19:50:51 +00:00
session , err := smux . Server ( c , s . cfg . SmuxConf )
if err != nil {
2017-10-03 20:20:23 +00:00
ln . Error ( ctx , err , f , ln . Action ( "establish server side of smux" ) )
2017-03-26 19:50:51 +00:00
return
}
2017-04-28 22:23:26 +00:00
defer session . Close ( )
2017-03-26 19:50:51 +00:00
controlStream , err := session . OpenStream ( )
if err != nil {
2017-10-03 20:20:23 +00:00
ln . Error ( ctx , err , f , ln . Action ( "opening control stream" ) )
2017-03-26 19:50:51 +00:00
return
}
2017-04-28 22:23:26 +00:00
defer controlStream . Close ( )
2017-03-26 19:50:51 +00:00
2017-10-04 06:43:31 +00:00
user , auth , err := s . backendAuthv1 ( ctx , controlStream )
2017-03-26 19:50:51 +00:00
if err != nil {
return
}
connection := & Connection {
2017-04-05 21:31:15 +00:00
id : ulid . New ( ) . String ( ) ,
conn : c ,
isKCP : isKCP ,
session : session ,
2017-10-04 06:43:31 +00:00
user : user ,
2017-04-05 21:31:15 +00:00
domain : auth . Domain ,
2017-09-30 17:33:19 +00:00
cf : cancel ,
2017-04-05 21:40:37 +00:00
detector : failure . New ( 15 , 1 ) ,
2017-04-06 04:44:12 +00:00
Auth : auth ,
2017-03-26 19:50:51 +00:00
}
2017-10-07 16:41:48 +00:00
connection . counter = expvar . NewInt ( "http.backend." + connection . id + ".hits" )
2017-03-26 19:50:51 +00:00
2017-09-30 17:42:09 +00:00
defer func ( ) {
if r := recover ( ) ; r != nil {
2017-10-01 13:28:13 +00:00
ln . Log ( ctx , connection , ln . F { "action" : "connection handler panic" , "err" : r } )
2017-09-30 17:42:09 +00:00
}
} ( )
2017-10-03 20:20:23 +00:00
ln . Log ( ctx , connection , ln . Action ( "backend successfully connected" ) )
2017-03-26 20:18:58 +00:00
2017-10-04 06:43:31 +00:00
s . addConn ( ctx , connection )
2017-03-27 04:56:54 +00:00
2017-10-03 20:20:23 +00:00
connection . usable = true // XXX set this to true once health checks pass?
2017-03-26 20:38:05 +00:00
2017-04-05 21:31:15 +00:00
ticker := time . NewTicker ( 5 * time . Second )
defer ticker . Stop ( )
2017-04-05 21:48:08 +00:00
for {
select {
case <- ticker . C :
err := connection . Ping ( )
if err != nil {
2017-09-30 17:33:19 +00:00
connection . cancel ( )
2017-03-27 04:56:54 +00:00
}
2017-10-04 17:17:07 +00:00
// case <-s.ctx.Done():
// ln.Log(ctx, connection, ln.Action("server context finished"))
// s.removeConn(ctx, connection)
// connection.Close()
2017-10-04 06:43:31 +00:00
2017-10-04 17:17:07 +00:00
// return
2017-04-05 21:48:08 +00:00
case <- ctx . Done ( ) :
2017-10-04 17:17:07 +00:00
ln . Log ( ctx , connection , ln . Action ( "client context finished" ) )
2017-10-04 06:43:31 +00:00
s . removeConn ( ctx , connection )
2017-04-05 22:04:02 +00:00
connection . Close ( )
2017-03-26 20:38:05 +00:00
2017-04-05 22:04:02 +00:00
return
}
}
}
2017-03-26 20:38:05 +00:00
2017-10-04 06:43:31 +00:00
// addConn adds a connection to the pool of backend connections.
func ( s * Server ) addConn ( ctx context . Context , connection * Connection ) {
s . connlock . Lock ( )
s . conns [ connection . conn ] = connection
s . connlock . Unlock ( )
var conns [ ] * Connection
val , ok := s . domains . Get ( connection . domain )
if ok {
conns , ok = val . ( [ ] * Connection )
if ! ok {
conns = nil
s . domains . Remove ( connection . domain )
}
}
conns = append ( conns , connection )
s . domains . Set ( connection . domain , conns )
}
// removeConn removes a connection from pool of backend connections.
func ( s * Server ) removeConn ( ctx context . Context , connection * Connection ) {
2017-04-05 22:04:02 +00:00
s . connlock . Lock ( )
delete ( s . conns , connection . conn )
s . connlock . Unlock ( )
2017-03-26 20:38:05 +00:00
2017-04-06 04:44:12 +00:00
auth := connection . Auth
2017-04-05 22:04:02 +00:00
var conns [ ] * Connection
2017-03-26 20:38:05 +00:00
2017-04-05 22:04:02 +00:00
val , ok := s . domains . Get ( auth . Domain )
if ok {
conns , ok = val . ( [ ] * Connection )
if ! ok {
2017-10-03 20:20:23 +00:00
ln . Error ( ctx , ErrCantRemoveWhatDoesntExist , connection , ln . Action ( "looking up for disconnect removal" ) )
2017-04-05 21:48:08 +00:00
return
}
2017-03-26 20:38:05 +00:00
}
2017-04-05 22:04:02 +00:00
for i , cntn := range conns {
if cntn . id == connection . id {
conns [ i ] = conns [ len ( conns ) - 1 ]
conns = conns [ : len ( conns ) - 1 ]
}
}
if len ( conns ) != 0 {
s . domains . Set ( auth . Domain , conns )
} else {
s . domains . Remove ( auth . Domain )
}
2017-09-30 18:08:41 +00:00
}
2017-04-28 22:23:26 +00:00
// RoundTrip sends a HTTP request to a backend and then returns its response.
2017-03-26 19:50:51 +00:00
func ( s * Server ) RoundTrip ( req * http . Request ) ( * http . Response , error ) {
2017-03-27 04:56:54 +00:00
var conns [ ] * Connection
2017-10-01 13:28:13 +00:00
ctx := req . Context ( )
2017-03-27 04:56:54 +00:00
2017-12-15 19:14:45 +00:00
f := ln . F {
2018-01-17 05:50:32 +00:00
"req_remote" : req . RemoteAddr ,
"req_host" : req . Host ,
"req_uri" : req . RequestURI ,
"req_method" : req . Method ,
"req_content_length" : req . ContentLength ,
2017-12-15 19:14:45 +00:00
}
2017-03-27 04:56:54 +00:00
val , ok := s . domains . Get ( req . Host )
if ok {
conns , ok = val . ( [ ] * Connection )
2017-03-27 05:03:06 +00:00
if ! ok {
2017-12-15 19:14:45 +00:00
ln . Error ( ctx , ErrNoSuchBackend , f , ln . Action ( "no backend available" ) )
2017-03-27 04:56:54 +00:00
2017-09-30 18:08:41 +00:00
return gen502Page ( req ) , nil
2017-03-27 04:56:54 +00:00
}
2017-03-26 19:50:51 +00:00
}
2017-09-30 17:33:19 +00:00
var goodConns [ ] * Connection
for _ , conn := range conns {
if conn . usable {
goodConns = append ( goodConns , conn )
}
}
if len ( goodConns ) == 0 {
2017-12-15 19:14:45 +00:00
ln . Error ( ctx , ErrNoSuchBackend , f , ln . Action ( "no good backends available" ) )
2017-03-27 05:50:45 +00:00
2017-09-30 18:08:41 +00:00
return gen502Page ( req ) , nil
2017-03-27 05:03:06 +00:00
}
2017-09-30 17:33:19 +00:00
c := goodConns [ rand . Intn ( len ( goodConns ) ) ]
2017-03-26 19:50:51 +00:00
2017-04-06 04:44:12 +00:00
resp , err := c . RoundTrip ( req )
2017-03-26 19:50:51 +00:00
if err != nil {
2017-12-15 19:14:45 +00:00
ln . Error ( ctx , err , c , f , ln . Action ( "connection roundtrip" ) )
2017-03-26 20:47:42 +00:00
2017-04-06 04:44:12 +00:00
defer c . cancel ( )
2017-03-26 19:50:51 +00:00
return nil , err
}
2017-12-15 19:14:45 +00:00
ln . Log ( ctx , c , ln . Action ( "http traffic" ) , f , ln . F {
2018-01-17 05:50:32 +00:00
"resp_status_code" : resp . StatusCode ,
"resp_content_length" : resp . ContentLength ,
2017-03-26 22:36:58 +00:00
} )
2017-03-26 19:50:51 +00:00
return resp , nil
}
2017-04-28 22:23:26 +00:00
// Auth is the authentication info the client passes to the server.
2017-03-26 19:50:51 +00:00
type Auth struct {
Token string ` json:"token" `
Domain string ` json:"domain" `
}