Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

84

85

86

87

88

89

90

91

92

93

94

95

96

97

98

99

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164

165

166

167

168

169

170

171

172

173

174

175

176

177

178

179

180

181

182

183

184

185

186

187

188

189

190

191

192

193

194

195

196

197

198

199

200

201

202

203

204

205

206

207

208

209

210

211

212

213

214

215

216

217

218

219

220

221

222

223

224

225

226

227

228

229

230

231

232

233

234

235

236

237

238

239

240

241

242

243

244

245

246

247

248

249

250

251

252

253

254

255

256

257

258

259

260

261

262

263

264

265

266

267

268

269

270

271

272

273

274

275

276

277

278

279

280

281

282

############################################################################# 

# 

# Surrogate Manager. 

# 

# Copyright (C) 2015-2017 Menlo Security, Inc. 

# 

############################################################################# 

 

import glob 

import httplib 

import json 

import os 

import os.path 

import psutil 

import stat 

import resource 

import shutil 

import subprocess 

import sys 

 

import tornado.gen 

import tornado.ioloop 

import tornado.web 

 

import net 

import pool 

import update_manager 

from safly.app import SafelyTornadoApp, SafelyApplication 

from safly.config import config 

from safly.container.pool import ContainerNamespacePoolManager 

from safly.logger import set_global_service_name 

 

set_global_service_name('safeview.sm') 

 

def die(*pargs, **kwargs): 

string = "FATAL: " + ' '.join([str(arg) for arg in pargs]) 

print >> sys.stderr, ( string ) 

sys.exit(-1) 

 

class SMRequestHandler(tornado.web.RequestHandler): 

def __init__(self, *args, **kwargs): 

super(SMRequestHandler, self).__init__(*args, **kwargs) 

self.custom_error = False 

 

class FreezeRequest(SMRequestHandler): 

def post(self): 

if not app.producer.freeze(): 

raise tornado.web.HTTPError(httplib.BAD_REQUEST, 'Cannot freeze') 

 

class StatusRequest(SMRequestHandler): 

def get(self): 

self.write(json.dumps(app.producer.get_status())) 

 

class SurrogateKillRequest(SMRequestHandler): 

# pylint: disable=arguments-differ 

def post(self, safe_rid): 

try: 

surrogate = app.producer._local_pool[safe_rid] 

except KeyError: 

self.set_status(404) 

self.finish('<html><body>Unknown surrogate</body></html>') 

return 

try: 

surrogate.kill('rest-api') 

except Exception as e: 

raise tornado.web.HTTPError(httplib.INTERNAL_SERVER_ERROR, str(e)) 

 

class TabDetachRequest(SMRequestHandler): 

# pylint: disable=arguments-differ 

@tornado.gen.coroutine 

def post(self, safe_rid, tab_id): 

try: 

surrogate = app.producer._local_pool[safe_rid] 

except KeyError: 

self.set_status(404) 

self.finish('<html><body>Unknown surrogate</body></html>') 

return 

try: 

yield surrogate.detach_tab(tab_id) 

except KeyError: 

self.set_status(404) 

self.finish('<html><body>Unknown tab</body></html>') 

return 

except Exception as e: 

raise tornado.web.HTTPError(httplib.INTERNAL_SERVER_ERROR, str(e)) 

 

class SurrogateStatusRequest(SMRequestHandler): 

# pylint: disable=arguments-differ 

@tornado.gen.coroutine 

def get(self, safe_rid): 

try: 

surrogate = app.producer._local_pool[safe_rid] 

except KeyError: 

self.set_status(404) 

self.finish('<html><body>Unknown surrogate</body></html>') 

return 

try: 

details = yield surrogate.get_details() 

except IOError: 

self.set_status(500) 

self.finish( 

'<html><body>Communication with surrogate failed</body></html>') 

return 

self.write(json.dumps(details)) 

 

class UnfreezeRequest(SMRequestHandler): 

def post(self): 

if not app.producer.unfreeze(): 

raise tornado.web.HTTPError(httplib.BAD_REQUEST, 'Cannot unfreeze') 

 

class SurrogatePoolApp(SafelyTornadoApp): 

NAME = 'surrogate_manager' 

URL_PATH = '/' 

 

def __init__(self, *args, **kwargs): 

super(SurrogatePoolApp, self).__init__(*args, **kwargs) 

self.producer = None 

self._ns_pool_man = None 

self._update_manager = None 

 

def create_tornado_app(self): 

required_handlers = [ 

(r'/detach/([^/]+)/(.+)', TabDetachRequest), 

(r'/freeze', FreezeRequest), 

(r'/kill/(.+)', SurrogateKillRequest), 

(r'/status', StatusRequest), 

(r'/status/(.+)', SurrogateStatusRequest), 

(r'/unfreeze', UnfreezeRequest), 

] 

return SafelyApplication(required_handlers, debug=False, gzip=True) 

 

def initialize_nss_db(self): 

"""Initialize NSS certificate db for use by surrogates. 

 

These are user-installed CAs (presumably configured by an MSIP admin) as 

well as built-in CAs that are in addition to the CAs that ship with NSS 

(libnssckbi.so). 

""" 

cas_path = config.get('general', 'user_installed_cas_path') 

builtin_cas_path = config.get('general', 'builtin_cas_path') 

db_path = os.path.join(cas_path, 'nssdb') 

 

if not os.path.exists(cas_path) and not os.path.exists(builtin_cas_path): 

return 

 

cas = glob.glob(os.path.join(cas_path, '*.crt')) 

builtin_cas = glob.glob(os.path.join(builtin_cas_path, '*.crt')) 

self.log.info({'cas': cas, 'builtin_cas': builtin_cas}, 

event='cas-load') 

 

# Always rebuild the db. 

if os.path.exists(db_path): 

try: 

shutil.rmtree(db_path) 

except Exception as e: 

self.log.error({'error': e, 'details': 'ca-delete-failed'}, 

event='unexpected-error') 

exit(1) 

 

os.makedirs(db_path) 

try: 

subprocess.check_call(['certutil', '-N', '--empty-password', '-d', 

'sql:%s' % db_path]) 

except Exception as e: 

self.log.error({'error': e, 'details': 'ca-db-create-failed'}, 

event='unexpected-error') 

exit(1) 

 

for ca_cert in cas + builtin_cas: 

ca_name = os.path.basename(ca_cert).rsplit('.', 1)[0] 

# Add magic prefix to the name for built-in CAs. The surrogate checks 

# for this when deciding whether this is a known root. 

if ca_cert in builtin_cas: 

ca_name = 'builtin_' + ca_name 

try: 

subprocess.check_call( 

['/usr/bin/certutil', '-A', '-n', ca_name, '-t', 'TC', 

'-i', ca_cert, '-d', 'sql:%s' % db_path]) 

except Exception as e: 

self.log.error({'error': e}, event='ca-import-failed') 

 

def pre_fork_init(self): 

def interp_limit(v): 

return (v * 1024) if v >= 0 else v 

sandboxed_namespaces = set(config.get('surrogate_manager', 

'sandboxed_namespaces').split(',')) 

self.log.info('Sandboxed namespaces are %s.', sandboxed_namespaces) 

# We maintain pools of pre-initialized resources to speed up container 

# launch. 

 

mem_soft_limit = interp_limit(config.getint('surrogate_manager', 

'max_ram_per_surrogate_soft_limit_kb')) 

mem_limit = interp_limit(config.getint('surrogate_manager', 

'max_ram_per_surrogate_kb')) 

memsw_limit = interp_limit(config.getint('surrogate_manager', 

'max_ram_plus_swap_per_surrogate_kb')) 

if psutil.swap_memory().total < memsw_limit: 

# Lack of swap is not necessarily an error, since it may be created 

# concurrently with service startup. 

self.log.warning({'stats': psutil.swap_memory(), 

'cause': 'swap created concurrently?'}, 

event='insufficient-swap-space') 

 

self.initialize_nss_db() 

 

self._update_manager = update_manager.UpdateManager() 

self._update_manager.start() 

 

net.init(config.get('surrogate_manager', 'container_bridge')) 

ns_init_pool_size = config.getint('surrogate_manager', 

'ns_init_pool_size') 

 

# If calculating the initial size of the namespace pool, 

# estimate the max number of surrogates based on RAM and 

# upsize by 1.2 times 

ns_init_pool_size = ns_init_pool_size or int( 

1.2 * psutil.virtual_memory().total / 

(config.getint('surrogate_manager', 'avg_ram_per_surrogate_kb') * 1024)) 

max_pool_size = config.getint('surrogate_manager', 'max_pool_size') 

ns_init_pool_size = min(max_pool_size, ns_init_pool_size) 

self._ns_pool_man = ContainerNamespacePoolManager( 

sandboxed_namespaces, 

mem_soft_limit_in_bytes=mem_soft_limit, 

mem_limit_in_bytes=mem_limit, 

memsw_limit_in_bytes=memsw_limit, 

swappiness=config.getint('surrogate_manager', 'swappiness'), 

enable_cgroups=config.getboolean('surrogate_manager', 

'enable_cgroups'), 

init_pool_size=ns_init_pool_size) 

self.producer = pool.GlobalSurrogatePoolProducer(self._ns_pool_man) 

try: 

self.producer.start() 

except Exception: 

self.log.exception('pool start failed') 

self._clean_exit(1) 

self._update_manager.producer = self.producer 

 

def _clean_exit(self, err): 

if self._ns_pool_man: 

self._ns_pool_man.destroy() 

if self.producer: 

self.producer.destroy() 

self._update_manager.producer = None 

# FIXME: we really should wait until all Surrogates have entered DEAD 

# state before exiting. Or else we may leave behind stale entries in 

# the global pool (in Redis). 

super(SurrogatePoolApp, self)._clean_exit(err) 

 

def _verify_chrome_sandbox(): 

"""Die if the Chrome sandbox is not present and usable.""" 

if not config.getboolean('surrogate', 'enable_setuid_sandbox'): 

return 

sv_cr_path = os.path.dirname(config.get('service', 'sv_cr_path')) 

sv_cr_sandbox_path = os.path.join(sv_cr_path, 'chrome-sandbox') 

if not (os.path.exists(sv_cr_sandbox_path)): 

die('missing chrome sandbox at %s' % sv_cr_sandbox_path) 

if not (os.stat(sv_cr_sandbox_path).st_mode & stat.S_ISUID): 

die('chrome-sandbox binary is missing the setuid bit: %s' % 

sv_cr_sandbox_path) 

 

def check_setup(): 

# Only root can create/destroy containers. 

# TODO: Relax this to those with CAP_SYS_ADMIN. 

if os.getuid() != 0: 

die("must be run as root.") 

_verify_chrome_sandbox() 

# Check acceptable limits. 

max_limit_by_name = { 

'file size': (resource.RLIMIT_FSIZE, 1024*1024*1024), 

'core file size': (resource.RLIMIT_CORE, 0), 

} 

for name, info in max_limit_by_name.items(): 

resid, max_limit = info 

soft_limit = resource.getrlimit(resid)[0] 

if soft_limit > max_limit: 

die('%s limit is too high (max is %d, current is %d)---did you ' 

'set the right ulimits?' % (name, max_limit, soft_limit)) 

 

if __name__ == '__main__': 

check_setup() 

app = SurrogatePoolApp() 

app.run()