Coverage for net.py : 82%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
############################################################################# # # A collection of software-defined routers for Surrogate networking needs. # # Copyright (C) 2015-2016 Menlo Security, Inc. # All rights reserved. # #############################################################################
# Name of container bridge device (e.g., 'br0').
"""Returns the system-wide default-route gateway if there is one."""
"""Returns the IP address assigned to the device @dev.""" return netifaces.ifaddresses(dev)[netifaces.AF_INET][0]['addr']
"""Returns the gateway IP for device @dev, or None if device has no gateway IP.""" netifaces.gateways()[netifaces.AF_INET]}
return _get_device_addr(con_bridge_device)
"""Watches for link changes and notifies interested parties.
To keep this simple, we delegate responsibility of determining which links changed status, if any, to the class's client.
This class does not provide any guarantees about notification (e.g., under memory pressure) speed because the underlying kernel link notification mechanism (NETLINK) does not. But eventual notification is guaranteed by periodic callbacks.
Per netlink (7):
"Only processes with an effective UID of 0 or the CAP_NET_ADMIN capability may send or listen to a netlink multicast group." """ # Format of NETLINK messages coming from the kernel. 'link_observer_sync_interval_ms')
auto_prefix=True) self.SYNC_INTERVAL_MS)
# "NETLINK is a datagram oriented # service." socket.SOCK_DGRAM, socket.NETLINK_ROUTE) # Tornado's IOStream cannot recover from ENOBUFS returned by # NETLINK when the socket receive buffer is full. So disable it # as we do not need it: we need to know that link status changed # but not any further details. self.RTMGRP_IPV6_IFADDR)) # FIXME: IOStream assumes a TCP-like socket, but that's not what # a netlink socket is. Despite the differences, this mostly works # because we do not use any TCP like features (e.g., connect). except Exception: self.log.exception({}, event='netlink-connect-failed') else: # In case we missed events while disconnected.
try: observer(reason) except Exception: self.log.exception({}, event='callback-exception')
# This shouldn't happen, especially since we've disabled ENOBUFS. self.log.error({}, event='netlink-sock-closed') # Attempt to recover by reconnecting. self._netlink_stream = None self._connect()
msg_len, msg_type, msg_flags = struct.unpack(self.HEADER_FMT, hdr)[0:3] self.log.debug({'nr_observers': len(self._observers), 'msg_len': msg_len, 'msg_type': msg_type, 'msg_flags': '%x' % msg_flags}, event='link-event') self._invoke_callbacks('link-event') self._netlink_stream.read_bytes(msg_len - self._hdr_len, self._on_payload)
# We don't care about the payload (and decoding it is more work). self._netlink_stream.read_bytes(self._hdr_len, self._on_header)
if not self._netlink_stream: # Prior connection attempt failed; try again. self._connect() self._invoke_callbacks('periodic')
"""A logical packet router bound to a given Container."""
auto_prefix=True)
"""Parent override."""
def _update_rules(self, add): # Override me---this is where you work the IPTables magic. pass
except Exception: self.log.exception({}, event='start-failed')
except Exception: self.log.exception({}, event='stop-failed')
"""Forwards outbound container traffic through a designated gateway device.
Forwards all packets coming from the container and heading out of the host (i.e., outbound packets) through a designated network device (e.g., a bounce tunnel). This is done solely via Linux's source IP policy routing capabilities. """
# Our self-designated namespace for routing table ids.
proxy_config=None, io_loop=None): # ID Scheme: use the container process pid # # FIXME: despite the use of a namespace id, there is still a risk of # collision with other applications; would be best if the kernel just # gave us a unique id; keyspace is only 32-bits so random id # generation may not work well either. assert(self._con.pid < 2**16) else dev for dev in device_pref_list] self._proxy_host, self._proxy_port = proxy_config.split(':') self._proxy_port = int(self._proxy_port) else:
"""Parent override."""
# FIXME: given the risk of these check_calls blocking (can we truly # guarantee that they do not block indefinitely?), we should really do this # in an Executor thread. #### Bounce gateway setup # If no bounce device is given, then we do not install any bounce # rules, thus causing traffic to be routed via the default route. # # Set up the custom routing table: route packets via the # designated bounce gateway for this container. # Safe to use 'default' since this rule # only applies to forwarded traffic. # # 'default' is equivalent to 0.0.0.0/0 . 'default'] else: # The other end of a tunnel is usually a gateway that knows # how to forward packets, so this is not necessarily fatal. self.log.warning({'detail': 'No gateway IP, is it a tunnel or bridge?', 'dev': gateway_device}, event='device-has-no-gateway') # 'static' ==> ensure that routing daemons do not # remove this entry: see ip-route(8). 'proto', 'static', 'table', self._table_id] # Send container for processing to the custom routing table # @self._table_id (as opposed to the main routing table). # Priority must be higher (kernel gives priority to # lower values) than the main and default tables (32766 # and 32767 respectively). 'priority', '1000', # 'from' is not necessary given that fwmark is unique to # the container, but having this helps with debugging. 'from', self._con_ip, # Route only container-initiated traffic (i.e., packets # with the mark set) through the gateway device (which # is possibly a bounce tunnel). All other packets # should use the default routing table. This is # especially important for resource requests---the # replies must be routed out via the default interface # or they will be lost thus resulting in resource load # failure. 'fwmark', self._table_id, 'table', self._table_id]) # Flush kernel cache to ensure that new rules take effect. # # FIXME: this is a system-wide flush; any way to do this more # selectively?
# Enable source-NAT on the device so that replies packets can find # their way back. # # NOTE on MASQUERADE vs. SNAT: they both do source-address NAT'ing, # but if the former is used, the kernel will take care of IP address # changes on the device, thus making it naturally more link/tunnel # failure tolerant. '-t', 'nat', ipt_action, 'SM-nat-postrouting-bf', '-s', self._con_ip, '-o', gateway_device, '-j', 'MASQUERADE'])
#### Forwarding setup. # # Goal: allow container to initiate connections (e.g., for HTTP) and # send packets through its gateway device (AKA, bounce tunnel). # # There is no need to allow outbound packets on the default gateway # since the Surrogate should never make connections through it (it # will only receive connections from SurrogateRouters). Outgoing # packets related to SurrogateRouter-established connections are # allowed by the RELATED,ESTABLISHED rule we've installed. '-s', self._con_ip, '-p', proto, '-o', gateway_device, '--dport', str(port), '-j', 'ACCEPT'])
# Mark all container-initiated traffic so that we can very precisely # route that traffic and only that traffic through the gateway # device. # # ASSUMPTION: We do not expect Surrogate-initiated connections to # create RELATED connections. Therefore, we needn't leverage # connection tracking state---it suffices to mark based on # destination port. ipt_action, 'SM-mangle-prerouting', '-s', self._con_ip, '-p', proto, '--dport', str(port), '-j', 'MARK', '--set-mark', self._table_id]) except subprocess.CalledProcessError as e: self.log.exception({'cmd': e.cmd, 'errcode': e.returncode}, event='rule-update-error')
"""Returns interface stats. Works with interface aliases too.
Interface aliases share the stats of the parent interface.""" # psutil.net_if_stats() does not include device aliases. except Exception: return None
except Exception: return None
"""Returns True iff @dev is up.
Works with device aliases as well (e.g., eth0:0).""" except Exception: return False
# Note that this could be a transient situation. self.log.error({'usable_devices': usable_devices, 'pref_list': self._pref_list}, event='no-gateway-device') return # No change. 'dns_hosts_by_device') 'new_gateway': new_dev, 'new_gateway_info': (get_if_addrs(new_dev) if new_dev else None), 'new_gateway_stats': (get_if_stats(new_dev) if new_dev else None), 'usable_devices': usable_devices, 'dns_hosts': dns_hosts, 'proxy_host': self._proxy_host, 'proxy_port': self._proxy_port, 'reason': reason}, event='gateway-changed') self._update_rules(self._gateway_device, add=False) (dns_hosts or os_helpers.get_nameservers()), self._con.SURROGATE_DNS_HOSTS, {('udp', 53), ('tcp', 53)}, io_loop=self._io_loop) self._helper_routers['proxy'] = VirtualIPForwarder(self._con, [self._proxy_host], [self._con.SURROGATE_PROXY_IP], {('tcp', self._proxy_port)}, io_loop=self._io_loop)
'container_ip': self._con_ip, 'table_id': self._table_id, 'forwarded_ports': self._forwarded_ports}, event='started') # Preferred gateway may not be available on start, so watch for link # events and check gateways again.
"""DNATs packets headed for virtual IPs to a set of real hosts.
The number of real hosts may be smaller than the number of virtual IPs, so we use round-robin assignment of virtual-to-real IPs to ensure that all virtual IPs are mapped to a real host.
Real hosts may be given as DNS names or IPs (or a mix of both). DNS names are periodically resolved to IP addresses and translation mappings are updated if there are any changes.
To ensure packets get through our forwarding firewall, we make forwarding exceptions for destination ports. """
# Interval at which to check for real host IP address changes. 'virt_ip_check_interval_secs')
"""Initializer.
|real_hosts| can be a collection of FQDNs or IP addresses (or a mix). |virtual_ips| must be a collection of IP addresses. |fw_ports| must be a set of destination ports to which we will allow packets to be forwarded. """
"""Parent override."""
ipt_action, 'SM-nat-prerouting-virtip', '-s', con_ip, '-d', orig_dst_ip, '-j', 'DNAT', '--to-destination', new_dst_ip]) # Ensure that requests are forwarded even if the target IP is # in the statically initialized blocked set. ipt_action, 'SM-filter-fw-block-exemptdyn', '-s', con_ip, '-d', new_dst_ip, '-p', proto, '--dport', str(port), '-j', 'ACCEPT']) except subprocess.CalledProcessError as e: self.log.exception({'cmd': e.cmd, 'errcode': e.returncode}, event='rule-update-error')
# Can't do much but retry with the hope that at least one host # eventually resolves. self.log.error({'hosts': self._real_hosts, 'ips': new_real_ips}, event='total-resolve-failure') return self.log.warning({'ips': new_real_ips, 'hosts': self._real_hosts}, event='partial-resolve-failure') # No change, do nothing. return for i in xrange(len(self._virtual_ips))} 'real_ips': self._real_ips, 'ip_map': self._ip_map, 'check_interval': self.IP_CHECK_INTERVAL_SECS, 'fw_ports': self._fw_ports}, event='mapping-updated')
def _on_check_for_ip_changes(self): """Reconfigure in response to domain-to-IP mapping changes.
This periodically monitors DNS hostnames and updates our DNAT tables if the corresponding IP addresses have changed.""" # Don't perform dns resolution if host is a valid ipv4 address else: # host needs to be resolved dns_start = monotonic() response = yield self._resolver.resolve(host, 0, socket.AF_INET) if (monotonic() - dns_start) > 1: self.log.error({'host':host, 'time': monotonic() - dns_start}, event='slow-resolution') # ThreadedResolver returns a list of (family, address) tuples # e.g. [(socket.AF_INET, ('1.2.3.4', 80))] new_real_ips.add(response[0][1][0]) except Exception as e: # Host resolution most likely failed. self.log.exception({'error': e}, event='change-check-failed') except Exception as e: self.log.exception({'error': e}, event='change-check-failed') except Exception as e: self.log.exception({'error': e}, event='change-check-failed') self._on_check_for_ip_changes)
# Create the initial set of IP mappings, and keep checking for # host-to-IP mapping changes.
except Exception as e: self.log.exception({'error': e}, event='resolver-close-failed')
"""Firewall for container packets arriving at the host."""
"""Parent override."""
'-s', self._con_ip, '-p', proto, '--dport', str(port), '-j', 'ACCEPT']) except subprocess.CalledProcessError as e: self.log.exception({'cmd': e.cmd, 'errcode': e.returncode}, event='rule-update-error')
'input_ports': self._input_ports}, event='started')
"""Removes all chains after clearing and removing references to them.""" # First clear all chains and remove all refs to them. # Must remove all entries before removing the chain. stderr=subprocess.STDOUT) # No such chain: expected to happen on clean environments. # A custom chain cannot be removed/reset while there are references # to it. chain], stderr=subprocess.STDOUT) # The chains are now unref'd and thus safe to delete. # Now remove the chain. stderr=subprocess.STDOUT) # No such chain: expected to happen on clean environments.
"""Creates all chains and then initializes them.
Invokes the chain-specific init function for each chain."""
# Allow inbound and outbound packets for related and established packets. '-m', 'state', '--state', 'RELATED,ESTABLISHED', direction, con_bridge_device, '-j', 'ACCEPT'])
# By default, drop everything else coming from the container, # regardless of the root chain default INPUT policy action (drop or # accept). '-i', con_bridge_device, '-j', 'SM-input-drop'])
# Allow related/established connections to and from anywhere: this is needed # to enable communication with SurrogateContainer clients (SurrogateRouters) # on other nodes in the cluster. '-m', 'state', '--state', 'RELATED,ESTABLISHED', direction, con_bridge_device, '-j', 'ACCEPT'])
"""Set up the static forwarding firewall rules.
Dynamic rules (i.e., those that are Surrogate-specific) are added in a separate table.
There are two tables for static rules: blocks go in the block chain (@is_exception_chain == False) and block exceptions go in the block-exceptions chain. """
'never_forward').strip().split(',') event=('init-forwarding-static-%s' % ('block-exceptions' if is_exception_chain else 'block'))) '-m', 'state', '--state', 'NEW'] cmd.extend(['-p', proto]) cmd.extend(['--dport', str(port)]) # Continue processing in the per-Surrogate forward chain. # # Goto semantics: processing will resume in the parent chain upon # return from the target chain. cmd.extend(['-g', 'SM-filter-forward']) else:
# By default, do not forward anything coming from the container # network. Per container forwarding rules are expected to override this # in whitelist fashion. '-i', con_bridge_device, '-j', 'SM-forward-drop'])
'-j', 'DROP']) '-j', 'LOG', '--log-uid', '--log-prefix', chain.title() + ':'])
# Rather than install rules in the root IPTables chains (e.g., INPUT, # FORWARD, etc.) we install our rules into SM-owned custom chains for better # isolation and clarity. These custom chains are referenced by their # corresponding root chain and get created upon SM startup and removed on SM # shutdown. # # The format here is: (table name, root chain, custom chain) . # # These are inserted at the top of the table in list order: i.e., later # entries takes precedence in matching. ('filter', None, 'SM-input-drop', _init_drop_chain), ('filter', None, 'SM-forward-drop', _init_drop_chain), ('filter', 'INPUT', 'SM-filter-input', _init_filter_input), ('filter', 'INPUT', 'SM-filter-input-pre', _init_filter_input_pre), ('filter', 'FORWARD', 'SM-filter-forward', _init_filter_forward), # Static block chain. ('filter', 'FORWARD', 'SM-filter-fw-block', functools.partial(_init_filter_forward_firewall, False)), # Static exceptions to the static block chain. ('filter', 'FORWARD', 'SM-filter-fw-block-exempt', functools.partial(_init_filter_forward_firewall, True)), # Dynamic exceptions to the static block chain (e.g., for Surrogate DNS). ('filter', 'FORWARD', 'SM-filter-fw-block-exemptdyn', None), ('filter', 'FORWARD', 'SM-filter-forward-pre', _init_filter_forward_pre), ('mangle', 'PREROUTING', 'SM-mangle-prerouting', None), # Virtual IP to real IP mappings. ('nat', 'PREROUTING', 'SM-nat-prerouting-virtip', None), ('nat', 'POSTROUTING', 'SM-nat-postrouting-bf', None)]
# Enable IP forwarding: the host needs to route container packets. ('net.ipv4.ip_forward', '1'), # Optimization: disable ARP netfilter on the bridge; improves throughput # up 4x (2Gb/s) according to iperf experiments. ('net.bridge.bridge-nf-call-arptables', '0'), ('net.bridge.bridge-nf-call-ip6tables', '0'), # Layer 3 netfilter on the bridge is still needed to block # inter-container communication, so we must not disable that. ('net.bridge.bridge-nf-call-iptables', '1'), # We don't need IPv6, and it may cause delays (e.g., during DNS lookup). ('net.ipv6.conf.all.disable_ipv6', '1'), ('net.ipv6.conf.default.disable_ipv6', '1'), ('net.ipv6.conf.lo.disable_ipv6', '1') ] # We should fail-stop on errors here, but some sysctls are not # available within our Docker test environment, thus we keep going. # This should not happen in production! stderr=subprocess.STDOUT) except Exception as e: log.error({'key': key, 'value': value, 'error': e}, event='init-sysctl-failure')
"""Create the container bridge with device name @dev.
Bridge will be assigned MAC address @hw_addr.""" except Exception: # No such bridge: expected in clean environments. pass except Exception: # No such bridge: expected in clean environments. pass # Only one bridge in the network, so STP is not needed. # Tried this when debugging container networking problems. Doesn't # appear to be needed. #brctl setageing br0 3600 #brctl setmaxage br0 3600 'broadcast', '10.3.255.255', 'promisc', 'hw', 'ether', hw_addr, 'up'])
global con_bridge_device
def fini(): _destroy_custom_chains() |