summaryrefslogtreecommitdiff
path: root/roms/ipxe/src/net/ipv4.c
diff options
context:
space:
mode:
Diffstat (limited to 'roms/ipxe/src/net/ipv4.c')
-rw-r--r--roms/ipxe/src/net/ipv4.c636
1 files changed, 636 insertions, 0 deletions
diff --git a/roms/ipxe/src/net/ipv4.c b/roms/ipxe/src/net/ipv4.c
new file mode 100644
index 000000000..b2d51ada4
--- /dev/null
+++ b/roms/ipxe/src/net/ipv4.c
@@ -0,0 +1,636 @@
+#include <string.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <errno.h>
+#include <byteswap.h>
+#include <ipxe/list.h>
+#include <ipxe/in.h>
+#include <ipxe/arp.h>
+#include <ipxe/if_ether.h>
+#include <ipxe/iobuf.h>
+#include <ipxe/netdevice.h>
+#include <ipxe/ip.h>
+#include <ipxe/tcpip.h>
+#include <ipxe/dhcp.h>
+#include <ipxe/settings.h>
+
+/** @file
+ *
+ * IPv4 protocol
+ *
+ */
+
+FILE_LICENCE ( GPL2_OR_LATER );
+
+/* Unique IP datagram identification number */
+static uint16_t next_ident = 0;
+
+/** List of IPv4 miniroutes */
+struct list_head ipv4_miniroutes = LIST_HEAD_INIT ( ipv4_miniroutes );
+
+/** List of fragment reassembly buffers */
+static LIST_HEAD ( frag_buffers );
+
+/**
+ * Add IPv4 minirouting table entry
+ *
+ * @v netdev Network device
+ * @v address IPv4 address
+ * @v netmask Subnet mask
+ * @v gateway Gateway address (if any)
+ * @ret miniroute Routing table entry, or NULL
+ */
+static struct ipv4_miniroute * __malloc
+add_ipv4_miniroute ( struct net_device *netdev, struct in_addr address,
+ struct in_addr netmask, struct in_addr gateway ) {
+ struct ipv4_miniroute *miniroute;
+
+ DBG ( "IPv4 add %s", inet_ntoa ( address ) );
+ DBG ( "/%s ", inet_ntoa ( netmask ) );
+ if ( gateway.s_addr )
+ DBG ( "gw %s ", inet_ntoa ( gateway ) );
+ DBG ( "via %s\n", netdev->name );
+
+ /* Allocate and populate miniroute structure */
+ miniroute = malloc ( sizeof ( *miniroute ) );
+ if ( ! miniroute ) {
+ DBG ( "IPv4 could not add miniroute\n" );
+ return NULL;
+ }
+
+ /* Record routing information */
+ miniroute->netdev = netdev_get ( netdev );
+ miniroute->address = address;
+ miniroute->netmask = netmask;
+ miniroute->gateway = gateway;
+
+ /* Add to end of list if we have a gateway, otherwise
+ * to start of list.
+ */
+ if ( gateway.s_addr ) {
+ list_add_tail ( &miniroute->list, &ipv4_miniroutes );
+ } else {
+ list_add ( &miniroute->list, &ipv4_miniroutes );
+ }
+
+ return miniroute;
+}
+
+/**
+ * Delete IPv4 minirouting table entry
+ *
+ * @v miniroute Routing table entry
+ */
+static void del_ipv4_miniroute ( struct ipv4_miniroute *miniroute ) {
+
+ DBG ( "IPv4 del %s", inet_ntoa ( miniroute->address ) );
+ DBG ( "/%s ", inet_ntoa ( miniroute->netmask ) );
+ if ( miniroute->gateway.s_addr )
+ DBG ( "gw %s ", inet_ntoa ( miniroute->gateway ) );
+ DBG ( "via %s\n", miniroute->netdev->name );
+
+ netdev_put ( miniroute->netdev );
+ list_del ( &miniroute->list );
+ free ( miniroute );
+}
+
+/**
+ * Perform IPv4 routing
+ *
+ * @v dest Final destination address
+ * @ret dest Next hop destination address
+ * @ret miniroute Routing table entry to use, or NULL if no route
+ *
+ * If the route requires use of a gateway, the next hop destination
+ * address will be overwritten with the gateway address.
+ */
+static struct ipv4_miniroute * ipv4_route ( struct in_addr *dest ) {
+ struct ipv4_miniroute *miniroute;
+ int local;
+ int has_gw;
+
+ /* Never attempt to route the broadcast address */
+ if ( dest->s_addr == INADDR_BROADCAST )
+ return NULL;
+
+ /* Find first usable route in routing table */
+ list_for_each_entry ( miniroute, &ipv4_miniroutes, list ) {
+ if ( ! netdev_is_open ( miniroute->netdev ) )
+ continue;
+ local = ( ( ( dest->s_addr ^ miniroute->address.s_addr )
+ & miniroute->netmask.s_addr ) == 0 );
+ has_gw = ( miniroute->gateway.s_addr );
+ if ( local || has_gw ) {
+ if ( ! local )
+ *dest = miniroute->gateway;
+ return miniroute;
+ }
+ }
+
+ return NULL;
+}
+
+/**
+ * Fragment reassembly counter timeout
+ *
+ * @v timer Retry timer
+ * @v over If asserted, the timer is greater than @c MAX_TIMEOUT
+ */
+static void ipv4_frag_expired ( struct retry_timer *timer __unused,
+ int over ) {
+ if ( over ) {
+ DBG ( "Fragment reassembly timeout" );
+ /* Free the fragment buffer */
+ }
+}
+
+/**
+ * Free fragment buffer
+ *
+ * @v fragbug Fragment buffer
+ */
+static void free_fragbuf ( struct frag_buffer *fragbuf ) {
+ free ( fragbuf );
+}
+
+/**
+ * Fragment reassembler
+ *
+ * @v iobuf I/O buffer, fragment of the datagram
+ * @ret frag_iob Reassembled packet, or NULL
+ */
+static struct io_buffer * ipv4_reassemble ( struct io_buffer * iobuf ) {
+ struct iphdr *iphdr = iobuf->data;
+ struct frag_buffer *fragbuf;
+
+ /**
+ * Check if the fragment belongs to any fragment series
+ */
+ list_for_each_entry ( fragbuf, &frag_buffers, list ) {
+ if ( fragbuf->ident == iphdr->ident &&
+ fragbuf->src.s_addr == iphdr->src.s_addr ) {
+ /**
+ * Check if the packet is the expected fragment
+ *
+ * The offset of the new packet must be equal to the
+ * length of the data accumulated so far (the length of
+ * the reassembled I/O buffer
+ */
+ if ( iob_len ( fragbuf->frag_iob ) ==
+ ( iphdr->frags & IP_MASK_OFFSET ) ) {
+ /**
+ * Append the contents of the fragment to the
+ * reassembled I/O buffer
+ */
+ iob_pull ( iobuf, sizeof ( *iphdr ) );
+ memcpy ( iob_put ( fragbuf->frag_iob,
+ iob_len ( iobuf ) ),
+ iobuf->data, iob_len ( iobuf ) );
+ free_iob ( iobuf );
+
+ /** Check if the fragment series is over */
+ if ( ! ( iphdr->frags & IP_MASK_MOREFRAGS ) ) {
+ iobuf = fragbuf->frag_iob;
+ free_fragbuf ( fragbuf );
+ return iobuf;
+ }
+
+ } else {
+ /* Discard the fragment series */
+ free_fragbuf ( fragbuf );
+ free_iob ( iobuf );
+ }
+ return NULL;
+ }
+ }
+
+ /** Check if the fragment is the first in the fragment series */
+ if ( iphdr->frags & IP_MASK_MOREFRAGS &&
+ ( ( iphdr->frags & IP_MASK_OFFSET ) == 0 ) ) {
+
+ /** Create a new fragment buffer */
+ fragbuf = ( struct frag_buffer* ) malloc ( sizeof( *fragbuf ) );
+ fragbuf->ident = iphdr->ident;
+ fragbuf->src = iphdr->src;
+
+ /* Set up the reassembly I/O buffer */
+ fragbuf->frag_iob = alloc_iob ( IP_FRAG_IOB_SIZE );
+ iob_pull ( iobuf, sizeof ( *iphdr ) );
+ memcpy ( iob_put ( fragbuf->frag_iob, iob_len ( iobuf ) ),
+ iobuf->data, iob_len ( iobuf ) );
+ free_iob ( iobuf );
+
+ /* Set the reassembly timer */
+ timer_init ( &fragbuf->frag_timer, ipv4_frag_expired, NULL );
+ start_timer_fixed ( &fragbuf->frag_timer, IP_FRAG_TIMEOUT );
+
+ /* Add the fragment buffer to the list of fragment buffers */
+ list_add ( &fragbuf->list, &frag_buffers );
+ }
+
+ return NULL;
+}
+
+/**
+ * Add IPv4 pseudo-header checksum to existing checksum
+ *
+ * @v iobuf I/O buffer
+ * @v csum Existing checksum
+ * @ret csum Updated checksum
+ */
+static uint16_t ipv4_pshdr_chksum ( struct io_buffer *iobuf, uint16_t csum ) {
+ struct ipv4_pseudo_header pshdr;
+ struct iphdr *iphdr = iobuf->data;
+ size_t hdrlen = ( ( iphdr->verhdrlen & IP_MASK_HLEN ) * 4 );
+
+ /* Build pseudo-header */
+ pshdr.src = iphdr->src;
+ pshdr.dest = iphdr->dest;
+ pshdr.zero_padding = 0x00;
+ pshdr.protocol = iphdr->protocol;
+ pshdr.len = htons ( iob_len ( iobuf ) - hdrlen );
+
+ /* Update the checksum value */
+ return tcpip_continue_chksum ( csum, &pshdr, sizeof ( pshdr ) );
+}
+
+/**
+ * Determine link-layer address
+ *
+ * @v dest IPv4 destination address
+ * @v src IPv4 source address
+ * @v netdev Network device
+ * @v ll_dest Link-layer destination address buffer
+ * @ret rc Return status code
+ */
+static int ipv4_ll_addr ( struct in_addr dest, struct in_addr src,
+ struct net_device *netdev, uint8_t *ll_dest ) {
+ struct ll_protocol *ll_protocol = netdev->ll_protocol;
+
+ if ( dest.s_addr == INADDR_BROADCAST ) {
+ /* Broadcast address */
+ memcpy ( ll_dest, netdev->ll_broadcast,
+ ll_protocol->ll_addr_len );
+ return 0;
+ } else if ( IN_MULTICAST ( ntohl ( dest.s_addr ) ) ) {
+ return ll_protocol->mc_hash ( AF_INET, &dest, ll_dest );
+ } else {
+ /* Unicast address: resolve via ARP */
+ return arp_resolve ( netdev, &ipv4_protocol, &dest,
+ &src, ll_dest );
+ }
+}
+
+/**
+ * Transmit IP packet
+ *
+ * @v iobuf I/O buffer
+ * @v tcpip Transport-layer protocol
+ * @v st_src Source network-layer address
+ * @v st_dest Destination network-layer address
+ * @v netdev Network device to use if no route found, or NULL
+ * @v trans_csum Transport-layer checksum to complete, or NULL
+ * @ret rc Status
+ *
+ * This function expects a transport-layer segment and prepends the IP header
+ */
+static int ipv4_tx ( struct io_buffer *iobuf,
+ struct tcpip_protocol *tcpip_protocol,
+ struct sockaddr_tcpip *st_src,
+ struct sockaddr_tcpip *st_dest,
+ struct net_device *netdev,
+ uint16_t *trans_csum ) {
+ struct iphdr *iphdr = iob_push ( iobuf, sizeof ( *iphdr ) );
+ struct sockaddr_in *sin_src = ( ( struct sockaddr_in * ) st_src );
+ struct sockaddr_in *sin_dest = ( ( struct sockaddr_in * ) st_dest );
+ struct ipv4_miniroute *miniroute;
+ struct in_addr next_hop;
+ uint8_t ll_dest[MAX_LL_ADDR_LEN];
+ int rc;
+
+ /* Fill up the IP header, except source address */
+ memset ( iphdr, 0, sizeof ( *iphdr ) );
+ iphdr->verhdrlen = ( IP_VER | ( sizeof ( *iphdr ) / 4 ) );
+ iphdr->service = IP_TOS;
+ iphdr->len = htons ( iob_len ( iobuf ) );
+ iphdr->ident = htons ( ++next_ident );
+ iphdr->ttl = IP_TTL;
+ iphdr->protocol = tcpip_protocol->tcpip_proto;
+ iphdr->dest = sin_dest->sin_addr;
+
+ /* Use routing table to identify next hop and transmitting netdev */
+ next_hop = iphdr->dest;
+ if ( sin_src )
+ iphdr->src = sin_src->sin_addr;
+ if ( ( next_hop.s_addr != INADDR_BROADCAST ) &&
+ ( ! IN_MULTICAST ( ntohl ( next_hop.s_addr ) ) ) &&
+ ( ( miniroute = ipv4_route ( &next_hop ) ) != NULL ) ) {
+ iphdr->src = miniroute->address;
+ netdev = miniroute->netdev;
+ }
+ if ( ! netdev ) {
+ DBG ( "IPv4 has no route to %s\n", inet_ntoa ( iphdr->dest ) );
+ rc = -ENETUNREACH;
+ goto err;
+ }
+
+ /* Determine link-layer destination address */
+ if ( ( rc = ipv4_ll_addr ( next_hop, iphdr->src, netdev,
+ ll_dest ) ) != 0 ) {
+ DBG ( "IPv4 has no link-layer address for %s: %s\n",
+ inet_ntoa ( next_hop ), strerror ( rc ) );
+ goto err;
+ }
+
+ /* Fix up checksums */
+ if ( trans_csum )
+ *trans_csum = ipv4_pshdr_chksum ( iobuf, *trans_csum );
+ iphdr->chksum = tcpip_chksum ( iphdr, sizeof ( *iphdr ) );
+
+ /* Print IP4 header for debugging */
+ DBG ( "IPv4 TX %s->", inet_ntoa ( iphdr->src ) );
+ DBG ( "%s len %d proto %d id %04x csum %04x\n",
+ inet_ntoa ( iphdr->dest ), ntohs ( iphdr->len ), iphdr->protocol,
+ ntohs ( iphdr->ident ), ntohs ( iphdr->chksum ) );
+
+ /* Hand off to link layer */
+ if ( ( rc = net_tx ( iobuf, netdev, &ipv4_protocol, ll_dest,
+ netdev->ll_addr ) ) != 0 ) {
+ DBG ( "IPv4 could not transmit packet via %s: %s\n",
+ netdev->name, strerror ( rc ) );
+ return rc;
+ }
+
+ return 0;
+
+ err:
+ free_iob ( iobuf );
+ return rc;
+}
+
+/**
+ * Process incoming packets
+ *
+ * @v iobuf I/O buffer
+ * @v netdev Network device
+ * @v ll_dest Link-layer destination address
+ * @v ll_source Link-layer destination source
+ *
+ * This function expects an IP4 network datagram. It processes the headers
+ * and sends it to the transport layer.
+ */
+static int ipv4_rx ( struct io_buffer *iobuf,
+ struct net_device *netdev __unused,
+ const void *ll_dest __unused,
+ const void *ll_source __unused ) {
+ struct iphdr *iphdr = iobuf->data;
+ size_t hdrlen;
+ size_t len;
+ union {
+ struct sockaddr_in sin;
+ struct sockaddr_tcpip st;
+ } src, dest;
+ uint16_t csum;
+ uint16_t pshdr_csum;
+ int rc;
+
+ /* Sanity check the IPv4 header */
+ if ( iob_len ( iobuf ) < sizeof ( *iphdr ) ) {
+ DBG ( "IPv4 packet too short at %zd bytes (min %zd bytes)\n",
+ iob_len ( iobuf ), sizeof ( *iphdr ) );
+ goto err;
+ }
+ if ( ( iphdr->verhdrlen & IP_MASK_VER ) != IP_VER ) {
+ DBG ( "IPv4 version %#02x not supported\n", iphdr->verhdrlen );
+ goto err;
+ }
+ hdrlen = ( ( iphdr->verhdrlen & IP_MASK_HLEN ) * 4 );
+ if ( hdrlen < sizeof ( *iphdr ) ) {
+ DBG ( "IPv4 header too short at %zd bytes (min %zd bytes)\n",
+ hdrlen, sizeof ( *iphdr ) );
+ goto err;
+ }
+ if ( hdrlen > iob_len ( iobuf ) ) {
+ DBG ( "IPv4 header too long at %zd bytes "
+ "(packet is %zd bytes)\n", hdrlen, iob_len ( iobuf ) );
+ goto err;
+ }
+ if ( ( csum = tcpip_chksum ( iphdr, hdrlen ) ) != 0 ) {
+ DBG ( "IPv4 checksum incorrect (is %04x including checksum "
+ "field, should be 0000)\n", csum );
+ goto err;
+ }
+ len = ntohs ( iphdr->len );
+ if ( len < hdrlen ) {
+ DBG ( "IPv4 length too short at %zd bytes "
+ "(header is %zd bytes)\n", len, hdrlen );
+ goto err;
+ }
+ if ( len > iob_len ( iobuf ) ) {
+ DBG ( "IPv4 length too long at %zd bytes "
+ "(packet is %zd bytes)\n", len, iob_len ( iobuf ) );
+ goto err;
+ }
+
+ /* Print IPv4 header for debugging */
+ DBG ( "IPv4 RX %s<-", inet_ntoa ( iphdr->dest ) );
+ DBG ( "%s len %d proto %d id %04x csum %04x\n",
+ inet_ntoa ( iphdr->src ), ntohs ( iphdr->len ), iphdr->protocol,
+ ntohs ( iphdr->ident ), ntohs ( iphdr->chksum ) );
+
+ /* Truncate packet to correct length, calculate pseudo-header
+ * checksum and then strip off the IPv4 header.
+ */
+ iob_unput ( iobuf, ( iob_len ( iobuf ) - len ) );
+ pshdr_csum = ipv4_pshdr_chksum ( iobuf, TCPIP_EMPTY_CSUM );
+ iob_pull ( iobuf, hdrlen );
+
+ /* Fragment reassembly */
+ if ( ( iphdr->frags & htons ( IP_MASK_MOREFRAGS ) ) ||
+ ( ( iphdr->frags & htons ( IP_MASK_OFFSET ) ) != 0 ) ) {
+ /* Pass the fragment to ipv4_reassemble() which either
+ * returns a fully reassembled I/O buffer or NULL.
+ */
+ iobuf = ipv4_reassemble ( iobuf );
+ if ( ! iobuf )
+ return 0;
+ }
+
+ /* Construct socket addresses and hand off to transport layer */
+ memset ( &src, 0, sizeof ( src ) );
+ src.sin.sin_family = AF_INET;
+ src.sin.sin_addr = iphdr->src;
+ memset ( &dest, 0, sizeof ( dest ) );
+ dest.sin.sin_family = AF_INET;
+ dest.sin.sin_addr = iphdr->dest;
+ if ( ( rc = tcpip_rx ( iobuf, iphdr->protocol, &src.st,
+ &dest.st, pshdr_csum ) ) != 0 ) {
+ DBG ( "IPv4 received packet rejected by stack: %s\n",
+ strerror ( rc ) );
+ return rc;
+ }
+
+ return 0;
+
+ err:
+ free_iob ( iobuf );
+ return -EINVAL;
+}
+
+/**
+ * Check existence of IPv4 address for ARP
+ *
+ * @v netdev Network device
+ * @v net_addr Network-layer address
+ * @ret rc Return status code
+ */
+static int ipv4_arp_check ( struct net_device *netdev, const void *net_addr ) {
+ const struct in_addr *address = net_addr;
+ struct ipv4_miniroute *miniroute;
+
+ list_for_each_entry ( miniroute, &ipv4_miniroutes, list ) {
+ if ( ( miniroute->netdev == netdev ) &&
+ ( miniroute->address.s_addr == address->s_addr ) ) {
+ /* Found matching address */
+ return 0;
+ }
+ }
+ return -ENOENT;
+}
+
+/**
+ * Convert IPv4 address to dotted-quad notation
+ *
+ * @v in IP address
+ * @ret string IP address in dotted-quad notation
+ */
+char * inet_ntoa ( struct in_addr in ) {
+ static char buf[16]; /* "xxx.xxx.xxx.xxx" */
+ uint8_t *bytes = ( uint8_t * ) &in;
+
+ sprintf ( buf, "%d.%d.%d.%d", bytes[0], bytes[1], bytes[2], bytes[3] );
+ return buf;
+}
+
+/**
+ * Transcribe IP address
+ *
+ * @v net_addr IP address
+ * @ret string IP address in dotted-quad notation
+ *
+ */
+static const char * ipv4_ntoa ( const void *net_addr ) {
+ return inet_ntoa ( * ( ( struct in_addr * ) net_addr ) );
+}
+
+/** IPv4 protocol */
+struct net_protocol ipv4_protocol __net_protocol = {
+ .name = "IP",
+ .net_proto = htons ( ETH_P_IP ),
+ .net_addr_len = sizeof ( struct in_addr ),
+ .rx = ipv4_rx,
+ .ntoa = ipv4_ntoa,
+};
+
+/** IPv4 TCPIP net protocol */
+struct tcpip_net_protocol ipv4_tcpip_protocol __tcpip_net_protocol = {
+ .name = "IPv4",
+ .sa_family = AF_INET,
+ .tx = ipv4_tx,
+};
+
+/** IPv4 ARP protocol */
+struct arp_net_protocol ipv4_arp_protocol __arp_net_protocol = {
+ .net_protocol = &ipv4_protocol,
+ .check = ipv4_arp_check,
+};
+
+/******************************************************************************
+ *
+ * Settings
+ *
+ ******************************************************************************
+ */
+
+/** IPv4 address setting */
+struct setting ip_setting __setting ( SETTING_IPv4 ) = {
+ .name = "ip",
+ .description = "IP address",
+ .tag = DHCP_EB_YIADDR,
+ .type = &setting_type_ipv4,
+};
+
+/** IPv4 subnet mask setting */
+struct setting netmask_setting __setting ( SETTING_IPv4 ) = {
+ .name = "netmask",
+ .description = "Subnet mask",
+ .tag = DHCP_SUBNET_MASK,
+ .type = &setting_type_ipv4,
+};
+
+/** Default gateway setting */
+struct setting gateway_setting __setting ( SETTING_IPv4 ) = {
+ .name = "gateway",
+ .description = "Default gateway",
+ .tag = DHCP_ROUTERS,
+ .type = &setting_type_ipv4,
+};
+
+/**
+ * Create IPv4 routing table based on configured settings
+ *
+ * @ret rc Return status code
+ */
+static int ipv4_create_routes ( void ) {
+ struct ipv4_miniroute *miniroute;
+ struct ipv4_miniroute *tmp;
+ struct net_device *netdev;
+ struct settings *settings;
+ struct in_addr address = { 0 };
+ struct in_addr netmask = { 0 };
+ struct in_addr gateway = { 0 };
+
+ /* Delete all existing routes */
+ list_for_each_entry_safe ( miniroute, tmp, &ipv4_miniroutes, list )
+ del_ipv4_miniroute ( miniroute );
+
+ /* Create a route for each configured network device */
+ for_each_netdev ( netdev ) {
+ settings = netdev_settings ( netdev );
+ /* Get IPv4 address */
+ address.s_addr = 0;
+ fetch_ipv4_setting ( settings, &ip_setting, &address );
+ if ( ! address.s_addr )
+ continue;
+ /* Get subnet mask */
+ fetch_ipv4_setting ( settings, &netmask_setting, &netmask );
+ /* Calculate default netmask, if necessary */
+ if ( ! netmask.s_addr ) {
+ if ( IN_CLASSA ( ntohl ( address.s_addr ) ) ) {
+ netmask.s_addr = htonl ( IN_CLASSA_NET );
+ } else if ( IN_CLASSB ( ntohl ( address.s_addr ) ) ) {
+ netmask.s_addr = htonl ( IN_CLASSB_NET );
+ } else if ( IN_CLASSC ( ntohl ( address.s_addr ) ) ) {
+ netmask.s_addr = htonl ( IN_CLASSC_NET );
+ }
+ }
+ /* Get default gateway, if present */
+ fetch_ipv4_setting ( settings, &gateway_setting, &gateway );
+ /* Configure route */
+ miniroute = add_ipv4_miniroute ( netdev, address,
+ netmask, gateway );
+ if ( ! miniroute )
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+/** IPv4 settings applicator */
+struct settings_applicator ipv4_settings_applicator __settings_applicator = {
+ .apply = ipv4_create_routes,
+};
+
+/* Drag in ICMP */
+REQUIRE_OBJECT ( icmp );