/* * This is a module which is used for queueing IPv6 packets and * communicating with userspace via netlink. * * (C) 2001 Fernando Anton, this code is GPL. * IPv64 Project - Work based in IPv64 draft by Arturo Azcorra. * Universidad Carlos III de Madrid - Leganes (Madrid) - Spain * Universidad Politecnica de Alcala de Henares - Alcala de H. (Madrid) - Spain * email: fanton@it.uc3m.es * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. * * 2001-11-06: First try. Working with ip_queue.c for IPv4 and trying * to adapt it to IPv6 * HEAVILY based in ipqueue.c by James Morris. It's just * a little modified version of it, so he's nearly the * real coder of this. * Few changes needed, mainly the hard_routing code and * the netlink socket protocol (we're NETLINK_IP6_FW). * 2002-06-25: Code cleanup. [JM: ported cleanup over from ip_queue.c] * 2005-02-04: Added /proc counter for dropped packets; fixed so * packets aren't delivered to user space if they're going * to be dropped. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define IPQ_QMAX_DEFAULT 1024 #define IPQ_PROC_FS_NAME "ip6_queue" #define NET_IPQ_QMAX 2088 #define NET_IPQ_QMAX_NAME "ip6_queue_maxlen" struct ipq_rt_info { struct in6_addr daddr; struct in6_addr saddr; }; struct ipq_queue_entry { struct list_head list; struct nf_info *info; struct sk_buff *skb; struct ipq_rt_info rt_info; }; typedef int (*ipq_cmpfn)(struct ipq_queue_entry *, unsigned long); static unsigned char copy_mode = IPQ_COPY_NONE; static unsigned int queue_maxlen = IPQ_QMAX_DEFAULT; static DEFINE_RWLOCK(queue_lock); static int peer_pid; static unsigned int copy_range; static unsigned int queue_total; static unsigned int queue_dropped = 0; static unsigned int queue_user_dropped = 0; static struct sock *ipqnl; static LIST_HEAD(queue_list); static DECLARE_MUTEX(ipqnl_sem); static void ipq_issue_verdict(struct ipq_queue_entry *entry, int verdict) { local_bh_disable(); nf_reinject(entry->skb, entry->info, verdict); local_bh_enable(); kfree(entry); } static inline void __ipq_enqueue_entry(struct ipq_queue_entry *entry) { list_add(&entry->list, &queue_list); queue_total++; } /* * Find and return a queued entry matched by cmpfn, or return the last * entry if cmpfn is NULL. */ static inline struct ipq_queue_entry * __ipq_find_entry(ipq_cmpfn cmpfn, unsigned long data) { struct list_head *p; list_for_each_prev(p, &queue_list) { struct ipq_queue_entry *entry = (struct ipq_queue_entry *)p; if (!cmpfn || cmpfn(entry, data)) return entry; } return NULL; } static inline void __ipq_dequeue_entry(struct ipq_queue_entry *entry) { list_del(&entry->list); queue_total--; } static inline struct ipq_queue_entry * __ipq_find_dequeue_entry(ipq_cmpfn cmpfn, unsigned long data) { struct ipq_queue_entry *entry; entry = __ipq_find_entry(cmpfn, data); if (entry == NULL) return NULL; __ipq_dequeue_entry(entry); return entry; } static inline void __ipq_flush(int verdict) { struct ipq_queue_entry *entry; while ((entry = __ipq_find_dequeue_entry(NULL, 0))) ipq_issue_verdict(entry, verdict); } static inline int __ipq_set_mode(unsigned char mode, unsigned int range) { int status = 0; switch(mode) { case IPQ_COPY_NONE: case IPQ_COPY_META: copy_mode = mode; copy_range = 0; break; case IPQ_COPY_PACKET: copy_mode = mode; copy_range = range; if (copy_range > 0xFFFF) copy_range = 0xFFFF; break; default: status = -EINVAL; } return status; } static inline void __ipq_reset(void) { peer_pid = 0; net_disable_timestamp(); __ipq_set_mode(IPQ_COPY_NONE, 0); __ipq_flush(NF_DROP); } static struct ipq_queue_entry * ipq_find_dequeue_entry(ipq_cmpfn cmpfn, unsigned long data) { struct ipq_queue_entry *entry; write_lock_bh(&queue_lock); entry = __ipq_find_dequeue_entry(cmpfn, data); write_unlock_bh(&queue_lock); return entry; } static void ipq_flush(int verdict) { write_lock_bh(&queue_lock); __ipq_flush(verdict); write_unlock_bh(&queue_lock); } static struct sk_buff * ipq_build_packet_message(struct ipq_queue_entry *entry, int *errp) { unsigned char *old_tail; size_t size = 0; size_t data_len = 0; struct sk_buff *skb; struct ipq_packet_msg *pmsg; struct nlmsghdr *nlh; read_lock_bh(&queue_lock); switch (copy_mode) { case IPQ_COPY_META: case IPQ_COPY_NONE: size = NLMSG_SPACE(sizeof(*pmsg)); data_len = 0; break; case IPQ_COPY_PACKET: if (entry->skb->ip_summed == CHECKSUM_HW && (*errp = skb_checksum_help(entry->skb, entry->info->outdev == NULL))) { read_unlock_bh(&queue_lock); return NULL; } if (copy_range == 0 || copy_range > entry->skb->len) data_len = entry->skb->len; else data_len = copy_range; size = NLMSG_SPACE(sizeof(*pmsg) + data_len); break; default: *errp = -EINVAL; read_unlock_bh(&queue_lock); return NULL; } read_unlock_bh(&queue_lock); skb = alloc_skb(size, GFP_ATOMIC); if (!skb) goto nlmsg_failure; old_tail= skb->tail; nlh = NLMSG_PUT(skb, 0, 0, IPQM_PACKET, size - sizeof(*nlh)); pmsg = NLMSG_DATA(nlh); memset(pmsg, 0, sizeof(*pmsg)); pmsg->packet_id = (unsigned long )entry; pmsg->data_len = data_len; pmsg->timestamp_sec = entry->skb->stamp.tv_sec; pmsg->timestamp_usec = entry->skb->stamp.tv_usec; pmsg->mark = entry->skb->nfmark; pmsg->hook = entry->info->hook; pmsg->hw_protocol = entry->skb->protocol; if (entry->info->indev) strcpy(pmsg->indev_name, entry->info->indev->name); else pmsg->indev_name[0] = '\0'; if (entry->info->outdev) strcpy(pmsg->outdev_name, entry->info->outdev->name); else pmsg->outdev_name[0] = '\0'; if (entry->info->indev && entry->skb->dev) { pmsg->hw_type = entry->skb->dev->type; if (entry->skb->dev->hard_header_parse) pmsg->hw_addrlen = entry->skb->dev->hard_header_parse(entry->skb, pmsg->hw_addr); } if (data_len) if (skb_copy_bits(entry->skb, 0, pmsg->payload, data_len)) BUG(); nlh->nlmsg_len = skb->tail - old_tail; return skb; nlmsg_failure: if (skb) kfree_skb(skb); *errp = -EINVAL; printk(KERN_ERR "ip6_queue: error creating packet message\n"); return NULL; } static int ipq_enqueue_packet(struct sk_buff *skb, struct nf_info *info, void *data) { int status = -EINVAL; struct sk_buff *nskb; struct ipq_queue_entry *entry; if (copy_mode == IPQ_COPY_NONE) return -EAGAIN; entry = kmalloc(sizeof(*entry), GFP_ATOMIC); if (entry == NULL) { printk(KERN_ERR "ip6_queue: OOM in ipq_enqueue_packet()\n"); return -ENOMEM; } entry->info = info; entry->skb = skb; if (entry->info->hook == NF_IP_LOCAL_OUT) { struct ipv6hdr *iph = skb->nh.ipv6h; entry->rt_info.daddr = iph->daddr; entry->rt_info.saddr = iph->saddr; } nskb = ipq_build_packet_message(entry, &status); if (nskb == NULL) goto err_out_free; write_lock_bh(&queue_lock); if (!peer_pid) goto err_out_free_nskb; if (queue_total >= queue_maxlen) { queue_dropped++; status = -ENOSPC; if (net_ratelimit()) printk (KERN_WARNING "ip6_queue: fill at %d entries, " "dropping packet(s). Dropped: %d\n", queue_total, queue_dropped); goto err_out_free_nskb; } /* netlink_unicast will either free the nskb or attach it to a socket */ status = netlink_unicast(ipqnl, nskb, peer_pid, MSG_DONTWAIT); if (status < 0) { queue_user_dropped++; goto err_out_unlock; } __ipq_enqueue_entry(entry); write_unlock_bh(&queue_lock); return status; err_out_free_nskb: kfree_skb(nskb); err_out_unlock: write_unlock_bh(&queue_lock); err_out_free: kfree(entry); return status; } static int ipq_mangle_ipv6(ipq_verdict_msg_t *v, struct ipq_queue_entry *e) { int diff; struct ipv6hdr *user_iph = (struct ipv6hdr *)v->payload; if (v->data_len < sizeof(*user_iph)) return 0; diff = v->data_len - e->skb->len; if (diff < 0) skb_trim(e->skb, v->data_len); else if (diff > 0) { if (v->data_len > 0xFFFF) return -EINVAL; if (diff > skb_tailroom(e->skb)) { struct sk_buff *newskb; newskb = skb_copy_expand(e->skb, skb_headroom(e->skb), diff, GFP_ATOMIC); if (newskb == NULL) { printk(KERN_WARNING "ip6_queue: OOM " "in mangle, dropping packet\n"); return -ENOMEM; } if (e->skb->sk) skb_set_owner_w(newskb, e->skb->sk); kfree_skb(e->skb); e->skb = newskb; } skb_put(e->skb, diff); } if (!skb_ip_make_writable(&e->skb, v->data_len)) return -ENOMEM; memcpy(e->skb->data, v->payload, v->data_len); e->skb->ip_summed = CHECKSUM_NONE; e->skb->nfcache |= NFC_ALTERED; /* * Extra routing may needed on local out, as the QUEUE target never * returns control to the table. * Not a nice way to cmp, but works */ if (e->info->hook == NF_IP_LOCAL_OUT) { struct ipv6hdr *iph = e->skb->nh.ipv6h; if (!ipv6_addr_equal(&iph->daddr, &e->rt_info.daddr) || !ipv6_addr_equal(&iph->saddr, &e->rt_info.saddr)) return ip6_route_me_harder(e->skb); } return 0; } static inline int id_cmp(struct ipq_queue_entry *e, unsigned long id) { return (id == (unsigned long )e); } static int ipq_set_verdict(struct ipq_verdict_msg *vmsg, unsigned int len) { struct ipq_queue_entry *entry; if (vmsg->value > NF_MAX_VERDICT) return -EINVAL; entry = ipq_find_dequeue_entry(id_cmp, vmsg->id); if (entry == NULL) return -ENOENT; else { int verdict = vmsg->value; if (vmsg->data_len && vmsg->data_len == len) if (ipq_mangle_ipv6(vmsg, entry) < 0) verdict = NF_DROP; ipq_issue_verdict(entry, verdict); return 0; } } static int ipq_set_mode(unsigned char mode, unsigned int range) { int status; write_lock_bh(&queue_lock); status = __ipq_set_mode(mode, range); write_unlock_bh(&queue_lock); return status; } static int ipq_receive_peer(struct ipq_peer_msg *pmsg, unsigned char type, unsigned int len) { int status = 0; if (len < sizeof(*pmsg)) return -EINVAL; switch (type) { case IPQM_MODE: status = ipq_set_mode(pmsg->msg.mode.value, pmsg->msg.mode.range); break; case IPQM_VERDICT: if (pmsg->msg.verdict.value > NF_MAX_VERDICT) status = -EINVAL; else status = ipq_set_verdict(&pmsg->msg.verdict, len - sizeof(*pmsg)); break; default: status = -EINVAL; } return status; } static int dev_cmp(struct ipq_queue_entry *entry, unsigned long ifindex) { if (entry->info->indev) if (entry->info->indev->ifindex == ifindex) return 1; if (entry->info->outdev) if (entry->info->outdev->ifindex == ifindex) return 1; return 0; } static void ipq_dev_drop(int ifindex) { struct ipq_queue_entry *entry; while ((entry = ipq_find_dequeue_entry(dev_cmp, ifindex)) != NULL) ipq_issue_verdict(entry, NF_DROP); } #define RCV_SKB_FAIL(err) do { netlink_ack(skb, nlh, (err)); return; } while (0) static inline void ipq_rcv_skb(struct sk_buff *skb) { int status, type, pid, flags, nlmsglen, skblen; struct nlmsghdr *nlh; skblen = skb->len; if (skblen < sizeof(*nlh)) return; nlh = (struct nlmsghdr *)skb->data; nlmsglen = nlh->nlmsg_len; if (nlmsglen < sizeof(*nlh) || skblen < nlmsglen) return; pid = nlh->nlmsg_pid; flags = nlh->nlmsg_flags; if(pid <= 0 || !(flags & NLM_F_REQUEST) || flags & NLM_F_MULTI) RCV_SKB_FAIL(-EINVAL); if (flags & MSG_TRUNC) RCV_SKB_FAIL(-ECOMM); type = nlh->nlmsg_type; if (type < NLMSG_NOOP || type >= IPQM_MAX) RCV_SKB_FAIL(-EINVAL); if (type <= IPQM_BASE) return; if (security_netlink_recv(skb)) RCV_SKB_FAIL(-EPERM); write_lock_bh(&queue_lock); if (peer_pid) { if (peer_pid != pid) { write_unlock_bh(&queue_lock); RCV_SKB_FAIL(-EBUSY); } } else { net_enable_timestamp(); peer_pid = pid; } write_unlock_bh(&queue_lock); status = ipq_receive_peer(NLMSG_DATA(nlh), type, skblen - NLMSG_LENGTH(0)); if (status < 0) RCV_SKB_FAIL(status); if (flags & NLM_F_ACK) netlink_ack(skb, nlh, 0); return; } static void ipq_rcv_sk(struct sock *sk, int len) { struct sk_buff *skb; unsigned int qlen; down(&ipqnl_sem); for (qlen = skb_queue_len(&sk->sk_receive_queue); qlen; qlen--) { skb = skb_dequeue(&sk->sk_receive_queue); ipq_rcv_skb(skb); kfree_skb(skb); } up(&ipqnl_sem); } static int ipq_rcv_dev_event(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *dev = ptr; /* Drop any packets associated with the downed device */ if (event == NETDEV_DOWN) ipq_dev_drop(dev->ifindex); return NOTIFY_DONE; } static struct notifier_block ipq_dev_notifier = { .notifier_call = ipq_rcv_dev_event, }; static int ipq_rcv_nl_event(struct notifier_block *this, unsigned long event, void *ptr) { struct netlink_notify *n = ptr; if (event == NETLINK_URELEASE && n->protocol == NETLINK_IP6_FW && n->pid) { write_lock_bh(&queue_lock); if (n->pid == peer_pid) __ipq_reset(); write_unlock_bh(&queue_lock); } return NOTIFY_DONE; } static struct notifier_block ipq_nl_notifier = { .notifier_call = ipq_rcv_nl_event, }; static struct ctl_table_header *ipq_sysctl_header; static ctl_table ipq_table[] = { { .ctl_name = NET_IPQ_QMAX, .procname = NET_IPQ_QMAX_NAME, .data = &queue_maxlen, .maxlen = sizeof(queue_maxlen), .mode = 0644, .proc_handler = proc_dointvec }, { .ctl_name = 0 } }; static ctl_table ipq_dir_table[] = { { .ctl_name = NET_IPV6, .procname = "ipv6", .mode = 0555, .child = ipq_table }, { .ctl_name = 0 } }; static ctl_table ipq_root_table[] = { { .ctl_name = CTL_NET, .procname = "net", .mode = 0555, .child = ipq_dir_table }, { .ctl_name = 0 } }; static int ipq_get_info(char *buffer, char **start, off_t offset, int length) { int len; read_lock_bh(&queue_lock); len = sprintf(buffer, "Peer PID : %d\n" "Copy mode : %hu\n" "Copy range : %u\n" "Queue length : %u\n" "Queue max. length : %u\n" "Queue dropped : %u\n" "Netfilter dropped : %u\n", peer_pid, copy_mode, copy_range, queue_total, queue_maxlen, queue_dropped, queue_user_dropped); read_unlock_bh(&queue_lock); *start = buffer + offset; len -= offset; if (len > length) len = length; else if (len < 0) len = 0; return len; } static int init_or_cleanup(int init) { int status = -ENOMEM; struct proc_dir_entry *proc; if (!init) goto cleanup; netlink_register_notifier(&ipq_nl_notifier); ipqnl = netlink_kernel_create(NETLINK_IP6_FW, ipq_rcv_sk); if (ipqnl == NULL) { printk(KERN_ERR "ip6_queue: failed to create netlink socket\n"); goto cleanup_netlink_notifier; } proc = proc_net_create(IPQ_PROC_FS_NAME, 0, ipq_get_info); if (proc) proc->owner = THIS_MODULE; else { printk(KERN_ERR "ip6_queue: failed to create proc entry\n"); goto cleanup_ipqnl; } register_netdevice_notifier(&ipq_dev_notifier); ipq_sysctl_header = register_sysctl_table(ipq_root_table, 0); status = nf_register_queue_handler(PF_INET6, ipq_enqueue_packet, NULL); if (status < 0) { printk(KERN_ERR "ip6_queue: failed to register queue handler\n"); goto cleanup_sysctl; } return status; cleanup: nf_unregister_queue_handler(PF_INET6); synchronize_net(); ipq_flush(NF_DROP); cleanup_sysctl: unregister_sysctl_table(ipq_sysctl_header); unregister_netdevice_notifier(&ipq_dev_notifier); proc_net_remove(IPQ_PROC_FS_NAME); cleanup_ipqnl: sock_release(ipqnl->sk_socket); down(&ipqnl_sem); up(&ipqnl_sem); cleanup_netlink_notifier: netlink_unregister_notifier(&ipq_nl_notifier); return status; } static int __init init(void) { return init_or_cleanup(1); } static void __exit fini(void) { init_or_cleanup(0); } MODULE_DESCRIPTION("IPv6 packet queue handler"); MODULE_LICENSE("GPL"); module_init(init); module_exit(fini);