summaryrefslogtreecommitdiff
path: root/boost/graph/distributed/mpi_process_group.hpp
blob: e0ee5790298794f3c71301dc3513ca8c8e1697d2 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
// Copyright (C) 2004-2008 The Trustees of Indiana University.
// Copyright (C) 2007   Douglas Gregor
// Copyright (C) 2007  Matthias Troyer  <troyer@boost-consulting.com>

// Use, modification and distribution is subject to the Boost Software
// License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt)

//  Authors: Douglas Gregor
//           Matthias Troyer
//           Andrew Lumsdaine
#ifndef BOOST_GRAPH_DISTRIBUTED_MPI_PROCESS_GROUP
#define BOOST_GRAPH_DISTRIBUTED_MPI_PROCESS_GROUP

#ifndef BOOST_GRAPH_USE_MPI
#error "Parallel BGL files should not be included unless <boost/graph/use_mpi.hpp> has been included"
#endif

//#define NO_SPLIT_BATCHES
#define SEND_OOB_BSEND

#include <boost/optional.hpp>
#include <boost/shared_ptr.hpp>
#include <boost/weak_ptr.hpp>
#include <utility>
#include <memory>
#include <boost/function/function1.hpp>
#include <boost/function/function2.hpp>
#include <boost/function/function0.hpp>
#include <boost/mpi.hpp>
#include <boost/graph/parallel/process_group.hpp>
#include <boost/utility/enable_if.hpp>

namespace boost { namespace graph { namespace distributed {

// Process group tags
struct mpi_process_group_tag : virtual parallel::linear_process_group_tag { };

class mpi_process_group
{
  struct impl;

 public:
  /// Number of tags available to each data structure.
  static const int max_tags = 256;

  /**
   * The type of a "receive" handler, that will be provided with
   * (source, tag) pairs when a message is received. Users can provide a
   * receive handler for a distributed data structure, for example, to
   * automatically pick up and respond to messages as needed.  
   */
  typedef function<void(int source, int tag)> receiver_type;

  /**
   * The type of a handler for the on-synchronize event, which will be
   * executed at the beginning of synchronize().
   */
  typedef function0<void>      on_synchronize_event_type;

  /// Used as a tag to help create an "empty" process group.
  struct create_empty {};

  /// The type used to buffer message data
  typedef boost::mpi::packed_oprimitive::buffer_type buffer_type;

  /// The type used to identify a process
  typedef int process_id_type;

  /// The type used to count the number of processes
  typedef int process_size_type;

  /// The type of communicator used to transmit data via MPI
  typedef boost::mpi::communicator communicator_type;

  /// Classification of the capabilities of this process group
  struct communication_category
    : virtual parallel::bsp_process_group_tag, 
      virtual mpi_process_group_tag { };

  // TBD: We can eliminate the "source" field and possibly the
  // "offset" field.
  struct message_header {
    /// The process that sent the message
    process_id_type source;

    /// The message tag
    int tag;

    /// The offset of the message into the buffer
    std::size_t offset;

    /// The length of the message in the buffer, in bytes
    std::size_t bytes;
    
    template <class Archive>
    void serialize(Archive& ar, int)
    {
      ar & source & tag & offset & bytes;
    }
  };

  /**
   * Stores the outgoing messages for a particular processor.
   *
   * @todo Evaluate whether we should use a deque instance, which
   * would reduce could reduce the cost of "sending" messages but
   * increases the time spent in the synchronization step.
   */
  struct outgoing_messages {
        outgoing_messages() {}
        ~outgoing_messages() {}

    std::vector<message_header> headers;
    buffer_type                 buffer;
    
    template <class Archive>
    void serialize(Archive& ar, int)
    {
      ar & headers & buffer;
    }
    
    void swap(outgoing_messages& x) 
    {
      headers.swap(x.headers);
      buffer.swap(x.buffer);
    }
  };

private:
  /**
   * Virtual base from which every trigger will be launched. See @c
   * trigger_launcher for more information.
   */
  class trigger_base : boost::noncopyable
  {
  public:
    explicit trigger_base(int tag) : tag_(tag) { }

    /// Retrieve the tag associated with this trigger  
    int tag() const { return tag_; }

    virtual ~trigger_base() { }

    /**
     * Invoked to receive a message that matches a particular trigger. 
     *
     * @param source      the source of the message
     * @param tag         the (local) tag of the message
     * @param context     the context under which the trigger is being
     *                    invoked
     */
    virtual void 
    receive(mpi_process_group const& pg, int source, int tag, 
            trigger_receive_context context, int block=-1) const = 0;

  protected:
    // The message tag associated with this trigger
    int tag_;
  };

  /**
   * Launches a specific handler in response to a trigger. This
   * function object wraps up the handler function object and a buffer
   * for incoming data. 
   */
  template<typename Type, typename Handler>
  class trigger_launcher : public trigger_base
  {
  public:
    explicit trigger_launcher(mpi_process_group& self, int tag, 
                              const Handler& handler) 
      : trigger_base(tag), self(self), handler(handler) 
      {}

    void 
    receive(mpi_process_group const& pg, int source, int tag,  
            trigger_receive_context context, int block=-1) const;

  private:
    mpi_process_group& self;
    mutable Handler handler;
  };

  /**
   * Launches a specific handler with a message reply in response to a
   * trigger. This function object wraps up the handler function
   * object and a buffer for incoming data.
   */
  template<typename Type, typename Handler>
  class reply_trigger_launcher : public trigger_base
  {
  public:
    explicit reply_trigger_launcher(mpi_process_group& self, int tag, 
                                    const Handler& handler) 
      : trigger_base(tag), self(self), handler(handler) 
      {}

    void 
    receive(mpi_process_group const& pg, int source, int tag, 
            trigger_receive_context context, int block=-1) const;

  private:
    mpi_process_group& self;
    mutable Handler handler;
  };

  template<typename Type, typename Handler>
  class global_trigger_launcher : public trigger_base
  {
  public:
    explicit global_trigger_launcher(mpi_process_group& self, int tag, 
                              const Handler& handler) 
      : trigger_base(tag), handler(handler) 
      { 
      }

    void 
    receive(mpi_process_group const& pg, int source, int tag, 
            trigger_receive_context context, int block=-1) const;

  private:
    mutable Handler handler;
    // TBD: do not forget to cancel any outstanding Irecv when deleted,
    // if we decide to use Irecv
  };

  template<typename Type, typename Handler>
  class global_irecv_trigger_launcher : public trigger_base
  {
  public:
    explicit global_irecv_trigger_launcher(mpi_process_group& self, int tag, 
                              const Handler& handler, int sz) 
      : trigger_base(tag), handler(handler), buffer_size(sz)
      { 
        prepare_receive(self,tag);
      }

    void 
    receive(mpi_process_group const& pg, int source, int tag, 
            trigger_receive_context context, int block=-1) const;

  private:
    void prepare_receive(mpi_process_group const& pg, int tag, bool force=false) const;
    Handler handler;
    int buffer_size;
    // TBD: do not forget to cancel any outstanding Irecv when deleted,
    // if we decide to use Irecv
  };

public:
  /** 
   * Construct a new BSP process group from an MPI communicator. The
   * MPI communicator will be duplicated to create a new communicator
   * for this process group to use.
   */
  mpi_process_group(communicator_type parent_comm = communicator_type());

  /** 
   * Construct a new BSP process group from an MPI communicator. The
   * MPI communicator will be duplicated to create a new communicator
   * for this process group to use. This constructor allows to tune the
   * size of message batches.
   *    
   *   @param num_headers The maximum number of headers in a message batch
   *
   *   @param buffer_size The maximum size of the message buffer in a batch.
   *
   */
  mpi_process_group( std::size_t num_headers, std::size_t buffer_size, 
                     communicator_type parent_comm = communicator_type());

  /**
   * Construct a copy of the BSP process group for a new distributed
   * data structure. This data structure will synchronize with all
   * other members of the process group's equivalence class (including
   * @p other), but will have its own set of tags. 
   *
   *   @param other The process group that this new process group will
   *   be based on, using a different set of tags within the same
   *   communication and synchronization space.
   *
   *   @param handler A message handler that will be passed (source,
   *   tag) pairs for each message received by this data
   *   structure. The handler is expected to receive the messages
   *   immediately. The handler can be changed after-the-fact by
   *   calling @c replace_handler.
   *
   *   @param out_of_band_receive An anachronism. TODO: remove this.
   */
  mpi_process_group(const mpi_process_group& other,
                    const receiver_type& handler,
                    bool out_of_band_receive = false);

  /**
   * Construct a copy of the BSP process group for a new distributed
   * data structure. This data structure will synchronize with all
   * other members of the process group's equivalence class (including
   * @p other), but will have its own set of tags. 
   */
  mpi_process_group(const mpi_process_group& other, 
                    attach_distributed_object,
                    bool out_of_band_receive = false);

  /**
   * Create an "empty" process group, with no information. This is an
   * internal routine that users should never need.
   */
  explicit mpi_process_group(create_empty) {}

  /**
   * Destroys this copy of the process group.
   */
  ~mpi_process_group();

  /**
   * Replace the current message handler with a new message handler.
   *
   * @param handle The new message handler.
   * @param out_of_band_receive An anachronism: remove this
   */
  void replace_handler(const receiver_type& handler,
                       bool out_of_band_receive = false);

  /**
   * Turns this process group into the process group for a new
   * distributed data structure or object, allocating its own tag
   * block.
   */
  void make_distributed_object();

  /**
   * Replace the handler to be invoked at the beginning of synchronize.
   */
  void
  replace_on_synchronize_handler(const on_synchronize_event_type& handler = 0);

  /** 
   * Return the block number of the current data structure. A value of
   * 0 indicates that this particular instance of the process group is
   * not associated with any distributed data structure.
   */
  int my_block_number() const { return block_num? *block_num : 0; }

  /**
   * Encode a block number/tag pair into a single encoded tag for
   * transmission.
   */
  int encode_tag(int block_num, int tag) const
  { return block_num * max_tags + tag; }

  /**
   * Decode an encoded tag into a block number/tag pair. 
   */
  std::pair<int, int> decode_tag(int encoded_tag) const
  { return std::make_pair(encoded_tag / max_tags, encoded_tag % max_tags); }

  // @todo Actually write up the friend declarations so these could be
  // private.

  // private:

  /** Allocate a block of tags for this instance. The block should not
   * have been allocated already, e.g., my_block_number() ==
   * 0. Returns the newly-allocated block number.
   */
  int allocate_block(bool out_of_band_receive = false);

  /** Potentially emit a receive event out of band. Returns true if an event 
   *  was actually sent, false otherwise. 
   */
  bool maybe_emit_receive(int process, int encoded_tag) const;

  /** Emit a receive event. Returns true if an event was actually
   * sent, false otherwise. 
   */
  bool emit_receive(int process, int encoded_tag) const;

  /** Emit an on-synchronize event to all block handlers. */
  void emit_on_synchronize() const;

  /** Retrieve a reference to the stored receiver in this block.  */
  template<typename Receiver>
  Receiver* get_receiver();

  template<typename T>
  void
  send_impl(int dest, int tag, const T& value,
            mpl::true_ /*is_mpi_datatype*/) const;

  template<typename T>
  void
  send_impl(int dest, int tag, const T& value,
            mpl::false_ /*is_mpi_datatype*/) const;

  template<typename T>
  typename disable_if<boost::mpi::is_mpi_datatype<T>, void>::type
  array_send_impl(int dest, int tag, const T values[], std::size_t n) const;

  template<typename T>
  bool
  receive_impl(int source, int tag, T& value,
               mpl::true_ /*is_mpi_datatype*/) const;

  template<typename T>
  bool
  receive_impl(int source, int tag, T& value,
               mpl::false_ /*is_mpi_datatype*/) const;

  // Receive an array of values
  template<typename T>
  typename disable_if<boost::mpi::is_mpi_datatype<T>, bool>::type
  array_receive_impl(int source, int tag, T* values, std::size_t& n) const;

  optional<std::pair<mpi_process_group::process_id_type, int> > probe() const;

  void synchronize() const;

  operator bool() { return impl_; }

  mpi_process_group base() const;

  /**
   * Create a new trigger for a specific message tag. Triggers handle
   * out-of-band messaging, and the handler itself will be called
   * whenever a message is available. The handler itself accepts four
   * arguments: the source of the message, the message tag (which will
   * be the same as @p tag), the message data (of type @c Type), and a
   * boolean flag that states whether the message was received
   * out-of-band. The last will be @c true for out-of-band receives,
   * or @c false for receives at the end of a synchronization step.
   */
  template<typename Type, typename Handler>
  void trigger(int tag, const Handler& handler);

  /**
   * Create a new trigger for a specific message tag, along with a way
   * to send a reply with data back to the sender. Triggers handle
   * out-of-band messaging, and the handler itself will be called
   * whenever a message is available. The handler itself accepts four
   * arguments: the source of the message, the message tag (which will
   * be the same as @p tag), the message data (of type @c Type), and a
   * boolean flag that states whether the message was received
   * out-of-band. The last will be @c true for out-of-band receives,
   * or @c false for receives at the end of a synchronization
   * step. The handler also returns a value, which will be routed back
   * to the sender.
   */
  template<typename Type, typename Handler>
  void trigger_with_reply(int tag, const Handler& handler);

  template<typename Type, typename Handler>
  void global_trigger(int tag, const Handler& handler, std::size_t buffer_size=0); 



  /**
   * Poll for any out-of-band messages. This routine will check if any
   * out-of-band messages are available. Those that are available will
   * be handled immediately, if possible.
   *
   * @returns if an out-of-band message has been received, but we are
   * unable to actually receive the message, a (source, tag) pair will
   * be returned. Otherwise, returns an empty optional.
   *
   * @param wait When true, we should block until a message comes in.
   *
   * @param synchronizing whether we are currently synchronizing the
   *                      process group
   */
  optional<std::pair<int, int> > 
  poll(bool wait = false, int block = -1, bool synchronizing = false) const;

  /**
   * Determines the context of the trigger currently executing. If
   * multiple triggers are executing (recursively), then the context
   * for the most deeply nested trigger will be returned. If no
   * triggers are executing, returns @c trc_none. This might be used,
   * for example, to determine whether a reply to a message should
   * itself be sent out-of-band or whether it can go via the normal,
   * slower communication route.
   */
  trigger_receive_context trigger_context() const;

  /// INTERNAL ONLY
  void receive_batch(process_id_type source, outgoing_messages& batch) const;

  /// INTERNAL ONLY
  ///
  /// Determine the actual communicator and tag will be used for a
  /// transmission with the given tag.
  std::pair<boost::mpi::communicator, int> 
  actual_communicator_and_tag(int tag, int block) const;

  /// set the size of the message buffer used for buffered oob sends
  
  static void set_message_buffer_size(std::size_t s);

  /// get the size of the message buffer used for buffered oob sends

  static std::size_t message_buffer_size();
  static int old_buffer_size;
  static void* old_buffer;
private:

  void install_trigger(int tag, int block, 
      shared_ptr<trigger_base> const& launcher); 

  void poll_requests(int block=-1) const;

  
  // send a batch if the buffer is full now or would get full
  void maybe_send_batch(process_id_type dest) const;

  // actually send a batch
  void send_batch(process_id_type dest, outgoing_messages& batch) const;
  void send_batch(process_id_type dest) const;

  void pack_headers() const;

  /**
   * Process a batch of incoming messages immediately.
   *
   * @param source         the source of these messages
   */
  void process_batch(process_id_type source) const;
  void receive_batch(boost::mpi::status& status) const;

  //void free_finished_sends() const;
          
  /// Status messages used internally by the process group
  enum status_messages {
    /// the first of the reserved message tags
    msg_reserved_first = 126,
    /// Sent from a processor when sending batched messages
    msg_batch = 126,
    /// Sent from a processor when sending large batched messages, larger than
    /// the maximum buffer size for messages to be received by MPI_Irecv
    msg_large_batch = 127,
    /// Sent from a source processor to everyone else when that
    /// processor has entered the synchronize() function.
    msg_synchronizing = 128,
    /// the last of the reserved message tags
    msg_reserved_last = 128
  };

  /**
   * Description of a block of tags associated to a particular
   * distributed data structure. This structure will live as long as
   * the distributed data structure is around, and will be used to
   * help send messages to the data structure.
   */
  struct block_type
  {
    block_type() { }

    /// Handler for receive events
    receiver_type     on_receive;

    /// Handler executed at the start of  synchronization 
    on_synchronize_event_type  on_synchronize;

    /// Individual message triggers. Note: at present, this vector is
    /// indexed by the (local) tag of the trigger.  Any tags that
    /// don't have triggers will have NULL pointers in that spot.
    std::vector<shared_ptr<trigger_base> > triggers;
  };

  /**
   * Data structure containing all of the blocks for the distributed
   * data structures attached to a process group.
   */
  typedef std::vector<block_type*> blocks_type;

  /// Iterator into @c blocks_type.
  typedef blocks_type::iterator block_iterator;

  /**
   * Deleter used to deallocate a block when its distributed data
   * structure is destroyed. This type will be used as the deleter for
   * @c block_num.
   */
  struct deallocate_block;
  
  static std::vector<char> message_buffer;

public:
  /**
   * Data associated with the process group and all of its attached
   * distributed data structures.
   */
  shared_ptr<impl> impl_;

  /**
   * When non-null, indicates that this copy of the process group is
   * associated with a particular distributed data structure. The
   * integer value contains the block number (a value > 0) associated
   * with that data structure. The deleter for this @c shared_ptr is a
   * @c deallocate_block object that will deallocate the associated
   * block in @c impl_->blocks.
   */
  shared_ptr<int>  block_num;

  /**
   * Rank of this process, to avoid having to call rank() repeatedly.
   */
  int rank;

  /**
   * Number of processes in this process group, to avoid having to
   * call communicator::size() repeatedly.
   */
  int size;
};



inline mpi_process_group::process_id_type 
process_id(const mpi_process_group& pg)
{ return pg.rank; }

inline mpi_process_group::process_size_type 
num_processes(const mpi_process_group& pg)
{ return pg.size; }

mpi_process_group::communicator_type communicator(const mpi_process_group& pg);

template<typename T>
void
send(const mpi_process_group& pg, mpi_process_group::process_id_type dest,
     int tag, const T& value);

template<typename InputIterator>
void
send(const mpi_process_group& pg, mpi_process_group::process_id_type dest,
     int tag, InputIterator first, InputIterator last);

template<typename T>
inline void
send(const mpi_process_group& pg, mpi_process_group::process_id_type dest,
     int tag, T* first, T* last)
{ send(pg, dest, tag, first, last - first); }

template<typename T>
inline void
send(const mpi_process_group& pg, mpi_process_group::process_id_type dest,
     int tag, const T* first, const T* last)
{ send(pg, dest, tag, first, last - first); }

template<typename T>
mpi_process_group::process_id_type
receive(const mpi_process_group& pg, int tag, T& value);

template<typename T>
mpi_process_group::process_id_type
receive(const mpi_process_group& pg,
        mpi_process_group::process_id_type source, int tag, T& value);

optional<std::pair<mpi_process_group::process_id_type, int> >
probe(const mpi_process_group& pg);

void synchronize(const mpi_process_group& pg);

template<typename T, typename BinaryOperation>
T*
all_reduce(const mpi_process_group& pg, T* first, T* last, T* out,
           BinaryOperation bin_op);

template<typename T, typename BinaryOperation>
T*
scan(const mpi_process_group& pg, T* first, T* last, T* out,
           BinaryOperation bin_op);

template<typename InputIterator, typename T>
void
all_gather(const mpi_process_group& pg,
           InputIterator first, InputIterator last, std::vector<T>& out);

template<typename InputIterator>
mpi_process_group
process_subgroup(const mpi_process_group& pg,
                 InputIterator first, InputIterator last);

template<typename T>
void
broadcast(const mpi_process_group& pg, T& val, 
          mpi_process_group::process_id_type root);


/*******************************************************************
 * Out-of-band communication                                       *
 *******************************************************************/

template<typename T>
typename enable_if<boost::mpi::is_mpi_datatype<T> >::type
send_oob(const mpi_process_group& pg, mpi_process_group::process_id_type dest,
         int tag, const T& value, int block=-1)
{
  using boost::mpi::get_mpi_datatype;

  // Determine the actual message tag we will use for the send, and which
  // communicator we will use.
  std::pair<boost::mpi::communicator, int> actual
    = pg.actual_communicator_and_tag(tag, block);

#ifdef SEND_OOB_BSEND
  if (mpi_process_group::message_buffer_size()) {
    MPI_Bsend(const_cast<T*>(&value), 1, get_mpi_datatype<T>(value), dest, 
              actual.second, actual.first);
    return;
  }
#endif
  MPI_Request request;
  MPI_Isend(const_cast<T*>(&value), 1, get_mpi_datatype<T>(value), dest, 
            actual.second, actual.first, &request);
  
  int done=0;
  do {
    pg.poll();
    MPI_Test(&request,&done,MPI_STATUS_IGNORE);
  } while (!done);
}

template<typename T>
typename disable_if<boost::mpi::is_mpi_datatype<T> >::type
send_oob(const mpi_process_group& pg, mpi_process_group::process_id_type dest,
         int tag, const T& value, int block=-1)
{
  using boost::mpi::packed_oarchive;

  // Determine the actual message tag we will use for the send, and which
  // communicator we will use.
  std::pair<boost::mpi::communicator, int> actual
    = pg.actual_communicator_and_tag(tag, block);

  // Serialize the data into a buffer
  packed_oarchive out(actual.first);
  out << value;
  std::size_t size = out.size();

  // Send the actual message data
#ifdef SEND_OOB_BSEND
  if (mpi_process_group::message_buffer_size()) {
    MPI_Bsend(const_cast<void*>(out.address()), size, MPI_PACKED,
            dest, actual.second, actual.first);
   return;
  }
#endif
  MPI_Request request;
  MPI_Isend(const_cast<void*>(out.address()), size, MPI_PACKED,
            dest, actual.second, actual.first, &request);

  int done=0;
  do {
    pg.poll();
    MPI_Test(&request,&done,MPI_STATUS_IGNORE);
  } while (!done);
}

template<typename T>
typename enable_if<boost::mpi::is_mpi_datatype<T> >::type
receive_oob(const mpi_process_group& pg, 
            mpi_process_group::process_id_type source, int tag, T& value, int block=-1);

template<typename T>
typename disable_if<boost::mpi::is_mpi_datatype<T> >::type
receive_oob(const mpi_process_group& pg, 
            mpi_process_group::process_id_type source, int tag, T& value, int block=-1);

template<typename SendT, typename ReplyT>
typename enable_if<boost::mpi::is_mpi_datatype<ReplyT> >::type
send_oob_with_reply(const mpi_process_group& pg, 
                    mpi_process_group::process_id_type dest,
                    int tag, const SendT& send_value, ReplyT& reply_value,
                    int block = -1);

template<typename SendT, typename ReplyT>
typename disable_if<boost::mpi::is_mpi_datatype<ReplyT> >::type
send_oob_with_reply(const mpi_process_group& pg, 
                    mpi_process_group::process_id_type dest,
                    int tag, const SendT& send_value, ReplyT& reply_value,
                    int block = -1);

} } } // end namespace boost::graph::distributed

BOOST_IS_BITWISE_SERIALIZABLE(boost::graph::distributed::mpi_process_group::message_header)
namespace boost { namespace mpi {
    template<>
    struct is_mpi_datatype<boost::graph::distributed::mpi_process_group::message_header> : mpl::true_ { };
} } // end namespace boost::mpi

namespace std {
/// optimized swap for outgoing messages
inline void 
swap(boost::graph::distributed::mpi_process_group::outgoing_messages& x,
     boost::graph::distributed::mpi_process_group::outgoing_messages& y)
{
  x.swap(y);
}


}

BOOST_CLASS_IMPLEMENTATION(boost::graph::distributed::mpi_process_group::outgoing_messages,object_serializable)
BOOST_CLASS_TRACKING(boost::graph::distributed::mpi_process_group::outgoing_messages,track_never)

#include <boost/graph/distributed/detail/mpi_process_group.ipp>

#endif // BOOST_PARALLEL_MPI_MPI_PROCESS_GROUP_HPP