summaryrefslogtreecommitdiff
path: root/gcc/tree-vect-generic.c
diff options
context:
space:
mode:
authorjakub <jakub@138bc75d-0d04-0410-961f-82ee72b054a4>2012-06-28 11:38:01 +0000
committerjakub <jakub@138bc75d-0d04-0410-961f-82ee72b054a4>2012-06-28 11:38:01 +0000
commit60420e1c32ba4bdd23188ae46879b387f769690a (patch)
tree97380392fe18b29c5f40a143b8e216c0fc48ab9c /gcc/tree-vect-generic.c
parent7f28687d51daef2dd0a2a00781e0fa5a17254a3c (diff)
downloadlinaro-gcc-60420e1c32ba4bdd23188ae46879b387f769690a.tar.gz
linaro-gcc-60420e1c32ba4bdd23188ae46879b387f769690a.tar.bz2
linaro-gcc-60420e1c32ba4bdd23188ae46879b387f769690a.zip
PR tree-optimization/53645
* tree-vect-generic.c (add_rshift): New function. (expand_vector_divmod): New function. (expand_vector_operation): Use it for vector integer TRUNC_{DIV,MOD}_EXPR by VECTOR_CST. * tree-vect-patterns.c (vect_recog_divmod_pattern): Replace unused lguup variable with dummy_int. * gcc.c-torture/execute/pr53645.c: New test. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@189043 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'gcc/tree-vect-generic.c')
-rw-r--r--gcc/tree-vect-generic.c528
1 files changed, 528 insertions, 0 deletions
diff --git a/gcc/tree-vect-generic.c b/gcc/tree-vect-generic.c
index 3b9f561bbfb..1b3ff274fd0 100644
--- a/gcc/tree-vect-generic.c
+++ b/gcc/tree-vect-generic.c
@@ -391,6 +391,515 @@ expand_vector_comparison (gimple_stmt_iterator *gsi, tree type, tree op0,
return t;
}
+/* Helper function of expand_vector_divmod. Gimplify a RSHIFT_EXPR in type
+ of OP0 with shift counts in SHIFTCNTS array and return the temporary holding
+ the result if successful, otherwise return NULL_TREE. */
+static tree
+add_rshift (gimple_stmt_iterator *gsi, tree type, tree op0, int *shiftcnts)
+{
+ optab op;
+ unsigned int i, nunits = TYPE_VECTOR_SUBPARTS (type);
+ bool scalar_shift = true;
+
+ for (i = 1; i < nunits; i++)
+ {
+ if (shiftcnts[i] != shiftcnts[0])
+ scalar_shift = false;
+ }
+
+ if (scalar_shift && shiftcnts[0] == 0)
+ return op0;
+
+ if (scalar_shift)
+ {
+ op = optab_for_tree_code (RSHIFT_EXPR, type, optab_scalar);
+ if (op != NULL
+ && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
+ return gimplify_build2 (gsi, RSHIFT_EXPR, type, op0,
+ build_int_cst (NULL_TREE, shiftcnts[0]));
+ }
+
+ op = optab_for_tree_code (RSHIFT_EXPR, type, optab_vector);
+ if (op != NULL
+ && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
+ {
+ tree *vec = XALLOCAVEC (tree, nunits);
+ for (i = 0; i < nunits; i++)
+ vec[i] = build_int_cst (TREE_TYPE (type), shiftcnts[i]);
+ return gimplify_build2 (gsi, RSHIFT_EXPR, type, op0,
+ build_vector (type, vec));
+ }
+
+ return NULL_TREE;
+}
+
+/* Try to expand integer vector division by constant using
+ widening multiply, shifts and additions. */
+static tree
+expand_vector_divmod (gimple_stmt_iterator *gsi, tree type, tree op0,
+ tree op1, enum tree_code code)
+{
+ bool use_pow2 = true;
+ bool has_vector_shift = true;
+ int mode = -1, this_mode;
+ int pre_shift = -1, post_shift;
+ unsigned int nunits = TYPE_VECTOR_SUBPARTS (type);
+ int *shifts = XALLOCAVEC (int, nunits * 4);
+ int *pre_shifts = shifts + nunits;
+ int *post_shifts = pre_shifts + nunits;
+ int *shift_temps = post_shifts + nunits;
+ unsigned HOST_WIDE_INT *mulc = XALLOCAVEC (unsigned HOST_WIDE_INT, nunits);
+ int prec = TYPE_PRECISION (TREE_TYPE (type));
+ int dummy_int;
+ unsigned int i, unsignedp = TYPE_UNSIGNED (TREE_TYPE (type));
+ unsigned HOST_WIDE_INT mask = GET_MODE_MASK (TYPE_MODE (TREE_TYPE (type)));
+ optab op;
+ tree *vec;
+ unsigned char *sel;
+ tree cur_op, mhi, mlo, mulcst, perm_mask, wider_type, tem;
+
+ if (prec > HOST_BITS_PER_WIDE_INT)
+ return NULL_TREE;
+
+ op = optab_for_tree_code (RSHIFT_EXPR, type, optab_vector);
+ if (op == NULL
+ || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
+ has_vector_shift = false;
+
+ /* Analysis phase. Determine if all op1 elements are either power
+ of two and it is possible to expand it using shifts (or for remainder
+ using masking). Additionally compute the multiplicative constants
+ and pre and post shifts if the division is to be expanded using
+ widening or high part multiplication plus shifts. */
+ for (i = 0; i < nunits; i++)
+ {
+ tree cst = VECTOR_CST_ELT (op1, i);
+ unsigned HOST_WIDE_INT ml;
+
+ if (!host_integerp (cst, unsignedp) || integer_zerop (cst))
+ return NULL_TREE;
+ pre_shifts[i] = 0;
+ post_shifts[i] = 0;
+ mulc[i] = 0;
+ if (use_pow2
+ && (!integer_pow2p (cst) || tree_int_cst_sgn (cst) != 1))
+ use_pow2 = false;
+ if (use_pow2)
+ {
+ shifts[i] = tree_log2 (cst);
+ if (shifts[i] != shifts[0]
+ && code == TRUNC_DIV_EXPR
+ && !has_vector_shift)
+ use_pow2 = false;
+ }
+ if (mode == -2)
+ continue;
+ if (unsignedp)
+ {
+ unsigned HOST_WIDE_INT mh;
+ unsigned HOST_WIDE_INT d = tree_low_cst (cst, 1) & mask;
+
+ if (d >= ((unsigned HOST_WIDE_INT) 1 << (prec - 1)))
+ /* FIXME: Can transform this into op0 >= op1 ? 1 : 0. */
+ return NULL_TREE;
+
+ if (d <= 1)
+ {
+ mode = -2;
+ continue;
+ }
+
+ /* Find a suitable multiplier and right shift count
+ instead of multiplying with D. */
+ mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
+
+ /* If the suggested multiplier is more than SIZE bits, we can
+ do better for even divisors, using an initial right shift. */
+ if ((mh != 0 && (d & 1) == 0)
+ || (!has_vector_shift && pre_shift != -1))
+ {
+ if (has_vector_shift)
+ pre_shift = floor_log2 (d & -d);
+ else if (pre_shift == -1)
+ {
+ unsigned int j;
+ for (j = 0; j < nunits; j++)
+ {
+ tree cst2 = VECTOR_CST_ELT (op1, j);
+ unsigned HOST_WIDE_INT d2;
+ int this_pre_shift;
+
+ if (!host_integerp (cst2, 1))
+ return NULL_TREE;
+ d2 = tree_low_cst (cst2, 1) & mask;
+ if (d2 == 0)
+ return NULL_TREE;
+ this_pre_shift = floor_log2 (d2 & -d2);
+ if (pre_shift == -1 || this_pre_shift < pre_shift)
+ pre_shift = this_pre_shift;
+ }
+ if (i != 0 && pre_shift != 0)
+ {
+ /* Restart. */
+ i = -1U;
+ mode = -1;
+ continue;
+ }
+ }
+ if (pre_shift != 0)
+ {
+ if ((d >> pre_shift) <= 1)
+ {
+ mode = -2;
+ continue;
+ }
+ mh = choose_multiplier (d >> pre_shift, prec,
+ prec - pre_shift,
+ &ml, &post_shift, &dummy_int);
+ gcc_assert (!mh);
+ pre_shifts[i] = pre_shift;
+ }
+ }
+ if (!mh)
+ this_mode = 0;
+ else
+ this_mode = 1;
+ }
+ else
+ {
+ HOST_WIDE_INT d = tree_low_cst (cst, 0);
+ unsigned HOST_WIDE_INT abs_d;
+
+ if (d == -1)
+ return NULL_TREE;
+
+ /* Since d might be INT_MIN, we have to cast to
+ unsigned HOST_WIDE_INT before negating to avoid
+ undefined signed overflow. */
+ abs_d = (d >= 0
+ ? (unsigned HOST_WIDE_INT) d
+ : - (unsigned HOST_WIDE_INT) d);
+
+ /* n rem d = n rem -d */
+ if (code == TRUNC_MOD_EXPR && d < 0)
+ d = abs_d;
+ else if (abs_d == (unsigned HOST_WIDE_INT) 1 << (prec - 1))
+ {
+ /* This case is not handled correctly below. */
+ mode = -2;
+ continue;
+ }
+ if (abs_d <= 1)
+ {
+ mode = -2;
+ continue;
+ }
+
+ choose_multiplier (abs_d, prec, prec - 1, &ml,
+ &post_shift, &dummy_int);
+ if (ml >= (unsigned HOST_WIDE_INT) 1 << (prec - 1))
+ {
+ this_mode = 4 + (d < 0);
+ ml |= (~(unsigned HOST_WIDE_INT) 0) << (prec - 1);
+ }
+ else
+ this_mode = 2 + (d < 0);
+ }
+ mulc[i] = ml;
+ post_shifts[i] = post_shift;
+ if ((i && !has_vector_shift && post_shifts[0] != post_shift)
+ || post_shift >= prec
+ || pre_shifts[i] >= prec)
+ this_mode = -2;
+
+ if (i == 0)
+ mode = this_mode;
+ else if (mode != this_mode)
+ mode = -2;
+ }
+
+ vec = XALLOCAVEC (tree, nunits);
+
+ if (use_pow2)
+ {
+ tree addend = NULL_TREE;
+ if (!unsignedp)
+ {
+ tree uns_type;
+
+ /* Both division and remainder sequences need
+ op0 < 0 ? mask : 0 computed. It can be either computed as
+ (type) (((uns_type) (op0 >> (prec - 1))) >> (prec - shifts[i]))
+ if none of the shifts is 0, or as the conditional. */
+ for (i = 0; i < nunits; i++)
+ if (shifts[i] == 0)
+ break;
+ uns_type
+ = build_vector_type (build_nonstandard_integer_type (prec, 1),
+ nunits);
+ if (i == nunits && TYPE_MODE (uns_type) == TYPE_MODE (type))
+ {
+ for (i = 0; i < nunits; i++)
+ shift_temps[i] = prec - 1;
+ cur_op = add_rshift (gsi, type, op0, shift_temps);
+ if (cur_op != NULL_TREE)
+ {
+ cur_op = gimplify_build1 (gsi, VIEW_CONVERT_EXPR,
+ uns_type, cur_op);
+ for (i = 0; i < nunits; i++)
+ shift_temps[i] = prec - shifts[i];
+ cur_op = add_rshift (gsi, uns_type, cur_op, shift_temps);
+ if (cur_op != NULL_TREE)
+ addend = gimplify_build1 (gsi, VIEW_CONVERT_EXPR,
+ type, cur_op);
+ }
+ }
+ if (addend == NULL_TREE
+ && expand_vec_cond_expr_p (type, type))
+ {
+ tree zero, cst, cond;
+ gimple stmt;
+
+ zero = build_zero_cst (type);
+ cond = build2 (LT_EXPR, type, op0, zero);
+ for (i = 0; i < nunits; i++)
+ vec[i] = build_int_cst (TREE_TYPE (type),
+ ((unsigned HOST_WIDE_INT) 1
+ << shifts[i]) - 1);
+ cst = build_vector (type, vec);
+ addend = create_tmp_reg (type, NULL);
+ add_referenced_var (addend);
+ addend = make_ssa_name (addend, NULL);
+ stmt = gimple_build_assign_with_ops3 (VEC_COND_EXPR, addend,
+ cond, cst, zero);
+ gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
+ }
+ }
+ if (code == TRUNC_DIV_EXPR)
+ {
+ if (unsignedp)
+ {
+ /* q = op0 >> shift; */
+ cur_op = add_rshift (gsi, type, op0, shifts);
+ if (cur_op != NULL_TREE)
+ return cur_op;
+ }
+ else if (addend != NULL_TREE)
+ {
+ /* t1 = op0 + addend;
+ q = t1 >> shift; */
+ op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
+ if (op != NULL
+ && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
+ {
+ cur_op = gimplify_build2 (gsi, PLUS_EXPR, type, op0, addend);
+ cur_op = add_rshift (gsi, type, cur_op, shifts);
+ if (cur_op != NULL_TREE)
+ return cur_op;
+ }
+ }
+ }
+ else
+ {
+ tree mask;
+ for (i = 0; i < nunits; i++)
+ vec[i] = build_int_cst (TREE_TYPE (type),
+ ((unsigned HOST_WIDE_INT) 1
+ << shifts[i]) - 1);
+ mask = build_vector (type, vec);
+ op = optab_for_tree_code (BIT_AND_EXPR, type, optab_default);
+ if (op != NULL
+ && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
+ {
+ if (unsignedp)
+ /* r = op0 & mask; */
+ return gimplify_build2 (gsi, BIT_AND_EXPR, type, op0, mask);
+ else if (addend != NULL_TREE)
+ {
+ /* t1 = op0 + addend;
+ t2 = t1 & mask;
+ r = t2 - addend; */
+ op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
+ if (op != NULL
+ && optab_handler (op, TYPE_MODE (type))
+ != CODE_FOR_nothing)
+ {
+ cur_op = gimplify_build2 (gsi, PLUS_EXPR, type, op0,
+ addend);
+ cur_op = gimplify_build2 (gsi, BIT_AND_EXPR, type,
+ cur_op, mask);
+ op = optab_for_tree_code (MINUS_EXPR, type,
+ optab_default);
+ if (op != NULL
+ && optab_handler (op, TYPE_MODE (type))
+ != CODE_FOR_nothing)
+ return gimplify_build2 (gsi, MINUS_EXPR, type,
+ cur_op, addend);
+ }
+ }
+ }
+ }
+ }
+
+ if (mode == -2 || BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
+ return NULL_TREE;
+
+ op = optab_for_tree_code (VEC_WIDEN_MULT_LO_EXPR, type, optab_default);
+ if (op == NULL
+ || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
+ return NULL_TREE;
+ op = optab_for_tree_code (VEC_WIDEN_MULT_HI_EXPR, type, optab_default);
+ if (op == NULL
+ || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
+ return NULL_TREE;
+ sel = XALLOCAVEC (unsigned char, nunits);
+ for (i = 0; i < nunits; i++)
+ sel[i] = 2 * i + (BYTES_BIG_ENDIAN ? 0 : 1);
+ if (!can_vec_perm_p (TYPE_MODE (type), false, sel))
+ return NULL_TREE;
+ wider_type
+ = build_vector_type (build_nonstandard_integer_type (prec * 2, unsignedp),
+ nunits / 2);
+ if (GET_MODE_CLASS (TYPE_MODE (wider_type)) != MODE_VECTOR_INT
+ || GET_MODE_BITSIZE (TYPE_MODE (wider_type))
+ != GET_MODE_BITSIZE (TYPE_MODE (type)))
+ return NULL_TREE;
+
+ cur_op = op0;
+
+ switch (mode)
+ {
+ case 0:
+ gcc_assert (unsignedp);
+ /* t1 = oprnd0 >> pre_shift;
+ t2 = (type) (t1 w* ml >> prec);
+ q = t2 >> post_shift; */
+ cur_op = add_rshift (gsi, type, cur_op, pre_shifts);
+ if (cur_op == NULL_TREE)
+ return NULL_TREE;
+ break;
+ case 1:
+ gcc_assert (unsignedp);
+ for (i = 0; i < nunits; i++)
+ {
+ shift_temps[i] = 1;
+ post_shifts[i]--;
+ }
+ break;
+ case 2:
+ case 3:
+ case 4:
+ case 5:
+ gcc_assert (!unsignedp);
+ for (i = 0; i < nunits; i++)
+ shift_temps[i] = prec - 1;
+ break;
+ default:
+ return NULL_TREE;
+ }
+
+ for (i = 0; i < nunits; i++)
+ vec[i] = build_int_cst (TREE_TYPE (type), mulc[i]);
+ mulcst = build_vector (type, vec);
+ for (i = 0; i < nunits; i++)
+ vec[i] = build_int_cst (TREE_TYPE (type), sel[i]);
+ perm_mask = build_vector (type, vec);
+ mhi = gimplify_build2 (gsi, VEC_WIDEN_MULT_HI_EXPR, wider_type,
+ cur_op, mulcst);
+ mhi = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, type, mhi);
+ mlo = gimplify_build2 (gsi, VEC_WIDEN_MULT_LO_EXPR, wider_type,
+ cur_op, mulcst);
+ mlo = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, type, mlo);
+ if (BYTES_BIG_ENDIAN)
+ cur_op = gimplify_build3 (gsi, VEC_PERM_EXPR, type, mhi, mlo, perm_mask);
+ else
+ cur_op = gimplify_build3 (gsi, VEC_PERM_EXPR, type, mlo, mhi, perm_mask);
+
+ switch (mode)
+ {
+ case 0:
+ /* t1 = oprnd0 >> pre_shift;
+ t2 = (type) (t1 w* ml >> prec);
+ q = t2 >> post_shift; */
+ cur_op = add_rshift (gsi, type, cur_op, post_shifts);
+ break;
+ case 1:
+ /* t1 = (type) (oprnd0 w* ml >> prec);
+ t2 = oprnd0 - t1;
+ t3 = t2 >> 1;
+ t4 = t1 + t3;
+ q = t4 >> (post_shift - 1); */
+ op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
+ if (op == NULL
+ || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
+ return NULL_TREE;
+ tem = gimplify_build2 (gsi, MINUS_EXPR, type, op0, cur_op);
+ tem = add_rshift (gsi, type, tem, shift_temps);
+ op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
+ if (op == NULL
+ || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
+ return NULL_TREE;
+ tem = gimplify_build2 (gsi, PLUS_EXPR, type, cur_op, tem);
+ cur_op = add_rshift (gsi, type, tem, post_shifts);
+ if (cur_op == NULL_TREE)
+ return NULL_TREE;
+ break;
+ case 2:
+ case 3:
+ case 4:
+ case 5:
+ /* t1 = (type) (oprnd0 w* ml >> prec);
+ t2 = t1; [ iff (mode & 2) != 0 ]
+ t2 = t1 + oprnd0; [ iff (mode & 2) == 0 ]
+ t3 = t2 >> post_shift;
+ t4 = oprnd0 >> (prec - 1);
+ q = t3 - t4; [ iff (mode & 1) == 0 ]
+ q = t4 - t3; [ iff (mode & 1) != 0 ] */
+ if ((mode & 2) == 0)
+ {
+ op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
+ if (op == NULL
+ || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
+ return NULL_TREE;
+ cur_op = gimplify_build2 (gsi, PLUS_EXPR, type, cur_op, op0);
+ }
+ cur_op = add_rshift (gsi, type, cur_op, post_shifts);
+ if (cur_op == NULL_TREE)
+ return NULL_TREE;
+ tem = add_rshift (gsi, type, op0, shift_temps);
+ if (tem == NULL_TREE)
+ return NULL_TREE;
+ op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
+ if (op == NULL
+ || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
+ return NULL_TREE;
+ if ((mode & 1) == 0)
+ cur_op = gimplify_build2 (gsi, MINUS_EXPR, type, cur_op, tem);
+ else
+ cur_op = gimplify_build2 (gsi, MINUS_EXPR, type, tem, cur_op);
+ break;
+ default:
+ gcc_unreachable ();
+ }
+
+ if (code == TRUNC_DIV_EXPR)
+ return cur_op;
+
+ /* We divided. Now finish by:
+ t1 = q * oprnd1;
+ r = oprnd0 - t1; */
+ op = optab_for_tree_code (MULT_EXPR, type, optab_default);
+ if (op == NULL
+ || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
+ return NULL_TREE;
+ tem = gimplify_build2 (gsi, MULT_EXPR, type, cur_op, op1);
+ op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
+ if (op == NULL
+ || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
+ return NULL_TREE;
+ return gimplify_build2 (gsi, MINUS_EXPR, type, op0, tem);
+}
+
static tree
expand_vector_operation (gimple_stmt_iterator *gsi, tree type, tree compute_type,
gimple assign, enum tree_code code)
@@ -454,6 +963,25 @@ expand_vector_operation (gimple_stmt_iterator *gsi, tree type, tree compute_type
return expand_vector_comparison (gsi, type, rhs1, rhs2, code);
}
+
+ case TRUNC_DIV_EXPR:
+ case TRUNC_MOD_EXPR:
+ {
+ tree rhs1 = gimple_assign_rhs1 (assign);
+ tree rhs2 = gimple_assign_rhs2 (assign);
+ tree ret;
+
+ if (!optimize
+ || !VECTOR_INTEGER_TYPE_P (type)
+ || TREE_CODE (rhs2) != VECTOR_CST)
+ break;
+
+ ret = expand_vector_divmod (gsi, type, rhs1, rhs2, code);
+ if (ret != NULL_TREE)
+ return ret;
+ break;
+ }
+
default:
break;
}