pqc/external/flint-2.4.3/qsieve/ll_compute_poly_data.c

453 lines
14 KiB
C

/*=============================================================================
This file is part of FLINT.
FLINT is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
FLINT is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with FLINT; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
=============================================================================*/
/******************************************************************************
Copyright (C) 2006, 2011 William Hart
******************************************************************************/
#define ulong ulongxx /* interferes with system includes */
#include <stdlib.h>
#include <stdio.h>
#undef ulong
#define ulong mp_limb_t
#include <gmp.h>
#include "flint.h"
#include "ulong_extras.h"
#include "qsieve.h"
#include "fmpz.h"
void balance4(qs_t qs_inf, mp_limb_t * A_ind,
prime_t * factor_base, slong min, slong fact, slong span, mp_limb_t target)
{
slong i, j;
mp_limb_t prod = factor_base[A_ind[0]].p * factor_base[A_ind[1]].p;
i = fact;
j = i + 1;
while (j < min + span)
{
while (j < min + span)
{
if (prod*factor_base[i].p*factor_base[j].p >= target/P_GOODNESS)
break;
j++;
}
i++;
j = i + 1;
}
A_ind[2] = i;
A_ind[3] = j;
}
void balance5(qs_t qs_inf, mp_limb_t * A_ind,
prime_t * factor_base, slong min, slong high, slong span, mp_limb_t target)
{
slong i, j;
mp_limb_t prod = factor_base[A_ind[0]].p * factor_base[A_ind[1]].p
* factor_base[A_ind[2]].p;
i = A_ind[2] + 1;
j = i + 1;
while (j < min + span)
{
while (j < min + span)
{
if (prod*factor_base[i].p*factor_base[j].p >= target/P_GOODNESS)
break;
j++;
}
i++;
j = i + 1;
}
A_ind[3] = i;
A_ind[4] = j;
}
void try_compute_A(qs_t qs_inf)
{
slong min = qs_inf->min;
slong span = qs_inf->span;
slong fact = qs_inf->fact;
slong mid = qs_inf->mid;
slong high = qs_inf->high;
slong s = qs_inf->s;
mp_limb_t * A_ind = qs_inf->A_ind;
mp_limb_t target = qs_inf->target_A;
prime_t * factor_base = qs_inf->factor_base;
slong i, j;
mp_limb_t prod;
if (qs_inf->A == 0) /* this is our first poly */
{
A_ind[0] = min;
/* try to pick prime factors of A whose product is not much smaller than target_A */
switch (s) /* we can only have up to 5 factors in A for small factorisations */
{
case 1:
break;
case 2:
prod = factor_base[A_ind[0]].p;
i = A_ind[0] + 1;
while (prod*factor_base[i].p < target/P_GOODNESS2 && i + 1 < min + span) i++;
A_ind[1] = i;
break;
case 3:
A_ind[1] = mid;
prod = factor_base[A_ind[0]].p * factor_base[A_ind[1]].p;
i = A_ind[1] + 1;
while (prod*factor_base[i].p < target/P_GOODNESS && i + 1 < min + span) i++;
A_ind[2] = i;
break;
case 4:
A_ind[1] = A_ind[0] + 1;
balance4(qs_inf, A_ind, factor_base, min, fact, span, target);
break;
case 5:
A_ind[1] = A_ind[0] + 1;
A_ind[2] = mid;
balance5(qs_inf, A_ind, factor_base, min, high, span, target);
break;
}
} else /* update to the next poly */
{
switch (s)
{
case 1:
if (A_ind[0] + 1 < min + span)
A_ind[0]++;
else
goto out_of_polys;
break;
case 2:
i = A_ind[0];
j = A_ind[1] + 1;
if (j < min + span && factor_base[i].p * factor_base[j].p < P_GOODNESS2*target)
A_ind[1] = j; /* we can just increment second index */
else /* must increment first index */
{
i++;
if (i < fact) /* find first appropriate second index */
{
A_ind[0] = i;
prod = factor_base[A_ind[0]].p;
i = A_ind[0] + 1;
while (prod*factor_base[i].p < target/P_GOODNESS && i + 1 < min + span) i++;
A_ind[1] = i;
} else
goto out_of_polys;
}
break;
case 3:
prod = factor_base[A_ind[0]].p * factor_base[A_ind[1]].p;
j = A_ind[2] + 1;
if (j < min + span && prod * factor_base[j].p < P_GOODNESS*target)
A_ind[2] = j; /* increment third index */
else
{
A_ind[1]++; /* increment second index */
i = A_ind[1] + 1;
prod = factor_base[A_ind[0]].p * factor_base[A_ind[1]].p;
if (i < min + span && prod * factor_base[i].p < P_GOODNESS*target)
{
while (prod*factor_base[i].p < target/P_GOODNESS && i + 1 < min + span) i++;
A_ind[2] = i;
} else /* must increment first index */
{
A_ind[0]++;
if (A_ind[0] < mid)
{
A_ind[1] = mid;
prod = factor_base[A_ind[0]].p * factor_base[A_ind[1]].p;
i = A_ind[1] + 1;
while (prod*factor_base[i].p < target/P_GOODNESS && i + 1 < min + span) i++;
A_ind[2] = i;
} else
goto out_of_polys;
}
}
break;
case 4:
prod = factor_base[A_ind[0]].p * factor_base[A_ind[1]].p * factor_base[A_ind[2]].p;
j = A_ind[3] + 1;
if (j < min + span && prod * factor_base[j].p < P_GOODNESS*target)
A_ind[3] = j; /* increment fourth index */
else
{
A_ind[2]++; /* increment third index */
i = A_ind[2] + 1;
prod = factor_base[A_ind[0]].p * factor_base[A_ind[1]].p * factor_base[A_ind[2]].p;
if (i < min + span && prod * factor_base[i].p < P_GOODNESS*target)
{
while (prod*factor_base[i].p < target/P_GOODNESS && i + 1 < min + span) i++;
A_ind[3] = i;
} else
{
A_ind[1]++; /* increment second index */
if (A_ind[1] < fact)
{
balance4(qs_inf, A_ind, factor_base, min, fact, span, target);
} else
{
A_ind[0]++; /* increment first factor */
A_ind[1] = A_ind[0] + 1;
if (A_ind[1] < fact)
{
balance4(qs_inf, A_ind, factor_base, min, fact, span, target);
} else
goto out_of_polys;
}
}
}
break;
case 5:
prod = factor_base[A_ind[0]].p * factor_base[A_ind[1]].p * factor_base[A_ind[2]].p * factor_base[A_ind[3]].p;
j = A_ind[4] + 1;
if (j < min + span && prod * factor_base[j].p < P_GOODNESS*target)
A_ind[4] = j; /* increment fifth index */
else
{
A_ind[3]++; /* increment fourth index */
i = A_ind[3] + 1;
prod = factor_base[A_ind[0]].p * factor_base[A_ind[1]].p * factor_base[A_ind[2]].p * factor_base[A_ind[3]].p;
if (i < min + span && prod * factor_base[i].p < P_GOODNESS*target)
{
while (prod*factor_base[i].p < target/P_GOODNESS && i + 1 < min + span) i++;
A_ind[4] = i;
} else
{
A_ind[2]++; /* increment third index */
if (A_ind[2] < high)
{
balance5(qs_inf, A_ind, factor_base, min, high, span, target);
} else
{
A_ind[1]++; /* increment second index */
if (A_ind[1] < high)
{
A_ind[2] = mid;
balance5(qs_inf, A_ind, factor_base, min, high, span, target);
} else
{
A_ind[0]++; /* increment first factor */
A_ind[1] = A_ind[0] + 1;
if (A_ind[1] < mid)
{
A_ind[2] = mid;
balance5(qs_inf, A_ind, factor_base, min, high, span, target);
} else
goto out_of_polys;
}
}
}
}
break;
}
}
qs_inf->A = 1;
for (i = 0; i < s; i++)
qs_inf->A *= factor_base[A_ind[i]].p;
return;
out_of_polys:
flint_printf("Out of polynomials, s = %wd\n", qs_inf->s);
abort();
}
void qsieve_ll_compute_A(qs_t qs_inf)
{
slong i;
do
{
try_compute_A(qs_inf);
} while (((qs_inf->A > P_GOODNESS * qs_inf->target_A
|| qs_inf->A < qs_inf->target_A / P_GOODNESS) && qs_inf->s > 2)
|| (((qs_inf->A > P_GOODNESS2 * qs_inf->target_A
|| qs_inf->A < qs_inf->target_A / P_GOODNESS2) && qs_inf->s == 2)));
#if QS_DEBUG > 1
flint_printf("A = %wd, target A = %wd\n", qs_inf->A, qs_inf->target_A);
#endif
for (i = 0; i < qs_inf->s; i++)
{
mp_limb_t p = qs_inf->factor_base[qs_inf->A_ind[i]].p;
qs_inf->inv_p2[i] = n_preinvert_limb(p*p);
}
}
void qsieve_ll_compute_B_terms(qs_t qs_inf)
{
slong s = qs_inf->s;
mp_limb_t * A_ind = qs_inf->A_ind;
mp_limb_t * A_modp = qs_inf->A_modp;
mp_limb_t * B_terms = qs_inf->B_terms;
prime_t * factor_base = qs_inf->factor_base;
mp_limb_t A = qs_inf->A;
mp_limb_t B;
mp_limb_t p, temp, temp2, pinv;
slong i;
for (i = 0; i < s; i++)
{
p = factor_base[A_ind[i]].p;
pinv = factor_base[A_ind[i]].pinv;
temp = A/p; /* TODO: possibly use precomputed inverse here */
A_modp[i] = (temp2 = n_mod2_preinv(temp, p, pinv));
temp2 = n_invmod(temp2, p);
temp2 = n_mulmod2_preinv(temp2, qs_inf->sqrts[A_ind[i]], p, pinv);
if (temp2 > p/2) temp2 = p - temp2;
B_terms[i] = temp*temp2;
}
B = B_terms[0];
for (i = 1; i < s; i++)
{
B += B_terms[i];
}
qs_inf->B = B;
}
void qsieve_ll_compute_off_adj(qs_t qs_inf)
{
slong num_primes = qs_inf->num_primes;
mp_limb_t A = qs_inf->A;
mp_limb_t B = qs_inf->B;
mp_limb_t * A_inv = qs_inf->A_inv;
mp_limb_t ** A_inv2B = qs_inf->A_inv2B;
mp_limb_t * B_terms = qs_inf->B_terms;
mp_limb_t * soln1 = qs_inf->soln1;
mp_limb_t * soln2 = qs_inf->soln2;
int * sqrts = qs_inf->sqrts;
prime_t * factor_base = qs_inf->factor_base;
slong s = qs_inf->s;
mp_limb_t p, temp, pinv;
slong i, j;
for (i = 2; i < num_primes; i++) /* skip k and 2 */
{
p = factor_base[i].p;
pinv = factor_base[i].pinv;
A_inv[i] = n_invmod(n_mod2_preinv(A, p, pinv), p);
for (j = 0; j < s; j++)
{
temp = n_mod2_preinv(B_terms[j], p, pinv);
temp = n_mulmod2_preinv(temp, A_inv[i], p, pinv);
temp *= 2;
if (temp >= p) temp -= p;
A_inv2B[j][i] = temp;
}
temp = n_mod2_preinv(B, p, pinv);
temp = sqrts[i] + p - temp;
temp *= A_inv[i];
temp += qs_inf->sieve_size/2;
soln1[i] = n_mod2_preinv(temp, p, pinv);
temp = p - sqrts[i];
if (temp == p) temp -= p;
temp = n_mulmod2_preinv(temp, A_inv[i], p, pinv);
temp *= 2;
if (temp >= p) temp -= p;
soln2[i] = temp + soln1[i];
if (soln2[i] >= p) soln2[i] -= p;
}
}
void qsieve_ll_compute_A_factor_offsets(qs_t qs_inf)
{
slong s = qs_inf->s;
mp_limb_t * A_ind = qs_inf->A_ind;
mp_limb_t * A_modp = qs_inf->A_modp;
mp_limb_t * soln1 = qs_inf->soln1;
mp_limb_t * soln2 = qs_inf->soln2;
mp_limb_t p, D;
mp_limb_t hi = qs_inf->hi;
mp_limb_t lo = qs_inf->lo;
mp_limb_t B = qs_inf->B;
mp_limb_t temp, temp2, B_modp2, index, p2;
prime_t * factor_base = qs_inf->factor_base;
mp_limb_t * inv_p2 = qs_inf->inv_p2;
mp_limb_t pinv;
slong j;
for (j = 0; j < s; j++)
{
index = A_ind[j];
p = factor_base[index].p;
p2 = p*p;
pinv = factor_base[index].pinv;
D = n_ll_mod_preinv(hi, lo, p*p, inv_p2[j]);
if ((mp_limb_signed_t) B < 0)
{
B_modp2 = n_mod2_preinv(-B, p2, inv_p2[j]);
B_modp2 = p2 - B_modp2;
if (B_modp2 == p2) B_modp2 = 0;
} else
B_modp2 = n_mod2_preinv(B, p2, inv_p2[j]);
temp = B_modp2*A_modp[j];
temp = n_mod2_preinv(temp, p, pinv);
temp2 = n_invmod(temp, p);
D -= (B_modp2*B_modp2);
if ((mp_limb_signed_t) D < 0)
temp = -(-D/p); /* TODO consider using precomputed inverse */
else
temp = (D/p); /* TODO consider using precomputed inverse */
temp *= temp2;
temp += qs_inf->sieve_size/2;
if ((mp_limb_signed_t) temp < 0)
{
temp = p - n_mod2_preinv(-temp, p, pinv);
if (temp == p) temp = 0;
}
else temp = n_mod2_preinv(temp, p, pinv);
soln1[index] = temp;
soln2[index] = -1;
}
}
void qsieve_ll_compute_C(qs_t qs_inf)
{
mp_limb_t A = qs_inf->A;
mp_limb_t B = qs_inf->B;
if ((mp_limb_signed_t) B < WORD(0)) B = -B;
fmpz_set_ui(qs_inf->C, B);
fmpz_mul_ui(qs_inf->C, qs_inf->C, B);
fmpz_sub(qs_inf->C, qs_inf->C, qs_inf->kn);
fmpz_divexact_ui(qs_inf->C, qs_inf->C, A);
}
void qsieve_ll_compute_poly_data(qs_t qs_inf)
{
qsieve_ll_compute_A(qs_inf);
qsieve_ll_compute_B_terms(qs_inf);
qsieve_ll_compute_off_adj(qs_inf);
qsieve_ll_compute_A_factor_offsets(qs_inf);
qsieve_ll_compute_C(qs_inf);
}