362 lines
9.7 KiB
C
362 lines
9.7 KiB
C
/*=============================================================================
|
|
|
|
This file is part of FLINT.
|
|
|
|
FLINT is free software; you can redistribute it and/or modify
|
|
it under the terms of the GNU General Public License as published by
|
|
the Free Software Foundation; either version 2 of the License, or
|
|
(at your option) any later version.
|
|
|
|
FLINT is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with FLINT; if not, write to the Free Software
|
|
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
|
|
|
=============================================================================*/
|
|
/******************************************************************************
|
|
|
|
Copyright (C) 2012 Sebastian Pancratz
|
|
|
|
******************************************************************************/
|
|
|
|
#include "fmpz_mod_poly.h"
|
|
#include "ulong_extras.h"
|
|
#include "qadic.h"
|
|
|
|
/*
|
|
Assumes that \code{len1} and \code{len2} are positive but at
|
|
most~$d$, and also that \code{len1} is at least $6$.
|
|
|
|
The latter assumption guarantees that $\ceil{n/B} \geq 2$,
|
|
i.e.\ $n \geq 2B$ so $n \geq 2 \ceil{\sqrt{n}}$.
|
|
*/
|
|
|
|
static void
|
|
_fmpz_mod_poly_compose_smod_rectangular(fmpz *rop,
|
|
const fmpz *op1, slong len1,
|
|
const fmpz *op2, slong len2,
|
|
const fmpz *a, const slong *j, slong lena,
|
|
const fmpz_t p)
|
|
{
|
|
const slong d = j[lena - 1];
|
|
|
|
if (len2 == 1)
|
|
{
|
|
_fmpz_mod_poly_evaluate_fmpz(rop, op1, len1, op2, p);
|
|
_fmpz_vec_zero(rop + 1, d - 1);
|
|
}
|
|
else
|
|
{
|
|
const slong B = n_sqrt(len1);
|
|
slong i, k;
|
|
fmpz *pows, *t;
|
|
|
|
pows = _fmpz_vec_init((B + 2) * d);
|
|
t = _fmpz_vec_init(2 * d - 1);
|
|
|
|
fmpz_one(pows + 0 * d + 0);
|
|
_fmpz_vec_set(pows + 1 * d, op2, len2);
|
|
for (i = 2; i <= B; i++)
|
|
{
|
|
_fmpz_poly_mul(pows + i * d, pows + (i - 1) * d, d, op2, len2);
|
|
_fmpz_poly_reduce(pows + i * d, d + len2 - 1, a, j, lena);
|
|
_fmpz_vec_scalar_mod_fmpz(pows + i * d, pows + i * d, d, p);
|
|
}
|
|
|
|
_fmpz_vec_zero(rop, d);
|
|
|
|
for (i = (len1 + B - 1) / B - 1; i >= 0; i--)
|
|
{
|
|
_fmpz_poly_mul(t, rop, d, pows + B * d, d);
|
|
_fmpz_poly_reduce(t, 2 * d - 1, a, j, lena);
|
|
|
|
_fmpz_vec_set(rop, t, d);
|
|
fmpz_add(rop + 0, rop + 0, op1 + i*B);
|
|
for (k = FLINT_MIN(B, len1 - i*B) - 1; k > 0; k--)
|
|
{
|
|
_fmpz_vec_scalar_addmul_fmpz(rop, pows + k * d, d, op1 + (i*B + k));
|
|
}
|
|
|
|
_fmpz_vec_scalar_mod_fmpz(rop, rop, d, p);
|
|
}
|
|
|
|
_fmpz_vec_clear(pows, (B + 2) * d);
|
|
_fmpz_vec_clear(t, 2 * d - 1);
|
|
}
|
|
}
|
|
|
|
static void
|
|
_fmpz_mod_poly_compose_smod_horner(fmpz *rop,
|
|
const fmpz *op1, slong len1,
|
|
const fmpz *op2, slong len2,
|
|
const fmpz *a, const slong *j, slong lena,
|
|
const fmpz_t p)
|
|
{
|
|
const slong d = j[lena - 1];
|
|
|
|
if (len1 == 1)
|
|
{
|
|
fmpz_set(rop, op1);
|
|
_fmpz_vec_zero(rop + 1, d - 1);
|
|
}
|
|
else if (len2 == 1)
|
|
{
|
|
_fmpz_mod_poly_evaluate_fmpz(rop, op1, len1, op2, p);
|
|
_fmpz_vec_zero(rop + 1, d - 1);
|
|
}
|
|
else
|
|
{
|
|
slong i;
|
|
fmpz *t;
|
|
|
|
t = _fmpz_vec_init(2*d - 1);
|
|
|
|
_fmpz_vec_zero(rop, d);
|
|
|
|
for (i = len1 - 1; i >= 0; i--)
|
|
{
|
|
_fmpz_poly_mul(t, rop, d, op2, len2);
|
|
_fmpz_poly_reduce(t, d + len2 - 1, a, j, lena);
|
|
_fmpz_poly_add(rop, t, d, op1 + i, 1);
|
|
_fmpz_vec_scalar_mod_fmpz(rop, rop, d, p);
|
|
}
|
|
|
|
_fmpz_vec_clear(t, 2*d - 1);
|
|
}
|
|
}
|
|
|
|
/*
|
|
Computes the composition $f(g(X))$ modulo the sparse polynomial
|
|
given by the data \code{(a, j, lena)}, which is assumed to be
|
|
of degree~$d \geq 2$.
|
|
|
|
Sets the vector \code{(rop, d)}.
|
|
|
|
Assumes that \code{len1} and \code{len2} are positive but at
|
|
most~$d$.
|
|
|
|
Does not support aliasing.
|
|
*/
|
|
|
|
void
|
|
_fmpz_mod_poly_compose_smod(fmpz *rop,
|
|
const fmpz *op1, slong len1,
|
|
const fmpz *op2, slong len2,
|
|
const fmpz *a, const slong *j, slong lena,
|
|
const fmpz_t p)
|
|
{
|
|
if (len1 < 6)
|
|
{
|
|
_fmpz_mod_poly_compose_smod_horner(rop, op1, len1, op2, len2, a, j, lena, p);
|
|
}
|
|
else
|
|
{
|
|
_fmpz_mod_poly_compose_smod_rectangular(rop, op1, len1, op2, len2, a, j, lena, p);
|
|
}
|
|
}
|
|
|
|
void _qadic_frobenius_a(fmpz *rop, slong exp,
|
|
const fmpz *a, const slong *j, slong lena,
|
|
const fmpz_t p, slong N)
|
|
{
|
|
const slong d = j[lena - 1];
|
|
|
|
slong *e, i, n;
|
|
fmpz *pow, *f1, *f2, *inv, *s, *t;
|
|
|
|
n = FLINT_CLOG2(N) + 1;
|
|
|
|
e = flint_malloc(n * sizeof(slong));
|
|
for (e[i = 0] = N; e[i] > 1; i++)
|
|
e[i + 1] = (e[i] + 1) / 2;
|
|
|
|
pow = _fmpz_vec_init(n);
|
|
f1 = _fmpz_vec_init(d + 1);
|
|
f2 = _fmpz_vec_init(d);
|
|
inv = _fmpz_vec_init(2*d - 1);
|
|
s = _fmpz_vec_init(2*d - 1);
|
|
t = _fmpz_vec_init(2*d - 1);
|
|
|
|
/* Compute powers of p */
|
|
{
|
|
fmpz_one(t);
|
|
fmpz_set(pow + i, p);
|
|
}
|
|
for (i--; i >= 1; i--)
|
|
{
|
|
if (e[i] & WORD(1))
|
|
{
|
|
fmpz_mul(pow + i, t, pow + (i + 1));
|
|
fmpz_mul(t, t, t);
|
|
}
|
|
else
|
|
{
|
|
fmpz_mul(t, t, pow + (i + 1));
|
|
fmpz_mul(pow + i, pow + (i + 1), pow + (i + 1));
|
|
}
|
|
}
|
|
{
|
|
if (e[i] & WORD(1))
|
|
fmpz_mul(pow + i, t, pow + (i + 1));
|
|
else
|
|
fmpz_mul(pow + i, pow + (i + 1), pow + (i + 1));
|
|
}
|
|
|
|
/* Dense representation of f and f' */
|
|
{
|
|
slong k;
|
|
|
|
for (k = 0; k < lena; k++)
|
|
fmpz_set(f1 + j[k], a + k);
|
|
for (k = 1; k < lena; k++)
|
|
fmpz_mul_ui(f2 + (j[k] - 1), a + k, j[k]);
|
|
}
|
|
|
|
/* Run Newton iteration */
|
|
i = n - 1;
|
|
{
|
|
fmpz op[2] = {WORD(0), WORD(1)};
|
|
|
|
fmpz_pow_ui(t, p, exp);
|
|
_qadic_pow(rop, op, 2, t, a, j, lena, pow + i);
|
|
_fmpz_mod_poly_compose_smod(t, f2, d, rop, d, a, j, lena, pow + i);
|
|
_qadic_inv(inv, t, d, a, j, lena, p, 1);
|
|
}
|
|
for (i--; i >= 0; i--)
|
|
{
|
|
_fmpz_mod_poly_compose_smod(s, f1, d + 1, rop, d, a, j, lena, pow + i);
|
|
_fmpz_mod_poly_mul(t, s, d, inv, d, pow + i);
|
|
_fmpz_mod_poly_reduce(t, 2*d - 1, a, j, lena, pow + i);
|
|
_fmpz_mod_poly_sub(rop, rop, d, t, d, pow + i);
|
|
|
|
if (i > 0)
|
|
{
|
|
_fmpz_mod_poly_compose_smod(s, f2, d, rop, d, a, j, lena, pow + i);
|
|
_fmpz_mod_poly_mul(t, inv, d, s, d, pow + i);
|
|
_fmpz_mod_poly_reduce(t, 2*d - 1, a, j, lena, pow + i);
|
|
fmpz_sub_ui(t, t, 2);
|
|
if (fmpz_sgn(t) < 0)
|
|
fmpz_add(t, t, pow + i);
|
|
_fmpz_mod_poly_neg(t, t, d, pow + i);
|
|
_fmpz_mod_poly_mul(s, inv, d, t, d, pow + i);
|
|
_fmpz_mod_poly_reduce(s, 2*d - 1, a, j, lena, pow + i);
|
|
|
|
/* SWAP(inv, s). Requires the arrays to be of the same size. */
|
|
{
|
|
fmpz *__t;
|
|
|
|
__t = inv;
|
|
inv = s;
|
|
s = __t;
|
|
}
|
|
}
|
|
}
|
|
|
|
_fmpz_vec_clear(pow, n);
|
|
_fmpz_vec_clear(f1, d + 1);
|
|
_fmpz_vec_clear(f2, d);
|
|
_fmpz_vec_clear(inv, 2*d - 1);
|
|
_fmpz_vec_clear(s, 2*d - 1);
|
|
_fmpz_vec_clear(t, 2*d - 1);
|
|
flint_free(e);
|
|
}
|
|
|
|
/*
|
|
Sets (rop, 2d-1) to the image of (op, len) under the Frobenius operator
|
|
raised to the e-th power.
|
|
*/
|
|
|
|
void _qadic_frobenius(fmpz *rop, const fmpz *op, slong len, slong e,
|
|
const fmpz *a, const slong *j, slong lena,
|
|
const fmpz_t p, slong N)
|
|
{
|
|
const slong d = j[lena - 1];
|
|
|
|
if (len == 1) /* op is in Zp, not just Zq */
|
|
{
|
|
_fmpz_vec_set(rop, op, len);
|
|
_fmpz_vec_zero(rop + len, (2*d - 1) - len);
|
|
}
|
|
else if (N == 1)
|
|
{
|
|
fmpz_t t;
|
|
|
|
fmpz_init(t);
|
|
fmpz_pow_ui(t, p, e);
|
|
_qadic_pow(rop, op, len, t, a, j, lena, p);
|
|
fmpz_clear(t);
|
|
}
|
|
else
|
|
{
|
|
fmpz *t;
|
|
fmpz_t pow;
|
|
|
|
t = _fmpz_vec_init(2*d - 1);
|
|
fmpz_init(pow);
|
|
fmpz_pow_ui(pow, p, N);
|
|
|
|
_qadic_frobenius_a(t, e, a, j, lena, p, N);
|
|
|
|
_fmpz_mod_poly_compose_smod(rop, op, len, t, d, a, j, lena, pow);
|
|
_fmpz_vec_zero(rop + d, d - 1);
|
|
|
|
_fmpz_vec_clear(t, 2*d - 1);
|
|
fmpz_clear(pow);
|
|
}
|
|
}
|
|
|
|
void qadic_frobenius(qadic_t rop, const qadic_t op, slong e, const qadic_ctx_t ctx)
|
|
{
|
|
const slong N = qadic_prec(rop);
|
|
const slong d = qadic_ctx_degree(ctx);
|
|
|
|
e = e % d;
|
|
if (e < 0)
|
|
e += d;
|
|
|
|
if (qadic_is_zero(op) || op->val >= N)
|
|
{
|
|
qadic_zero(rop);
|
|
}
|
|
else if (e == 0)
|
|
{
|
|
padic_poly_set(rop, op, &ctx->pctx);
|
|
}
|
|
else
|
|
{
|
|
fmpz *t;
|
|
|
|
if (rop == op)
|
|
{
|
|
t = _fmpz_vec_init(2 * d - 1);
|
|
}
|
|
else
|
|
{
|
|
padic_poly_fit_length(rop, 2 * d - 1);
|
|
t = rop->coeffs;
|
|
}
|
|
|
|
_qadic_frobenius(t, op->coeffs, op->length, e,
|
|
ctx->a, ctx->j, ctx->len, (&ctx->pctx)->p, N - op->val);
|
|
|
|
if (rop == op)
|
|
{
|
|
_fmpz_vec_clear(rop->coeffs, rop->alloc);
|
|
rop->coeffs = t;
|
|
rop->alloc = 2 * d - 1;
|
|
rop->length = d;
|
|
}
|
|
else
|
|
{
|
|
rop->val = op->val;
|
|
_padic_poly_set_length(rop, d);
|
|
}
|
|
_padic_poly_normalise(rop);
|
|
}
|
|
}
|
|
|