/*============================================================================= This file is part of FLINT. FLINT is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. FLINT is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with FLINT; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA =============================================================================*/ /****************************************************************************** Copyright (C) 2010 William Hart ******************************************************************************/ #include #include #include "flint.h" #include "fmpz.h" #include "fmpz_vec.h" #include "fmpz_poly.h" void _fmpz_poly_mullow_kara_recursive(fmpz * out, const fmpz * pol1, const fmpz * pol2, fmpz * temp, slong len); /* Multiplication using truncated karatsuba. Below length 7, classical truncated multiplication is always theoretically faster, so we switch to that as the basecase. Above that we use the ordinary (left/right) karatsuba identity and recursively do one full karatsuba multiplication and two truncated karatsuba multiplications. */ void _fmpz_poly_mullow_kara_recursive(fmpz * out, const fmpz * pol1, const fmpz * pol2, fmpz * temp, slong len) { slong m1 = len / 2; slong m2 = len - m1; int odd = (len & 1); if (len <= 6) { _fmpz_poly_mullow_classical(out, pol1, len, pol2, len, len); return; } _fmpz_vec_add(temp + m2, pol1, pol1 + m1, m1); if (odd) fmpz_set(temp + m2 + m1, pol1 + 2 * m1); _fmpz_vec_add(temp + 2 * m2, pol2, pol2 + m1, m1); if (odd) fmpz_set(temp + 2 * m2 + m1, pol2 + 2 * m1); _fmpz_poly_mul_karatsuba(out, pol1, m1, pol2, m1); fmpz_zero(out + 2 * m1 - 1); _fmpz_poly_mullow_kara_recursive(temp, temp + m2, temp + 2 * m2, temp + 3 * m2, m2); _fmpz_poly_mullow_kara_recursive(temp + m2, pol1 + m1, pol2 + m1, temp + 2 * m2, m2); _fmpz_vec_sub(temp, temp, out, m2); _fmpz_vec_sub(temp, temp, temp + m2, m2); if (odd) fmpz_set(out + 2 * m1, temp + m2); _fmpz_vec_add(out + m1, out + m1, temp, m2); } /* Assumes poly1 and poly2 are not length 0. */ void _fmpz_poly_mullow_karatsuba_n(fmpz * res, const fmpz * poly1, const fmpz * poly2, slong n) { fmpz *temp; slong len, loglen = 0; if (n == 1) { fmpz_mul(res, poly1, poly2); return; } while ((WORD(1) << loglen) < n) loglen++; len = (WORD(1) << loglen); temp = _fmpz_vec_init(3 * len); _fmpz_poly_mullow_kara_recursive(res, poly1, poly2, temp, n); _fmpz_vec_clear(temp, 3 * len); } void fmpz_poly_mullow_karatsuba_n(fmpz_poly_t res, const fmpz_poly_t poly1, const fmpz_poly_t poly2, slong n) { const slong len1 = FLINT_MIN(poly1->length, n); const slong len2 = FLINT_MIN(poly2->length, n); slong i, lenr; int clear = 0; fmpz *copy1, *copy2; if (len1 == 0 || len2 == 0) { fmpz_poly_zero(res); return; } lenr = len1 + len2 - 1; if (n > lenr) n = lenr; if (len1 >= n) copy1 = poly1->coeffs; else { copy1 = (fmpz *) flint_malloc(n * sizeof(fmpz)); for (i = 0; i < len1; i++) copy1[i] = poly1->coeffs[i]; flint_mpn_zero((mp_ptr) copy1 + len1, n - len1); clear |= 1; } if (len2 >= n) copy2 = poly2->coeffs; else { copy2 = (fmpz *) flint_malloc(n * sizeof(fmpz)); for (i = 0; i < len2; i++) copy2[i] = poly2->coeffs[i]; flint_mpn_zero((mp_ptr) copy2 + len2, n - len2); clear |= 2; } if (res != poly1 && res != poly2) { fmpz_poly_fit_length(res, n); _fmpz_poly_mullow_karatsuba_n(res->coeffs, copy1, copy2, n); } else { fmpz_poly_t t; fmpz_poly_init2(t, n); _fmpz_poly_mullow_karatsuba_n(t->coeffs, copy1, copy2, n); fmpz_poly_swap(res, t); fmpz_poly_clear(t); } _fmpz_poly_set_length(res, n); _fmpz_poly_normalise(res); if (clear & 1) flint_free(copy1); if (clear & 2) flint_free(copy2); }