aboutsummaryrefslogtreecommitdiffstats
path: root/moon-abe/pbc-0.5.14/arith/montfp.c
blob: c79bb72b36b589098c663d971f506125db118570 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40

@media only all and (prefers-color-scheme: dark) {
.highlight .hll { background-color: #49483e }
.highlight .c { color: #75715e } /* Comment */
.highlight .err { color: #960050; background-color: #1e0010 } /* Error */
.highlight .k { color: #66d9ef } /* Keyword */
.highlight .l { color: #ae81ff } /* Literal */
.highlight .n { color: #f8f8f2 } /* Name */
.highlight .o { color: #f92672 } /* Operator */
.highlight .p { color: #f8f8f2 } /* Punctuation */
.highlight .ch { color: #75715e } /* Comment.Hashbang */
.highlight .cm { color: #75715e } /* Comment.Multiline */
.highlight .cp { color: #75715e } /* Comment.Preproc */
.highlight .cpf { color: #75715e } /* Comment.PreprocFile */
.highlight .c1 { color: #75715e } /* Comment.Single */
.highlight .cs { color: #75715e } /* Comment.Special */
.highlight .gd { color: #f92672 } /* Generic.Deleted */
.highlight .ge { font-style: italic } /* Generic.Emph */
.highlight .gi { color: #a6e22e } /* Generic.Inserted */
.highlight .gs { font-weight: bold } /* Generic.Strong */
.highlight .gu { color: #75715e } /* Generic.Subheading */
.highlight .kc { color: #66d9ef } /* Keyword.Constant */
.highlight .kd { color: #66d9ef } /* Keyword.Declaration */
.highlight .kn { color: #f92672 } /* Keyword.Namespace */
.highlight .kp { color: #66d9ef } /* Keyword.Pseudo */
.highlight .kr { color: #66d9ef } /* Keyword.Reserved */
.highlight .kt { color: #66d9ef } /* Keyword.Type */
.highlight .ld { color: #e6db74 } /* Literal.Date */
.highlight .m { color: #ae81ff } /* Literal.Number */
.highlight .s { color: #e6db74 } /* Literal.String */
.highlight .na { color: #a6e22e } /* Name.Attribute */
.highlight .nb { color: #f8f8f2 } /* Name.Builtin */
.highlight .nc { color: #a6e22e } /* Name.Class */
.highlight .no { color: #66d9ef } /* Name.Constant */
.highlight .nd { color: #a6e22e } /* Name.Decorator */
.highlight .ni { color: #f8f8f2 } /* Name.Entity */
.highlight .ne { color: #a6e22e } /* Name.Exception */
.highlight .nf { color: #a6e22e } /* Name.Function */
.highlight .nl { color: #f8f8f2 } /* Name.Label */
.highlight .nn { color: #f8f8f2 } /* Name.Namespace */
.highlight .nx { color: #a6e22e } /* Name.Other */
.highlight .py { color: #f8f8f2 } /* Name.Property */
.highlight .nt { color: #f92672 } /* Name.Tag */
.highlight .nv { color: #f8f8f2 } /* Name.Variable */
.highlight .ow { color: #f92672 } /* Operator.Word */
.highlight .w { color: #f8f8f2 } /* Text.Whitespace */
.highlight .mb { color: #ae81ff } /* Literal.Number.Bin */
.highlight .mf { color: #ae81ff } /* Literal.Number.Float */
.highlight .mh { color: #ae81ff } /* Literal.Number.Hex */
.highlight .mi { color: #ae81ff } /* Literal.Number.Integer */
.highlight .mo { color: #ae81ff } /* Literal.Number.Oct */
.highlight .sa { color: #e6db74 } /* Literal.String.Affix */
.highlight .sb { color: #e6db74 } /* Literal.String.Backtick */
.highlight .sc { color: #e6db74 } /* Literal.String.Char */
.highlight .dl { color: #e6db74 } /* Literal.String.Delimiter */
.highlight .sd { color: #e6db74 } /* Literal.String.Doc */
.highlight .s2 { color: #e6db74 } /* Literal.String.Double */
.highlight .se { color: #ae81ff } /* Literal.String.Escape */
.highlight .sh { color: #e6db74 } /* Literal.String.Heredoc */
.highlight .si { color: #e6db74 } /* Literal.String.Interpol */
.highlight .sx { color: #e6db74 } /* Literal.String.Other */
.highlight .sr { color: #e6db74 } /* Literal.String.Regex */
.highlight .s1 { color: #e6db74 } /* Literal.String.Single */
.highlight .ss { color: #e6db74 } /* Literal.String.Symbol */
.highlight .bp { color: #f8f8f2 } /* Name.Builtin.Pseudo */
.highlight .fm { color: #a6e22e } /* Name.Function.Magic */
.highlight .vc { color: #f8f8f2 } /* Name.Variable.Class */
.highlight .vg { color: #f8f8f2 } /* Name.Variable.Global */
.highlight .vi { color: #f8f8f2 } /* Name.Variable.Instance */
.highlight .vm { color: #f8f8f2 } /* Name.Variable.Magic */
.highlight .il { color: #ae81ff } /* Literal.Number.Integer.Long */
}
@media (prefers-color-scheme: light) {
.highlight .hll { background-color: #ffffcc }
.highlight .c { color: #888888 } /* Comment */
.highlight .err { color: #a61717; background-color: #e3d2d2 } /* Error */
.highlight .k { color: #008800; font-weight: bold } /* Keyword */
.highlight .ch { color: #888888 } /* Comment.Hashbang */
.highlight .cm { color: #888888 } /* Comment.Multiline */
.highlight .cp { color: #cc0000; font-weight: bold } /* Comment.Preproc */
.highlight .cpf { color: #888888 } /* Comment.PreprocFile */
.highlight .c1 { color: #888888 } /* Comment.Single */
.highlight .cs { color: #cc0000; font-weight: bold; background-color: #fff0f0 } /* Comment.Special */
.highlight .gd { color: #000000; background-color: #ffdddd } /* Generic.Deleted */
.highlight .ge { font-style: italic } /* Generic.Emph */
.highlight .gr { color: #aa0000 } /* Generic.Error */
.highlight .gh { color: #333333 } /* Generic.Heading */
.highlight .gi { color: #000000; background-color: #ddffdd } /* Generic.Inserted */
.highlight .go { color: #888888 } /* Generic.Output */
.highlight .gp { color: #555555 } /* Generic.Prompt */
.highlight .gs { font-weight: bold } /* Generic.Strong */
.highlight .gu { color: #666666 } /* Generic.Subheading */
.highlight .gt { color: #aa0000 } /* Generic.Traceback */
.highlight .kc { color: #008800; font-weight: bold } /* Keyword.Constant */
.highlight .kd { color: #008800; font-weight: bold } /* Keyword.Declaration */
.highlight .kn { color: #008800; font-weight: bold } /* Keyword.Namespace */
.highlight .kp { color: #008800 } /* Keyword.Pseudo */
.highlight .kr { color: #008800; font-weight: bold } /* Keyword.Reserved */
.highlight .kt { color: #888888; font-weight: bold } /* Keyword.Type */
.highlight .m { color: #0000DD; font-weight: bold } /* Literal.Number */
.highlight .s { color: #dd2200; background-color: #fff0f0 } /* Literal.String */
.highlight .na { color: #336699 } /* Name.Attribute */
.highlight .nb { color: #003388 } /* Name.Builtin */
.highlight .nc { color: #bb0066; font-weight: bold } /* Name.Class */
.highlight .no { color: #003366; font-weight: bold } /* Name.Constant */
.highlight .nd { color: #555555 } /* Name.Decorator */
.highlight .ne { color: #bb0066; font-weight: bold } /* Name.Exception */
.highlight .nf { color: #0066bb; font-weight: bold } /* Name.Function */
.highlight .nl { color: #336699; font-style: italic } /* Name.Label */
.highlight .nn { color: #bb0066; font-weight: bold } /* Name.Namespace */
.highlight .py { color: #336699; font-weight: bold } /* Name.Property */
.highlight .nt { color: #bb0066; font-weight: bold } /* Name.Tag */
.highlight .nv { color: #336699 } /* Name.Variable */
.highlight .ow { color: #008800 } /* Operator.Word */
.highlight .w { color: #bbbbbb } /* Text.Whitespace */
.highlight .mb { color: #0000DD; font-weight: bold } /* Literal.Number.Bin */
.highlight .mf { color: #0000DD; font-weight: bold } /* Literal.Number.Float */
.highlight .mh { color: #0000DD; font-weight: bold } /* Literal.Number.Hex */
.highlight .mi { color: #0000DD; font-weight: bold } /* Literal.Number.Integer */
.highlight .mo { color: #0000DD; font-weight: bold } /* Literal.Number.Oct */
.highlight .sa { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Affix */
.highlight .sb { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Backtick */
.highlight .sc { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Char */
.highlight .dl { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Delimiter */
.highlight .sd { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Doc */
.highlight .s2 { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Double */
.highlight .se { color: #0044dd; background-color: #fff0f0 } /* Literal.String.Escape */
.highlight .sh { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Heredoc */
.highlight .si { color: #3333bb; background-color: #fff0f0 } /* Literal.String.Interpol */
.highlight .sx { color: #22bb22; background-color: #f0fff0 } /* Literal.String.Other */
.highlight .sr { color: #008800; background-color: #fff0ff } /* Literal.String.Regex */
.highlight .s1 { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Single */
.highlight .ss { color: #aa6600; background-color: #fff0f0 } /* Literal.String.Symbol */
.highlight .bp { color: #003388 } /* Name.Builtin.Pseudo */
.highlight .fm { color: #0066bb; font-weight: bold } /* Name.Function.Magic */
.highlight .vc { color: #336699 } /* Name.Variable.Class */
.highlight .vg { color: #dd7700 } /* Name.Variable.Global */
.highlight .vi { color: #3333bb } /* Name.Variable.Instance */
.highlight .vm { color: #336699 } /* Name.Variable.Magic */
.highlight .il { color: #0000DD; font-weight: bold } /* Literal.Number.Integer.Long */
}
##############################################################################
# Copyright (c) 2015 Ericsson AB and others.
# stefan.k.berg@ericsson.com
# jonas.bjurel@ericsson.com
# All rights reserved. This program and the accompanying materials
# are made available under the terms of the Apache License, Version 2.0
# which accompanies this distribution, and is available at
# http://www.apache.org/licenses/LICENSE-2.0
##############################################################################

FUEL_MAIN_REPO := https://github.com/openstack/fuel-main
FUEL_MAIN_TAG := 9.0.1
MOS_VERSION = 9.0
OPENSTACK_VERSION = mitaka-9.0

# Pinning down exact Fuel repo versions for Fuel 9.0.1
export FUELLIB_COMMIT?=e283b62750d9e26355981b3ad3be7c880944ae0f
export NAILGUN_COMMIT?=e2b85bafb68c348f25cb7cceda81edc668ba2e64
export PYTHON_FUELCLIENT_COMMIT?=67d8c693a670d27c239d5d175f3ea2a0512c498c
export FUEL_AGENT_COMMIT?=7ffbf39caf5845bd82b8ce20a7766cf24aa803fb
export FUEL_NAILGUN_AGENT_COMMIT?=46fa0db0f8944f9e67699d281d462678aaf4db26
export ASTUTE_COMMIT?=390b257240d49cc5e94ed5c4fcd940b5f2f6ec64
export OSTF_COMMIT?=f09c98ff7cc71ee612b2450f68a19f2f9c64345a
export FUEL_MIRROR_COMMIT?=d1ef06b530ce2149230953bb3810a88ecaff870c
export FUELMENU_COMMIT?=0ed9e206ed1c6271121d3acf52a6bf757411286b
export SHOTGUN_COMMIT?=781a8cfa0b6eb290e730429fe2792f2b6f5e0c11
export NETWORKCHECKER_COMMIT?=fcb47dd095a76288aacf924de574e39709e1f3ca
export FUELUPGRADE_COMMIT?=c1c4bac6a467145ac4fac73e4a7dd2b00380ecfb
export FUEL_UI_COMMIT?=90de7ef4477230cb7335453ed26ed4306ca6f04f

DOCKER_REPO := http://get.docker.com/builds/Linux/x86_64
DOCKER_TAG := docker-latest

.PHONY: get-fuel-repo
get-fuel-repo:
	@echo $(FUEL_MAIN_REPO) $(FUEL_MAIN_TAG)
445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596
// F_p using Montgomery representation.
//
// Let b = 256^sizeof(mp_limb_t).
// Let R = b^t be the smallest power of b greater than the modulus p.
// Then x is stored as xR (mod p).
// Addition: same as naive implementation.
// Multipication: Montgomery reduction.
// Code assumes the modulus p is odd.
//
// TODO: mul_2exp(x, p->bytes * 8) could be replaced with
// faster code that messes with GMP internals

#include <stdarg.h>
#include <stdio.h>
#include <stdint.h> // for intptr_t
#include <stdlib.h>
#include <string.h>
#include <gmp.h>
#include "pbc_utils.h"
#include "pbc_field.h"
#include "pbc_random.h"
#include "pbc_fp.h"
#include "pbc_memory.h"

// Per-field data.
typedef struct {
  size_t limbs;           // Number of limbs per element.
  size_t bytes;           // Number of bytes per element.
  mp_limb_t *primelimbs;  // Points to an array of limbs holding the modulus.
  mp_limb_t negpinv;      // -p^-1 mod b
  mp_limb_t *R;           // R mod p
  mp_limb_t *R3;          // R^3 mod p
} *fptr;

// Per-element data.
typedef struct {
  char flag;     // flag == 0 means the element is zero.
  mp_limb_t *d;  // Otherwise d points to an array holding the element.
} *eptr;

// Copies limbs of z into dst and zeroes any leading limbs, where n is the
// total number of limbs.
// Requires z to have at most n limbs.
static inline void set_limbs(mp_limb_t *dst, mpz_t z, size_t n) {
  size_t count;
  mpz_export(dst, &count, -1, sizeof(mp_limb_t), 0, 0, z);
  memset((void *) (((unsigned char *) dst) + count * sizeof(mp_limb_t)),
         0, (n - count) * sizeof(mp_limb_t));
}

static void fp_init(element_ptr e) {
  fptr p = e->field->data;
  eptr ep = e->data = pbc_malloc(sizeof(*ep));
  ep->flag = 0;
  ep->d = pbc_malloc(p->bytes);
}

static void fp_clear(element_ptr e) {
  eptr ep = e->data;
  pbc_free(ep->d);
  pbc_free(e->data);
}

static void fp_set_mpz(element_ptr e, mpz_ptr z) {
  fptr p = e->field->data;
  eptr ep = e->data;
  if (!mpz_sgn(z)) ep->flag = 0;
  else {
    mpz_t tmp;
    mpz_init(tmp);
    mpz_mul_2exp(tmp, z, p->bytes * 8);
    mpz_mod(tmp, tmp, e->field->order);
    if (!mpz_sgn(tmp)) ep->flag = 0;
    else {
      set_limbs(ep->d, tmp, p->limbs);
      ep->flag = 2;
    }
    mpz_clear(tmp);
  }
}

static void fp_set_si(element_ptr e, signed long int op) {
  fptr p = e->field->data;
  eptr ep = e->data;
  if (!op) ep->flag = 0;
  else {
    mpz_t tmp;
    mpz_init(tmp);
    // TODO: Could be optimized.
    mpz_set_si(tmp, op);
    mpz_mul_2exp(tmp, tmp, p->bytes * 8);
    mpz_mod(tmp, tmp, e->field->order);
    if (!mpz_sgn(tmp)) ep->flag = 0;
    else {
      set_limbs(ep->d, tmp, p->limbs);
      ep->flag = 2;
    }
    mpz_clear(tmp);
  }
}

// Montgomery reduction.
// Algorithm II.4 from Blake, Seroussi and Smart.
static void mont_reduce(mp_limb_t *x, mp_limb_t *y, fptr p) {
  size_t t = p->limbs;
  size_t i;
  mp_limb_t flag = 0;
  for (i = 0; i < t; i++) {
    mp_limb_t u = y[i] * p->negpinv;
    mp_limb_t carry = mpn_addmul_1(&y[i], p->primelimbs, t, u);
    //mpn_add_1(&y[i+t], &y[i+t], t - i + 1, carry);
    flag += mpn_add_1(&y[i + t], &y[i + t], t - i, carry);
  }
  if (flag || mpn_cmp(&y[t], p->primelimbs, t) >= 0) {
    mpn_sub_n(x, &y[t], p->primelimbs, t);
  } else {
    // TODO: GMP set might be faster.
    memcpy(x, &y[t], t * sizeof(mp_limb_t));
  }
}

static void fp_to_mpz(mpz_ptr z, element_ptr e) {
  eptr ep = e->data;
  if (!ep->flag) mpz_set_ui(z, 0);
  else {
    // x is stored as xR.
    // We must divide out R to convert to standard representation.
    fptr p = e->field->data;
    mp_limb_t tmp[2 * p->limbs];

    memcpy(tmp, ep->d, p->limbs * sizeof(mp_limb_t));
    memset(&tmp[p->limbs], 0, p->limbs * sizeof(mp_limb_t));
    _mpz_realloc(z, p->limbs);
    mont_reduce(z->_mp_d, tmp, p);
    // Remove leading zero limbs.
    for (z->_mp_size = p->limbs; !z->_mp_d[z->_mp_size - 1]; z->_mp_size--);
  }
}

static void fp_set0(element_ptr e) {
  eptr ep = e->data;
  ep->flag = 0;
}

static void fp_set1(element_ptr e) {
  fptr p = e->field->data;
  eptr ep = e->data;
  ep->flag = 2;
  memcpy(ep->d, p->R, p->bytes);
}

static int fp_is1(element_ptr e) {
  eptr ep = e->data;
  if (!ep->flag) return 0;
  else {
    fptr p = e->field->data;
    return !mpn_cmp(ep->d, p->R, p->limbs);
  }
}

static int fp_is0(element_ptr e) {
  eptr ep = e->data;
  return !ep->flag;
}

static size_t fp_out_str(FILE * stream, int base, element_ptr e) {
  size_t result;
  mpz_t z;
  mpz_init(z);
  fp_to_mpz(z, e);
  result = mpz_out_str(stream, base, z);
  mpz_clear(z);
  return result;
}

static int fp_snprint(char *s, size_t n, element_ptr e) {
  int result;
  mpz_t z;
  mpz_init(z);
  fp_to_mpz(z, e);
  result = gmp_snprintf(s, n, "%Zd", z);
  mpz_clear(z);
  return result;
}

static int fp_set_str(element_ptr e, const char *s, int base) {
  mpz_t z;
  mpz_init(z);
  int result = pbc_mpz_set_str(z, s, base);
  mpz_mod(z, z, e->field->order);
  fp_set_mpz(e, z);
  mpz_clear(z);
  return result;
}

static void fp_set(element_ptr c, element_ptr a) {
  eptr ad = a->data;
  eptr cd = c->data;
  if (a == c) return;
  if (!ad->flag) cd->flag = 0;
  else {
    fptr p = a->field->data;

    // Assembly is faster, but I don't want to stoop to that level.
    // Instead of memcpy(), we rewrite so GMP assembly ends up being invoked.
    /*
       memcpy(cd->d, ad->d, p->bytes);
     */
    mpz_t z1, z2;
    z1->_mp_d = cd->d;
    z2->_mp_d = ad->d;
    z1->_mp_size = z1->_mp_alloc = z2->_mp_size = z2->_mp_alloc = p->limbs;
    mpz_set(z1, z2);

    cd->flag = 2;
  }
}

static void fp_add(element_ptr c, element_ptr a, element_ptr b) {
  eptr ad = a->data, bd = b->data;

  if (!ad->flag) {
    fp_set(c, b);
  } else if (!bd->flag) {
    fp_set(c, a);
  } else {
    eptr cd = c->data;
    fptr p = a->field->data;
    const size_t t = p->limbs;
    mp_limb_t carry;
    carry = mpn_add_n(cd->d, ad->d, bd->d, t);

    if (carry) {
      // Assumes result of following sub is not zero,
      // i.e. modulus cannot be 2^(n * bits_per_limb).
      mpn_sub_n(cd->d, cd->d, p->primelimbs, t);
      cd->flag = 2;
    } else {
      int i = mpn_cmp(cd->d, p->primelimbs, t);
      if (!i) {
        cd->flag = 0;
      } else {
        cd->flag = 2;
        if (i > 0) {
          mpn_sub_n(cd->d, cd->d, p->primelimbs, t);
        }
      }
    }
  }
}

static void fp_double(element_ptr c, element_ptr a) {
  eptr ad = a->data, cd = c->data;
  if (!ad->flag) {
    cd->flag = 0;
  } else {
    fptr p = c->field->data;
    const size_t t = p->limbs;
    if (mpn_lshift(cd->d, ad->d, t, 1)) {
      cd->flag = 2;
      // Again, assumes result is not zero.
      mpn_sub_n(cd->d, cd->d, p->primelimbs, t);
    } else {
      int i = mpn_cmp(cd->d, p->primelimbs, t);
      if (!i) {
        cd->flag = 0;
      } else {
        cd->flag = 2;
        if (i > 0) {
          mpn_sub_n(cd->d, cd->d, p->primelimbs, t);
        }
      }
    }
  }
}

static void fp_halve(element_ptr c, element_ptr a) {
  eptr ad = a->data, cd = c->data;
  if (!ad->flag) {
    cd->flag = 0;
  } else {
    fptr p = c->field->data;
    const size_t t = p->limbs;
    int carry = 0;
    mp_limb_t *alimb = ad->d;
    mp_limb_t *climb = cd->d;
    if (alimb[0] & 1) {
      carry = mpn_add_n(climb, alimb, p->primelimbs, t);
    } else fp_set(c, a);

    mpn_rshift(climb, climb, t, 1);
    if (carry) climb[t - 1] |= ((mp_limb_t) 1) << (sizeof(mp_limb_t) * 8 - 1);
  }
}

static void fp_neg(element_ptr c, element_ptr a) {
  eptr ad = a->data, cd = c->data;
  if (!ad->flag) cd->flag = 0;
  else {
    fptr p = a->field->data;
    mpn_sub_n(cd->d, p->primelimbs, ad->d, p->limbs);
    cd->flag = 2;
  }
}

static void fp_sub(element_ptr c, element_ptr a, element_ptr b) {
  eptr ad = a->data, bd = b->data;

  if (!ad->flag) {
    fp_neg(c, b);
  } else if (!bd->flag) {
    fp_set(c, a);
  } else {
    fptr p = c->field->data;
    size_t t = p->limbs;
    eptr cd = c->data;
    int i = mpn_cmp(ad->d, bd->d, t);

    if (i == 0) {
      cd->flag = 0;
    } else {
      cd->flag = 2;
      mpn_sub_n(cd->d, ad->d, bd->d, t);
      if (i < 0) {
        mpn_add_n(cd->d, cd->d, p->primelimbs, t);
      }
    }
  }
}

// Montgomery multiplication.
// See Blake, Seroussi and Smart.
static inline void mont_mul(mp_limb_t *c, mp_limb_t *a, mp_limb_t *b,
                            fptr p) {
  // Instead of right shifting every iteration
  // I allocate more room for the z array.
  size_t i, t = p->limbs;
  mp_limb_t z[2 * t + 1];
  mp_limb_t u = (a[0] * b[0]) * p->negpinv;
  mp_limb_t v = z[t] = mpn_mul_1(z, b, t, a[0]);
  z[t] += mpn_addmul_1(z, p->primelimbs, t, u);
  z[t + 1] = z[t] < v;  // Handle overflow.
  for (i = 1; i < t; i++) {
    u = (z[i] + a[i] * b[0]) * p->negpinv;
    v = z[t + i] += mpn_addmul_1(z + i, b, t, a[i]);
    z[t + i] += mpn_addmul_1(z + i, p->primelimbs, t, u);
    z[t + i + 1] = z[t + i] < v;
  }
  if (z[t * 2] || mpn_cmp(z + t, p->primelimbs, t) >= 0) {
    mpn_sub_n(c, z + t, p->primelimbs, t);
  } else {
    memcpy(c, z + t, t * sizeof(mp_limb_t));
    // Doesn't seem to make a difference:
    /*
       mpz_t z1, z2;
       z1->_mp_d = c;
       z2->_mp_d = z + t;
       z1->_mp_size = z1->_mp_alloc = z2->_mp_size = z2->_mp_alloc = t;
       mpz_set(z1, z2);
     */
  }
}

static void fp_mul(element_ptr c, element_ptr a, element_ptr b) {
  eptr ad = a->data, bd = b->data;
  eptr cd = c->data;

  if (!ad->flag || !bd->flag) {
    cd->flag = 0;
  } else {
    fptr p = c->field->data;
    mont_mul(cd->d, ad->d, bd->d, p);
    cd->flag = 2;
  }
}

static void fp_pow_mpz(element_ptr c, element_ptr a, mpz_ptr op) {
  // Alternative: rewrite GMP mpz_powm().
  fptr p = a->field->data;
  eptr ad = a->data;
  eptr cd = c->data;
  if (!ad->flag) cd->flag = 0;
  else {
    mpz_t z;
    mpz_init(z);
    fp_to_mpz(z, a);
    mpz_powm(z, z, op, a->field->order);
    mpz_mul_2exp(z, z, p->bytes * 8);
    mpz_mod(z, z, a->field->order);
    set_limbs(cd->d, z, p->limbs);
    mpz_clear(z);
    cd->flag = 2;
  }
}

// Inversion is slower than in a naive Fp implementation because of an extra
// multiplication.
// Requires nonzero a.
static void fp_invert(element_ptr c, element_ptr a) {
  eptr ad = a->data;
  eptr cd = c->data;
  fptr p = a->field->data;
  mp_limb_t tmp[p->limbs];
  mpz_t z;

  mpz_init(z);

  // Copy the limbs into a regular mpz_t so we can invert using the standard
  // mpz_invert().
  mpz_import(z, p->limbs, -1, sizeof(mp_limb_t), 0, 0, ad->d);
  mpz_invert(z, z, a->field->order);
  set_limbs(tmp, z, p->limbs);

  // Normalize.
  mont_mul(cd->d, tmp, p->R3, p);
  cd->flag = 2;
  mpz_clear(z);
}

static void fp_random(element_ptr a) {
  fptr p = a->field->data;
  eptr ad = a->data;
  mpz_t z;
  mpz_init(z);
  pbc_mpz_random(z, a->field->order);
  if (mpz_sgn(z)) {
    mpz_mul_2exp(z, z, p->bytes * 8);
    mpz_mod(z, z, a->field->order);
    set_limbs(ad->d, z, p->limbs);
    ad->flag = 2;
  } else {
    ad->flag = 0;
  }
  mpz_clear(z);
}

static void fp_from_hash(element_ptr a, void *data, int len) {
  mpz_t z;

  mpz_init(z);
  pbc_mpz_from_hash(z, a->field->order, data, len);
  fp_set_mpz(a, z);
  mpz_clear(z);
}

static int fp_cmp(element_ptr a, element_ptr b) {
  eptr ad = a->data, bd = b->data;
  if (!ad->flag) return bd->flag;
  else {
    fptr p = a->field->data;
    return mpn_cmp(ad->d, bd->d, p->limbs);
    //return memcmp(ad->d, bd->d, p->limbs);
  }
}

static int fp_sgn_odd(element_ptr a) {
  eptr ad = a->data;
  if (!ad->flag) return 0;
  else {
    mpz_t z;
    mpz_init(z);
    int res;
    fp_to_mpz(z, a);
    res = mpz_odd_p(z) ? 1 : -1;
    mpz_clear(z);
    return res;
  }
}

static int fp_is_sqr(element_ptr a) {
  eptr ad = a->data;
  int res;
  mpz_t z;
  mpz_init(z);
  // 0 is a square.
  if (!ad->flag) return 1;
  fp_to_mpz(z, a);
  res = mpz_legendre(z, a->field->order) == 1;
  mpz_clear(z);
  return res;
}

static int fp_to_bytes(unsigned char *data, element_t a) {
  mpz_t z;
  int n = a->field->fixed_length_in_bytes;

  mpz_init(z);
  fp_to_mpz(z, a);
  pbc_mpz_out_raw_n(data, n, z);
  mpz_clear(z);
  return n;
}

static int fp_from_bytes(element_t a, unsigned char *data) {
  fptr p = a->field->data;
  eptr ad = a->data;
  int n;
  mpz_t z;

  mpz_init(z);

  n = a->field->fixed_length_in_bytes;
  mpz_import(z, n, 1, 1, 1, 0, data);
  if (!mpz_sgn(z)) ad->flag = 0;
  else {
    ad->flag = 2;
    mpz_mul_2exp(z, z, p->bytes * 8);
    mpz_mod(z, z, a->field->order);
    set_limbs(ad->d, z, p->limbs);
  }
  mpz_clear(z);
  return n;
}

static void fp_field_clear(field_t f) {
  fptr p = f->data;
  pbc_free(p->primelimbs);
  pbc_free(p->R);
  pbc_free(p->R3);
  pbc_free(p);
}

// The only public functions. All the above should be static.

static void fp_out_info(FILE * out, field_ptr f) {
  element_fprintf(out, "GF(%Zd): Montgomery representation", f->order);
}

void field_init_mont_fp(field_ptr f, mpz_t prime) {
  PBC_ASSERT(!mpz_fits_ulong_p(prime), "modulus too small");
  fptr p;
  field_init(f);
  f->init = fp_init;
  f->clear = fp_clear;
  f->set_si = fp_set_si;
  f->set_mpz = fp_set_mpz;
  f->out_str = fp_out_str;
  f->snprint = fp_snprint;
  f->set_str = fp_set_str;
  f->add = fp_add;
  f->sub = fp_sub;
  f->set = fp_set;
  f->mul = fp_mul;
  f->doub = fp_double;
  f->halve = fp_halve;
  f->pow_mpz = fp_pow_mpz;
  f->neg = fp_neg;
  f->sign = fp_sgn_odd;
  f->cmp = fp_cmp;
  f->invert = fp_invert;
  f->random = fp_random;
  f->from_hash = fp_from_hash;
  f->is1 = fp_is1;
  f->is0 = fp_is0;
  f->set0 = fp_set0;
  f->set1 = fp_set1;
  f->is_sqr = fp_is_sqr;
  f->sqrt = element_tonelli;
  f->field_clear = fp_field_clear;
  f->to_bytes = fp_to_bytes;
  f->from_bytes = fp_from_bytes;
  f->to_mpz = fp_to_mpz;
  f->out_info = fp_out_info;

  // Initialize per-field data specific to this implementation.
  p = f->data = pbc_malloc(sizeof(*p));
  p->limbs = mpz_size(prime);
  p->bytes = p->limbs * sizeof(mp_limb_t);
  p->primelimbs = pbc_malloc(p->bytes);
  mpz_export(p->primelimbs, &p->limbs, -1, sizeof(mp_limb_t), 0, 0, prime);

  mpz_set(f->order, prime);
  f->fixed_length_in_bytes = (mpz_sizeinbase(prime, 2) + 7) / 8;

  // Compute R, R3 and negpinv.
  mpz_t z;
  mpz_init(z);

  p->R = pbc_malloc(p->bytes);
  p->R3 = pbc_malloc(p->bytes);
  mpz_setbit(z, p->bytes * 8);
  mpz_mod(z, z, prime);
  set_limbs(p->R, z, p->limbs);

  mpz_powm_ui(z, z, 3, prime);
  set_limbs(p->R3, z, p->limbs);

  mpz_set_ui(z, 0);

  // Algorithm II.5 in Blake, Seroussi and Smart is better but this suffices
  // since we're only doing it once.
  mpz_setbit(z, p->bytes * 8);
  mpz_invert(z, prime, z);
  p->negpinv = -mpz_get_ui(z);
  mpz_clear(z);
}