/*One level up
***************************************************************************************
Subject: Multiplicative group of units modulo small N:
	 representing U(N) as a direct product of
	 canonical least-generator cyclic subgroups.
Ref.   : D. Shanks, Solved and unsolved problems in number theory,
         Chelsea, New York, 1978, pp. 83-98:
        'Questions & answers concerning cycle graphs.'
Author : Sjoerd.J.Schaper - vspickelen
Date   : 07-03-2007
Code   : ANSI C, C89
Links to a Basic implementation good for N < 2^15,
and a quite different version for large integer-Basic.
**********************************************************************************
This program is copyright (c) 2007 by the author. It is made available as is,
and no warranty - about the program, its performance, or its conformity to any
specification - is given or implied. It may be used, modified, and distributed
freely, as long as the original author is credited as part of the final work.
********************************************************************************/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>

typedef unsigned short int uint;
typedef unsigned long int ulong;
const U = 0, G = 1, H = 2;                  /* groups                       */
const LB = 5, PB = 32, Pm1 = 31;            /* Dword size 2^(2^5)           */
ulong bi[32], *bv;                          /* bitmasks, 4 bitvectors       */
uint pt[4], ge[7], cy[7];                   /* generators, invariant orders */
struct Pls { uint f, e; } pr[6];            /* primelist                    */
uint cx, sx, fx, N;                         /* bounds, modulus              */

void Swap (uint *a, uint *b)
{  /* exchange a and b */

   uint c = *a; *a = *b; *b = c;
}

uint gcd (uint a, uint b)
{  /* highest common divisor */

   while (b) {
      uint c = a % b;
      a = b; b = c;
   }
return a;
}

void Pwr (uint *a, uint k)
{  /* let a:= a ^ k */
register uint p = 1, b = *a;

   for (; k; k >>=1)
   {
      if (k & 1) p *= b;
      b *= b;
   }
*a = p;
}

uint modpwr (uint a, uint k)
{  /* return a ^ k (mod N) */
uint p = 1; register ulong b = a;

   for (; k; k >>=1)
   {
      if (k & 1) p = b * p % N;
      b = b * b % N;
   }
return p;
}

uint order (uint a)
{  /* return ord_N(a) */
uint i, j, e = cy[0];                       /* Carmichael lambda */

   for (i = 0; i < fx; i++)
      for (j = 0; j < pr[i].e; j++)
      {
         uint k = e / pr[i].f;
	 if (modpwr(a, k)==1) e = k;        /* reduce exponent */
         else break;
      }
return e;
}

uint triald (uint a, uint sw)
{  /* primefactors(a), set sw = 1 to print */
uint q, e, f = 2, df = 1, fl = 0;
int t = -1; if (a < 2) return 0;

   while (1) {
      for (e = 0;; e++)
      {
	 q = a / f;                         /* trial division */
	 if (a - q * f) break;
	 a = q;
      }

      if (e) {
	 pr[++t].f = f; pr[t].e = e;        /* store prime and exponent */
      } else if (f > q) {                   /* f > sqrt(a) */
	 if (a > 1) {
	    pr[++t].f = a; pr[t].e = 1;     /* store coprime */
         }
	 break;
      }

      if (fl) { df ^= 6; f += df; }         /* sieve multiples of 2 and 3 */
      else { f +=df; df <<=1; fl=f==5; }
   }

   if (sw && (t > 0 || pr[0].e > 1)) {
      int i; printf("     ");
      for (i = 0; i <= t;)
      {
    	 if (pr[i].e==1) printf("%u", pr[i].f);
	 else printf("%u^%u", pr[i].f, pr[i].e);
	 if (i++ < t) printf(",");
	 else printf("\n");
      }
   }
return ++t;
}

uint invars (uint a)
{  /* torsion invariants(a) */
uint q, e; int i, j = 0, t = -1;

   fx = triald(a, 1);                       /* factorize a */
   if (fx==0) return 0;

   if (pr[0].f==2) {
      j = 1;
      if (pr[0].e > 1) {                    /* multiplicity of 2 */
         e = pr[0].e; cy[++t] = 2;
         if (e > 2) cy[++t] = bi[e - 2];
      }
   }

   for (i = j; i < fx; i++)
   {
      q = 1; cy[++t] = pr[i].f;             /* odd prime power decomposition */
      if (pr[i].e > 1) {
	 e = pr[i].e; q = cy[t];
	 if (e > 2) Pwr(&q, e - 1);
	 cy[t] *= q;
      }
      cy[t] -= q;                           /* Eulerphi(p^ e) */
   }

   for (i = t; i > 0; i--)                  /* Smith canonical form */
      for (j = i - 1; j >= 0; j--)
	 if (cy[j] % cy[i]) {
	    q = gcd(cy[i], cy[j]);
	    cy[j] *= cy[i] / q; cy[i] = q;
	 }

   for (i = ++t; i < 7; i++) cy[i] = 0;
return t;
}

void Sieve (uint r, uint N)
{  /* sieve residues prime to N */
uint s, i, j = 0, sw = pr[0].f==2;
ulong W = 0xFFFFFFFF;

   if (sw) { j = 1; W = 0xAAAAAAAA; }
   memset(&bv[r], W, sx * sizeof(ulong));   /* initialize bitvector U */
   bv[r] &= ~3;

   for (i = j; i < fx; i++)                 /* sieve units */
   {
      uint p = pr[i].f, dp = p;
      if (sw) dp += p;
      while (1) {
         bv[p>>LB] &= ~bi[p&Pm1];
         if (p > N - dp) break;
         p += dp;
      }
   }
}

void Balc (uint p)
{  /* print balanced residues, initialize by printing N */
static uint d, w; int q;

   if (p < N) {
      if (p > d) q = (int) p - N;
      else q = p;
      printf("%*d ", w, q);
   } else {
      d = N >> 1;
      for (w = 1, q = d; q; q /=10, w++);
      printf("\n N = %u\n", N);
   }
}

void Cycle (uint r, uint q, uint sw)
{  /* H:= cycle generated by q, set sw = 1 to print */
register uint p = 1;
uint k = 0, Hr = pt[r];
   memset(&bv[Hr], 0, sx * sizeof(ulong));

   if (sw) printf(" {");
   do {
      p = (ulong) q * p % N;
      bv[Hr + (p>>LB)] |= bi[p&Pm1];	    /* store cycle */
      if (sw) { k++; Balc(p); }
   } while (p != 1);
   if (sw) printf("}  %u\n", k);
}

void Smult (uint i, uint j, uint q)
{  /* recursive set multiplication */
uint s, r, t, Gr = pt[j];

   for (s = 0; s < sx; s++)
      if (bv[Gr + s])                       /* unpack set elements */
	 for (r = s<<LB, t = 0; t < PB; t++)
	    if (bv[Gr + s] & bi[t])
	       if (j > 1)
		  Smult(i, j - 1, r + t);
	       else {                       /* orbit of q under G */
		  uint p = (ulong) (r + t) * q % N;
		  bv[pt[i] + (p>>LB)] |= bi[p&Pm1];
	       }
}

uint dprod (uint i, uint j)
{  /* direct product G:= G x H, return 1 if G is enlarged */
uint s, Gr = pt[i], Hr = pt[j], Kr = pt[j + 1];

   if (bv[Gr]==0) {
      Swap(&pt[i], &pt[j]); return 1;       /* 1st largest subgroup H */
   }

   bv[Hr] &= ~bi[1];                        /* clear {1} */
   for (s = 0; s < sx; s++)
   {
      if (bv[Gr + s] & bv[Hr + s])          /* non-trivial intersection */
         return 0;
      bv[Kr + s] = bv[Gr + s] | bv[Hr + s]; /* join 1G with 1H */
   }
   bv[Gr] &= ~bi[1];

   Smult(j + 1, j, 0);                      /* product G x H/1G, /1H */
   Swap(&pt[i], &pt[j + 1]);
return 1;
}

void Remove (uint i, uint j)
{  /* mask out subgroup H in G */
uint s, Gr = pt[i], Hr = pt[j];

   for (s = 0; s < sx; s++)
      bv[Gr + s] &= ~bv[Hr + s];
}

void Build (uint *v)
{  /* build generator, and cycle */
uint c, p, q = 1;

   for (c = 0; c < cx; c++)
   {
      p = modpwr(ge[c], v[c]);
      q = (ulong) p * q % N;
   }

   p = pt[G] + (q>>LB);                     /* assuming G holds U(N) */
   if (bv[p] & bi[q&Pm1]) {
      printf(" (");
      for (c = 0; c < cx; c++)
	 printf(" %u", v[c]);
      printf(" )");
      Cycle(H, q, 1);
      Remove(G, H);                         /* clear < q > candidates */
   }
}

void Combi (uint *v, uint c)
{  /* recursive index combinations */
uint i;

   for (i = 0; i < cy[c]; i++)
   {
      v[c] = i + 1;
      if (c + 1 < cx) {
	 Combi(v, c + 1);
      } else {
         Build(v);
      }
   }
}

uint popchk (uint r)
{  /* return population count (G) */
uint s, t, k = 0, Gr = pt[r];

   for (s = 0; s < sx; s++)
      if (bv[Gr + s]) {
	 register ulong W = bv[Gr + s];
	 for (; W; k++) W &= W - 1;
      }
return k;
}

void main (void)
{
uint c, s, r, t;
ulong W; uint Verb, fi, iv[7];              /* index vector */
const char Ni[8]; clock_t tim;

   for (t = 0; t < PB; t++)
      bi[t] = (ulong) 1 << t;               /* store bitmasks */

   for (tim = clock();;)
   {
      printf("\n N? ");
      do scanf("%8s", Ni);
      while (strpbrk(Ni, "'"));
      W = atol(Ni); if (W > 65535) continue;
      N = W; if (N < 2) break;
      Verb = strpbrk(Ni, "c") != 0;         /* suffix 'c' to print cycles */

      Balc(N); cx = invars(N); fi = 1;      /* factorize N, find invariants */
      if (cx==0) {
	 printf(" phi = 1\n"); continue; }
      for (c = 0; c < cx; c++) fi *= cy[c];
      printf(" phi = %u\n", fi);            /* order(U(N)) */

      sx = ((N - 1) >> LB) + 1;
      bv = malloc(4 * sx * sizeof(ulong));
      for (r = 0; r < 4; r++)               /* initialize row pointers */
         pt[r] = r * sx;

      bv[pt[G]] = 0; Sieve(U, N);           /* prepare G and U(N) */
      fx = triald(cy[0], 0);                /* factorize group exponent */

      for (c = 0; c < cx; c++)              /* find generators */
      {
         for (s = 0; s < sx; s++)
            if (bv[s])                      /* unpack U(N) elements */
               for (r = s<<LB, t = 0; t < PB; t++)
                  if (bv[s] & bi[t]) {
                     uint q = r + t;
                     if (order(q)==cy[c]) { /* invariant order */
		        Cycle(H, q, 0);

		        if (dprod(G, H)) {  /* valid factor H */
			   Remove(U, G);
                           ge[c] = q; goto jump;
                        }
		     }
		  }
      jump:;
      }

      if (popchk(G)==fi) {                  /* check if G holds the full  */
         printf("<generator> order\n");     /* unit group of order phi(N) */
	 for (c = 0; c < cx; c++)           /* minimal generating set */
	    printf(" < %u > %u\n", ge[c], cy[c]);

         if (Verb) {
	    printf("(indices) {cycle}  order\n");
	    Combi(iv, 0);                   /* covering cycles */
	    if (popchk(G))
	       printf(" fail: units left\n");
	 }
      } else printf(" fail: order < phi\n");

      free(bv);
   }
   printf("\nTimer: %f s\n", (double) (clock() - tim) / CLOCKS_PER_SEC);
}
/* */