1 Generate m numbers from a sequence 1:n

  • Assume that the generation is with replacement.
  • Assume each generation is indenpendent from others.
  • Question: How many unique elements in the random numbers.

  • The probability of the event that an element is generated in one time
    • The probability is \(1/n\).
  • The probability of the event that an element is not generated in one time
    • The probability is \(1-1/n\).
  • The probability of the event that an element is not generated in m times
    • The probability is \((1-1/n)^m\).
    • The limit is \(e^{-1}\) as n goes infinity if m=n
    • The limit is \(e^{-2}\) as n goes infinity if m=2n
  • The probability of the event that an element is generated at least one time among m
    • The probability is \(1-(1-1/n)^m\).
    • The limit is \(1-e^{-1}\sim0.6321\) as n goes infinity if m=n
    • The limit is \(1-e^{-2}\sim0.8708\) as n goes infinity if m=2n
  • The number of elements in which each is generated at least one time among m.
    • It is \(m(1-(1-1/n)^m)\)
    • It approximates \(n(1-e^{-1})\) if m=n
  • R codes
    • Approximately 100(1 - 1/e) = 63.21
length(unique(sample(100, 100, replace = TRUE)))
  • C codes
#include <stdio.h>
#include<stdlib.h>
#define N 1000

void QSort_int(int *s, int l, int r){
    int i, j,x,step=0;
    if (l < r){
        i = l; j = r; x = s[i];
        while (i < j){
            while(i < j && s[j] > x) j--; 
            if(i < j) s[i++] = s[j];
            while(i < j && s[i] < x) i++; 
            if(i < j) s[j--] = s[i];
        }
        s[i] = x;
        QSort_int(s, l, i-1); 
        QSort_int(s, i+1, r);
    }
}

int unique_int(int *pb, int *a, int n){
    int i,count=0;
    QSort_int(a,0,n-1);
    *pb=*a++;
    for(i=1;i<n;i++,a++)
        if(*a!=*pb){
            *++pb=*a;
            count++;
        }
    return count;
}

int main(){
    int i;
    int a[N],b[N],count;
    srand(1);
    for (i = 0; i < N; i++) a[i] = rand() % N;
    count = unique_int(b,a,N);
    printf("length = %d\t probability = %f\n",count,1.0*count/N);
    //for (i = 0; i < count; i++) printf("%d\t",b[i]);printf("\n");

    getchar();
    return 0;
}