One can do this easily in O(arr.size) both time and extra space using a Bloom filter. However, there is a non-zero probability of mistaking numbers that are not seen yet as having been seen already—false positives. To be approximate, (1 - exp[-kn/m])^k, where k is the number of hash functions (I used 3), n is the number of unique numbers already in the bit-vector, strictly bounded by arr size, and m is the size of the bit-vector (I used 64). Which is a maximum of 4%, in this case.
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <assert.h>
#include <inttypes.h>
#include <stdbool.h>
/* https://stackoverflow.com/a/12996028/2472827 */
static uint32_t mueller(uint32_t x) {
x = ((x >> 16) ^ x) * 0x45d9f3b;
x = ((x >> 16) ^ x) * 0x45d9f3b;
x = (x >> 16) ^ x;
return x;
}
/* https://gist.github.com/badboy/6267743 */
static uint32_t jenkins(uint32_t a) {
a = (a+0x7ed55d16) + (a<<12);
a = (a^0xc761c23c) ^ (a>>19);
a = (a+0x165667b1) + (a<<5);
a = (a+0xd3a2646c) ^ (a<<9);
a = (a+0xfd7046c5) + (a<<3);
a = (a^0xb55a4f09) ^ (a>>16);
return a;
}
/* https://gist.github.com/zeux/25b490b07b4873efc08bd37c843777a4 */
static uint32_t murmur3(uint32_t h) {
h ^= h >> 16;
h *= 0x85ebca6bu;
h ^= h >> 13;
h *= 0xc2b2ae35u;
h ^= h >> 16;
return h;
}
static bool maybe_duplicate_add(const int n) {
static uint64_t bloom;
bool maybe = true;
uint64_t mask;
uint32_t u = (uint32_t)n; /* Usually 32-bits? */
mask = 1ul << (mueller(u) & 63);
if(!(bloom & mask)) maybe = false, bloom |= mask;
mask = 1ul << (jenkins(u) & 63);
if(!(bloom & mask)) maybe = false, bloom |= mask;
mask = 1ul << (murmur3(u) & 63);
if(!(bloom & mask)) maybe = false, bloom |= mask;
return maybe;
}
int main(void) {
int numbers[] = { 6, 2, 3, 6, 9, 2, 7, 8, 1 };
size_t numbers_size = sizeof numbers / sizeof *numbers;
struct { size_t size; int a[sizeof numbers / sizeof *numbers]; } temp;
temp.size = 0;
size_t behind = 0;
for(size_t i = 0; i < numbers_size; i++) {
int n = numbers[i];
if(maybe_duplicate_add(n)) temp.a[temp.size++] = n;
else numbers[behind++] = n;
}
assert(behind + temp.size == numbers_size);
memcpy(&numbers[behind], temp.a, temp.size * sizeof *numbers);
printf("Number of duplicates: %zu\n"
"{ ", temp.size);
for(size_t i = 0; i < numbers_size; i++)
printf("%s%d", i ? ", " : "", numbers[i]);
printf(" }\n");
return 0;
}
On the other hand, it's probably not the direction you want to go in your assignment because it doesn't really fit the "O(n log n)" hint you were given. Although, they never said what the extra array holds; are you allowed to sort pointers to the array?