There is a "killer" optimization possible yet.
If you make the program to print what prime candidate is sieving, you will find it spends a lot of time in sieving the small primes as 3,5,7 because of the high number of bits to set.
As the bit pattern is periodical for a few primes sieved, you can sieve for those small integers in a multiple of the period that fills compeletely a number of integers, then memcopy that small part to the rest of the array.
Another optimization I have found is to separate the count from the sieve.
This way you have to sieve only up to sqr(n). The primes above it do not have multiples in the table (remember we start sieving at p*p), so no check is needed, the separate count finds them.
The count can be very fast if you use a bit counting trick as the one I posted at The Code Post.
EDITED:
I have a source ...and it's fast
Code:
'n-sieve-bits in FreeBASIC by Antoni Gual
#include "crt.bi"
option explicit
option escape
dim shared p as uinteger ptr, m as integer
#define subset 3*5*7*11
#define precount 5
#define lastpre 11
#define firstsieve 13
'
'-----------------------------------------------
sub mymemcopy
dim as uinteger ptr p1,p2
dim as integer i,j
p1=p+subset
for i=subset to m\64+1-subset step subset
p2=p1-subset
for j=1 to subset
*p1=*p2
p1+=1
p2+=1
next
next
p2=p1-subset
while p1<m\64+1
*p1=*p2
p1+=1
p2+=1
wend
end sub
'
'----------------------------------------------------
sub presieve
'sieve a small part of the sieve for 3,5,7,11, marking
'p*1 as composites
dim as integer i,nn,cs
i=3
for nn=i to subset*64 step 2*i
p[nn\64] or= 1u shl((nn and 63)\2)
next
i=5
cs=2
while i<=lastpre
for nn=i to (subset*64) step 2*i
p[nn\64] or= 1u shl((nn and 63)\2)
next
i=i+cs
cs=6-cs
wend
end sub
'
'---------------------------------------------------
sub dosieve
'does not sieve even nrs or multiples of 3
'starts at 11, previous sieving has been made by preload and mymemcopy
'starts checking multiples at p*p
'stops trying at sqrt(maxprime), as p*p multiples of bigger
' candidates would fall past the end of the sieve.
dim as integer sm,i,nn,cs,ccs,ccs1
sm=int(sqr(m))
i=firstsieve
cs=iif((i mod 3)=2,2,4)
'sieve
do
if (p[i\64] and (1u shl((i and 63)\2)))=0u then
nn=i*i
ccs=I* iif ((I mod 3)=2,2,4)
ccs1=6*I
do
p[nn\64] or= 1u shl((nn and 63)\2)
nn+=ccs
ccs=ccs1-ccs
loop until nn>m
end if
i+=cs
cs=6-cs
loop until i>sm
end sub
'
'------------------------------------------------------
sub docount
'count unset bytes
'
dim as integer cnt,cnt1,i,i1,i2,n=1000
dim as unsigned integer u,uc
cnt=precount
i=0
do
'count unset bytes by 4 bytes groups
do
u=p[i]
'
'bit count snippet (See it at the Code post)
uc= u-((u shr 1) and &o33333333333)-((u shr 2) and &o11111111111)
cnt+=32-( ((uc+(uc shr 3)) and &o30707070707) mod 63 )
i+=1
loop until cnt>=n
'if count too high, count down single unset bytes in the last
'sieve 4 bytes group counted
if cnt>n then
cnt1=cnt
i2=32
do
i2-=1
if (u and (1u shl i2))=0 then cnt1-=1
loop until cnt1=n
printf("the %7dth prime is %9d\n",cnt1,i*64-(31-i2)*2-1)
else
printf("the %7dth prime is %9d\n", cnt, i*64+1)
endif
n*=10
loop until n>1000000
end sub
'
'-------------------------------------------------------
dim as integer m1
m=16e6
m1=(m\64+1) *sizeof(uinteger)
p =callocate(m1)
presieve
mymemcopy
dosieve
docount
deallocate p