;  File: PI386.ASM     System: PI Calc       Version: 1.00  Date: 10-01-94  ;

;-----------------------------------------------------------------------------
; PI386.COM computes and displays number pi using spigot algorithm (due to S.
; Rabinowitz).  About 4932*n digits are output, where n=1-1000 is specified
; command-line input.  80386 CPU, 128K RAM, and 66*nK disk space are required.
; Both time and total I/O increase as n*(n+1)/2.  Beep flags error, e.g., I/O
; error or pre-386 CPU.  The 1000-limit is artificial, but it would take a few
; months to compute five million digits anyway.  One million digits (n=203)
; takes about 4.5 days on 486/66MHz machine.  Overhead due to file I/O is
; typically one to two percent of total time.
;
; Syntax: PI386 [n | RESUME | DISPLAY] where n=1-1000
;
;   E.g., PI386 4            Computes 19,720+ decimal digits of pi
;         PI386 R            Resumes excution after previous keypress halt
;         PI386 D > PI.TXT   Displays final/interim output as ASCII digits
;
; Program state is maintained in fixed-length PI386.WRK file.  The file
; contains the spigot array of 16*nK dwords (64*nK bytes) plus header info.
; The array is processed in 64K-byte blocks.  Block read/writes take place
; whenever displayed WRK counter changes (every 19 seconds on 486/66MHz).
; A total of n*(n+1)/2 of these blocks are processed during run.  That is, n
; blocks are processed on first pass through array, n-1 blocks on second pass,
; etc., and one block on last pass.  The passes become shorter, since trailing
; insignificant array blocks are ignored when no longer needed.
;
; Output is appended to PI386.OUT file at the end of each pass, when the OUT
; counter changes.  Output chunks are 549 dwords, with each dword representing
; 9 digits.  The last chunk is shorter.  Any normal keypress halts execution,
; but response is delayed until WRK counter changes, in synch with updates to
; WRK file.  The DISPLAY option may be used to expand the interim or final
; OUT file to console (or redirected to file) as ASCII digits.
;
; Whenever an n value (new or same) is specified, existing OUT/WRK files are
; overwritten.  Use the RESUME option, without n specified, to continue a run
; with existing files.  If there is a power loss during a run or a Ctrl-C
; break (instead of delayed break by other keypress), the files will probably
; be ok.  Header info is saved to the work file frequently (with each 64K
; block write) so that you can try restart from a power loss with little lost
; computation time.  However, if snapshot to disk of computaton state was
; incomplete, try will be unsuccessful.  File copies should be saved before
; restarts for this reason.  The WRK file may be deleted manually when
; calculation is done--when all display counters are zeroed.  A restart when
; done causes no harm.  You may retain the compact OUT file results or the
; equivalent ASCII text from the DISPLAY option.
;
; This program is designed for one machine.  The algorithm, however, is ideal
; for pipelining with multiple machines that share Q buffer I/O.  This would
; allow using the same basic method to compute pi to higher precision than is
; feasable here in reasonable real time on DOS machines.
;
; There are faster algorithms for computing pi, but this may be the simplest,
; since messy fast Fourier transforms are not needed.  See PICALC.COM for
; a tiny version of the spigot algorithm (85 bytes), limited to 9860 digits.
; The limit there is due to use of word, rather than dword, array elements.
;
; PI386.COM also differs since the array is handled in 64K blocks (requiring
; file I/O), interim q values are buffered (reducing the file I/O), and the
; effective length of the array is scaled down linearly during a run (roughly
; halving execution time).  Output display is a separate command-line option,
; since buffering of q values prevents immediate output anyway and since a
; specific format was desired.  The simpler PICALC.COM displays output
; immediately as an unformatted stream of digits.  PI386.COM is 1K in size,
; with 720 bytes of code and the rest trailing data/text.
;
; Output was verified to 1.25 million digits against the Gutenberg Project
; PIMIL10.TXT (same size) downloaded from the Internet.  The display format
; here matches that file.  For reference, pi is:
;
; 3.
; 1415926535 8979323846 2643383279 5028841971 6939937510
; 5820974944 5923078164 0628620899 8628034825 3421170679 <-- 100th digit
; ...
; 5982534904 2875546873 1159562863 8823537875 9375195778
; 1857780532 1712268066 1300192787 6611195909 2164201989 <-- 1000th digit
; ...
; 2645600162 3742880210 9276457931 0657922955 2498872758
; 4610126483 6999892256 9596881592 0560010165 5256375678 <-- 10000th digit
; ...
; 8575016363 4113146275 3049901913 5646823804 3299706957
; 7015078933 7728658035 7127909137 6742080565 5493624646 <-- 100000th digit
; ...
; 0315614033 3212728491 9441843715 0696552087 5424505989
; 5678796130 3311646283 9963464604 2209010610 5779458151 <-- 1000000th digit
; ...
;
; C. Hessel/ER Support/DOE/Germantown MD
;-----------------------------------------------------------------------------
Code_Seg  SEGMENT USE16            ; Want DOS-based 16-bit segments
          ASSUME cs:Code_Seg,ds:Code_Seg,es:Code_Seg

          .486                     ; Will only use 386 instructions, but
                                   ;   want 486 timings in listing
MUV       EQU   <xchg>             ; Saves byte on some AX moves
BLKPWR    =     14                 ; Determines high buffer size--14 maximum
BLKELTS   =     1 SHL BLKPWR       ; Dword elements per buffer--16K maximum
LOGTWO    =     4D104D42h          ; 65536*65536*log(2) base 10, truncated
NMAX      =     1000               ; Cap on input n
DGTS      =     9                  ; Digits per dword--9 maximum
RADIX     =     1000000000         ; Ten raised to power DGTS
FACTOR    =     LOGTWO/DGTS        ; Multiplier used to get output dword count
TEMP      =     32-BLKPWR          ; Construct QELTS value that forces IterCnt
TEMP      =     FACTOR SHR TEMP    ;   equal to or just under MaxBlk (n)
QELTS     =     TEMP+1             ; Quadwords in Q buffer, dwords in P buffer
PARMSIZE  =     16                 ; Parm space at header start--see EOF
HDRSIZE   =     PARMSIZE+8*QELTS   ; Work file header size--includes Q buffer
TEMP      =     HDRSIZE MOD 16     ; Keep work file blocks that follow header
TEMP      =     (16-TEMP) MOD 16   ;   16-aligned for debugging convenience,
HDRSIZE   =     HDRSIZE+TEMP       ;   but need at least 4-aligned
OUTSIZE   =     4000h              ; Read buffer size for DISPLAY option--
                                   ;   insure 256-multiple
;-----------------------------------------------------------------------------
; See EOF for data.  First order of business is to check command-line and
; handle four possibilities: D, R, no parm, or digits.  Other input is
; interpreted by digit handler as zero value for n, which causes abort.
;-----------------------------------------------------------------------------
          ORG   100h               ; COM file start
Begin:    cld                      ; Default direction (could assume from DOS)
          mov   si,5Dh             ; PSP first command arg--uppercase
          mov   al,[si]            ; Fetch first byte, leaving SI as is
          cmp   al,"D"             ;   for MiscInit
          je    Display            ; Display of output file wanted?  Ahead

          cmp   al,"R"
          je    Resume             ; Resume wanted?  Ahead

          cmp   al," "
          jne   Start              ; Not space?  Expect command-line n (digits)

DispSyn:  mov   dx,OFFSET Syntax   ; Else show syntax info and exit via PSP
Disp:     mov   ah,9               ; DOS display $-terminated string
toint:    int   21h
          ret

          EVEN                     ; Force even offset for BP flag in Dump
OutChr:   MUV   dx,ax              ; Output character AL--called by Dump/Abort
          mov   ah,2               ; DOS display character DL
          jmp   toint

OutCrLf3: call  OutCrLf            ; Output 3 CrLfs--called only by Dump
OutCrLf2: call  OutCrLf            ; Output 2 CrLfs
OutCrLf:  mov   dx,OFFSET CrLf     ; Output CrLf, positioned here for
          jmp   Disp               ;   short jump

Prep:     call  Check              ; RAM/386 checks, returning AX/CX/EBX/BP
          sub   ax,1000h           ; Back up 64K, still above stack
          mov   es,ax              ; Save as buffer segment for 64K spigot
          call  DispSyn            ; Display syntax, setting DX to offset
          mov   dl,LOW (OFFSET MsgHdr-OFFSET Begin)
          jmp   Disp               ; Need MsgHdr and Syntax in same page

Display:  call  Check              ; RAM/386 checks, returning AX/CX/EBX/BP
          call  OpenOne            ; Open output file, setting handle BX
          jmp   Dump               ; Dump digits to console, with BX handle
                                   ;   and CX/high EBX zero and BP odd
Resume:   call  Prep               ; See above
          call  OpenTwo            ; Open work and output files read/write
          mov   ax,4202h           ; DOS seek from EOF
          cwd                      ; Zero DX (CX zero from Prep)
          call  Int21h             ; Seek to EOF (handle BX from OpenTwo)
          mov   bp,3F00h           ; DOS read on reentry
          jmp   Reenter            ; Near bottom of main loop below--this is
                                   ;   leap of faith in integrity of files
Start:    call  Prep               ; See above
          call  MiscInit           ; Miscellaneous, CX/EBX/SI input, SI output
          mov   bp,4000h           ; DOS write for next and ApndBlk
          call  SvLdHdr            ; Write work file header, setting handle BX
initloop: call  ApndBlk            ; Append SI initial blocks of dword 2's to
          dec   si                 ;   work file--last dword in last block
          jne   initloop           ;   could be set to 4 (see MiscInit)
;-----------------------------------------------------------------------------
; Main loop, ending with scaled reduction in MaxBlk to cut run time roughly in
; half.  QELTS was set carefully so that reduction is just decrement.  Initial
; MaxBlk and IterCnt are same for n under 554.  For larger n (or for smaller
; BLKPWR or if DGTS/RADIX reduced), IterCnt is less than MaxBlk.  In such a
; case, last iteration operates on more than one array block.
;-----------------------------------------------------------------------------
iterloop: mov   ax,MaxBlk          ; Start from (reduced) MaxBlk
blkloop:  mov   CurBlk,ax          ; Save current block number
          push  ax                 ; To stack for later
          dec   ax                 ; Make block number zero-based for next
          mov   bp,3F00h           ; DOS read
          call  SvLdBlk            ; Load CurBlk from disk
          mov   di,OFFSET QBuf     ; Reset pointers into Q buffer
          mov   si,OFFSET PBuf     ;   and P buffer
          push  si                 ; Save as possible write offset after loop
          mov   ax,IterCnt
          dec   ax                 ; Test if in last outer iteration
          mov   ax,QELTS           ; In case not, set usual Q buffer count
          jne   qloop              ; Not last?  Ahead

          mov   ax,QLast           ; Else reduce Q (and P) buffer count
qloop:    push  ax                 ; Save Q buffer count to stack
          dec   ax                 ; Make zero-based for display
          test  al,0Fh             ; Display every 16 qloop iterations
          jne   skipdisp           ; Not 16-multiple?  Skip

          call  PrepStat           ; Prepare message--AX input/DX output
          call  Disp               ; Display status--DX input
skipdisp: mov   ebx,[di]           ; Get next ECX:EBX from Q buffer without
          mov   ecx,[di+4]         ;   advancing pointer
          call  BufPass            ; Returns ECX:EBX and EDX
          mov   [di],ebx           ; Save ECX:EBX to Q buffer (was zeroed for
          mov   [di+4],ecx         ;   next outer loop if this is digit pass)
          add   di,8
          mov   [si],edx           ; Save possible digits EDX to P Buffer
          add   si,4               ;   (later ignored if not digit pass)
          pop   ax                 ; Restore Q buffer count
          dec   ax                 ;   and decrement
          jne   qloop              ; Not done?  Loop

          pop   dx                 ; Restore P buffer offset (used if digits)
          pop   di                 ; Restore block number
          mov   bp,4000h           ; DOS write for File21h/SvLdBlk/SvLdHdr
          dec   di                 ; Decrement, testing if lowest block
          jne   nodigits           ; No?  Then no digits this pass else save
                                   ;   P buffer data from offset DX
          mov   cx,si              ; Offset after last EDX store above
          sub   cx,dx              ; Bytes of P buffer data (usually 4*QELTS)
OutHandl  =     $+1                ; EBX is zero from BufPass, so only a low
          mov   bl,0               ;   byte load is needed to fetch handle
          call  File21h            ; Append P buffer data to output file
nodigits: MUV   ax,di              ; Decremented block number to AX
          call  SvLdBlk            ; Save block back to work file
Reenter:  call  SvLdHdr            ; Save parms and Q buffer to work file
                                   ;   (or load on reentry with 3Fh)
          mov   ah,1               ; BIOS check for keypress made in synch
          int   16h                ;   with file updates, so response delayed
          jne   done               ; Yes?  Early exit

          mov   ax,CurBlk          ; Current block number--because of loop
          dec   ax                 ;   reentry, this cannot be on stack
          jne   blkloop            ; Not lowest block?  Loop

          dec   MaxBlk             ; Reduce maximum work file block referenced
          dec   IterCnt            ; Parallel reduction in iteration count
          jne   iterloop           ; Not last iteration?  Loop

done:     ret                      ; Exit via int 20h in PSP, closing files
;-----------------------------------------------------------------------------
; Innermost buffer pass, using only registers in loop, except for load/store.
; Input is high buffer contents and q value ECX:EBX.  Buffer contents and q
; value are updated on output, and, if this is digit pass (CurBlk=1), output
; dword is returned in EDX.  On digit pass, output q value ECX:EBX is zeroed
; for start of next outer iteration.
;
; Unrolling loop to reduce jumps would improve performance slightly.  Over 98%
; of program time is spent here on typical machine, with file I/O and status
; display accounting for nearly all the rest.
;-----------------------------------------------------------------------------
BufPass:  push  di
          push  si
          mov   esi,RADIX          ; Fixed radix for multiply/divide
          mov   di,4*BLKELTS-4     ; Point to top dword of buffer
          mov   ebp,CurBlkD        ; Fetch CurBlk (high word zero)
          shl   ebp,BLKPWR+1       ; Multiply by 2*BLKELTS to get 2*k+2
          push  es
          pop   ds                 ; Synch DS with ES to eliminate overrides
          EVEN
bufloop:  dec   ebp                ; 2*k+1
          mov   eax,[di]           ; Fetch next a[k]
          mul   esi                ; a[k]*radix
          add   eax,ebx            ; a[k]*radix + q
          adc   edx,ecx
          div   ebp                ; q <- a[k]/(2*k+1)
          mov   [di],edx           ; Store remainder to a[k]
          shr   ebp,1              ; k
          mul   ebp                ; k*q
          add   ebp,ebp            ; 2*k
          mov   ebx,eax            ; q <- k*q
          mov   ecx,edx
          sub   di,4               ; Point to next a[k]
          jne   bufloop            ; Not last in block?  Loop

          dec   ebp                ; 2*k+1
          mov   eax,[di]           ; Fetch last a[k]
          mul   esi                ; a[k]*radix
          add   eax,ebx            ; a[k]*radix + q
          adc   edx,ecx
          dec   ebp                ; 2*k
          je    gotdgts            ; k = 0?  Then can spit out 9 digits,
                                   ;   else handle as if still in loop
          inc   ebp                ; 2*k+1
          div   ebp                ; q <- a[k]/(2*k+1)
          mov   ebx,edx            ; a[k] <- remainder
          shr   ebp,1              ; k
          mul   ebp                ; k*q
          xchg  ebx,eax            ; q <- k*q
          mov   ecx,edx
stoback:  stosd                    ; Store a[k] (or a[0])
          pop   si
          pop   di
setds:    push  cs                 ; Restore DS
          pop   ds
          ret                      ; Return ECX:EBX (and EDX if digits)

gotdgts:  div   esi                ; Digits <- a[0]/radix
          xor   ebx,ebx            ; Zero q for next blkloop (ECX zero)
          xchg  eax,edx            ; Ready remainder a[0] for store
          jmp   stoback            ; Store a[0], return ECX:EBX and EDX
;-----------------------------------------------------------------------------
; Save/load/append current block to/from work file.  Input BP high is 3Fh/40h
; to flag read/write and AX is zero-based block number.  For append entry
; point, BX must be work file handle (AX irrelevant--no seek done).
;-----------------------------------------------------------------------------
SvLdBlk:
IF BLKELTS EQ 4000h                ; Maximum value allowed
          mov   dx,HDRSIZE         ; Low seek offset, AX is high offset
ELSE                               ; Small test values
          mov   cx,4*BLKELTS
          mul   cx
          add   ax,HDRSIZE         ; Adjust for header
          xchg  dx,ax              ; Low seek offset to DX, high to AX
          adc   ax,0               ; Carry can be set if small BLKELTS/DGTS
ENDIF
          call  WrkSeek            ; Seek from BOF--also sets handle BX
ApndBlk:  push  es                 ; Need high BP 40h and BX handle for append
          pop   ds                 ; Set DS to high buffer segment
          xor   dx,dx              ; Start offset of high buffer
IF BLKELTS EQ 4000h                ; Maximum value allowed
          mov   cx,2*BLKELTS
          call  File21h            ; Half block
          MUV   dx,ax              ; Bytes read or written to DX--next offset
ELSE                               ; Small test values
          mov   cx,4*BLKELTS
ENDIF
          call  File21h            ; Half block (small full block if testing)
          jmp   setds              ; Restore DS and exit
;-----------------------------------------------------------------------------
; Display OUT file.  Format matches Gutenberg Project pi file, with output in
; 1000-digit blocks, 20 lines each, and with 3 blank lines separating blocks
; (so that Vern Buerg's LIST utility can page through output without drift).
; Input is handle BX, zero CX/high EBX and BP odd (OpenFile offset).
;
; Note that output dword may on rare occasions exactly equal Radix.  This
; should be handled by expanding the dword to DGTS zeroes and bumping previous
; dword by one (carry).  Instead, a colon (ASCII 58) followed by DGTS-1 zeroes
; is output.  The colon should be manually replaced with 0 and a carry of 1
; added to the digits left of the colon.
;
; This event is easily noticeable for DGTS < 4, but becomes statistically
; rarer as DGTS increases.  For maximum DGTS = 9, the statistical probablilty
; of such an occurrence in first 5 million digits is under 0.05% and it was
; not, in fact, observed to occur in first 1.25 million digits.  The low
; likelihood is "justification" for sloppy handling here (which adds one-byte
; push ax instruction to routine below).  See SPIGOT.PRG for small radix
; demo of these overflow events.
;-----------------------------------------------------------------------------
dumploop: push  bx                 ; Save so no OutHandl reference needed
          mov   di,OFFSET DgtBuf   ; Expand dwords to digits
          push  di                 ; Save for digit source later
          mov   si,dx              ; OutBuf from file read
          mov   bl,10              ; Fixed divisor--BH zero from handle and
                                   ;   high EBX zeroed earlier
          MUV   cx,ax              ; Number of source dwords
          shr   bp,1               ; BP odd before Dump call
          mov   bp,OFFSET OutChr   ; Function pointer (even) for duration
          jnc   exploop            ; Not first pass?  Ahead

          dec   cx                 ; Should still be non-zero
          lodsd                    ; First dword should be binary 3
          add   al,"0"
          call  bp                 ; Output expected 3 then dot
          mov   al,"."
          call  bp
          call  OutCrLf2           ; Pair of CrLfs
exploop:  push  cx
          lodsd                    ; Load dword from buffer then extract 9
          mov   cx,DGTS-1          ;   decimal digits--last one specially
IF DGTS GT 1
pushlp:   cdq                      ; Zeroes EDX since EAX less equal Radix,
          div   ebx                ;   but divide overflow possible if bad file
          push  dx                 ; Save remainder, since must reverse order
          loop  pushlp
ENDIF
          push  ax                 ; Final quotient--can be 10 in rare cases
          mov   cl,DGTS            ; CH zero
poplp:    pop   ax                 ; Pop digits in reverse order
          add   al,"0"             ; Make ASCII digit (or rare colon)
          stosb                    ; Store to digit buffer
          loop  poplp

          pop   cx                 ; Dwords left
          loop  exploop            ; Not done expanding?  Loop

          pop   si                 ; DgtBuf start for output pass
          pop   bx                 ; Restore handle
          mov   cx,di
          sub   cx,si              ; Total digits to handle this pass
chrloop:  lodsb                    ; Fetch next digit--CX ok through loop
          call  bp                 ; Display
          dec   Cnt10
          jne   nextchr            ; Not end of 10-digit block?  Ahead

          mov   Cnt10,10           ; Reset 10-digit counter
          dec   Cnt5
          je    chkblk             ; Last digit in line?  Ahead--no space

          mov   al," "
          call  bp                 ; Space to separate 10-digit blocks
          jmp   nextchr            ; To next digit

chkblk:   mov   Cnt5,5             ; Reset 5-block counter
          call  OutCrLf            ; Output end-of-line CrLf
          dec   Cnt20
          jne   nextchr            ; Not last line in 20-line group?  Ahead

          mov   Cnt20,20           ; Reset 20-line counter
          call  OutCrLf3           ; Output 3 more CrLfs
nextchr:  loop  chrloop            ; Not end of buffer?  Loop

Dump:     mov   dx,OFFSET OutBuf   ; Handle EBX, zero CX, and odd BP input
          mov   ch,OUTSIZE/256     ; Full buffer (fewer if near EOF)
          mov   ah,3Fh             ; DOS read--use Int21h to allow short read
          call  Int21h             ; Returns bytes-read AX, if no abort
          shr   ax,2               ; Bytes-read to dword count
          jne   dumploop           ; Some dwords?  Loop, else fall harmlessly
                                   ;   to return and PSP exit...
;-----------------------------------------------------------------------------
; Store three nested loop counters to message buffer.  Each is decremented to
; make zero-based.  AX input (decremented counter).  DX message offset output.
;
; On fall through from Dump, uninitialized CurBlk/IterCnt have clear high bits
; from ASCII syntax text, so cwd instruction below is ok.
;-----------------------------------------------------------------------------
PrepStat: mov   bx,OFFSET Message+11
          call  StoDgts+1          ; Store Q counter (already decremented)
          mov   ax,CurBlk
          call  StoDgts            ; Decrement and store blkloop counter
          mov   ax,IterCnt
StoDgts:  dec   ax                 ; Make display counter zero-based
          mov   bp,10              ; Divisor
          mov   cx,3
sdlp:     cwd                      ; Zeroes DX, since counters small
          div   bp
          xchg  ax,dx              ; Remainder to AL, save quotient
          add   al,"0"
          mov   [bx],al            ; Store ASCII digit
          dec   bx                 ; Move leftward
          MUV   ax,dx              ; Quotient back to AX
          loop  sdlp

          mov   dx,bx              ; On last call, sets DX to message start
          dec   bx                 ; Skip colon on first two calls
toret:    ret
;-----------------------------------------------------------------------------
; Save/load parm bytes and Q buffer to/from workfile.  BP high is 3Fh/40h.
;-----------------------------------------------------------------------------
SvLdHdr:  xor   ax,ax
          cwd                      ; Zero DX
          call  WrkSeek            ; Seek from BOF--also sets BX
          mov   dx,OFFSET HdrBuf
          mov   cx,HDRSIZE         ; Fall...
;-----------------------------------------------------------------------------
; Int 21h with error check for read/write calls.  BP high byte is 3Fh/40h.
;-----------------------------------------------------------------------------
          EVEN                     ; Insure OpenFile offset odd for Dump
File21h:  mov   ax,bp              ; Set AH, preserving BP
          call  Int21h             ; Read/write, aborting on set carry
          cmp   ax,cx              ; Also test for short read/write
          jmp   jcabort            ; Short?  Abort else return
;-----------------------------------------------------------------------------
; Open file, name at DX, returning handle AX.
;-----------------------------------------------------------------------------
OpenFile: mov   ax,3D02h           ; DOS read/write open
          jmp   Int21h
;-----------------------------------------------------------------------------
; Create file, name at DX, returning handle AX.  Need input CX zero.
;-----------------------------------------------------------------------------
MakeFile: mov   ah,3Ch             ; DOS create, assuming CX zero
Int21h:   int   21h
jcabort:  jnc   toret              ; Ok?  Out, else fall...
;-----------------------------------------------------------------------------
; Error exit.  Just beep and quit.
;-----------------------------------------------------------------------------
Abort:    mov   al,7               ; Bell character
          call  OutChr             ; Output character to console
          int   20h                ; Closes files too
;-----------------------------------------------------------------------------
; Seek AX:DX bytes from BOF in work file.  BX is handle on exit.
;-----------------------------------------------------------------------------
WrkHandl  =     $+1                ; Handle is stored in instruction
WrkSeek:  mov   bx,0               ; Handle to BX
          MUV   cx,ax              ; High seek offset to CX, DX ok
          mov   ax,4200h           ; DOS seek from BOF
          jmp   Int21h             ; Int 21h with set carry abort
;-----------------------------------------------------------------------------
; Check 386 and RAM, setting AX to segment 128K forward, possibly aborting.
; Routine also zeroes EBX/CX and sets pointer BP for Display/Resume for use
; later.  The 386 check follows Intel-recommended method (see CPUID.ZIP) and,
; as side effect, virtually insures DOS 2.0+ for file I/O.  I doubt if DOS
; 1.x is running on any 386 machines.
;-----------------------------------------------------------------------------
Check:    mov   dx,0F000h          ; Constant for use a few times below
          pushf                    ; Try to clear bits 12-15 in CPU flags,
          pop   bx                 ;   which are always set on 8086/8088
          mov   ax,0FFFh
          and   ax,bx              ; Clear bits 12-15
          push  ax
          popf                     ; Back to flags
          pushf
          pop   ax                 ; Back to AX
          and   ax,dx              ; If bits 12-15 are set, then
          cmp   ax,dx              ;   CPU is an 8086/8088
          je    Abort              ; Yes?  Abort

          or    bx,dx              ; Try to set bits 12-15 in CPU flags,
          push  bx                 ;   which are always clear on 80286
          popf
          pushf
          pop   ax                 ; If bits 12-15 are cleared, then
          and   ax,dx              ;   CPU is an 80286
          je    Abort              ; Yes?  Abort

          mov   bp,OFFSET OpenFile ; Function pointer for Display/Resume
          xor   cx,cx              ; Used later at various points
          xor   ebx,ebx            ; Used later--386 instructions ok now
          mov   ax,cs              ; Point to segment 128K forward past 64K
          add   ax,2000h           ;   spigot buffer and compare to segment
          cmp   [bx+2],ax          ;   after COM allocation (PSP)
          jmp   jcabort            ; No room?  Abort, else return
;-----------------------------------------------------------------------------
; From CX/EBX zero and SI pointing to left digit, get command-line n.  Abort
; if not 1-1000.  Limit is artificially set for 3-digit display (but higher
; values ok).  From n, set parameters MaxBlk/IterCnt/Qlast and zero CurBlkD.
; Zero Q buffer, which holds quadwords for intermediate q values.  Store dword
; 2's to entire 64K high segment.  Finally, create files.  SI holds n on exit.
;
; Total dwords output is set to n*16384*log(2)/9 + 1, truncated to an integer.
; Total decimal digit dwords output (excluding ordinal 3 dword) is one less,
; or roughly 4932*n decimal digits.  This is closest to accuracy limit of
; about n*16384*log(2) + 0.5*log(n) + 1.5576 decimal places when truncated
; fraction is small.  Since less than full accuracy is output, the result will
; be, at worst, off by one unit in the last decimal place.  The accuracy slack
; makes even this error unlikely.  See SPIGOT.PRG for FoxPro demo of algorithm
; error bound.

; The fudging below in FACTOR over allowed n range adds at worst 0.3 decimal
; digits to the dword output setting.  This is well under the slack term
; 0.5*log(n) + 1.5576.  The fudge works since the high and low words of
; LOGTWO are approximately equal.  Part of the fun in writing this program
; was minimizing code size without hurting performance or functionality.
;
; Making last array dword 4 initially (or, equivalently, setting first Q
; buffer quadword below to 2*RADIX) would improve the limit slightly, but
; would not affect the first order term used in setting dword total.
;-----------------------------------------------------------------------------
TEMP      =     HDRSIZE/2-3        ; CX value used below to clear Q buffer--
LOFAC     =     FACTOR MOD 65536   ;   very close to LOFAC at maximum BLKPWR
HIFAC     =     FACTOR/65536       ;   for all DGTS but 5 and 7
TINY      =     FACTOR SHR 24      ; Limit adjustment to 1 part in 16 million

FUDGING   =     TEMP-LOFAC LE TINY AND LOFAC-TEMP LE TINY
IF FUDGING                         ; Adjust FACTOR for dual CX/ECX setting
FACTOR    =     HIFAC*65536+TEMP
ENDIF
getloop:  cbw                      ; Zero AH
          xchg  ax,cx              ; Accumulation to AX, binary digit to CX
          mov   bl,10              ; EBX zeroed earlier
          mul   bx                 ; Accumulation times 10
          add   cx,ax              ; Sum to CX for new accumulation
MiscInit: lodsb                    ; Get next digit
          sub   al,"0"             ; ASCII to binary
          jb    gotnum             ; Exit at first non-digit

          cmp   al,9
          jbe   getloop            ; Loop if binary 0-9

gotnum:   MUV   ax,cx              ; Set AX to n
          dec   ax
          cmp   ax,NMAX            ; Insure n is 1-1000
          jae   Abort              ; No?  Abort (also occurs if no digits)

          inc   ax                 ; Back to n
          mov   si,ax              ; Save as exit value for initial write loop
          push  es                 ; Synch ES/DS for stosw's that follow
          push  ds
          pop   es
          mov   di,OFFSET MaxBlk   ; Point to first header parm
          stosw                    ; MaxBlk--n initially
          cwde                     ; Zero high EAX word (n small)
          shl   eax,BLKPWR         ; BLKELTS times n yields total array dwords
          mov   ecx,FACTOR         ; LOGTWO/DGTS, possibly fudged small amount
          mul   ecx                ; Sets EDX to number of decimal dwords to
          push  edx                ;   output, bumped below to account for
          pop   ax                 ;   initial ordinal 3 dword
          pop   dx                 ; DX:AX (plus one) now dwords to output
          mov   bx,QELTS           ; Dwords output per iteration
          div   bx                 ; Divide to get iteration count
          inc   ax                 ; Bump for extra remainder iteration
          stosw                    ; IterCnt--MaxBlk or just under by design
          MUV   ax,dx              ; Remainder to AX and bump once off possible
          inc   ax                 ;   zero (and accounting for ordinal 3)
          stosw                    ; Qlast--dwords output on last interation
IF NOT FUDGING                     ; Else CX already HDRSIZE/2-3 from FACTOR
          mov   cx,HDRSIZE/2-3
ENDIF
          xor   eax,eax            ; Zero rest of parm space and Q buffer,
          rep   stosw              ;   leaving DI at dword offset afterward
          pop   es                 ; Restore high buffer segment
          dec   cx                 ; CX was zero--repetition overkill, but ok
          mov   al,2               ; Store dword 2's to high buffer, starting
          rep   stosd              ;   somewhere in middle--CX zero afterward
          mov   bp,OFFSET MakeFile ; Fall, creating files...
;-----------------------------------------------------------------------------
; Open work file and output file, setting handles.  Input BP must point to
; either MakeFile or OpenFile above.  Second entry point used to open just
; output file for DISPLAY option.  Output handle returned in BX.
;
; Handles are stored in instruction operands.  This is ok (for current Intel
; CPUs), since any jump/call/ret flushes prefetch queue.  If this is not
; to your taste, you may store the handles, e.g., at the end of the PSP and
; adjust the two target instructions from immediate loads to memory loads.
; This will add a few bytes to COM file size.
;-----------------------------------------------------------------------------
OpenTwo:  mov   dx,OFFSET WrkFile
          call  bp
          mov   BYTE PTR WrkHandl,al
OpenOne:  mov   dx,OFFSET OutFile
          call  bp
          mov   BYTE PTR OutHandl,al
          MUV   bx,ax              ; Output handle to BX
          ret
;-----------------------------------------------------------------------------
; Local data and small buffers next.  Main 64K buffer has own segment.
;-----------------------------------------------------------------------------
MsgHdr    DB    "OUT WRK RAM",10   ; Header for display counters
Message   DB    13," 01-Oct-94 $"  ; Status buffer with program date initially
Syntax    DB    13,10,"Syntax: PI386 [n | RESUME | DISPLAY] where n=1-1000"
          DB    13,10,10,"Finds 4932*n digits of pi by spigot method.  "
WrkFile   DB    "PI386.WRK",0,"and "
OutFile   DB    "PI386.OUT",0
Free      DB    "created.",13,10,"Need 66*nK disk/128K RAM/386 CPU.  "
          DB    "Time grows as n*(n+1)/2.  Keypress halts",13,10,"run for "
          DB    "later restart.  Details in ASM file--Craig R. Hessel."
Cnt10     DB    10                 ; Digit count within block--also LF
CrLf      DB    13,10,"$"          ; CrLf and counts used in DISPLAY option
Cnt20     DB    20                 ; Line count for 1000 digits
Cnt5      DB    5                  ; Block count within line

LOC       =     OFFSET $-OFFSET Begin+100h
TEMP      =     LOC MOD 16
TEMP      =     (16-TEMP) MOD 16   ; Paragraph align OutBuf after Syntax

OutBuf    =     LOC+TEMP           ; Read buffer for DISPLAY option only
DgtBuf    =     OutBuf+OUTSIZE     ; Digit buffer for DISPLAY option only
;-----------------------------------------------------------------------------
; Following parm data/buffers overwrite tail of syntax message above.  These
; parms, along with Q buffer array and spigot array, preserve program state
; between restarts.  All are saved in work file.  Insure PARMSIZE equate at
; BOF is at least as large as parms here (10 bytes).  Order of parms is
; specific for initialization in MiscInit.  Without overwrite of syntax, ORG
; statements below would extend COM file size.
;-----------------------------------------------------------------------------
LOC       =     OFFSET Free-OFFSET Begin+100h
TEMP      =     LOC MOD 16
TEMP      =     (16-TEMP) MOD 16   ; Paragraph align HdrBuf inside Syntax

HdrBuf    =     LOC+TEMP           ; Up to PARMSIZE bytes of header data
QBuf      =     HdrBuf+PARMSIZE    ; Q values, I/O for each main buffer pass
PBuf      =     HdrBuf+HDRSIZE     ; Pi digits, output as dwords

          ORG   HdrBuf
MaxBlk    LABEL WORD               ; Maximum block referenced--decreases
          ORG   HdrBuf+2
IterCnt   LABEL WORD               ; Iteration count decremented to zero
          ORG   HdrBuf+4
QLast     LABEL WORD               ; Q (and P) buffer elements in last
          ORG   HdrBuf+6           ;   iteration (less equal QELTS)
CurBlk    LABEL WORD               ; Current file block
CurBlkD   LABEL DWORD              ; Referenced once as dword (high word zero)

Code_Seg  ENDS
          END  Begin
