Posts Tagged ‘x86’

Linux System Call

November 8th, 2013 No comments

The HelloWorld application is much simpler than the Windows one. Just put parameters into registers from %eax to %edx, and trigger a 0x80 interrupt.

# gcc -nostdlib syscall_linux.s -o syscall_linux
.global _start


    # write(1, message, 13)
    mov     $4, %eax            # system call 4 is write
    mov     $1, %ebx            # file handle 1 is stdout
    mov     $message, %ecx      # address of string to output
    mov     $13, %edx           # number of bytes to write
    int     $0x80               # invoke system call  
    # exit(0)
    mov     $1, %eax            # system call 1 is exit
    xor     %ebx, %ebx          # return 0
    int     $0x80               # invoke system call
    .ascii  "Hello World!\n"
Categories: OS Tags: , ,

Windows System Call Sequence and Simulation

November 7th, 2013 No comments

There are hundreds of documents telling how Windows implements its system call, using int 2e or sysenter. But I can find no code to run to learn how exactly it works. And I managed to write it for my own.

The C code requires only SDK to compile, for I have copied all DDK definitions inline. It opens a C:\test.txt file and write Hello World! to it. Quite simple. I’ve tried a HelloWorld console application. But its call sequence is far more complex than I have expected, after I have made some reverse engineering and read some code from ReactOS project(Wine does not help, since it does not implement a Win32 compatible call sequence in the console case). The code is the basis of our further investigation. It invokes NtCreateFile(), NtWriteFile() and NtClose() in ntdll.dll with dynamic loading:

#include <windows.h>
#include <stdio.h>

#define FILE_OVERWRITE_IF               0x00000005
#define FILE_SYNCHRONOUS_IO_NONALERT    0x00000020
#define OBJ_KERNEL_HANDLE               0x00000200L
#define NT_SUCCESS(Status)      ((NTSTATUS)(Status) >= 0)


typedef struct _UNICODE_STRING {
    USHORT Length;
    USHORT MaximumLength;
    PWSTR  Buffer;

typedef struct _OBJECT_ATTRIBUTES {
    ULONG Length;
    HANDLE RootDirectory;
    ULONG Attributes;
    PVOID SecurityDescriptor;        // Points to type SECURITY_DESCRIPTOR
    PVOID SecurityQualityOfService;  // Points to type SECURITY_QUALITY_OF_SERVICE

typedef struct _IO_STATUS_BLOCK {
    union {
        NTSTATUS Status;
        PVOID Pointer;
    ULONG_PTR Information;

    IN PVOID ApcContext,
    IN PIO_STATUS_BLOCK IoStatusBlock,
    IN ULONG Reserved

typedef NTSTATUS (WINAPI *FnNtCreateFile)(
    PHANDLE FileHandle,
    ACCESS_MASK DesiredAccess,
    POBJECT_ATTRIBUTES ObjectAttributes,
    PIO_STATUS_BLOCK IoStatusBlock,
    PLARGE_INTEGER AllocationSize,
    ULONG FileAttributes,
    ULONG ShareAccess,
    ULONG CreateDisposition,
    ULONG CreateOptions,
    PVOID EaBuffer,
    ULONG EaLength

typedef NTSTATUS (WINAPI *FnNtWriteFile)(
    HANDLE FileHandle,
    HANDLE Event,
    PIO_APC_ROUTINE ApcRoutine,
    PVOID ApcContext,
    PIO_STATUS_BLOCK IoStatusBlock,
    PVOID Buffer,
    ULONG Length,
    PLARGE_INTEGER ByteOffset,
    PULONG Key

typedef NTSTATUS (WINAPI *FnNtClose)(
    HANDLE Handle

int main()
    HMODULE hModule;
    FnNtCreateFile pfnNtCreateFile;
    FnNtWriteFile pfnNtWriteFile;
    FnNtClose pfnNtClose;
    hModule = LoadLibraryA("ntdll.dll");  /* always 0x7c900000 on XP */
    if (hModule == NULL) {
        return -1;
    pfnNtCreateFile = (FnNtCreateFile)GetProcAddress(hModule, "NtCreateFile");  /* 0x7c90d090 */
    pfnNtWriteFile = (FnNtWriteFile)GetProcAddress(hModule, "NtWriteFile");  /* 0x7c90df60 */
    pfnNtClose = (FnNtClose)GetProcAddress(hModule, "NtClose");  /* 0x7c90cfd0 */
    if (pfnNtCreateFile == NULL || pfnNtWriteFile == NULL || pfnNtClose == NULL) {
        return -1;
    } else {
        NTSTATUS ntStatus;
        UNICODE_STRING us;
        IO_STATUS_BLOCK ioStatusBlock;
        HANDLE hFile;
        char szHello[] = "Hello World!";
        us.Buffer = L"\\??\\C:\\test.txt";
        us.Length = (USHORT)wcslen(us.Buffer) * sizeof(WCHAR);
        us.MaximumLength = us.Length + sizeof(WCHAR);
        oa.Length = sizeof(oa);
        oa.RootDirectory = NULL;
        oa.ObjectName = &us;
        oa.Attributes = OBJ_KERNEL_HANDLE;
        oa.SecurityDescriptor = NULL;
        oa.SecurityQualityOfService = NULL;
        ntStatus = pfnNtCreateFile(&hFile,
        if (!NT_SUCCESS(ntStatus)) {
            fprintf(stderr, "Failed to create file, error = 0x%x\n", ntStatus);
            return -1;
        ntStatus = pfnNtWriteFile(hFile,
        if (!NT_SUCCESS(ntStatus)) {
            fprintf(stderr, "Failed to write file, error = 0x%x\n", ntStatus);
            return -1;
    return 0;

I found the handle value and all three function pointers are fixed, at least on my Windows XP(SP3). It may be caused by the preferred base address of ntdll.dll. The code should work on all Windows platforms, since it has no hardcoded values.

Now, translate the C code into assembly. Error handling is ommitted:

.model flat,stdcall

;            SDK prototypes            ;
NULL            EQU 0

    Len             WORD ?
    MaximumLength   WORD ?
    Buffer          DWORD ?

    Len                         DWORD ?
    RootDirectory               DWORD ?
    ObjectName                  DWORD ?
    Attributes                  DWORD ?
    SecurityDescriptor          DWORD ?
    SecurityQualityOfService    DWORD ?

    Status  DWORD ?
    Pointer DWORD ?

ExitProcess PROTO :DWORD

;         Program declarations         ;
; IMPORTANT: The paddding is required!!
STR_HELLO           DB      "Hello World!",0,0,0,0
STR_FILE            WORD    "\","?","?","\","C",":","\","t","e","s","t",".","t","x","t",0


NtCreateFile PROC
    ; 25h(XP) or 42h(Win7)
    mov eax, 25h
    mov edx, 7ffe0300h
    call DWORD PTR [edx]
    retn 2ch
NtCreateFile ENDP

NtWriteFile PROC
    ; 112h(XP) or 18ch(Win7)
    mov eax, 112h
    mov edx, 7ffe0300h
    call DWORD PTR [edx]
    retn 24h
NtWriteFile ENDP

NtClose PROC
    ; 19h(XP) or 32h(Win7)
    mov eax, 19h
    mov edx, 7ffe0300h
    call DWORD PTR [edx]
    retn 4h
NtClose ENDP

main PROC
    ;LOCAL ntStatus:DWORD
    ; 1. initialization
    mov us.Buffer, OFFSET STR_FILE
    mov us.Len, 30
    mov us.MaximumLength, 32
    mov oa.RootDirectory, NULL
    lea eax, [us]
    mov oa.ObjectName, eax
    mov oa.Attributes, 200h
    mov oa.SecurityDescriptor, NULL
    mov oa.SecurityQualityOfService, NULL
    ; 2. parameters of NtCreateFile
    push 0
    push NULL
    push 20h
    push 5h
    push 0
    push 80h      
    push NULL
    lea eax, [ioStatusBlock]
    push eax
    lea eax, [oa]
    push eax
    push 10100000h
    lea eax, [hFile]
    push eax
    ; 3. call NtCreateFile
    call NtCreateFile
    ; 4. parameters of NtWriteFile
    push NULL
    push NULL
    push 12
    lea eax, [ioStatusBlock]
    push eax
    push NULL
    push NULL
    push NULL
    push hFile
    ; 5. call NtWriteFile
    call NtWriteFile
    ; 6. parameters of NtClose
    push hFile
    ; 7. call NtClose
    call NtClose
    ; 8. Exit
    ;INVOKE ExitProcess, 0
main ENDP

END main

Compile the code with:

# ml /c testnt.asm
# link /subsystem:console testnt.obj

The assembly code of NtCreateFile(), NtWriteFile() and NtClose() are copied directly from ntdll.dll. For NtCreate(), 25h is the system service number that will be used to index into the KiServiceTable(SSDT, System Service Dispatch Table) to locate the kernel function that handles the call.

System service numbers vary between Windows versions. This is why they are not recommend to be used directly to invoke system calls. I only demonstrate the approach here. For Windows XP, the values of the three numbers are 25h, 112h and 19h. While for Windows 7, they are 42h, 18ch and 32h. Change them yourself if you’re running Windows 7. For a complete list of system service numbers, refer here or dissemble your ntdll.dll manually :). The output executable is a tiny one, only 3KB in size, since it eliminates the usage of CRT. Moreover, it has an empty list of import functions!

At 7ffe0300h is a pointer to the following code:

mov edx, esp

NOTE: The assembly code may work only when compiled to a 32-bit application. 64-bit mode is not tested and need modification to work.

One last point, it seems the STR_HELLO string is required to be aligned to 8 byte border. Otherwise, you will get 0x80000002 error code(STATUS_DATATYPE_MISALIGNMENT).

Categories: OS Tags: , ,

Compiler Intrinsic Functions

October 30th, 2013 No comments

Copied from Wikipedia:

An intrinsic function is a function available for use in a given programming language whose implementation is handled specially by the compiler. Typically, it substitutes a sequence of automatically generated instructions for the original function call, similar to an inline function. Unlike an inline function though, the compiler has an intimate knowledge of the intrinsic function and can therefore better integrate it and optimize it for the situation. This is also called builtin function in many languages.

A code snippet is written to check the code generation when intrinsic is enabled or not:

 * # gcc -S intrinsic.c -o intrinsic.s
 * # gcc -S -fno-builtin intrinsic.c -o intrinsic2.s
 * # cl /c /Oi intrinsic.c /FAs /Faintrinsic.asm
 * # cl /c intrinsic.c /FAs /Faintrinsic2.asm
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

const char *c = "Hello World!";
char c2[16];

int main(int argc, char *argv[])
    int a = abs(argc);
    memcpy(c2, c, 12);
    printf("%d,%s\n", a, c2);
    return 0;

Generated assembly:

    pushl   %ebp
    movl    %esp, %ebp
    andl    $-16, %esp
    subl    $32, %esp
    movl    8(%ebp), %eax
    sarl    $31, %eax
    movl    %eax, %edx
    xorl    8(%ebp), %edx
    movl    %edx, 28(%esp)
    subl    %eax, 28(%esp)
    movl    c, %eax
    movl    %eax, %edx
    movl    $c2, %eax
    movl    (%edx), %ecx
    movl    %ecx, (%eax)
    movl    4(%edx), %ecx
    movl    %ecx, 4(%eax)
    movl    8(%edx), %edx
    movl    %edx, 8(%eax)
    movl    $.LC1, %eax
    movl    $c2, 8(%esp)
    movl    28(%esp), %edx
    movl    %edx, 4(%esp)
    movl    %eax, (%esp)
    call    printf
    movl    $0, %eax

Only printf() is in code. No abs() nor memcpy(). Since they are intrinsic, as listed here in gcc’s online document.

Intrinsic can be explicitly disabled. For instance, CRT intrinsic must be disabled for kernel development. Add -fno-builtin flag to gcc, or remove /Oi switch in MSVC. Only paste the generated code in gcc case here:

    pushl   %ebp
    movl    %esp, %ebp
    andl    $-16, %esp
    subl    $32, %esp
    movl    8(%ebp), %eax
    movl    %eax, (%esp)
    call    abs
    movl    %eax, 28(%esp)
    movl    c, %eax
    movl    %eax, %edx
    movl    $c2, %eax
    movl    $12, 8(%esp)
    movl    %edx, 4(%esp)
    movl    %eax, (%esp)
    call    memcpy
    movl    $.LC1, %eax
    movl    $c2, 8(%esp)
    movl    28(%esp), %edx
    movl    %edx, 4(%esp)
    movl    %eax, (%esp)
    call    printf
    movl    $0, %eax

There _are_ abs() and memcpy() now. General MSVC intrinsic can be found here.

Intrinsic is easier than inline assembly. It is used to increase performance in most cases. Both gcc and MSVC provide intrinsic support for Intel’s MMX, SSE and SSE2 instrument set. Code snippet to use MMX:

 * # gcc -O2 -S -mmmx intrinsic_mmx.c -o intrinsic_mmx.s
 * # cl /O2 /c intrinsic_mmx.c /FAs /Faintrinsic_mmx.asm
#include <stdio.h>
#include <mmintrin.h>

int main()
    __m64 m1, m2, m3;
    int out1, out2;
    int in1[] = { 222, 111 };
    int in2[] = { 444, 333 };
#if 0
    m1 = _mm_setr_pi32(in1[0], in1[1]);
    m2 = _mm_setr_pi32(in2[0], in2[1]);
    m1 = *(__m64 *)in1;
    m2 = *(__m64 *)in2;
    m3 = _mm_add_pi32(m1, m2); 
    out1 = _mm_cvtsi64_si32(m3);
    m3  = _mm_srli_si64(m3, 32);
    out2 = _mm_cvtsi64_si32(m3);
    printf("out1=%d,out2=%d\n", out1, out2);
    return 0;

Assembly looks like:

    pushl   %ebp
    movl    %esp, %ebp
    andl    $-16, %esp
    subl    $16, %esp
    movq    .LC1, %mm0
    paddd   .LC2, %mm0
    movd    %mm0, 8(%esp)
    psrlq   $32, %mm0
    movd    %mm0, 12(%esp)
    movl    $.LC0, 4(%esp)
    movl    $1, (%esp)
    call    __printf_chk
    xorl    %eax, %eax

You see MMX registers and instruments this time. -mmmx flag is required to build for gcc. MSVC also generate similar code. Reference for these instrument set is available on Intel’s website.

A simple benchmark to use SSE is avalable here.

Categories: C/C++ Tags: , , ,

Jump Instruments and EFLAGS

October 29th, 2013 No comments

There was a misleading in my knowledge of a conditional jump: It checks only the result of CMP and TEST instruments. So when it appears after other instruments like ADD or SUB, I can find no clue on how it works.

Actually, a conditional jump checks flags in the EFLAGS control register. From Intel’s manual, vol 1, 3.4.3:

The status flags (bits 0, 2, 4, 6, 7, and 11) of the EFLAGS register indicate the results of arithmetic instructions, such as the ADD, SUB, MUL, and DIV instructions. The status flag functions are:

CF (bit 0) Carry flag: Set if an arithmetic operation generates a carry or a borrow out of the most-significant bit of the result; cleared otherwise. This flag indicates an overflow condition for unsigned-integer arithmetic. It is also used in multiple-precision arithmetic.

PF (bit 2) Parity flag: Set if the least-significant byte of the result contains an even number of 1 bits; cleared otherwise.
AF (bit 4) Adjust flag: Set if an arithmetic operation generates a carry or a borrow out of bit 3 of the result; cleared otherwise. This flag is used in binary-coded decimal (BCD) arithmetic.

ZF (bit 6) Zero flag: Set if the result is zero; cleared otherwise.

SF (bit 7) Sign flag: Set equal to the most-significant bit of the result, which is the sign bit of a signed integer. (0 indicates a positive value and 1 indicates a negative value.)

OF (bit 11) Overflow flag: Set if the integer result is too large a positive number or too small a negative number (excluding the sign-bit) to fit in the destination operand; cleared otherwise. This flag indicates an overflow condition for signed-integer (two’s complement) arithmetic.

And again from vol 2a, section Jcc Jump if Condition is met, more details. I just copy content from here:

Instruction Description signed? Flags short
JO Jump if overflow OF = 1 70 0F 80
JNO Jump if not overflow OF = 0 71 0F 81
JS Jump if sign SF = 1 78 0F 88
JNS Jump if not sign SF = 0 79 0F 89
Jump if equal
Jump if zero
ZF = 1 74 0F 84
Jump if not equal
Jump if not zero
ZF = 0 75 0F 85
Jump if below
Jump if not above or equal
Jump if carry
unsigned CF = 1 72 0F 82
Jump if not below
Jump if above or equal
Jump if not carry
unsigned CF = 0 73 0F 83
Jump if below or equal
Jump if not above
unsigned CF = 1 or ZF = 1 76 0F 86
Jump if above
Jump if not below or equal
unsigned CF = 0 and ZF = 0 77 0F 87
Jump if less
Jump if not greater or equal
signed SF <> OF 7C 0F 8C
Jump if greater or equal
Jump if not less
signed SF = OF 7D 0F 8D
Jump if less or equal
Jump if not greater
signed ZF = 1 or SF <> OF 7E 0F 8E
Jump if greater
Jump if not less or equal
signed ZF = 0 and SF = OF 7F 0F 8F
Jump if parity
Jump if parity even
PF = 1 7A 0F 8A
Jump if not parity
Jump if parity odd
PF = 0 7B 0F 8B
Jump if %CX register is 0
Jump if %ECX register is 0
%CX = 0
%ECX = 0
E3 E3

There are signed and unsigned versions when comparing: JA Vs JG, JB Vs JL etc.. Let’s take JA and JG to explain the difference. For JA, it’s clear that it requires CF=0(no borrow bit) and ZF=0(not equal). For JG, when two operands are both positive or negative, it requires ZF=0 and SF=OF=0. When two operands have different signs, it requires ZF=0 and the first operand is positive, thus requires SF=OF=1.

Note, the following 2 lines(AT&T syntax) are equivalent. CPU does arithmetic calculation, it does not care about whether it is signed or unsigned. It only set flags. It is we that make the signed or unsigned jump decision.

movl $-1, %eax
movl $0xffffffff, %eax

Last, I’d like to use ndisasm(install nasm package to get it) to illustrate how jump instruments are encoded, including short jump, near jump and far jump:

# echo -e "\x74\x00" | ndisasm -
00000000  7400              jz 0x2
00000002  0A                db 0x0a
# echo -e "\x74\xfe" | ndisasm -
00000000  74FE              jz 0x0
00000002  0A                db 0x0a
# echo -e "\x0f\x84\x00\x00" | ndisasm -
00000000  0F840000          jz word 0x4
00000004  0A                db 0x0a
# echo -e "\x0f\x84\xfc\xff" | ndisasm -
00000000  0F84FCFF          jz word 0x0
00000004  0A                db 0x0a
# echo -e "\x0f\x84\x00\x00\x00\x00" | ndisasm - -b 32
00000000  0F8400000000      jz dword 0x6
00000006  0A                db 0x0a
# echo -e "\x0f\x84\xfa\xff\xff\xff" | ndisasm - -b 32
00000000  0F84FAFFFFFF      jz dword 0x0
00000006  0A                db 0x0a
# echo -e "\xeb\x00" | ndisasm -
00000000  EB00              jmp short 0x2
00000002  0A                db 0x0a
# echo -e "\xe9\x00\x00" | ndisasm -
00000000  E90000            jmp word 0x3
00000003  0A                db 0x0a
# echo -e "\xe9\x00\x00\x00\x00" | ndisasm - -b32
00000000  E900000000        jmp dword 0x5
00000005  0A                db 0x0a
# echo -e "\xea\x00\x00\x34\x12" | ndisasm -
00000000  EA00003412        jmp word 0x1234:0x0
00000005  0A                db 0x0a
# echo -e "\xea\x00\x00\x00\x00\x34\x12" | ndisasm - -b 32
00000000  EA000000003412    jmp dword 0x1234:0x0
00000007  0A                db 0x0a
Categories: C/C++ Tags: ,

BIOS Boot Sequence

October 17th, 2013 No comments

First, from Intel’s manuals 3A 9.1.4:

The first instruction that is fetched and executed following a hardware reset is located at physical address FFFFFFF0H. This address is 16 bytes below the processor’s uppermost physical address. The EPROM containing the software-initialization code must be located at this address.

The address FFFFFFF0H is beyond the 1-MByte addressable range of the processor while in real-address mode. The processor is initialized to this starting address as follows. The CS register has two parts: the visible segment selector part and the hidden base address part. In real-address mode, the base address is normally formed by shifting the 16-bit segment selector value 4 bits to the left to produce a 20-bit base address. However, during a hardware reset, the segment selector in the CS register is loaded with F000H and the base address is loaded with FFFF0000H. The starting address is thus formed by adding the base address to the value in the EIP register (that is, FFFF0000 + FFF0H = FFFFFFF0H).

The first time the CS register is loaded with a new value after a hardware reset, the processor will follow the normal rule for address translation in real-address mode(that is, [CS base address = CS segment selector * 16]). To insure that the base address in the CS register remains unchanged until the EPROM based software-initialization code is completed, the code must not contain a far jump or far call or allow an interrupt to occur (which would cause the CS selector value to be changed).

Two screenshots showing instructions in address FFFFFFF0H and FFFF0H(Shadow BIOS, see below) and their jumps. The first one is showing a AMI BIOS, while the second Phoenix BIOS. High BIOS of AMI directly jumps to the shadowed one, and both high and shadowed one jump to the same address. But High BIOS of Phoenix just keeps running in high addresses. The first instruction of both BIOS after all jumps is FAh, say cli(disable interrupts). I’m not going to do more reverse engineering. 🙂

NOTE: Main memory is not initialized yet at this time. From here:

The motherboard ensures that the instruction at the reset vector is a jump to the memory location mapped to the BIOS entry point. This jump implicitly clears the hidden base address present at power up. All of these memory locations have the right contents needed by the CPU thanks to the memory map kept by the chipset. They are all mapped to flash memory containing the BIOS since at this point the RAM modules have random crap in them.

The reset vector is simply FFFFFFF0h. Now, POST is started as described here:

POST stands for Power On Self Test. It’s a series of individual functions or routines that perform various initialization and tests of the computers hardware. BIOS starts with a series of tests of the motherboard hardware. The CPU, math coprocessor, timer IC’s, DMA controllers, and IRQ controllers. The order in which these tests are performed varies from motherboard to motherboard. Next, the BIOS will look for the presence of video ROM between memory locations C000:000h and C780:000h. If a video BIOS is found, It’s contents will be tested with a checksum test. If this test is successful, the BIOS will initialize the video adapter. It will pass controller to the video BIOS, which will inturn initialize itself and then assume controller once it’s complete. At this point, you should see things like a manufacturers logo from the video card manufacturer video card description or the video card BIOS information. Next, the BIOS will scan memory from C800:000h to DF800:000h in 2KB increments. It’s searching for any other ROM’s that might be installed in the computer, such as network adapter cards or SCSI adapter cards. If a adapter ROM is found, it’s contents are tested with a checksum test. If the tests pass, the card is initialized. Controller will be passed to each ROM for initialization then the system BIOS will resume controller after each BIOS found is done initializing. If these tests fail, you should see a error message displayed telling you “XXXX ROM Error”. The XXXX indicates the segment address where the faulty ROM was detected. Next, BIOS will begin checking memory at 0000:0472h. This address contains a flag which will tell the BIOS if the system is booting from a cold boot or warm boot. A value of 1234h at this address tells the BIOS that the system was started from a warm boot. This signature value appears in Intel little endian format, that is, the least significant byte comes first, they appear in memory as the sequence 3412. In the event of a warm boot, the BIOS will will skip the POST routines remaining. If a cold start is indicated, the remaining POST routines will be run.

NOTE: Main memory is initialized in POST. Main part of memory initialization code is complicated, and is directly provided by Intel which is known as MRC(Memory Reference Code).

There’s one step in POST called BIOS Shadowing:

Shadowing refers to the technique of copying BIOS code from slow ROM chips into faster RAM chips during boot-up so that any access to BIOS routines will be faster. DOS and other operating systems may access BIOS routines frequently. System performance is greatly improved if the BIOS is accessed from RAM rather than from a slower ROM chip.

A DRAM control register PAM0(Programmable Attribute Map) makes it possible to independently redirect reads and writes in the BIOS ROM area to main memory. The idea is to allow for RAM shadowing which allows read-access for ROMs to come from main memory whereas writes will continue to go to ROMs. Refer to Intel’s MCH datasheet for details:

This register controls the read, write, and shadowing attributes of the BIOS area from 0F0000h–0FFFFFh. The (G)MCH allows programmable memory attributes on 13 Legacy memory segments of various sizes in the 768 KB to 1 MB address range. Seven Programmable Attribute Map (PAM) Registers are used to support these features. Cacheability of these areas is controlled via the MTRR registers in the processor.

Big real mode(or unreal mode) is used to address more memory beyond 1M, as BIOS ROMs becomes larger and larger. In big real mode, one or more data segment registers have been loaded with 32-bit addresses and limits, but code segment stays unchanged:

Real Mode Big Real Mode Protected Mode
Code segment(cs) 1M 1M 4G
Data segments(ds, es, fs, gs) 1M 4G 4G

Protected mode can also refer 4G memory. But BIOS is mainly written for real mode, big real mode is a better choice for addressing.

Then, BIOS continues to  find a bootable device, see wikipedia:

The BIOS selects candidate boot devices using information collected by POST and configuration information from EEPROM, CMOS RAM or, in the earliest PCs, DIP switches. Option ROMs may also influence or supplant the boot process defined by the motherboard BIOS ROM. The BIOS checks each device in order to see if it is bootable. For a disk drive or a device that logically emulates a disk drive, such as a USB Flash drive or perhaps a tape drive, to perform this check the BIOS attempts to load the first sector (boot sector) from the disk to address 7C00 hexadecimal, and checks for the boot sector signature 0x55 0xAA in the last two bytes of the sector. If the sector cannot be read (due to a missing or blank disk, or due to a hardware failure), or if the sector does not end with the boot signature, the BIOS considers the disk unbootable and proceeds to check the next device. Another device such as a network adapter attempts booting by a procedure that is defined by its option ROM (or the equivalent integrated into the motherboard BIOS ROM). The BIOS proceeds to test each device sequentially until a bootable device is found, at which time the BIOS transfers control to the loaded sector with a jump instruction to its first byte at address 7C00 hexadecimal (1 KiB below the 32 KiB mark).

After all of above, BIOS initialization is finished. It’s your turn to take control of your system from address 0000:7c00!!

Why this address? It’s not defined by Intel nor Microsoft. It was decided by IBM PC 5150 BIOS developer team(David Bradley). See here:

BIOS developer team decided 0x7C00 because:

– They wanted to leave as much room as possible for the OS to load itself within the 32KB.
– 8086/8088 used 0x0 – 0x3FF for interrupts vector, and BIOS data area was after it.
– The boot sector was 512 bytes, and stack/data area for boot program needed more 512 bytes.
– So, 0x7C00, the last 1024B of 32KB was chosen.

Categories: Linux Tags: , , ,