Coding in assembly in linux
In the 90s when I got my first PC with a 80286 cpu (12Mhz, 1MB, VGA), I started coding in assembly. I was using DOS or using the computer without an OS by letting the BIOS boot and run the code in sector 0 (512 bytes) on the boot disk.
I was using DOS for accessing the files on the filesystem.
Tasm was the assembly compiler and turbo debug was the debugger.
So recently, I decided to try coding in assembly in linux with nasm.
Resources
I collected some information sources for assembly coding:
- x64_86 tutorial
- i386 tutorial with a memory management example calling sys_brk
- nasm documentation
- linux syscall list for x64_86 and the source repo
- x86 and amd64 instruction reference: an easy to navigate Instruction reference
- Intel 64 and IA-32 Architectures Software Developer Manuals
- i386 assembly library
Coding
I found an ide for programming in assembly called sasm, there is a code editor and a debugger showing the memory and registers, to install run:
apt-get install sasm
Then go to settings, choose nasm and 64bit mode.
I didn't use sasm much because I usually use vim, the terminal, shell scripts and gdb.
I noticed a few good things in nasm:
- it is easy to declare sections and they can be declared anywhere in the code
- it is possible to compute the length of a buffer with
$-varname - it has struct like C
- it has macros
- it has a preprocessor
In my trial program, I used macros to call functions with parameters. The macros wrap the functions and save the registers used for the parameters.
Calling linux syscalls is quite simple:
- set syscall number in rax
- set other register parameter
- execute syscall
Having linux dealing with hardware make thing much easier than running code without an operating system and linux has a lot of drivers for all sorts of hardware.
Also when a program crashes, the process stops and there is no need to reboot the machine.
I created a program that prints the number of arguments and the arguments themselves. It finishes by printing 'Hello world' in 3 ways (call, call with struc, macro).
For a more advanced program, check out spartasm a spartan server.
spartasm (gemini)
spartasm (http)
Here is a library I created to try the nasm features (file: libInc.asm):
%ifndef libInc
%define libInc
%macro syscallpush 0
push rax
push rdi
%endmacro
%macro syscallpop 0
pop rdi
pop rax
%endmacro
%macro oscall 0
push rcx
push r11
syscall
pop r11
pop rcx
%endmacro
struc putsParams ; declare struc type
.buf resq 1 ; allocate 1 qword
.len resq 1 ; symbols start with dot to be local putsParams.len
endstruc
puts:
syscallpush
mov rax, 1 ; syscall sys_write
mov rdi, 1 ; fd stdout
oscall
syscallpop
ret
putsStruc: ; params struc address in rdi
syscallpush
push rsi
push rdx
mov rsi, [rdi] ; buf address
mov rdx, [rdi + putsParams.len] ; len
mov rax, 1 ; syscall sys_write
mov rdi, 1 ; fd stdout
oscall
pop rdx
pop rsi
syscallpop
ret
%macro putsMacro 2 ; 2 parameters
syscallpush
push rsi
push rdx
mov rsi, %1 ; buf address
mov rdx, %2 ; len
mov rax, 1 ; syscall sys_write
mov rdi, 1 ; fd stdout
oscall
pop rdx
pop rsi
syscallpop
%endmacro
strlen: ; param: rdi char*, return length in rax
xor rax, rax ; store zero in rax
.strlen_loop:
cmp BYTE [rdi + rax], 0x0 ; compare byte to 0
je .strlen_break ; break if the current byte is 0
inc rax
jmp .strlen_loop ; repeat if the current byte isn't 0
.strlen_break:
inc rax ; add one to string length to count the ending 0
ret
printfunc: ; param: rdi char*
push rax
push rsi
push rdx
call strlen
mov rsi, rdi
mov rdx, rax
mov rax, 1 ; syscall sys_write
mov rdi, 1 ; fd stdout
oscall
pop rdx
pop rsi
pop rax
ret
; use a macro to call the print function
; to simplify the code in the caller:
; print string
%macro print 1
push rdi
mov rdi, %1
call printfunc
pop rdi
%endmacro
; %macro print 1
; push rdi
; push rax
; push rsi
; push rdx
;
; mov rdi, %1
; call strlen
;
; mov rsi, rdi
; mov rdx, rax
; mov rax, 1 ; syscall sys_write
; mov rdi, 1 ; fd stdout
; oscall
;
; pop rdx
; pop rsi
; pop rax
; pop rdi
; %endmacro
;
%macro exit 1 ; one parameter: exit code
mov rax, 60 ; syscall sys_exit
mov rdi, %1 ; exit code
syscall
%endmacro
section .bss
putibuf resb 30 ; 30 byte array, resb... should be in the bss section
section .text
; Convert integers to string
; Argument - Integer Value in rax
; return: string address in r9, size in r11
itoa:
mov rbx, 10
;We have declared a memory which we will use as buffer to store our result
mov r9, putibuf+10 ;We are are storing the number in backward order like LSB in 10 index and decrementing index as we move to MSB
mov [r9], byte 0 ;Store NULL terminating byte in last slot
dec r9 ;Decrement memory index
mov [r9], byte 0XA ;Store break line
dec r9 ;Decrement memory index
mov r11, 2;r11 will store the size of our string stored in buffer we will use it while printing as argument to sys_write
.loop_block:
mov rdx, 0
div rbx ;Get the LSB by dividing number by 10 , LSB will be remainder (stored in 'dl') like 23 divider 10 will give us 3 as remainder which is LSB here
cmp rax, 0 ;If rax (quotient) becomes 0 our procedure reached to the MSB of the number we should leave now
je .return_block
add dl, 48 ;Convert each digit to its ASCII value
mov [r9], dl ;Store the ASCII value in memory by using r9 as index
dec r9 ;Dont forget to decrement r9 remember we are using memory backwards
inc r11 ;Increment size as soon as you add a digit in memory
jmp .loop_block ;Loop until it breaks on its own
.return_block:
add dl, 48 ;Don't forget to repeat the routine for out last MSB as loop ended early
mov [r9], dl
dec r9
inc r11
ret
putifunc: ; param: print rax as a string
push rbx
push r9
push r11
push rdx
push rdi
push rsi
call itoa
mov rax, 1 ;Store syscall number , 1 is for sys_write
mov rdi, 1 ;Descriptor where we want to write , 1 is for stdout
mov rsi, r9 ;This is pointer to the string which was returned by int_to_char
mov rdx, r11 ;r11 stores the number of chars in our string , read about how to make syscall in asm
oscall ;interrupt , give the wheel to OS it'll handle your systemcall
pop rsi
pop rdi
pop rdx
pop r11
pop r9
pop rbx
ret
%macro puti 1 ; print int argument
push rax
mov rax, %1
call putifunc
pop rax
%endmacro
section .data
newline db 0x0A
section .text
; print newline
%macro printnewline 0
putsMacro newline, 1
%endmacro
printnfunc: ; param: rdi char*
push rax
push rsi
push rdx
call strlen
mov rsi, rdi
mov rdx, rax
mov rax, 1 ; syscall sys_write
mov rdi, 1 ; fd stdout
oscall
printnewline
pop rdx
pop rsi
pop rax
ret
; print string and new line
%macro printn 1
push rdi
mov rdi, %1
call printnfunc
pop rdi
%endmacro
%endif
Here is the code for the main function (file: arguments.asm):
section .data
msg db "hello, world!", 0x0A ; 0x0A is newline
msglen equ $-msg
msg2 db 0x0A,"Hello, world!", 0x0A, 0x00 ; 0x0A is newline, 0x00 end of string
section .text
global _start
%include "libInc.asm"
section .data
params:
istruc putsParams
at putsParams.buf, dq msg
at putsParams.len, dq 13
iend
section .text
_start:
pop rcx ; put argc in rcx
.printArgV:
puti rcx
pop rsi
printn rsi
loop .printArgV
; hello
mov rsi, msg ; buf
mov rdx, msglen ; msg length
call puts
mov rdi, params ; call putsStruc with params address in rdi
call putsStruc
putsMacro msg, 13
print msg2
exit 0
To compile run the commands:
nasm -f elf64 -o arguments.o arguments.asm
ld -o arguments arguments.o
DOS Assembly code
Here are some example code for the 80486 in 16bit mode in DOS, the code here is the borland turbo assembler tasm style: Open a file with DOS:
Mov AH,3Dh
Mov AL,01000010b
Mov DX, OFFSET filename
Int 21h
Generate a beep sound:
Mov AL,182
Out 43h,AL
Mov AX,170
Out 42h,AL
Mov AL,AH
Out 42h,AL
In AL,61h
Or AL,3
Out 61h,AL
Print a string:
mov ah,9
mov dx,OFFSET string
int 21h
To set the graphical mode, the hardware had to be detected and then configured:
; detect vga graphic card
mov ax,1A00h
int 10h
cmp al,1Ah
je vgaDetected
; set graphic mode to 320x200 256 colors
mov ax,0013h
int 10h
; then set 360x480 256 colors mode
mov dx,3C4h
mov ax,0604h
out dx,ax
mov ax,0100h
out dx,ax
mov dx,3C2h
mov al,0E7h
out dx,al
mov dx,3C4h
mov ax,0300h
out dx,ax
mov dx,3D4h
mov al,11h
out dx,al
inc dx
in al,dx
and al,7Fh
out dx,al
dec dx
cld
mov si,offset vptbl
mov cx,11h ; Nombre de mots
@b: lodsw
out dx,ax
loop @b
mov dx,3C4h ; SC_INDEX
mov al,2 ; MAP_MASK
mov ah,00001111b
out dx,ax ; Ceci donne acces aux 4 plans de bits
mov ax,0A000h
mov es,ax
sub di,di
mov ax,di
mov cx,0A8C0h ; 360*480/4
cld
rep stosw
; set text mode
mov ax,0003
int 10h
Switch the CPU to 32bit protected mode (short version):
InitProtecM PROC NEAR
SMSW AX
Test AX,1
Jz InitLeModeProtege
Jmp DejaEnModeProtege
InitLeModeProtege:
Mov DI,2
Xor EAX,EAX
Mov AX,DS
Shl EAX,4
Mov [DI],EAX ; GDTR
LGDT FWord Ptr [Offset IGDTR]
Mov DI,Offset IIDTR+2
Xor EAX,EAX
Mov AX,DS
Shl EAX,4
Add EAX,Offset TabIDT
Mov [DI],EAX ; IDTR
Mov SI,Offset TabIDT
Mov CX,256
InitInterruptionProt:
Mov Word Ptr [SI],Offset InterruptionNul
Add SI,8
Loop InitInterruptionProt
Mov DI,Offset CODEP
Xor EAX,EAX
Mov AX,CODE2
Shl EAX,4
Or [DI+2],EAX ; CS Proteg 32 Bits
Mov DI,Offset CODER
Xor EAX,EAX
Mov AX,CS
Shl EAX,4
Or [DI+2],EAX ; Segment Rel 16 Bits
Mov DI,Offset DATAR
Xor EAX,EAX
Mov AX,DATA
Shl EAX,4
Or [DI+2],EAX ; DATA du Rel
Mov Ax,TailleEtendue ; Initialise
Shr AX,2 ; le segment au dessus du
Mov Word Ptr [Offset DATAP],AX ; premier mga
Mov EAX,AdSeg32 ; Granularit
1 /4
Mov EBX,EAX ; Page de 4Ko
Shr EBX,24
And EAX,00FFFFFFh
Or DWord Ptr [Offset DATAP+2],EAX
Mov Byte Ptr [Offset DATAP+7],BL
Mov DI,Offset PILERP
Xor EAX,EAX
Mov AX,SS
Shl EAX,4
Or [DI+2],EAX ; PILE
CLI
Cmp XmsTaille,0
Jne XMSHMA
ActiveHMA:
In AL,61h ; Initialise Ligne A20
Call TestA20
Jne ErreurA20
Mov AL,0D1h
Out 64h,AL
Call TestA20
Jne ErreurA20
Mov AL,0DFh
Out 60h,AL
Call TestA20 ; Fin Init
Jne ErreurA20
Jmp ContinueLInitProt
XMSHMA:
Cmp HMA,1
Je ContinueLInitProt
Jmp ActiveHMA
ContinueLInitProt:
In AL,21h ; Interdit toutes les interruptions
Mov IntMat,AL ; matrielles
Mov AL,0FFh ; Et stock ancienne Valeur
Out 21h,AL
SMSW AX
Or AX,1
LMSW AX ; Passage en Mode Protger
DB 0EAh ; Jmp Far ptr [8:0]
DW 0,8 ; Segment CODE2 Offset 0
RetourP:
Mov AX,DATA
Mov DS,AX
Mov EAX,DWord PTR [Offset PILERP+2] ; PILE
Shr EAX,4
Mov SS,AX
NOP
LIDT Qword Ptr [IDTRR] ; Interruptions du Mode Rel
Mov AL,IntMat ; Autorise les interruptions
Out 21h,AL ; matrielles
Cmp HMA,1
Je FinGestionHMA
Call TestA20 ; Remets la Ligne A20
Jne ErreurA20
Mov AL,0D1h
Out 64h,AL
Call TestA20
Jne ErreurA20
Mov AL,0DDh
Out 60h,AL
Call TestA20 ; Fin A20
Jne ErreurA20
FinGestionHMA:
STI
Jmp FinDuModeProtegeRetourAuDos
hashtags: #assembly