FFT ON AMD64
2012-06-05
Fast Fourier Transform with x86-64 assembly language
This is an old application I did a while ago. I did this in 2005 when I got my first 64bit CPU (AMD).
The first I did after installing my new CPU was to open VI and start coding an FFT using 64 bit registers. This is old news, but 64 bit at that time was awesome. Not only can you store 64 bits in a register, but you get 32 general purpose registers!
The only really annoying thing with this architecture is that they don't provide a bit reveral instruction. I don't understand why a simple RISC processor like the AVR32 (lookup "brev") has one but not a high end CISC like Intel or AMD. I don't actually show the bit reveral part of the FFT in here though.
By the way, I remember doing some tests with this algorithm and, although I don't remember the results exactly (7 years ago), I remember that it was running at least 5 times faster than most other FFTs in other libraries.
//; x8664realfft(float* source,float** spectrum,long size)
x8664realifft:
mov $1,%eax
cvtsi2ss %eax,%xmm10
pshufd $0b00000000,%xmm10,%xmm10
mov $-1,%eax
cvtsi2ss %eax,%xmm10
pshufd $0b11000100,%xmm10,%xmm10
jmp fftentry
x8664realfft:
mov $1,%eax
cvtsi2ss %eax,%xmm10
pshufd $0b00000000,%xmm10,%xmm10
fftentry:
pushq %rbp
movq %rsp,%rbp
pushq %rbp
subq $0xFF,%rsp
movq %rsp,%rbp
//; make a 16bytes aligned buffer
addq $16,%rbp
andq $0xFFFFFFFFFFFFFFF0,%rbp
pushq %r15
pushq %r14
pushq %r13
pushq %r12
pushq %r11
pushq %r10
pushq %r9
pushq %r8
//; rcx = size
movq %rdx,%rcx
pushq %rcx
//; rdx = source
mov %rdi,%rdx
pushq %rdx
//; rdi = spectrum[0]
movq (%rsi), %rdi
addq $8, %rsi
//; rsi = spectrum[1]
movq (%rsi), %rsi
//; r8 = log2(N), r14= N
pushq %rcx
fld1
fild (%rsp)
xorq %r8,%r8
pushq %r8
fyl2x
fistp (%rsp)
popq %r8
popq %r14
//; bit reversal has already been done prior to calling this function
//; r9 = nLargeSpectrum
//; r10 = nPointsLargeSpectrum
movq %r14,%r9
movq $1,%r10
movq $1,%r11
mov %rdi,%r14
mov %rsi,%r15
//;load 2PI in st(0)
fldpi
fldpi
faddp %st(0),%st(1)
movq %r8,%rcx
l1: pushq %rcx
shrq $1,%r9
shlq $1,%r10
//;st(0) = theta, st(1) = 2pi
fld %st(0)
pushq %r10
fidiv (%rsp)
popq %r10
//;xmm0 = 2*costheta[0],2*costheta[0],2*costheta[0],2*costheta[0]
//; st(0) = theta, st(1) = 2pi
pushq %rax
fld %st(0)
fcos
fstp (%rsp)
movss (%rsp),%xmm0
pshufd $0b00000000,%xmm0,%xmm0
popq %rax
addps %xmm0,%xmm0
movq %r9,%rcx
l2: pushq %rcx
//; r12 = point1 (index *4bytes) r13 = point2 (index *4bytes)
movq %r10,%r12
movq %r9,%rax
subq %rcx,%rax
pushq %rdx
mulq %r12
popq %rdx
movq %rax,%r12
movq %r11,%r13
addq %r12,%r13
shlq $2,%r13
shlq $2,%r12
//; xmm2 = costheta[2],sintheta[2],costheta[1],sintheta[1]
movq %r12,16(%rbp)
decq 16(%rbp)
fld %st(0)
fimul 16(%rbp)
fsincos
fstp (%rbp)
fstp 4(%rbp)
decq 16(%rbp)
fld %st(0)
fimul 16(%rbp)
fsincos
fstp 8(%rbp)
fstp 12(%rbp)
movaps (%rbp),%xmm2
pshufd $0b10110001 ,%xmm2,%xmm2
//;xmm1 = costheta[1],sintheta[1],0,0
movhlps %xmm2,%xmm1
movq %r11,%rcx
l3:
//; recurrence formula
//; xmm3 = w.re,w.im,w.re,w.im
movaps %xmm2,%xmm3
mulps %xmm0,%xmm3
subps %xmm1,%xmm3
movlhps %xmm3,%xmm3
movaps %xmm2,%xmm1
movaps %xmm3,%xmm2
mulps %xmm10,%xmm3
//; xmm5 := c.im,c.re,c.re,c.im
movq %r14,%rdi
movq %r15,%rsi
addq %r13,%rdi
addq %r13,%rsi
movss (%rdi),%xmm5
pshufd $0b00000011,%xmm5,%xmm5
addss (%rsi),%xmm5
pshufd $0b00101000,%xmm5,%xmm5
//; xmm3 := inner product: re,re,im,im
mulps %xmm3,%xmm5
pshufd $0b11011101 ,%xmm5,%xmm3
pshufd $0b10001000 ,%xmm5,%xmm5
addsubps %xmm5,%xmm3
pshufd $0b10101111,%xmm3,%xmm3
//;xmm6 := sortedArray[point1].re,sortedArray[point1].re,sortedArray[point1].im,sortedArray[point1].im
movq %r14,%rdi
movq %r15,%rsi
addq %r12,%rdi
addq %r12,%rsi
movss (%rdi),%xmm6
pshufd $0b00001111,%xmm6,%xmm6
addss (%rsi),%xmm6
pshufd $0b11100000,%xmm6,%xmm6
addsubps %xmm3,%xmm6
pshufd $0b00100111,%xmm6,%xmm6
movss %xmm6,(%rdi)
pshufd $0b11100001,%xmm6,%xmm6
movss %xmm6,(%rsi)
movq %r14,%rdi
movq %r15,%rsi
addq %r13,%rdi
addq %r13,%rsi
pshufd $0b01001110,%xmm6,%xmm6
movss %xmm6,(%rdi)
pshufd $0b11100001,%xmm6,%xmm6
movss %xmm6,(%rsi)
//; increase point1 and point2 by 4 bytes (each index represent a float)
addq $4,%r12
addq $4,%r13
decq %rcx
jnz l3
popq %rcx
decq %rcx
jnz l2
//; remove theta from fpu stack
fstp %st(0)
shlq $1,%r11
popq %rcx
decq %rcx
jnz l1
popq %rdx
//; rcx is already pushed in stack
cvtsi2ss (%rsp),%xmm1
pshufd $0b00000000,%xmm1,%xmm1
popq %rcx
shrq $2,%rcx
movq %r14,%rdi
movq %r15,%rsi
//; is this a ifft or a fft?
cvtss2si %xmm10,%eax
cmp $-1,%eax
jne nrm
cp: movaps (%rdi),%xmm2
movntdq %xmm2,(%rdx)
addq $16,%rdi
addq $16,%rdx
loop cp
jmp cleanexit
nrm:
movaps (%rdi),%xmm2
movaps (%rsi),%xmm3
divps %xmm1,%xmm2
divps %xmm1,%xmm3
movntdq %xmm2,(%rdi)
movntdq %xmm3,(%rsi)
addq $16,%rdi
addq $16,%rsi
loop nrm
cleanexit:
fstp %st(0)
popq %r8
popq %r9
popq %r10
popq %r11
popq %r12
popq %r13
popq %r14
popq %r15
addq $0xFF,%rsp
popq %rbp
leave
ret
CLONING A HARD DRIVE
2012-05-17
Cloning a hard drive
In one of my computers, I have one hard drive that contains 2 partitions: 1 for the root filesystem and one for my /home partition. When I bought a new hard drive, I needed to clone the old one on the new one. This can be easily done with "dd" as long as your partitions are the same size. So I decided to keep the root filesystem with the same size, but wanted to grow the /home partition.
Create the partitions
First, you need to create the partitions on the new drive using fdisk. Remember to keep the same size for the partitions you wanna clone. If you create them smaller, you will end up with a corrupted filesystem. If you create them larger, you will not be able to access the extra space so it will be wasted. After creating the partitions, you don't need to create a filesystem on them (mkfs) since "dd" will clone the partition table of the old hard drive too. But of course, you will need to create a FS for the other partitions that won't be cloned.
Clone
You need to clone your master boot record (which contains lilo/grub). We need to copy the first 512 bytes (the first sector):
dd if=/dev/sda of=/dev/sdb bs=512 count=1
Then, we can clone the partition:
dd if=/dev/sda1 of=/dev/sdb1 bs=4096 conv=noerror
At this point, my root partition was cloned successfully. For the other partition (/dev/sdb2), I had to create a new filesystem (mkfs) because my partition needed to be larger. After that, I copied the files manually using "cp".
CONFIGURING AND USING KVM-QEMU
2012-02-28
KVM Qemu
I was tired of Vmware Server's sloooooow web interface that only works half of the time.
I just couldn't take it anymore. So I started looking for other virtualization solutions. I found KVM.
KVM/QEmu is, by far, easier to use than VMWARE Server. The thing I like
about qemu is that there is no virtual machine files. You only create a virtual disk file but
the machine itself is built from the command line when invoking qemu. That means you have to "rebuild"
the machine every time you reload it. It looks painful but you just have to save your command in a script and invoke it.
So it comes down to say that what a shell script is to qemu what a VMX file is to vmware. Don't ask me why,
but this is a strong point for me.
Installing and preparing KVM Qemu
- Compile kernel using KVM (see flags VIRTUALIZATION,KVM,KVM_AMD,KVM_INTEL)
- Download and Install qemu-kvm
- Install "tunctl"
- make network bridge script. will need to create a script that will need to be run after every reboot (put in rc.local):
#load tun driver and create a TAP interface
modprobe tun
tunctl -t tap0
# bring eth0 down, we will set it as promiscuous and it will be part of a bridge
ifconfig eth0 down
brctl addbr br0
ifconfig eth0 0.0.0.0 promisc up
ifconfig tap0 0.0.0.0 promisc up
# set the IP address of the bridge interface. This is the interface that we will use from now on. So use
# an IP address on your LAN. This is the address of the host computer, not the guest.
ifconfig br0 192.168.1.2 netmask 255.255.255.0 broadcast 192.168.1.255 up
# add tap0 and eth0 as members of the bridge and bring it up.
brctl stp br0 off
brctl setfd br0 1
brctl sethello br0 1
brctl addif br0 eth0
brctl addif br0 tap0
# setup default gateway.
route add default gw 192.168.1.1
Note that you will need to run that on every reboot. So you might want to save this is a boot script.
Create a VM
- Create a 10g disk: qemu-img create -f qcow2 vdisk.img 10G.
- install OS: qemu-system-x86_64 -hda vdisk.img -cdrom /path/to/boot-media.iso -boot d -m 512 -vnc :1. Let's analyze that command:
- "-hda vdisk.img": use vdisk.img as primary disk
- "-cdrom /path/to/boot-media.iso": cdrom should be mouted asboot-media.iso
- "-boot d": Boot from D drive, the cdrom
- "-m 512": 512 mb of RAM
- "-vnc :1" : The display will be on VNC port index number 1. Depending on your settings, if your base port is 5900, then the TCP port used in that case will be (5900 + 1).
So you can now use a VNC client to connect to port 5901 on your host to have access to the display. The VM will
boot from the OS install CD you have provided so you will be able to install the OS like you would on a real computer.
Use a VM
- Run: qemu-system-x86_64 -usbdevice tablet -daemonize -enable-kvm --hda /virtual-machines/vdisk.img -boot c -m 512 -vnc :1 -monitor telnet:127.0.0.1:3010,server,nowait,ipv4 -net tap,ifname=tap0,script=no -net nic
Let's analyze that command:
- -usbdevice tablet: I had problems with my mouse cursor when using VNC if I didn't use that option.
- -daemonize: Run as background process
- -enable-kvm: Enable the use of kernel-based virtualization.
- "-hda vdisk.img": use vdisk.img as primary disk
- "-boot c": Boot from C drive, the primary disk
- "-m 512": 512 mb of RAM
- "-vnc :1" : The display will be on VNC port index number 1.
- -monitor telnet:127.0.0.1:3010,server,nowait,ipv4: Listen on 127.0.0.1:3010 for the telnet configuration.
- -net tap,ifname=tap0,script=no: Use tap0, and don't run network setup script.
- Install a vnc viewer on some other computer (TightVNC). Connect to host on port 5901
- Configure network on guest (If windows, enable remote desktop and disable firewall or poke a hole in it)
You should now have access to your VM through remote desktop or SSH or whatever you configured in that last step.
Managing the VM
You can telnet in the VM console to manage it. use the port you have setup with option "-monitor telnet". To exit
the monitor, use 'ctrl-]' and press 'q'. If you type 'q' without 'ctrl-]', you will kill the VM.
Change CD in cdrom
telnet in management console and: change ide1-cd0 /shared/newimg.iso
Changing specs
Of course, if you want to add more RAM or change other system specs, you can do it from the command line
when invoking qemu.
USING COUCHDB
2012-02-27
Introduction
Before using this information, you need to know how the JSON format works. JSON is kind of like XML, it is a way of
representing data. I won't go into more details in here.
Concepts
If you are switching from a SQL database like MySQL to couchdb, then chances are you will be wondering where are the tables and how do I query them? Well there is no table. To make things simple, try to think of it this way:
- CouchDB is like a database that contains only one table and one column. Each row is filled with a JSON document. You could easily do that with MySQL, except that the server doesn't understand JSON, so it can't do any special processing based on what your JSON document contains.
- Everything is done through a web interface using a REST API. This doesn't mean that you query the DB directly from your website (you still make the queries from the server side). And for that matter, it doesn't mean that CouchDB is only made for websites.
- If you are searching for "stored procedures", you wanna use "views" with couchDB.
So consider this: If you are building a simple blog where each posts contains a timestamp, a title and a content, then you will probably create a table like this in MySQL:
ID | TimeStamp | Title | Content |
1 | 330133439 | A Post | oh yeah |
2 | 330133439 | Another post | blah blah blah |
... |
What happens if you wanna add a "tag" column at one point? You'd have to modify your schema. So instead, for flexibility, you will decide to use one column only and store each post with a format you like, maybe you'll choose XML:
Data |
<post> <id>1<\id> <title>A post</title> <timestamp>330133439</timestamp> <content>oh yeah</content> </post> |
<post> <id>2<\id> <title>Another post</title> <timestamp>330133439</timestamp> <content>blah blah blah</content> </post> |
... |
This is exactly what couchDB is used for. Except that instead of a row, it calls it a document. Instead of using XML, it uses the JSON format. You might be wondering what's the point of using couchdb over mysql if both can do the same thing then. Couch DB adds more functionalities, like adding attachments to a document, create views with javascript and so much more. You will find a lot of blogs with people debating SQL vs NoSQL, so I won't cover this here. I just wanted to explain what CouchDB is.
Cheatsheet
- Check if DB exists: curl -X GET http://127.0.0.1:5984/database1/
where 'database1' is the name of your database
Will return an error if DB does not exist
- Create a database: curl -X PUT http://127.0.0.1:5984/database1/
where 'database1' is the name of your database
- Create a document: curl -X PUT http://127.0.0.1:5984/database1/document1 -H "Content-Type: application/json" -d {"field1":"value1","field2":"value2"}
where 'database1' is the name of your database
where 'document1' is the ID of the document to create
- Retrieve a document: curl -X GET http://127.0.0.1:5984/database1/document1
where 'database1' is the name of your database
where 'document1' is the ID of the document to retrieve
- Create a view: curl -X PUT http://127.0.0.1:5984/database1/_design/designdocument1 -H "Content-Type: application/json" -d {JSON_REPRESENTATION_OF_VIEW}/
where 'designdocument1' is the name of your designdocument
Note that a design document can contain more than one view. A view contains a map function and a reduce function.
The following is an example of what could be included as the "JSON_REPRESENTATION_OF_VIEW"
{
"language": "javascript",
"views": {
"view1": {
"map": "function(doc){emit(doc._id,doc);}"
},
"view2": {
"map": "function(doc){emit(doc._id,doc);}",
"reduce": "function (key, values){return null;}"
}
}
}
- Query a view: http://127.0.0.1:5984/database1/_design/designdocument1/_view/view2?reduce=true&group=true&skip=2&limit=5
where 'database1' is the name of your database
This will return the results of the view "view1" in "designdocument1". We have also provided parameters in the
URL that says: we want the reduce function to be executed, we want results grouped, we want to skip the 2 first
documents returned by the view, we want a maximum of 5 documents in total.
using the results in php
If we query curl -X GET http://127.0.0.1:5984/database1/document1
and we get the result
{
"_id": "document1",
"_rev": "1-a227e6b8d34d14fbc59c4dde72e53848",
"field1": "value1",
"field2": {"sub1":"val1","sub2":"val2"},
"field3": ["val1","val2","val3"]
}
Then we can take that result and decode it using json_decode
$obj = json_decode($jsonString);
We get:
- $obj->field1="value1"
- $obj->field2->sub2"val2" ($obj->field2 is an object)
- $obj->field3[1]="val2" ($obj->field3 is an array)
Text Search
Consider this SQL query: SELECT * FROM posts WHERE content LIKE 'test'. With
CouchDB, it gets a little more complicated. First, you need to create a view that emits a map of ALL
the words in your documents.
function(doc) {
var tokens;
if (doc.content) {
var st = doc.content.replace(/<(?:.|\n)*?>/gm, '');
tokens = st.split(/[^A-Z0-9\-_]+/i);
var uniqueTokens = {};
for (var i=0;i<tokens.length;i++)
{
var key = (tokens[i]);
if (key!="") uniqueTokens[key] = key;
}
for (var token in uniqueTokens){
emit(token,doc.title);
}
}
}
So if you have the following documents in your database:
{"title":"doc1","content":"hello this is a test"}
{"title":"doc2","content":"another document"}
Your view would output the following:
"hello",doc1
"this",doc1
"is",doc1
"a",doc1
"test",doc1
"another",doc2
"document",doc2
So if you want to retrieve only the documents that contains the word "test", then you could invoke the following:
http://127.0.0.1:5984/database1/_design/designdocument1/_view/view1?keys=["test"]