Pythonの練習。 #more|| cui01.c: #code||> #include int main(int argc, char *argv[]) { printf("Hello, World!\n"); return 0; } ||< コンパイル: > cl cui01.c IMAGE_DOS_HEADER部分を取り出してみる。 show_dos_header.py: #code|python|> import sys, logging, struct, ctypes WORD = ctypes.c_ushort LONG = ctypes.c_long class IMAGE_DOS_HEADER(ctypes.Structure): _fields_ = [ ('e_magic', WORD), # Magic number ('e_cblp', WORD), # Bytes on last page of file ('e_cp', WORD), # Pages in file ('e_crlc', WORD), # Relocations ('e_cparhdr', WORD), # Size of header in paragraphs ('e_minalloc', WORD), # Minimum extra paragraphs needed ('e_maxalloc', WORD), # Maximum extra paragraphs needed ('e_ss', WORD), # Initial (relative) SS value ('e_sp', WORD), # Initial SP value ('e_csum', WORD), # Checksum ('e_ip', WORD), # Initial IP value ('e_cs', WORD), # Initial (relative) CS value ('e_lfarlc', WORD), # File address of relocation table ('e_ovno', WORD), # Overlay number ('e_res', WORD * 4), # Reserved words ('e_oemid', WORD), # OEM identifier (for e_oeminfo) ('e_oeminfo', WORD), # OEM information; e_oemid specific ('e_res2', WORD * 10), # Reserved words ('e_lfanew', LONG), # File address of new exe header ] if 2 != len(sys.argv): print 'usage: python %s filename' % sys.argv[0] quit() file_name = sys.argv[1] logging.basicConfig( level=logging.DEBUG, format='%(asctime)s %(name)s %(levelname)s %(message)s', ) log = logging.getLogger('main') try: f = open(file_name, 'rb') except IOError, e: log.error("open(%s) faileed." % file_name) log.error(e) raise e f.seek(0, 0) data = f.read(ctypes.sizeof(IMAGE_DOS_HEADER)) header = IMAGE_DOS_HEADER() fit = min(len(data), ctypes.sizeof(IMAGE_DOS_HEADER)) ctypes.memmove(ctypes.addressof(header), data, fit) print "IMAGE_DOS_HEADER:" print "\te_magic: %04X" % header.e_magic print "\te_cblp: %04X" % header.e_cblp print "\te_cp: %04X" % header.e_cp print "\te_crlc: %04X" % header.e_crlc print "\te_cparhdr: %04X" % header.e_cparhdr print "\te_minalloc: %04X" % header.e_minalloc print "\te_maxalloc: %04X" % header.e_maxalloc print "\te_ss: %04X" % header.e_ss print "\te_sp: %04X" % header.e_sp print "\te_csum: %04X" % header.e_csum print "\te_ip: %04X" % header.e_ip print "\te_cs: %04X" % header.e_cs print "\te_lfarlc: %04X" % header.e_lfarlc print "\te_ovno: %04X" % header.e_ovno for i in range(4): print "\te_res[%d]: %04X" % (i, header.e_res[i]) print "\te_oemid: %04X" % header.e_oemid print "\te_oeminfo: %04X" % header.e_oeminfo for i in range(10): print "\te_res2[%d]: %04X" % (i, header.e_res2[i]) print "\te_lfanew: %08X" % header.e_lfanew ||< ctypes.Structureにファイルから読んだ生データ(string)を取り込ませるtipsについては下記参照: - How to pack and unpack using ctypes (Structure <-> str) - Stack Overflow -- http://stackoverflow.com/questions/1825715/how-to-pack-and-unpack-using-ctypes-structure-str 続いて、DOSのStub実行データ部分を抜き出す・・・というか、正確にはIMAGE_DOS_HEADERとそのe_lfanewの示すファイル位置の間に存在するデータ部分を抜き出す。 IMAGE_DOS_HEADER DOS_STUB IMAGE_NT_HEADERS の順に並んでる筈だから・・・多分Stub実行イメージ本体で良いんだよね?セグメントとかその辺が絡んできそうなのであまり突っ込まないが、とりあえずIMAGE_DOS_HEADERとIMAGE_NT_HEADERSの間を抜き出すプログラム: extract_dos_stub.py: #code|python|> import sys, logging, struct, ctypes WORD = ctypes.c_ushort LONG = ctypes.c_long class IMAGE_DOS_HEADER(ctypes.Structure): _fields_ = [ ('e_magic', WORD), # Magic number ('e_cblp', WORD), # Bytes on last page of file ('e_cp', WORD), # Pages in file ('e_crlc', WORD), # Relocations ('e_cparhdr', WORD), # Size of header in paragraphs ('e_minalloc', WORD), # Minimum extra paragraphs needed ('e_maxalloc', WORD), # Maximum extra paragraphs needed ('e_ss', WORD), # Initial (relative) SS value ('e_sp', WORD), # Initial SP value ('e_csum', WORD), # Checksum ('e_ip', WORD), # Initial IP value ('e_cs', WORD), # Initial (relative) CS value ('e_lfarlc', WORD), # File address of relocation table ('e_ovno', WORD), # Overlay number ('e_res', WORD * 4), # Reserved words ('e_oemid', WORD), # OEM identifier (for e_oeminfo) ('e_oeminfo', WORD), # OEM information; e_oemid specific ('e_res2', WORD * 10), # Reserved words ('e_lfanew', LONG), # File address of new exe header ] if 3 != len(sys.argv): print 'usage: python %s filename output' % sys.argv[0] quit() file_name = sys.argv[1] out_file = sys.argv[2] logging.basicConfig( level=logging.DEBUG, format='%(asctime)s %(name)s %(levelname)s %(message)s', ) log = logging.getLogger('main') try: f = open(file_name, 'rb') except IOError, e: log.error("open(%s) faileed." % file_name) log.error(e) raise e f.seek(0, 0) dos_header_sz = ctypes.sizeof(IMAGE_DOS_HEADER) data = f.read(dos_header_sz) header = IMAGE_DOS_HEADER() fit = min(len(data), ctypes.sizeof(IMAGE_DOS_HEADER)) ctypes.memmove(ctypes.addressof(header), data, fit) pe_offset = header.e_lfanew stub_size = pe_offset - dos_header_sz f.seek(dos_header_sz, 0) stub_data = f.read(stub_size) try: f_out = open(out_file, 'wb') f_out.write(stub_data) f_out.close() print "DOS Stub executable binary image is saved to %s." % out_file print "%d - %d (%d bytes) are extracted from %s" % ( dos_header_sz, dos_header_sz + stub_size, stub_size, file_name) except IOError, e: log.error("open(%s) faileed." % file_name) log.error(e) raise e ||< 実際に抜き出してみる。 > extract_dos_stub.py cui01.exe dos_stub ndisasmで逆アセンブルしてみる。 #pre||> > ndisasm -b 16 dos_stub 00000000 0E push cs 00000001 1F pop ds 00000002 BA0E00 mov dx,0xe 00000005 B409 mov ah,0x9 00000007 CD21 int 0x21 00000009 B8014C mov ax,0x4c01 0000000C CD21 int 0x21 0000000E 54 push sp 0000000F 686973 push word 0x7369 (...) ||< ここで 0000000E 54 push sp 以下は丁度 This program cannot be run in DOS mode. の文字列になる。その後ろにもバイト列が幾つか続いているが、よく分からないのでスルー。 実行コードとしてはこれだけになる・・・のだろう、多分。 00000000 0E push cs 00000001 1F pop ds 00000002 BA0E00 mov dx,0xe 00000005 B409 mov ah,0x9 00000007 CD21 int 0x21 00000009 B8014C mov ax,0x4c01 0000000C CD21 int 0x21 0x7の"int 0x21"は、AHが0x9なので文字列表示。CS(Code Segment)をDSレジスタにコピーした後、DXレジスタに"0xe"、つまり"This program ..."の先頭のアドレスが指定される。これはAH=09hのINT21呼び出しでは、文字列の先頭アドレスを"DS:DX"で指定する為である。 - Int 21/AH=09h -- http://www.ctyme.com/intr/rb-2562.htm 文字列表示が終われば、今度は0xCの"int 0x21"。こんどはAXが0x4c01になる。つまりAH=4C, AL=01。なのでreturn codeは1になる。 - Int 21/AH=4Ch -- http://www.ctyme.com/intr/rb-2974.htm 以上。 ちなみに、バイナリエディタでe_lfanewのところを適当な値に書き換えると、"PE\00\00"が見つからなくなるためか、DOSモードでの実行になる。