src/jit/dis_x86.lua - luajit-2.0-src

Functions defined

Source code

  1. ----------------------------------------------------------------------------
  2. -- LuaJIT x86/x64 disassembler module.
  3. --
  4. -- Copyright (C) 2005-2015 Mike Pall. All rights reserved.
  5. -- Released under the MIT license. See Copyright Notice in luajit.h
  6. ----------------------------------------------------------------------------
  7. -- This is a helper module used by the LuaJIT machine code dumper module.
  8. --
  9. -- Sending small code snippets to an external disassembler and mixing the
  10. -- output with our own stuff was too fragile. So I had to bite the bullet
  11. -- and write yet another x86 disassembler. Oh well ...
  12. --
  13. -- The output format is very similar to what ndisasm generates. But it has
  14. -- been developed independently by looking at the opcode tables from the
  15. -- Intel and AMD manuals. The supported instruction set is quite extensive
  16. -- and reflects what a current generation Intel or AMD CPU implements in
  17. -- 32 bit and 64 bit mode. Yes, this includes MMX, SSE, SSE2, SSE3, SSSE3,
  18. -- SSE4.1, SSE4.2, SSE4a and even privileged and hypervisor (VMX/SVM)
  19. -- instructions.
  20. --
  21. -- Notes:
  22. -- * The (useless) a16 prefix, 3DNow and pre-586 opcodes are unsupported.
  23. -- * No attempt at optimization has been made -- it's fast enough for my needs.
  24. -- * The public API may change when more architectures are added.
  25. ------------------------------------------------------------------------------

  26. local type = type
  27. local sub, byte, format = string.sub, string.byte, string.format
  28. local match, gmatch, gsub = string.match, string.gmatch, string.gsub
  29. local lower, rep = string.lower, string.rep
  30. local bit = require("bit")
  31. local tohex = bit.tohex

  32. -- Map for 1st opcode byte in 32 bit mode. Ugly? Well ... read on.
  33. local map_opc1_32 = {
  34. --0x
  35. [0]="addBmr","addVmr","addBrm","addVrm","addBai","addVai","push es","pop es",
  36. "orBmr","orVmr","orBrm","orVrm","orBai","orVai","push cs","opc2*",
  37. --1x
  38. "adcBmr","adcVmr","adcBrm","adcVrm","adcBai","adcVai","push ss","pop ss",
  39. "sbbBmr","sbbVmr","sbbBrm","sbbVrm","sbbBai","sbbVai","push ds","pop ds",
  40. --2x
  41. "andBmr","andVmr","andBrm","andVrm","andBai","andVai","es:seg","daa",
  42. "subBmr","subVmr","subBrm","subVrm","subBai","subVai","cs:seg","das",
  43. --3x
  44. "xorBmr","xorVmr","xorBrm","xorVrm","xorBai","xorVai","ss:seg","aaa",
  45. "cmpBmr","cmpVmr","cmpBrm","cmpVrm","cmpBai","cmpVai","ds:seg","aas",
  46. --4x
  47. "incVR","incVR","incVR","incVR","incVR","incVR","incVR","incVR",
  48. "decVR","decVR","decVR","decVR","decVR","decVR","decVR","decVR",
  49. --5x
  50. "pushUR","pushUR","pushUR","pushUR","pushUR","pushUR","pushUR","pushUR",
  51. "popUR","popUR","popUR","popUR","popUR","popUR","popUR","popUR",
  52. --6x
  53. "sz*pushaw,pusha","sz*popaw,popa","boundVrm","arplWmr",
  54. "fs:seg","gs:seg","o16:","a16",
  55. "pushUi","imulVrmi","pushBs","imulVrms",
  56. "insb","insVS","outsb","outsVS",
  57. --7x
  58. "joBj","jnoBj","jbBj","jnbBj","jzBj","jnzBj","jbeBj","jaBj",
  59. "jsBj","jnsBj","jpeBj","jpoBj","jlBj","jgeBj","jleBj","jgBj",
  60. --8x
  61. "arith!Bmi","arith!Vmi","arith!Bmi","arith!Vms",
  62. "testBmr","testVmr","xchgBrm","xchgVrm",
  63. "movBmr","movVmr","movBrm","movVrm",
  64. "movVmg","leaVrm","movWgm","popUm",
  65. --9x
  66. "nop*xchgVaR|pause|xchgWaR|repne nop","xchgVaR","xchgVaR","xchgVaR",
  67. "xchgVaR","xchgVaR","xchgVaR","xchgVaR",
  68. "sz*cbw,cwde,cdqe","sz*cwd,cdq,cqo","call farViw","wait",
  69. "sz*pushfw,pushf","sz*popfw,popf","sahf","lahf",
  70. --Ax
  71. "movBao","movVao","movBoa","movVoa",
  72. "movsb","movsVS","cmpsb","cmpsVS",
  73. "testBai","testVai","stosb","stosVS",
  74. "lodsb","lodsVS","scasb","scasVS",
  75. --Bx
  76. "movBRi","movBRi","movBRi","movBRi","movBRi","movBRi","movBRi","movBRi",
  77. "movVRI","movVRI","movVRI","movVRI","movVRI","movVRI","movVRI","movVRI",
  78. --Cx
  79. "shift!Bmu","shift!Vmu","retBw","ret","$lesVrm","$ldsVrm","movBmi","movVmi",
  80. "enterBwu","leave","retfBw","retf","int3","intBu","into","iretVS",
  81. --Dx
  82. "shift!Bm1","shift!Vm1","shift!Bmc","shift!Vmc","aamBu","aadBu","salc","xlatb",
  83. "fp*0","fp*1","fp*2","fp*3","fp*4","fp*5","fp*6","fp*7",
  84. --Ex
  85. "loopneBj","loopeBj","loopBj","sz*jcxzBj,jecxzBj,jrcxzBj",
  86. "inBau","inVau","outBua","outVua",
  87. "callVj","jmpVj","jmp farViw","jmpBj","inBad","inVad","outBda","outVda",
  88. --Fx
  89. "lock:","int1","repne:rep","rep:","hlt","cmc","testb!Bm","testv!Vm",
  90. "clc","stc","cli","sti","cld","std","incb!Bm","incd!Vm",
  91. }
  92. assert(#map_opc1_32 == 255)

  93. -- Map for 1st opcode byte in 64 bit mode (overrides only).
  94. local map_opc1_64 = setmetatable({
  95.   [0x06]=false, [0x07]=false, [0x0e]=false,
  96.   [0x16]=false, [0x17]=false, [0x1e]=false, [0x1f]=false,
  97.   [0x27]=false, [0x2f]=false, [0x37]=false, [0x3f]=false,
  98.   [0x60]=false, [0x61]=false, [0x62]=false, [0x63]="movsxdVrDmt", [0x67]="a32:",
  99.   [0x40]="rex*",   [0x41]="rex*b",   [0x42]="rex*x",   [0x43]="rex*xb",
  100.   [0x44]="rex*r",  [0x45]="rex*rb",  [0x46]="rex*rx",  [0x47]="rex*rxb",
  101.   [0x48]="rex*w",  [0x49]="rex*wb",  [0x4a]="rex*wx",  [0x4b]="rex*wxb",
  102.   [0x4c]="rex*wr", [0x4d]="rex*wrb", [0x4e]="rex*wrx", [0x4f]="rex*wrxb",
  103.   [0x82]=false, [0x9a]=false, [0xc4]=false, [0xc5]=false, [0xce]=false,
  104.   [0xd4]=false, [0xd5]=false, [0xd6]=false, [0xea]=false,
  105. }, { __index = map_opc1_32 })

  106. -- Map for 2nd opcode byte (0F xx). True CISC hell. Hey, I told you.
  107. -- Prefix dependent MMX/SSE opcodes: (none)|rep|o16|repne, -|F3|66|F2
  108. local map_opc2 = {
  109. --0x
  110. [0]="sldt!Dmp","sgdt!Ump","larVrm","lslVrm",nil,"syscall","clts","sysret",
  111. "invd","wbinvd",nil,"ud1",nil,"$prefetch!Bm","femms","3dnowMrmu",
  112. --1x
  113. "movupsXrm|movssXrm|movupdXrm|movsdXrm",
  114. "movupsXmr|movssXmr|movupdXmr|movsdXmr",
  115. "movhlpsXrm$movlpsXrm|movsldupXrm|movlpdXrm|movddupXrm",
  116. "movlpsXmr||movlpdXmr",
  117. "unpcklpsXrm||unpcklpdXrm",
  118. "unpckhpsXrm||unpckhpdXrm",
  119. "movlhpsXrm$movhpsXrm|movshdupXrm|movhpdXrm",
  120. "movhpsXmr||movhpdXmr",
  121. "$prefetcht!Bm","hintnopVm","hintnopVm","hintnopVm",
  122. "hintnopVm","hintnopVm","hintnopVm","hintnopVm",
  123. --2x
  124. "movUmx$","movUmy$","movUxm$","movUym$","movUmz$",nil,"movUzm$",nil,
  125. "movapsXrm||movapdXrm",
  126. "movapsXmr||movapdXmr",
  127. "cvtpi2psXrMm|cvtsi2ssXrVmt|cvtpi2pdXrMm|cvtsi2sdXrVmt",
  128. "movntpsXmr|movntssXmr|movntpdXmr|movntsdXmr",
  129. "cvttps2piMrXm|cvttss2siVrXm|cvttpd2piMrXm|cvttsd2siVrXm",
  130. "cvtps2piMrXm|cvtss2siVrXm|cvtpd2piMrXm|cvtsd2siVrXm",
  131. "ucomissXrm||ucomisdXrm",
  132. "comissXrm||comisdXrm",
  133. --3x
  134. "wrmsr","rdtsc","rdmsr","rdpmc","sysenter","sysexit",nil,"getsec",
  135. "opc3*38",nil,"opc3*3a",nil,nil,nil,nil,nil,
  136. --4x
  137. "cmovoVrm","cmovnoVrm","cmovbVrm","cmovnbVrm",
  138. "cmovzVrm","cmovnzVrm","cmovbeVrm","cmovaVrm",
  139. "cmovsVrm","cmovnsVrm","cmovpeVrm","cmovpoVrm",
  140. "cmovlVrm","cmovgeVrm","cmovleVrm","cmovgVrm",
  141. --5x
  142. "movmskpsVrXm$||movmskpdVrXm$","sqrtpsXrm|sqrtssXrm|sqrtpdXrm|sqrtsdXrm",
  143. "rsqrtpsXrm|rsqrtssXrm","rcppsXrm|rcpssXrm",
  144. "andpsXrm||andpdXrm","andnpsXrm||andnpdXrm",
  145. "orpsXrm||orpdXrm","xorpsXrm||xorpdXrm",
  146. "addpsXrm|addssXrm|addpdXrm|addsdXrm","mulpsXrm|mulssXrm|mulpdXrm|mulsdXrm",
  147. "cvtps2pdXrm|cvtss2sdXrm|cvtpd2psXrm|cvtsd2ssXrm",
  148. "cvtdq2psXrm|cvttps2dqXrm|cvtps2dqXrm",
  149. "subpsXrm|subssXrm|subpdXrm|subsdXrm","minpsXrm|minssXrm|minpdXrm|minsdXrm",
  150. "divpsXrm|divssXrm|divpdXrm|divsdXrm","maxpsXrm|maxssXrm|maxpdXrm|maxsdXrm",
  151. --6x
  152. "punpcklbwPrm","punpcklwdPrm","punpckldqPrm","packsswbPrm",
  153. "pcmpgtbPrm","pcmpgtwPrm","pcmpgtdPrm","packuswbPrm",
  154. "punpckhbwPrm","punpckhwdPrm","punpckhdqPrm","packssdwPrm",
  155. "||punpcklqdqXrm","||punpckhqdqXrm",
  156. "movPrVSm","movqMrm|movdquXrm|movdqaXrm",
  157. --7x
  158. "pshufwMrmu|pshufhwXrmu|pshufdXrmu|pshuflwXrmu","pshiftw!Pmu",
  159. "pshiftd!Pmu","pshiftq!Mmu||pshiftdq!Xmu",
  160. "pcmpeqbPrm","pcmpeqwPrm","pcmpeqdPrm","emms|",
  161. "vmreadUmr||extrqXmuu$|insertqXrmuu$","vmwriteUrm||extrqXrm$|insertqXrm$",
  162. nil,nil,
  163. "||haddpdXrm|haddpsXrm","||hsubpdXrm|hsubpsXrm",
  164. "movVSmMr|movqXrm|movVSmXr","movqMmr|movdquXmr|movdqaXmr",
  165. --8x
  166. "joVj","jnoVj","jbVj","jnbVj","jzVj","jnzVj","jbeVj","jaVj",
  167. "jsVj","jnsVj","jpeVj","jpoVj","jlVj","jgeVj","jleVj","jgVj",
  168. --9x
  169. "setoBm","setnoBm","setbBm","setnbBm","setzBm","setnzBm","setbeBm","setaBm",
  170. "setsBm","setnsBm","setpeBm","setpoBm","setlBm","setgeBm","setleBm","setgBm",
  171. --Ax
  172. "push fs","pop fs","cpuid","btVmr","shldVmru","shldVmrc",nil,nil,
  173. "push gs","pop gs","rsm","btsVmr","shrdVmru","shrdVmrc","fxsave!Dmp","imulVrm",
  174. --Bx
  175. "cmpxchgBmr","cmpxchgVmr","$lssVrm","btrVmr",
  176. "$lfsVrm","$lgsVrm","movzxVrBmt","movzxVrWmt",
  177. "|popcntVrm","ud2Dp","bt!Vmu","btcVmr",
  178. "bsfVrm","bsrVrm|lzcntVrm|bsrWrm","movsxVrBmt","movsxVrWmt",
  179. --Cx
  180. "xaddBmr","xaddVmr",
  181. "cmppsXrmu|cmpssXrmu|cmppdXrmu|cmpsdXrmu","$movntiVmr|",
  182. "pinsrwPrWmu","pextrwDrPmu",
  183. "shufpsXrmu||shufpdXrmu","$cmpxchg!Qmp",
  184. "bswapVR","bswapVR","bswapVR","bswapVR","bswapVR","bswapVR","bswapVR","bswapVR",
  185. --Dx
  186. "||addsubpdXrm|addsubpsXrm","psrlwPrm","psrldPrm","psrlqPrm",
  187. "paddqPrm","pmullwPrm",
  188. "|movq2dqXrMm|movqXmr|movdq2qMrXm$","pmovmskbVrMm||pmovmskbVrXm",
  189. "psubusbPrm","psubuswPrm","pminubPrm","pandPrm",
  190. "paddusbPrm","padduswPrm","pmaxubPrm","pandnPrm",
  191. --Ex
  192. "pavgbPrm","psrawPrm","psradPrm","pavgwPrm",
  193. "pmulhuwPrm","pmulhwPrm",
  194. "|cvtdq2pdXrm|cvttpd2dqXrm|cvtpd2dqXrm","$movntqMmr||$movntdqXmr",
  195. "psubsbPrm","psubswPrm","pminswPrm","porPrm",
  196. "paddsbPrm","paddswPrm","pmaxswPrm","pxorPrm",
  197. --Fx
  198. "|||lddquXrm","psllwPrm","pslldPrm","psllqPrm",
  199. "pmuludqPrm","pmaddwdPrm","psadbwPrm","maskmovqMrm||maskmovdquXrm$",
  200. "psubbPrm","psubwPrm","psubdPrm","psubqPrm",
  201. "paddbPrm","paddwPrm","padddPrm","ud",
  202. }
  203. assert(map_opc2[255] == "ud")

  204. -- Map for three-byte opcodes. Can't wait for their next invention.
  205. local map_opc3 = {
  206. ["38"] = { -- [66] 0f 38 xx
  207. --0x
  208. [0]="pshufbPrm","phaddwPrm","phadddPrm","phaddswPrm",
  209. "pmaddubswPrm","phsubwPrm","phsubdPrm","phsubswPrm",
  210. "psignbPrm","psignwPrm","psigndPrm","pmulhrswPrm",
  211. nil,nil,nil,nil,
  212. --1x
  213. "||pblendvbXrma",nil,nil,nil,
  214. "||blendvpsXrma","||blendvpdXrma",nil,"||ptestXrm",
  215. nil,nil,nil,nil,
  216. "pabsbPrm","pabswPrm","pabsdPrm",nil,
  217. --2x
  218. "||pmovsxbwXrm","||pmovsxbdXrm","||pmovsxbqXrm","||pmovsxwdXrm",
  219. "||pmovsxwqXrm","||pmovsxdqXrm",nil,nil,
  220. "||pmuldqXrm","||pcmpeqqXrm","||$movntdqaXrm","||packusdwXrm",
  221. nil,nil,nil,nil,
  222. --3x
  223. "||pmovzxbwXrm","||pmovzxbdXrm","||pmovzxbqXrm","||pmovzxwdXrm",
  224. "||pmovzxwqXrm","||pmovzxdqXrm",nil,"||pcmpgtqXrm",
  225. "||pminsbXrm","||pminsdXrm","||pminuwXrm","||pminudXrm",
  226. "||pmaxsbXrm","||pmaxsdXrm","||pmaxuwXrm","||pmaxudXrm",
  227. --4x
  228. "||pmulddXrm","||phminposuwXrm",
  229. --Fx
  230. [0xf0] = "|||crc32TrBmt",[0xf1] = "|||crc32TrVmt",
  231. },

  232. ["3a"] = { -- [66] 0f 3a xx
  233. --0x
  234. [0x00]=nil,nil,nil,nil,nil,nil,nil,nil,
  235. "||roundpsXrmu","||roundpdXrmu","||roundssXrmu","||roundsdXrmu",
  236. "||blendpsXrmu","||blendpdXrmu","||pblendwXrmu","palignrPrmu",
  237. --1x
  238. nil,nil,nil,nil,
  239. "||pextrbVmXru","||pextrwVmXru","||pextrVmSXru","||extractpsVmXru",
  240. nil,nil,nil,nil,nil,nil,nil,nil,
  241. --2x
  242. "||pinsrbXrVmu","||insertpsXrmu","||pinsrXrVmuS",nil,
  243. --4x
  244. [0x40] = "||dppsXrmu",
  245. [0x41] = "||dppdXrmu",
  246. [0x42] = "||mpsadbwXrmu",
  247. --6x
  248. [0x60] = "||pcmpestrmXrmu",[0x61] = "||pcmpestriXrmu",
  249. [0x62] = "||pcmpistrmXrmu",[0x63] = "||pcmpistriXrmu",
  250. },
  251. }

  252. -- Map for VMX/SVM opcodes 0F 01 C0-FF (sgdt group with register operands).
  253. local map_opcvm = {
  254. [0xc1]="vmcall",[0xc2]="vmlaunch",[0xc3]="vmresume",[0xc4]="vmxoff",
  255. [0xc8]="monitor",[0xc9]="mwait",
  256. [0xd8]="vmrun",[0xd9]="vmmcall",[0xda]="vmload",[0xdb]="vmsave",
  257. [0xdc]="stgi",[0xdd]="clgi",[0xde]="skinit",[0xdf]="invlpga",
  258. [0xf8]="swapgs",[0xf9]="rdtscp",
  259. }

  260. -- Map for FP opcodes. And you thought stack machines are simple?
  261. local map_opcfp = {
  262. -- D8-DF 00-BF: opcodes with a memory operand.
  263. -- D8
  264. [0]="faddFm","fmulFm","fcomFm","fcompFm","fsubFm","fsubrFm","fdivFm","fdivrFm",
  265. "fldFm",nil,"fstFm","fstpFm","fldenvVm","fldcwWm","fnstenvVm","fnstcwWm",
  266. -- DA
  267. "fiaddDm","fimulDm","ficomDm","ficompDm",
  268. "fisubDm","fisubrDm","fidivDm","fidivrDm",
  269. -- DB
  270. "fildDm","fisttpDm","fistDm","fistpDm",nil,"fld twordFmp",nil,"fstp twordFmp",
  271. -- DC
  272. "faddGm","fmulGm","fcomGm","fcompGm","fsubGm","fsubrGm","fdivGm","fdivrGm",
  273. -- DD
  274. "fldGm","fisttpQm","fstGm","fstpGm","frstorDmp",nil,"fnsaveDmp","fnstswWm",
  275. -- DE
  276. "fiaddWm","fimulWm","ficomWm","ficompWm",
  277. "fisubWm","fisubrWm","fidivWm","fidivrWm",
  278. -- DF
  279. "fildWm","fisttpWm","fistWm","fistpWm",
  280. "fbld twordFmp","fildQm","fbstp twordFmp","fistpQm",
  281. -- xx C0-FF: opcodes with a pseudo-register operand.
  282. -- D8
  283. "faddFf","fmulFf","fcomFf","fcompFf","fsubFf","fsubrFf","fdivFf","fdivrFf",
  284. -- D9
  285. "fldFf","fxchFf",{"fnop"},nil,
  286. {"fchs","fabs",nil,nil,"ftst","fxam"},
  287. {"fld1","fldl2t","fldl2e","fldpi","fldlg2","fldln2","fldz"},
  288. {"f2xm1","fyl2x","fptan","fpatan","fxtract","fprem1","fdecstp","fincstp"},
  289. {"fprem","fyl2xp1","fsqrt","fsincos","frndint","fscale","fsin","fcos"},
  290. -- DA
  291. "fcmovbFf","fcmoveFf","fcmovbeFf","fcmovuFf",nil,{nil,"fucompp"},nil,nil,
  292. -- DB
  293. "fcmovnbFf","fcmovneFf","fcmovnbeFf","fcmovnuFf",
  294. {nil,nil,"fnclex","fninit"},"fucomiFf","fcomiFf",nil,
  295. -- DC
  296. "fadd toFf","fmul toFf",nil,nil,
  297. "fsub toFf","fsubr toFf","fdivr toFf","fdiv toFf",
  298. -- DD
  299. "ffreeFf",nil,"fstFf","fstpFf","fucomFf","fucompFf",nil,nil,
  300. -- DE
  301. "faddpFf","fmulpFf",nil,{nil,"fcompp"},
  302. "fsubrpFf","fsubpFf","fdivrpFf","fdivpFf",
  303. -- DF
  304. nil,nil,nil,nil,{"fnstsw ax"},"fucomipFf","fcomipFf",nil,
  305. }
  306. assert(map_opcfp[126] == "fcomipFf")

  307. -- Map for opcode groups. The subkey is sp from the ModRM byte.
  308. local map_opcgroup = {
  309.   arith = { "add", "or", "adc", "sbb", "and", "sub", "xor", "cmp" },
  310.   shift = { "rol", "ror", "rcl", "rcr", "shl", "shr", "sal", "sar" },
  311.   testb = { "testBmi", "testBmi", "not", "neg", "mul", "imul", "div", "idiv" },
  312.   testv = { "testVmi", "testVmi", "not", "neg", "mul", "imul", "div", "idiv" },
  313.   incb = { "inc", "dec" },
  314.   incd = { "inc", "dec", "callUmp", "$call farDmp",
  315.            "jmpUmp", "$jmp farDmp", "pushUm" },
  316.   sldt = { "sldt", "str", "lldt", "ltr", "verr", "verw" },
  317.   sgdt = { "vm*$sgdt", "vm*$sidt", "$lgdt", "vm*$lidt",
  318.            "smsw", nil, "lmsw", "vm*$invlpg" },
  319.   bt = { nil, nil, nil, nil, "bt", "bts", "btr", "btc" },
  320.   cmpxchg = { nil, "sz*,cmpxchg8bQmp,cmpxchg16bXmp", nil, nil,
  321.               nil, nil, "vmptrld|vmxon|vmclear", "vmptrst" },
  322.   pshiftw = { nil, nil, "psrlw", nil, "psraw", nil, "psllw" },
  323.   pshiftd = { nil, nil, "psrld", nil, "psrad", nil, "pslld" },
  324.   pshiftq = { nil, nil, "psrlq", nil, nil, nil, "psllq" },
  325.   pshiftdq = { nil, nil, "psrlq", "psrldq", nil, nil, "psllq", "pslldq" },
  326.   fxsave = { "$fxsave", "$fxrstor", "$ldmxcsr", "$stmxcsr",
  327.              nil, "lfenceDp$", "mfenceDp$", "sfenceDp$clflush" },
  328.   prefetch = { "prefetch", "prefetchw" },
  329.   prefetcht = { "prefetchnta", "prefetcht0", "prefetcht1", "prefetcht2" },
  330. }

  331. ------------------------------------------------------------------------------

  332. -- Maps for register names.
  333. local map_regs = {
  334.   B = { "al", "cl", "dl", "bl", "ah", "ch", "dh", "bh",
  335.         "r8b", "r9b", "r10b", "r11b", "r12b", "r13b", "r14b", "r15b" },
  336.   B64 = { "al", "cl", "dl", "bl", "spl", "bpl", "sil", "dil",
  337.           "r8b", "r9b", "r10b", "r11b", "r12b", "r13b", "r14b", "r15b" },
  338.   W = { "ax", "cx", "dx", "bx", "sp", "bp", "si", "di",
  339.         "r8w", "r9w", "r10w", "r11w", "r12w", "r13w", "r14w", "r15w" },
  340.   D = { "eax", "ecx", "edx", "ebx", "esp", "ebp", "esi", "edi",
  341.         "r8d", "r9d", "r10d", "r11d", "r12d", "r13d", "r14d", "r15d" },
  342.   Q = { "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi",
  343.         "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15" },
  344.   M = { "mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7",
  345.         "mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7" }, -- No x64 ext!
  346.   X = { "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7",
  347.         "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15" },
  348. }
  349. local map_segregs = { "es", "cs", "ss", "ds", "fs", "gs", "segr6", "segr7" }

  350. -- Maps for size names.
  351. local map_sz2n = {
  352.   B = 1, W = 2, D = 4, Q = 8, M = 8, X = 16,
  353. }
  354. local map_sz2prefix = {
  355.   B = "byte", W = "word", D = "dword",
  356.   Q = "qword",
  357.   M = "qword", X = "xword",
  358.   F = "dword", G = "qword", -- No need for sizes/register names for these two.
  359. }

  360. ------------------------------------------------------------------------------

  361. -- Output a nicely formatted line with an opcode and operands.
  362. local function putop(ctx, text, operands)
  363.   local code, pos, hex = ctx.code, ctx.pos, ""
  364.   local hmax = ctx.hexdump
  365.   if hmax > 0 then
  366.     for i=ctx.start,pos-1 do
  367.       hex = hex..format("%02X", byte(code, i, i))
  368.     end
  369.     if #hex > hmax then hex = sub(hex, 1, hmax)..". "
  370.     else hex = hex..rep(" ", hmax-#hex+2) end
  371.   end
  372.   if operands then text = text.." "..operands end
  373.   if ctx.o16 then text = "o16 "..text; ctx.o16 = false end
  374.   if ctx.a32 then text = "a32 "..text; ctx.a32 = false end
  375.   if ctx.rep then text = ctx.rep.." "..text; ctx.rep = false end
  376.   if ctx.rex then
  377.     local t = (ctx.rexw and "w" or "")..(ctx.rexr and "r" or "")..
  378.               (ctx.rexx and "x" or "")..(ctx.rexb and "b" or "")
  379.     if t ~= "" then text = "rex."..t.." "..text end
  380.     ctx.rexw = false; ctx.rexr = false; ctx.rexx = false; ctx.rexb = false
  381.     ctx.rex = false
  382.   end
  383.   if ctx.seg then
  384.     local text2, n = gsub(text, "%[", "["..ctx.seg..":")
  385.     if n == 0 then text = ctx.seg.." "..text else text = text2 end
  386.     ctx.seg = false
  387.   end
  388.   if ctx.lock then text = "lock "..text; ctx.lock = false end
  389.   local imm = ctx.imm
  390.   if imm then
  391.     local sym = ctx.symtab[imm]
  392.     if sym then text = text.."\t->"..sym end
  393.   end
  394.   ctx.out(format("%08x  %s%s\n", ctx.addr+ctx.start, hex, text))
  395.   ctx.mrm = false
  396.   ctx.start = pos
  397.   ctx.imm = nil
  398. end

  399. -- Clear all prefix flags.
  400. local function clearprefixes(ctx)
  401.   ctx.o16 = false; ctx.seg = false; ctx.lock = false; ctx.rep = false
  402.   ctx.rexw = false; ctx.rexr = false; ctx.rexx = false; ctx.rexb = false
  403.   ctx.rex = false; ctx.a32 = false
  404. end

  405. -- Fallback for incomplete opcodes at the end.
  406. local function incomplete(ctx)
  407.   ctx.pos = ctx.stop+1
  408.   clearprefixes(ctx)
  409.   return putop(ctx, "(incomplete)")
  410. end

  411. -- Fallback for unknown opcodes.
  412. local function unknown(ctx)
  413.   clearprefixes(ctx)
  414.   return putop(ctx, "(unknown)")
  415. end

  416. -- Return an immediate of the specified size.
  417. local function getimm(ctx, pos, n)
  418.   if pos+n-1 > ctx.stop then return incomplete(ctx) end
  419.   local code = ctx.code
  420.   if n == 1 then
  421.     local b1 = byte(code, pos, pos)
  422.     return b1
  423.   elseif n == 2 then
  424.     local b1, b2 = byte(code, pos, pos+1)
  425.     return b1+b2*256
  426.   else
  427.     local b1, b2, b3, b4 = byte(code, pos, pos+3)
  428.     local imm = b1+b2*256+b3*65536+b4*16777216
  429.     ctx.imm = imm
  430.     return imm
  431.   end
  432. end

  433. -- Process pattern string and generate the operands.
  434. local function putpat(ctx, name, pat)
  435.   local operands, regs, sz, mode, sp, rm, sc, rx, sdisp
  436.   local code, pos, stop = ctx.code, ctx.pos, ctx.stop

  437.   -- Chars used: 1DFGIMPQRSTUVWXacdfgijmoprstuwxyz
  438.   for p in gmatch(pat, ".") do
  439.     local x = nil
  440.     if p == "V" or p == "U" then
  441.       if ctx.rexw then sz = "Q"; ctx.rexw = false
  442.       elseif ctx.o16 then sz = "W"; ctx.o16 = false
  443.       elseif p == "U" and ctx.x64 then sz = "Q"
  444.       else sz = "D" end
  445.       regs = map_regs[sz]
  446.     elseif p == "T" then
  447.       if ctx.rexw then sz = "Q"; ctx.rexw = false else sz = "D" end
  448.       regs = map_regs[sz]
  449.     elseif p == "B" then
  450.       sz = "B"
  451.       regs = ctx.rex and map_regs.B64 or map_regs.B
  452.     elseif match(p, "[WDQMXFG]") then
  453.       sz = p
  454.       regs = map_regs[sz]
  455.     elseif p == "P" then
  456.       sz = ctx.o16 and "X" or "M"; ctx.o16 = false
  457.       regs = map_regs[sz]
  458.     elseif p == "S" then
  459.       name = name..lower(sz)
  460.     elseif p == "s" then
  461.       local imm = getimm(ctx, pos, 1); if not imm then return end
  462.       x = imm <= 127 and format("+0x%02x", imm)
  463.                      or format("-0x%02x", 256-imm)
  464.       pos = pos+1
  465.     elseif p == "u" then
  466.       local imm = getimm(ctx, pos, 1); if not imm then return end
  467.       x = format("0x%02x", imm)
  468.       pos = pos+1
  469.     elseif p == "w" then
  470.       local imm = getimm(ctx, pos, 2); if not imm then return end
  471.       x = format("0x%x", imm)
  472.       pos = pos+2
  473.     elseif p == "o" then -- [offset]
  474.       if ctx.x64 then
  475.         local imm1 = getimm(ctx, pos, 4); if not imm1 then return end
  476.         local imm2 = getimm(ctx, pos+4, 4); if not imm2 then return end
  477.         x = format("[0x%08x%08x]", imm2, imm1)
  478.         pos = pos+8
  479.       else
  480.         local imm = getimm(ctx, pos, 4); if not imm then return end
  481.         x = format("[0x%08x]", imm)
  482.         pos = pos+4
  483.       end
  484.     elseif p == "i" or p == "I" then
  485.       local n = map_sz2n[sz]
  486.       if n == 8 and ctx.x64 and p == "I" then
  487.         local imm1 = getimm(ctx, pos, 4); if not imm1 then return end
  488.         local imm2 = getimm(ctx, pos+4, 4); if not imm2 then return end
  489.         x = format("0x%08x%08x", imm2, imm1)
  490.       else
  491.         if n == 8 then n = 4 end
  492.         local imm = getimm(ctx, pos, n); if not imm then return end
  493.         if sz == "Q" and (imm < 0 or imm > 0x7fffffff) then
  494.           imm = (0xffffffff+1)-imm
  495.           x = format(imm > 65535 and "-0x%08x" or "-0x%x", imm)
  496.         else
  497.           x = format(imm > 65535 and "0x%08x" or "0x%x", imm)
  498.         end
  499.       end
  500.       pos = pos+n
  501.     elseif p == "j" then
  502.       local n = map_sz2n[sz]
  503.       if n == 8 then n = 4 end
  504.       local imm = getimm(ctx, pos, n); if not imm then return end
  505.       if sz == "B" and imm > 127 then imm = imm-256
  506.       elseif imm > 2147483647 then imm = imm-4294967296 end
  507.       pos = pos+n
  508.       imm = imm + pos + ctx.addr
  509.       if imm > 4294967295 and not ctx.x64 then imm = imm-4294967296 end
  510.       ctx.imm = imm
  511.       if sz == "W" then
  512.         x = format("word 0x%04x", imm%65536)
  513.       elseif ctx.x64 then
  514.         local lo = imm % 0x1000000
  515.         x = format("0x%02x%06x", (imm-lo) / 0x1000000, lo)
  516.       else
  517.         x = "0x"..tohex(imm)
  518.       end
  519.     elseif p == "R" then
  520.       local r = byte(code, pos-1, pos-1)%8
  521.       if ctx.rexb then r = r + 8; ctx.rexb = false end
  522.       x = regs[r+1]
  523.     elseif p == "a" then x = regs[1]
  524.     elseif p == "c" then x = "cl"
  525.     elseif p == "d" then x = "dx"
  526.     elseif p == "1" then x = "1"
  527.     else
  528.       if not mode then
  529.         mode = ctx.mrm
  530.         if not mode then
  531.           if pos > stop then return incomplete(ctx) end
  532.           mode = byte(code, pos, pos)
  533.           pos = pos+1
  534.         end
  535.         rm = mode%8; mode = (mode-rm)/8
  536.         sp = mode%8; mode = (mode-sp)/8
  537.         sdisp = ""
  538.         if mode < 3 then
  539.           if rm == 4 then
  540.             if pos > stop then return incomplete(ctx) end
  541.             sc = byte(code, pos, pos)
  542.             pos = pos+1
  543.             rm = sc%8; sc = (sc-rm)/8
  544.             rx = sc%8; sc = (sc-rx)/8
  545.             if ctx.rexx then rx = rx + 8; ctx.rexx = false end
  546.             if rx == 4 then rx = nil end
  547.           end
  548.           if mode > 0 or rm == 5 then
  549.             local dsz = mode
  550.             if dsz ~= 1 then dsz = 4 end
  551.             local disp = getimm(ctx, pos, dsz); if not disp then return end
  552.             if mode == 0 then rm = nil end
  553.             if rm or rx or (not sc and ctx.x64 and not ctx.a32) then
  554.               if dsz == 1 and disp > 127 then
  555.                 sdisp = format("-0x%x", 256-disp)
  556.               elseif disp >= 0 and disp <= 0x7fffffff then
  557.                 sdisp = format("+0x%x", disp)
  558.               else
  559.                 sdisp = format("-0x%x", (0xffffffff+1)-disp)
  560.               end
  561.             else
  562.               sdisp = format(ctx.x64 and not ctx.a32 and
  563.                 not (disp >= 0 and disp <= 0x7fffffff)
  564.                 and "0xffffffff%08x" or "0x%08x", disp)
  565.             end
  566.             pos = pos+dsz
  567.           end
  568.         end
  569.         if rm and ctx.rexb then rm = rm + 8; ctx.rexb = false end
  570.         if ctx.rexr then sp = sp + 8; ctx.rexr = false end
  571.       end
  572.       if p == "m" then
  573.         if mode == 3 then x = regs[rm+1]
  574.         else
  575.           local aregs = ctx.a32 and map_regs.D or ctx.aregs
  576.           local srm, srx = "", ""
  577.           if rm then srm = aregs[rm+1]
  578.           elseif not sc and ctx.x64 and not ctx.a32 then srm = "rip" end
  579.           ctx.a32 = false
  580.           if rx then
  581.             if rm then srm = srm.."+" end
  582.             srx = aregs[rx+1]
  583.             if sc > 0 then srx = srx.."*"..(2^sc) end
  584.           end
  585.           x = format("[%s%s%s]", srm, srx, sdisp)
  586.         end
  587.         if mode < 3 and
  588.            (not match(pat, "[aRrgp]") or match(pat, "t")) then -- Yuck.
  589.           x = map_sz2prefix[sz].." "..x
  590.         end
  591.       elseif p == "r" then x = regs[sp+1]
  592.       elseif p == "g" then x = map_segregs[sp+1]
  593.       elseif p == "p" then -- Suppress prefix.
  594.       elseif p == "f" then x = "st"..rm
  595.       elseif p == "x" then
  596.         if sp == 0 and ctx.lock and not ctx.x64 then
  597.           x = "CR8"; ctx.lock = false
  598.         else
  599.           x = "CR"..sp
  600.         end
  601.       elseif p == "y" then x = "DR"..sp
  602.       elseif p == "z" then x = "TR"..sp
  603.       elseif p == "t" then
  604.       else
  605.         error("bad pattern `"..pat.."'")
  606.       end
  607.     end
  608.     if x then operands = operands and operands..", "..x or x end
  609.   end
  610.   ctx.pos = pos
  611.   return putop(ctx, name, operands)
  612. end

  613. -- Forward declaration.
  614. local map_act

  615. -- Fetch and cache MRM byte.
  616. local function getmrm(ctx)
  617.   local mrm = ctx.mrm
  618.   if not mrm then
  619.     local pos = ctx.pos
  620.     if pos > ctx.stop then return nil end
  621.     mrm = byte(ctx.code, pos, pos)
  622.     ctx.pos = pos+1
  623.     ctx.mrm = mrm
  624.   end
  625.   return mrm
  626. end

  627. -- Dispatch to handler depending on pattern.
  628. local function dispatch(ctx, opat, patgrp)
  629.   if not opat then return unknown(ctx) end
  630.   if match(opat, "%|") then -- MMX/SSE variants depending on prefix.
  631.     local p
  632.     if ctx.rep then
  633.       p = ctx.rep=="rep" and "%|([^%|]*)" or "%|[^%|]*%|[^%|]*%|([^%|]*)"
  634.       ctx.rep = false
  635.     elseif ctx.o16 then p = "%|[^%|]*%|([^%|]*)"; ctx.o16 = false
  636.     else p = "^[^%|]*" end
  637.     opat = match(opat, p)
  638.     if not opat then return unknown(ctx) end
  639. --    ctx.rep = false; ctx.o16 = false
  640.     --XXX fails for 66 f2 0f 38 f1 06  crc32 eax,WORD PTR [esi]
  641.     --XXX remove in branches?
  642.   end
  643.   if match(opat, "%$") then -- reg$mem variants.
  644.     local mrm = getmrm(ctx); if not mrm then return incomplete(ctx) end
  645.     opat = match(opat, mrm >= 192 and "^[^%$]*" or "%$(.*)")
  646.     if opat == "" then return unknown(ctx) end
  647.   end
  648.   if opat == "" then return unknown(ctx) end
  649.   local name, pat = match(opat, "^([a-z0-9 ]*)(.*)")
  650.   if pat == "" and patgrp then pat = patgrp end
  651.   return map_act[sub(pat, 1, 1)](ctx, name, pat)
  652. end

  653. -- Get a pattern from an opcode map and dispatch to handler.
  654. local function dispatchmap(ctx, opcmap)
  655.   local pos = ctx.pos
  656.   local opat = opcmap[byte(ctx.code, pos, pos)]
  657.   pos = pos + 1
  658.   ctx.pos = pos
  659.   return dispatch(ctx, opat)
  660. end

  661. -- Map for action codes. The key is the first char after the name.
  662. map_act = {
  663.   -- Simple opcodes without operands.
  664.   [""] = function(ctx, name, pat)
  665.     return putop(ctx, name)
  666.   end,

  667.   -- Operand size chars fall right through.
  668.   B = putpat, W = putpat, D = putpat, Q = putpat,
  669.   V = putpat, U = putpat, T = putpat,
  670.   M = putpat, X = putpat, P = putpat,
  671.   F = putpat, G = putpat,

  672.   -- Collect prefixes.
  673.   [":"] = function(ctx, name, pat)
  674.     ctx[pat == ":" and name or sub(pat, 2)] = name
  675.     if ctx.pos - ctx.start > 5 then return unknown(ctx) end -- Limit #prefixes.
  676.   end,

  677.   -- Chain to special handler specified by name.
  678.   ["*"] = function(ctx, name, pat)
  679.     return map_act[name](ctx, name, sub(pat, 2))
  680.   end,

  681.   -- Use named subtable for opcode group.
  682.   ["!"] = function(ctx, name, pat)
  683.     local mrm = getmrm(ctx); if not mrm then return incomplete(ctx) end
  684.     return dispatch(ctx, map_opcgroup[name][((mrm-(mrm%8))/8)%8+1], sub(pat, 2))
  685.   end,

  686.   -- o16,o32[,o64] variants.
  687.   sz = function(ctx, name, pat)
  688.     if ctx.o16 then ctx.o16 = false
  689.     else
  690.       pat = match(pat, ",(.*)")
  691.       if ctx.rexw then
  692.         local p = match(pat, ",(.*)")
  693.         if p then pat = p; ctx.rexw = false end
  694.       end
  695.     end
  696.     pat = match(pat, "^[^,]*")
  697.     return dispatch(ctx, pat)
  698.   end,

  699.   -- Two-byte opcode dispatch.
  700.   opc2 = function(ctx, name, pat)
  701.     return dispatchmap(ctx, map_opc2)
  702.   end,

  703.   -- Three-byte opcode dispatch.
  704.   opc3 = function(ctx, name, pat)
  705.     return dispatchmap(ctx, map_opc3[pat])
  706.   end,

  707.   -- VMX/SVM dispatch.
  708.   vm = function(ctx, name, pat)
  709.     return dispatch(ctx, map_opcvm[ctx.mrm])
  710.   end,

  711.   -- Floating point opcode dispatch.
  712.   fp = function(ctx, name, pat)
  713.     local mrm = getmrm(ctx); if not mrm then return incomplete(ctx) end
  714.     local rm = mrm%8
  715.     local idx = pat*8 + ((mrm-rm)/8)%8
  716.     if mrm >= 192 then idx = idx + 64 end
  717.     local opat = map_opcfp[idx]
  718.     if type(opat) == "table" then opat = opat[rm+1] end
  719.     return dispatch(ctx, opat)
  720.   end,

  721.   -- REX prefix.
  722.   rex = function(ctx, name, pat)
  723.     if ctx.rex then return unknown(ctx) end -- Only 1 REX prefix allowed.
  724.     for p in gmatch(pat, ".") do ctx["rex"..p] = true end
  725.     ctx.rex = true
  726.   end,

  727.   -- Special case for nop with REX prefix.
  728.   nop = function(ctx, name, pat)
  729.     return dispatch(ctx, ctx.rex and pat or "nop")
  730.   end,
  731. }

  732. ------------------------------------------------------------------------------

  733. -- Disassemble a block of code.
  734. local function disass_block(ctx, ofs, len)
  735.   if not ofs then ofs = 0 end
  736.   local stop = len and ofs+len or #ctx.code
  737.   ofs = ofs + 1
  738.   ctx.start = ofs
  739.   ctx.pos = ofs
  740.   ctx.stop = stop
  741.   ctx.imm = nil
  742.   ctx.mrm = false
  743.   clearprefixes(ctx)
  744.   while ctx.pos <= stop do dispatchmap(ctx, ctx.map1) end
  745.   if ctx.pos ~= ctx.start then incomplete(ctx) end
  746. end

  747. -- Extended API: create a disassembler context. Then call ctx:disass(ofs, len).
  748. local function create(code, addr, out)
  749.   local ctx = {}
  750.   ctx.code = code
  751.   ctx.addr = (addr or 0) - 1
  752.   ctx.out = out or io.write
  753.   ctx.symtab = {}
  754.   ctx.disass = disass_block
  755.   ctx.hexdump = 16
  756.   ctx.x64 = false
  757.   ctx.map1 = map_opc1_32
  758.   ctx.aregs = map_regs.D
  759.   return ctx
  760. end

  761. local function create64(code, addr, out)
  762.   local ctx = create(code, addr, out)
  763.   ctx.x64 = true
  764.   ctx.map1 = map_opc1_64
  765.   ctx.aregs = map_regs.Q
  766.   return ctx
  767. end

  768. -- Simple API: disassemble code (a string) at address and output via out.
  769. local function disass(code, addr, out)
  770.   create(code, addr, out):disass()
  771. end

  772. local function disass64(code, addr, out)
  773.   create64(code, addr, out):disass()
  774. end

  775. -- Return register name for RID.
  776. local function regname(r)
  777.   if r < 8 then return map_regs.D[r+1] end
  778.   return map_regs.X[r-7]
  779. end

  780. local function regname64(r)
  781.   if r < 16 then return map_regs.Q[r+1] end
  782.   return map_regs.X[r-15]
  783. end

  784. -- Public module functions.
  785. return {
  786.   create = create,
  787.   create64 = create64,
  788.   disass = disass,
  789.   disass64 = disass64,
  790.   regname = regname,
  791.   regname64 = regname64
  792. }