123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375337633773378337933803381338233833384338533863387338833893390339133923393339433953396339733983399340034013402340334043405340634073408340934103411341234133414341534163417341834193420342134223423342434253426342734283429343034313432343334343435343634373438343934403441344234433444344534463447344834493450345134523453345434553456345734583459346034613462346334643465346634673468346934703471347234733474347534763477347834793480348134823483348434853486348734883489349034913492349334943495349634973498349935003501350235033504350535063507350835093510351135123513351435153516351735183519352035213522352335243525352635273528352935303531353235333534353535363537353835393540354135423543354435453546354735483549355035513552355335543555355635573558355935603561356235633564356535663567356835693570357135723573357435753576357735783579358035813582358335843585358635873588358935903591359235933594359535963597359835993600360136023603360436053606360736083609361036113612361336143615361636173618361936203621362236233624362536263627362836293630363136323633363436353636363736383639364036413642364336443645364636473648364936503651365236533654365536563657365836593660366136623663366436653666366736683669367036713672367336743675367636773678367936803681368236833684368536863687368836893690369136923693369436953696369736983699370037013702370337043705370637073708370937103711371237133714371537163717371837193720372137223723372437253726372737283729373037313732373337343735373637373738373937403741374237433744374537463747374837493750375137523753375437553756375737583759376037613762376337643765376637673768376937703771377237733774377537763777377837793780378137823783378437853786378737883789379037913792379337943795379637973798379938003801380238033804380538063807380838093810381138123813381438153816381738183819382038213822382338243825382638273828382938303831383238333834383538363837383838393840384138423843384438453846384738483849385038513852385338543855385638573858385938603861386238633864386538663867386838693870387138723873387438753876387738783879388038813882388338843885388638873888388938903891389238933894389538963897389838993900390139023903390439053906390739083909391039113912391339143915391639173918391939203921392239233924392539263927392839293930393139323933393439353936393739383939394039413942394339443945394639473948394939503951395239533954395539563957395839593960396139623963396439653966396739683969397039713972397339743975397639773978397939803981398239833984398539863987398839893990399139923993399439953996399739983999400040014002400340044005400640074008400940104011401240134014401540164017401840194020402140224023402440254026402740284029403040314032403340344035403640374038403940404041404240434044404540464047404840494050405140524053405440554056405740584059406040614062406340644065406640674068406940704071407240734074407540764077407840794080408140824083408440854086408740884089409040914092409340944095409640974098409941004101410241034104410541064107410841094110411141124113411441154116411741184119412041214122412341244125412641274128412941304131413241334134413541364137413841394140414141424143414441454146414741484149415041514152415341544155415641574158415941604161416241634164416541664167416841694170417141724173417441754176417741784179418041814182418341844185418641874188418941904191419241934194419541964197419841994200420142024203420442054206420742084209421042114212421342144215421642174218421942204221422242234224422542264227422842294230423142324233423442354236423742384239424042414242424342444245424642474248424942504251425242534254425542564257425842594260426142624263426442654266426742684269427042714272427342744275427642774278427942804281428242834284428542864287428842894290429142924293429442954296429742984299430043014302430343044305430643074308430943104311431243134314431543164317431843194320432143224323432443254326432743284329433043314332433343344335433643374338433943404341434243434344434543464347434843494350435143524353435443554356435743584359436043614362436343644365436643674368436943704371437243734374437543764377437843794380438143824383438443854386438743884389439043914392439343944395439643974398439944004401440244034404440544064407440844094410441144124413441444154416441744184419442044214422442344244425442644274428442944304431443244334434443544364437443844394440444144424443444444454446444744484449445044514452445344544455445644574458445944604461446244634464446544664467446844694470447144724473447444754476447744784479448044814482448344844485448644874488448944904491449244934494449544964497449844994500450145024503450445054506450745084509451045114512451345144515451645174518451945204521452245234524452545264527452845294530453145324533453445354536453745384539454045414542454345444545454645474548454945504551455245534554455545564557455845594560456145624563456445654566456745684569457045714572457345744575457645774578457945804581458245834584458545864587458845894590459145924593459445954596459745984599460046014602460346044605460646074608460946104611461246134614461546164617461846194620462146224623462446254626462746284629463046314632463346344635463646374638463946404641464246434644464546464647464846494650465146524653465446554656465746584659466046614662466346644665466646674668466946704671467246734674467546764677467846794680468146824683468446854686468746884689469046914692469346944695469646974698469947004701470247034704470547064707470847094710471147124713471447154716471747184719472047214722472347244725472647274728472947304731473247334734473547364737473847394740474147424743474447454746474747484749475047514752475347544755475647574758475947604761476247634764476547664767476847694770477147724773477447754776477747784779478047814782478347844785478647874788478947904791479247934794479547964797479847994800480148024803480448054806480748084809481048114812481348144815481648174818481948204821482248234824482548264827482848294830483148324833483448354836483748384839484048414842484348444845484648474848484948504851485248534854485548564857485848594860486148624863486448654866486748684869487048714872487348744875487648774878487948804881488248834884488548864887488848894890489148924893489448954896489748984899490049014902490349044905490649074908490949104911491249134914491549164917491849194920492149224923492449254926492749284929493049314932493349344935493649374938493949404941494249434944494549464947494849494950495149524953495449554956495749584959496049614962496349644965496649674968496949704971497249734974497549764977497849794980498149824983498449854986498749884989499049914992499349944995499649974998499950005001500250035004500550065007500850095010501150125013501450155016501750185019502050215022502350245025502650275028502950305031503250335034503550365037503850395040504150425043504450455046504750485049505050515052505350545055505650575058505950605061506250635064506550665067506850695070507150725073507450755076507750785079508050815082508350845085508650875088508950905091509250935094509550965097509850995100510151025103510451055106510751085109511051115112511351145115511651175118511951205121512251235124512551265127512851295130513151325133513451355136513751385139514051415142514351445145514651475148514951505151515251535154515551565157515851595160516151625163516451655166516751685169517051715172517351745175517651775178517951805181518251835184518551865187518851895190519151925193519451955196519751985199520052015202520352045205520652075208520952105211521252135214521552165217521852195220522152225223522452255226522752285229523052315232523352345235523652375238523952405241524252435244524552465247524852495250525152525253525452555256525752585259526052615262526352645265526652675268526952705271527252735274527552765277527852795280528152825283528452855286528752885289529052915292529352945295529652975298529953005301530253035304530553065307530853095310531153125313531453155316531753185319532053215322532353245325532653275328532953305331533253335334533553365337533853395340534153425343534453455346534753485349535053515352535353545355535653575358535953605361536253635364536553665367536853695370537153725373537453755376537753785379538053815382538353845385538653875388538953905391539253935394539553965397539853995400540154025403540454055406540754085409541054115412541354145415541654175418541954205421542254235424542554265427542854295430543154325433543454355436543754385439544054415442544354445445544654475448544954505451545254535454545554565457545854595460546154625463546454655466546754685469547054715472547354745475547654775478547954805481548254835484548554865487548854895490549154925493549454955496549754985499550055015502550355045505550655075508550955105511551255135514551555165517551855195520552155225523552455255526552755285529553055315532553355345535553655375538553955405541554255435544554555465547554855495550555155525553555455555556555755585559556055615562556355645565556655675568556955705571557255735574557555765577557855795580558155825583558455855586558755885589559055915592559355945595559655975598559956005601560256035604560556065607560856095610561156125613561456155616561756185619562056215622562356245625562656275628562956305631563256335634563556365637563856395640564156425643564456455646564756485649565056515652565356545655565656575658565956605661566256635664566556665667566856695670567156725673567456755676567756785679568056815682568356845685568656875688568956905691569256935694569556965697569856995700570157025703570457055706570757085709571057115712571357145715571657175718571957205721572257235724572557265727572857295730573157325733573457355736573757385739574057415742574357445745574657475748574957505751575257535754575557565757575857595760576157625763576457655766576757685769577057715772577357745775577657775778577957805781578257835784578557865787578857895790579157925793579457955796579757985799580058015802580358045805580658075808580958105811581258135814581558165817581858195820582158225823582458255826582758285829583058315832583358345835583658375838583958405841584258435844584558465847584858495850585158525853585458555856585758585859586058615862586358645865586658675868586958705871587258735874587558765877587858795880588158825883588458855886588758885889589058915892589358945895589658975898589959005901590259035904590559065907590859095910591159125913591459155916591759185919592059215922592359245925592659275928592959305931593259335934593559365937593859395940594159425943594459455946594759485949595059515952595359545955595659575958595959605961596259635964596559665967596859695970597159725973597459755976597759785979598059815982598359845985598659875988598959905991599259935994599559965997599859996000600160026003600460056006600760086009601060116012601360146015601660176018601960206021602260236024602560266027602860296030603160326033603460356036603760386039604060416042604360446045604660476048604960506051605260536054605560566057605860596060606160626063606460656066606760686069607060716072607360746075607660776078607960806081608260836084608560866087608860896090609160926093609460956096609760986099610061016102610361046105610661076108610961106111611261136114611561166117611861196120612161226123612461256126612761286129613061316132613361346135613661376138613961406141614261436144614561466147614861496150615161526153615461556156615761586159616061616162616361646165616661676168616961706171617261736174617561766177617861796180618161826183618461856186618761886189619061916192619361946195619661976198619962006201620262036204620562066207620862096210621162126213621462156216621762186219622062216222622362246225622662276228622962306231623262336234623562366237623862396240624162426243624462456246624762486249625062516252625362546255625662576258625962606261626262636264626562666267626862696270627162726273627462756276627762786279628062816282628362846285628662876288628962906291629262936294629562966297629862996300630163026303630463056306630763086309631063116312631363146315631663176318631963206321632263236324632563266327632863296330633163326333633463356336633763386339634063416342634363446345634663476348634963506351635263536354635563566357635863596360636163626363636463656366636763686369637063716372637363746375637663776378637963806381638263836384638563866387638863896390639163926393639463956396639763986399640064016402640364046405640664076408640964106411641264136414641564166417641864196420642164226423642464256426642764286429643064316432643364346435643664376438643964406441644264436444644564466447644864496450645164526453645464556456645764586459646064616462646364646465646664676468646964706471647264736474647564766477647864796480648164826483648464856486648764886489649064916492649364946495649664976498649965006501650265036504650565066507650865096510651165126513651465156516651765186519652065216522652365246525652665276528652965306531653265336534653565366537653865396540654165426543654465456546654765486549655065516552655365546555655665576558655965606561656265636564656565666567656865696570657165726573657465756576657765786579658065816582658365846585658665876588658965906591659265936594659565966597659865996600660166026603660466056606660766086609661066116612661366146615661666176618661966206621662266236624662566266627662866296630663166326633663466356636663766386639664066416642664366446645664666476648664966506651665266536654665566566657665866596660666166626663666466656666666766686669667066716672667366746675667666776678667966806681668266836684668566866687668866896690669166926693669466956696669766986699670067016702670367046705670667076708670967106711671267136714671567166717671867196720672167226723672467256726672767286729673067316732673367346735673667376738673967406741674267436744674567466747674867496750675167526753675467556756675767586759676067616762676367646765676667676768676967706771677267736774677567766777677867796780678167826783678467856786678767886789679067916792679367946795679667976798679968006801680268036804680568066807680868096810681168126813681468156816681768186819682068216822682368246825682668276828682968306831683268336834683568366837683868396840684168426843684468456846684768486849685068516852685368546855685668576858685968606861686268636864686568666867686868696870687168726873687468756876687768786879688068816882688368846885688668876888688968906891689268936894689568966897689868996900690169026903690469056906690769086909691069116912691369146915691669176918691969206921692269236924692569266927692869296930693169326933693469356936693769386939694069416942694369446945694669476948694969506951695269536954695569566957695869596960696169626963696469656966696769686969697069716972697369746975697669776978697969806981698269836984698569866987698869896990699169926993699469956996699769986999700070017002700370047005700670077008700970107011701270137014701570167017701870197020702170227023702470257026702770287029703070317032703370347035703670377038703970407041704270437044704570467047704870497050705170527053705470557056705770587059706070617062706370647065706670677068706970707071707270737074707570767077707870797080708170827083708470857086708770887089709070917092709370947095709670977098709971007101710271037104710571067107710871097110711171127113711471157116711771187119712071217122712371247125712671277128712971307131713271337134713571367137713871397140714171427143714471457146714771487149715071517152715371547155715671577158715971607161716271637164716571667167716871697170717171727173717471757176717771787179718071817182718371847185718671877188718971907191719271937194719571967197719871997200720172027203720472057206720772087209721072117212721372147215721672177218721972207221722272237224722572267227722872297230723172327233723472357236723772387239724072417242724372447245724672477248724972507251725272537254725572567257725872597260726172627263726472657266726772687269727072717272727372747275727672777278727972807281728272837284728572867287728872897290729172927293729472957296729772987299730073017302730373047305730673077308730973107311731273137314731573167317731873197320732173227323732473257326732773287329733073317332733373347335733673377338733973407341734273437344734573467347734873497350735173527353735473557356735773587359736073617362736373647365736673677368736973707371737273737374737573767377737873797380738173827383738473857386738773887389739073917392 |
- #!/usr/bin/python
- # -*- coding: utf-8 -*-
- """pefile, Portable Executable reader module
- All the PE file basic structures are available with their default names as
- attributes of the instance returned.
- Processed elements such as the import table are made available with lowercase
- names, to differentiate them from the upper case basic structure names.
- pefile has been tested against many edge cases such as corrupted and malformed
- PEs as well as malware, which often attempts to abuse the format way beyond its
- standard use. To the best of my knowledge most of the abuse is handled
- gracefully.
- Copyright (c) 2005-2021 Ero Carrera <ero.carrera@gmail.com>
- """
- __author__ = "Ero Carrera"
- __version__ = "2021.9.3"
- __contact__ = "ero.carrera@gmail.com"
- import collections
- import os
- import struct
- import codecs
- import time
- import math
- import string
- import mmap
- from collections import Counter
- from hashlib import sha1
- from hashlib import sha256
- from hashlib import sha512
- from hashlib import md5
- import functools
- import copy as copymod
- import ordlookup
- codecs.register_error("backslashreplace_", codecs.lookup_error("backslashreplace"))
- long = int
- # lru_cache with a shallow copy of the objects returned (list, dicts, ..)
- # we don't use deepcopy as it's _really_ slow and the data we retrieved using
- # this is enough with copy.copy taken from
- # https://stackoverflow.com/questions/54909357
- def lru_cache(maxsize=128, typed=False, copy=False):
- if not copy:
- return functools.lru_cache(maxsize, typed)
- def decorator(f):
- cached_func = functools.lru_cache(maxsize, typed)(f)
- @functools.wraps(f)
- def wrapper(*args, **kwargs):
- # return copymod.deepcopy(cached_func(*args, **kwargs))
- return copymod.copy(cached_func(*args, **kwargs))
- return wrapper
- return decorator
- @lru_cache(maxsize=2048)
- def cache_adjust_FileAlignment(val, file_alignment):
- if file_alignment < FILE_ALIGNMENT_HARDCODED_VALUE:
- return val
- return (int(val / 0x200)) * 0x200
- @lru_cache(maxsize=2048)
- def cache_adjust_SectionAlignment(val, section_alignment, file_alignment):
- if section_alignment < 0x1000: # page size
- section_alignment = file_alignment
- # 0x200 is the minimum valid FileAlignment according to the documentation
- # although ntoskrnl.exe has an alignment of 0x80 in some Windows versions
- #
- # elif section_alignment < 0x80:
- # section_alignment = 0x80
- if section_alignment and val % section_alignment:
- return section_alignment * (int(val / section_alignment))
- return val
- def count_zeroes(data):
- try:
- # newbytes' count() takes a str in Python 2
- count = data.count("\0")
- except TypeError:
- # bytes' count() takes an int in Python 3
- count = data.count(0)
- return count
- fast_load = False
- # This will set a maximum length of a string to be retrieved from the file.
- # It's there to prevent loading massive amounts of data from memory mapped
- # files. Strings longer than 1MB should be rather rare.
- MAX_STRING_LENGTH = 0x100000 # 2^20
- # Maximum number of imports to parse.
- MAX_IMPORT_SYMBOLS = 0x2000
- # Limit maximum length for specific string types separately
- MAX_IMPORT_NAME_LENGTH = 0x200
- MAX_DLL_LENGTH = 0x200
- MAX_SYMBOL_NAME_LENGTH = 0x200
- # Lmit maximum number of sections before processing of sections will stop
- MAX_SECTIONS = 0x800
- # The global maximum number of resource entries to parse per file
- MAX_RESOURCE_ENTRIES = 0x8000
- # The maximum depth of nested resource tables
- MAX_RESOURCE_DEPTH = 32
- # Limit number of exported symbols
- MAX_SYMBOL_EXPORT_COUNT = 0x2000
- IMAGE_DOS_SIGNATURE = 0x5A4D
- IMAGE_DOSZM_SIGNATURE = 0x4D5A
- IMAGE_NE_SIGNATURE = 0x454E
- IMAGE_LE_SIGNATURE = 0x454C
- IMAGE_LX_SIGNATURE = 0x584C
- IMAGE_TE_SIGNATURE = 0x5A56 # Terse Executables have a 'VZ' signature
- IMAGE_NT_SIGNATURE = 0x00004550
- IMAGE_NUMBEROF_DIRECTORY_ENTRIES = 16
- IMAGE_ORDINAL_FLAG = 0x80000000
- IMAGE_ORDINAL_FLAG64 = 0x8000000000000000
- OPTIONAL_HEADER_MAGIC_PE = 0x10B
- OPTIONAL_HEADER_MAGIC_PE_PLUS = 0x20B
- def two_way_dict(pairs):
- return dict([(e[1], e[0]) for e in pairs] + pairs)
- directory_entry_types = [
- ("IMAGE_DIRECTORY_ENTRY_EXPORT", 0),
- ("IMAGE_DIRECTORY_ENTRY_IMPORT", 1),
- ("IMAGE_DIRECTORY_ENTRY_RESOURCE", 2),
- ("IMAGE_DIRECTORY_ENTRY_EXCEPTION", 3),
- ("IMAGE_DIRECTORY_ENTRY_SECURITY", 4),
- ("IMAGE_DIRECTORY_ENTRY_BASERELOC", 5),
- ("IMAGE_DIRECTORY_ENTRY_DEBUG", 6),
- # Architecture on non-x86 platforms
- ("IMAGE_DIRECTORY_ENTRY_COPYRIGHT", 7),
- ("IMAGE_DIRECTORY_ENTRY_GLOBALPTR", 8),
- ("IMAGE_DIRECTORY_ENTRY_TLS", 9),
- ("IMAGE_DIRECTORY_ENTRY_LOAD_CONFIG", 10),
- ("IMAGE_DIRECTORY_ENTRY_BOUND_IMPORT", 11),
- ("IMAGE_DIRECTORY_ENTRY_IAT", 12),
- ("IMAGE_DIRECTORY_ENTRY_DELAY_IMPORT", 13),
- ("IMAGE_DIRECTORY_ENTRY_COM_DESCRIPTOR", 14),
- ("IMAGE_DIRECTORY_ENTRY_RESERVED", 15),
- ]
- DIRECTORY_ENTRY = two_way_dict(directory_entry_types)
- image_characteristics = [
- ("IMAGE_FILE_RELOCS_STRIPPED", 0x0001),
- ("IMAGE_FILE_EXECUTABLE_IMAGE", 0x0002),
- ("IMAGE_FILE_LINE_NUMS_STRIPPED", 0x0004),
- ("IMAGE_FILE_LOCAL_SYMS_STRIPPED", 0x0008),
- ("IMAGE_FILE_AGGRESIVE_WS_TRIM", 0x0010),
- ("IMAGE_FILE_LARGE_ADDRESS_AWARE", 0x0020),
- ("IMAGE_FILE_16BIT_MACHINE", 0x0040),
- ("IMAGE_FILE_BYTES_REVERSED_LO", 0x0080),
- ("IMAGE_FILE_32BIT_MACHINE", 0x0100),
- ("IMAGE_FILE_DEBUG_STRIPPED", 0x0200),
- ("IMAGE_FILE_REMOVABLE_RUN_FROM_SWAP", 0x0400),
- ("IMAGE_FILE_NET_RUN_FROM_SWAP", 0x0800),
- ("IMAGE_FILE_SYSTEM", 0x1000),
- ("IMAGE_FILE_DLL", 0x2000),
- ("IMAGE_FILE_UP_SYSTEM_ONLY", 0x4000),
- ("IMAGE_FILE_BYTES_REVERSED_HI", 0x8000),
- ]
- IMAGE_CHARACTERISTICS = two_way_dict(image_characteristics)
- section_characteristics = [
- ("IMAGE_SCN_TYPE_REG", 0x00000000), # reserved
- ("IMAGE_SCN_TYPE_DSECT", 0x00000001), # reserved
- ("IMAGE_SCN_TYPE_NOLOAD", 0x00000002), # reserved
- ("IMAGE_SCN_TYPE_GROUP", 0x00000004), # reserved
- ("IMAGE_SCN_TYPE_NO_PAD", 0x00000008), # reserved
- ("IMAGE_SCN_TYPE_COPY", 0x00000010), # reserved
- ("IMAGE_SCN_CNT_CODE", 0x00000020),
- ("IMAGE_SCN_CNT_INITIALIZED_DATA", 0x00000040),
- ("IMAGE_SCN_CNT_UNINITIALIZED_DATA", 0x00000080),
- ("IMAGE_SCN_LNK_OTHER", 0x00000100),
- ("IMAGE_SCN_LNK_INFO", 0x00000200),
- ("IMAGE_SCN_LNK_OVER", 0x00000400), # reserved
- ("IMAGE_SCN_LNK_REMOVE", 0x00000800),
- ("IMAGE_SCN_LNK_COMDAT", 0x00001000),
- ("IMAGE_SCN_MEM_PROTECTED", 0x00004000), # obsolete
- ("IMAGE_SCN_NO_DEFER_SPEC_EXC", 0x00004000),
- ("IMAGE_SCN_GPREL", 0x00008000),
- ("IMAGE_SCN_MEM_FARDATA", 0x00008000),
- ("IMAGE_SCN_MEM_SYSHEAP", 0x00010000), # obsolete
- ("IMAGE_SCN_MEM_PURGEABLE", 0x00020000),
- ("IMAGE_SCN_MEM_16BIT", 0x00020000),
- ("IMAGE_SCN_MEM_LOCKED", 0x00040000),
- ("IMAGE_SCN_MEM_PRELOAD", 0x00080000),
- ("IMAGE_SCN_ALIGN_1BYTES", 0x00100000),
- ("IMAGE_SCN_ALIGN_2BYTES", 0x00200000),
- ("IMAGE_SCN_ALIGN_4BYTES", 0x00300000),
- ("IMAGE_SCN_ALIGN_8BYTES", 0x00400000),
- ("IMAGE_SCN_ALIGN_16BYTES", 0x00500000), # default alignment
- ("IMAGE_SCN_ALIGN_32BYTES", 0x00600000),
- ("IMAGE_SCN_ALIGN_64BYTES", 0x00700000),
- ("IMAGE_SCN_ALIGN_128BYTES", 0x00800000),
- ("IMAGE_SCN_ALIGN_256BYTES", 0x00900000),
- ("IMAGE_SCN_ALIGN_512BYTES", 0x00A00000),
- ("IMAGE_SCN_ALIGN_1024BYTES", 0x00B00000),
- ("IMAGE_SCN_ALIGN_2048BYTES", 0x00C00000),
- ("IMAGE_SCN_ALIGN_4096BYTES", 0x00D00000),
- ("IMAGE_SCN_ALIGN_8192BYTES", 0x00E00000),
- ("IMAGE_SCN_ALIGN_MASK", 0x00F00000),
- ("IMAGE_SCN_LNK_NRELOC_OVFL", 0x01000000),
- ("IMAGE_SCN_MEM_DISCARDABLE", 0x02000000),
- ("IMAGE_SCN_MEM_NOT_CACHED", 0x04000000),
- ("IMAGE_SCN_MEM_NOT_PAGED", 0x08000000),
- ("IMAGE_SCN_MEM_SHARED", 0x10000000),
- ("IMAGE_SCN_MEM_EXECUTE", 0x20000000),
- ("IMAGE_SCN_MEM_READ", 0x40000000),
- ("IMAGE_SCN_MEM_WRITE", 0x80000000),
- ]
- SECTION_CHARACTERISTICS = two_way_dict(section_characteristics)
- debug_types = [
- ("IMAGE_DEBUG_TYPE_UNKNOWN", 0),
- ("IMAGE_DEBUG_TYPE_COFF", 1),
- ("IMAGE_DEBUG_TYPE_CODEVIEW", 2),
- ("IMAGE_DEBUG_TYPE_FPO", 3),
- ("IMAGE_DEBUG_TYPE_MISC", 4),
- ("IMAGE_DEBUG_TYPE_EXCEPTION", 5),
- ("IMAGE_DEBUG_TYPE_FIXUP", 6),
- ("IMAGE_DEBUG_TYPE_OMAP_TO_SRC", 7),
- ("IMAGE_DEBUG_TYPE_OMAP_FROM_SRC", 8),
- ("IMAGE_DEBUG_TYPE_BORLAND", 9),
- ("IMAGE_DEBUG_TYPE_RESERVED10", 10),
- ("IMAGE_DEBUG_TYPE_CLSID", 11),
- ("IMAGE_DEBUG_TYPE_VC_FEATURE", 12),
- ("IMAGE_DEBUG_TYPE_POGO", 13),
- ("IMAGE_DEBUG_TYPE_ILTCG", 14),
- ("IMAGE_DEBUG_TYPE_MPX", 15),
- ("IMAGE_DEBUG_TYPE_REPRO", 16),
- ("IMAGE_DEBUG_TYPE_EX_DLLCHARACTERISTICS", 20),
- ]
- DEBUG_TYPE = two_way_dict(debug_types)
- subsystem_types = [
- ("IMAGE_SUBSYSTEM_UNKNOWN", 0),
- ("IMAGE_SUBSYSTEM_NATIVE", 1),
- ("IMAGE_SUBSYSTEM_WINDOWS_GUI", 2),
- ("IMAGE_SUBSYSTEM_WINDOWS_CUI", 3),
- ("IMAGE_SUBSYSTEM_OS2_CUI", 5),
- ("IMAGE_SUBSYSTEM_POSIX_CUI", 7),
- ("IMAGE_SUBSYSTEM_NATIVE_WINDOWS", 8),
- ("IMAGE_SUBSYSTEM_WINDOWS_CE_GUI", 9),
- ("IMAGE_SUBSYSTEM_EFI_APPLICATION", 10),
- ("IMAGE_SUBSYSTEM_EFI_BOOT_SERVICE_DRIVER", 11),
- ("IMAGE_SUBSYSTEM_EFI_RUNTIME_DRIVER", 12),
- ("IMAGE_SUBSYSTEM_EFI_ROM", 13),
- ("IMAGE_SUBSYSTEM_XBOX", 14),
- ("IMAGE_SUBSYSTEM_WINDOWS_BOOT_APPLICATION", 16),
- ]
- SUBSYSTEM_TYPE = two_way_dict(subsystem_types)
- machine_types = [
- ("IMAGE_FILE_MACHINE_UNKNOWN", 0),
- ("IMAGE_FILE_MACHINE_I386", 0x014C),
- ("IMAGE_FILE_MACHINE_R3000", 0x0162),
- ("IMAGE_FILE_MACHINE_R4000", 0x0166),
- ("IMAGE_FILE_MACHINE_R10000", 0x0168),
- ("IMAGE_FILE_MACHINE_WCEMIPSV2", 0x0169),
- ("IMAGE_FILE_MACHINE_ALPHA", 0x0184),
- ("IMAGE_FILE_MACHINE_SH3", 0x01A2),
- ("IMAGE_FILE_MACHINE_SH3DSP", 0x01A3),
- ("IMAGE_FILE_MACHINE_SH3E", 0x01A4),
- ("IMAGE_FILE_MACHINE_SH4", 0x01A6),
- ("IMAGE_FILE_MACHINE_SH5", 0x01A8),
- ("IMAGE_FILE_MACHINE_ARM", 0x01C0),
- ("IMAGE_FILE_MACHINE_THUMB", 0x01C2),
- ("IMAGE_FILE_MACHINE_ARMNT", 0x01C4),
- ("IMAGE_FILE_MACHINE_AM33", 0x01D3),
- ("IMAGE_FILE_MACHINE_POWERPC", 0x01F0),
- ("IMAGE_FILE_MACHINE_POWERPCFP", 0x01F1),
- ("IMAGE_FILE_MACHINE_IA64", 0x0200),
- ("IMAGE_FILE_MACHINE_MIPS16", 0x0266),
- ("IMAGE_FILE_MACHINE_ALPHA64", 0x0284),
- ("IMAGE_FILE_MACHINE_AXP64", 0x0284), # same
- ("IMAGE_FILE_MACHINE_MIPSFPU", 0x0366),
- ("IMAGE_FILE_MACHINE_MIPSFPU16", 0x0466),
- ("IMAGE_FILE_MACHINE_TRICORE", 0x0520),
- ("IMAGE_FILE_MACHINE_CEF", 0x0CEF),
- ("IMAGE_FILE_MACHINE_EBC", 0x0EBC),
- ("IMAGE_FILE_MACHINE_AMD64", 0x8664),
- ("IMAGE_FILE_MACHINE_M32R", 0x9041),
- ("IMAGE_FILE_MACHINE_ARM64", 0xAA64),
- ("IMAGE_FILE_MACHINE_CEE", 0xC0EE),
- ]
- MACHINE_TYPE = two_way_dict(machine_types)
- relocation_types = [
- ("IMAGE_REL_BASED_ABSOLUTE", 0),
- ("IMAGE_REL_BASED_HIGH", 1),
- ("IMAGE_REL_BASED_LOW", 2),
- ("IMAGE_REL_BASED_HIGHLOW", 3),
- ("IMAGE_REL_BASED_HIGHADJ", 4),
- ("IMAGE_REL_BASED_MIPS_JMPADDR", 5),
- ("IMAGE_REL_BASED_SECTION", 6),
- ("IMAGE_REL_BASED_REL", 7),
- ("IMAGE_REL_BASED_MIPS_JMPADDR16", 9),
- ("IMAGE_REL_BASED_IA64_IMM64", 9),
- ("IMAGE_REL_BASED_DIR64", 10),
- ("IMAGE_REL_BASED_HIGH3ADJ", 11),
- ]
- RELOCATION_TYPE = two_way_dict(relocation_types)
- dll_characteristics = [
- ("IMAGE_LIBRARY_PROCESS_INIT", 0x0001), # reserved
- ("IMAGE_LIBRARY_PROCESS_TERM", 0x0002), # reserved
- ("IMAGE_LIBRARY_THREAD_INIT", 0x0004), # reserved
- ("IMAGE_LIBRARY_THREAD_TERM", 0x0008), # reserved
- ("IMAGE_DLLCHARACTERISTICS_HIGH_ENTROPY_VA", 0x0020),
- ("IMAGE_DLLCHARACTERISTICS_DYNAMIC_BASE", 0x0040),
- ("IMAGE_DLLCHARACTERISTICS_FORCE_INTEGRITY", 0x0080),
- ("IMAGE_DLLCHARACTERISTICS_NX_COMPAT", 0x0100),
- ("IMAGE_DLLCHARACTERISTICS_NO_ISOLATION", 0x0200),
- ("IMAGE_DLLCHARACTERISTICS_NO_SEH", 0x0400),
- ("IMAGE_DLLCHARACTERISTICS_NO_BIND", 0x0800),
- ("IMAGE_DLLCHARACTERISTICS_APPCONTAINER", 0x1000),
- ("IMAGE_DLLCHARACTERISTICS_WDM_DRIVER", 0x2000),
- ("IMAGE_DLLCHARACTERISTICS_GUARD_CF", 0x4000),
- ("IMAGE_DLLCHARACTERISTICS_TERMINAL_SERVER_AWARE", 0x8000),
- ]
- DLL_CHARACTERISTICS = two_way_dict(dll_characteristics)
- FILE_ALIGNMENT_HARDCODED_VALUE = 0x200
- # Unwind info-related enums
- unwind_info_flags = [
- ("UNW_FLAG_EHANDLER", 0x01),
- ("UNW_FLAG_UHANDLER", 0x02),
- ("UNW_FLAG_CHAININFO", 0x04),
- ]
- UNWIND_INFO_FLAGS = two_way_dict(unwind_info_flags)
- registers = [
- ("RAX", 0),
- ("RCX", 1),
- ("RDX", 2),
- ("RBX", 3),
- ("RSP", 4),
- ("RBP", 5),
- ("RSI", 6),
- ("RDI", 7),
- ("R8", 8),
- ("R9", 9),
- ("R10", 10),
- ("R11", 11),
- ("R12", 12),
- ("R13", 13),
- ("R14", 14),
- ("R15", 15),
- ]
- REGISTERS = two_way_dict(registers)
- # enum _UNWIND_OP_CODES
- UWOP_PUSH_NONVOL = 0
- UWOP_ALLOC_LARGE = 1
- UWOP_ALLOC_SMALL = 2
- UWOP_SET_FPREG = 3
- UWOP_SAVE_NONVOL = 4
- UWOP_SAVE_NONVOL_FAR = 5
- UWOP_EPILOG = 6
- UWOP_SAVE_XMM128 = 8
- UWOP_SAVE_XMM128_FAR = 9
- UWOP_PUSH_MACHFRAME = 10
- # Resource types
- resource_type = [
- ("RT_CURSOR", 1),
- ("RT_BITMAP", 2),
- ("RT_ICON", 3),
- ("RT_MENU", 4),
- ("RT_DIALOG", 5),
- ("RT_STRING", 6),
- ("RT_FONTDIR", 7),
- ("RT_FONT", 8),
- ("RT_ACCELERATOR", 9),
- ("RT_RCDATA", 10),
- ("RT_MESSAGETABLE", 11),
- ("RT_GROUP_CURSOR", 12),
- ("RT_GROUP_ICON", 14),
- ("RT_VERSION", 16),
- ("RT_DLGINCLUDE", 17),
- ("RT_PLUGPLAY", 19),
- ("RT_VXD", 20),
- ("RT_ANICURSOR", 21),
- ("RT_ANIICON", 22),
- ("RT_HTML", 23),
- ("RT_MANIFEST", 24),
- ]
- RESOURCE_TYPE = two_way_dict(resource_type)
- # Language definitions
- lang = [
- ("LANG_NEUTRAL", 0x00),
- ("LANG_INVARIANT", 0x7F),
- ("LANG_AFRIKAANS", 0x36),
- ("LANG_ALBANIAN", 0x1C),
- ("LANG_ARABIC", 0x01),
- ("LANG_ARMENIAN", 0x2B),
- ("LANG_ASSAMESE", 0x4D),
- ("LANG_AZERI", 0x2C),
- ("LANG_BASQUE", 0x2D),
- ("LANG_BELARUSIAN", 0x23),
- ("LANG_BENGALI", 0x45),
- ("LANG_BULGARIAN", 0x02),
- ("LANG_CATALAN", 0x03),
- ("LANG_CHINESE", 0x04),
- ("LANG_CROATIAN", 0x1A),
- ("LANG_CZECH", 0x05),
- ("LANG_DANISH", 0x06),
- ("LANG_DIVEHI", 0x65),
- ("LANG_DUTCH", 0x13),
- ("LANG_ENGLISH", 0x09),
- ("LANG_ESTONIAN", 0x25),
- ("LANG_FAEROESE", 0x38),
- ("LANG_FARSI", 0x29),
- ("LANG_FINNISH", 0x0B),
- ("LANG_FRENCH", 0x0C),
- ("LANG_GALICIAN", 0x56),
- ("LANG_GEORGIAN", 0x37),
- ("LANG_GERMAN", 0x07),
- ("LANG_GREEK", 0x08),
- ("LANG_GUJARATI", 0x47),
- ("LANG_HEBREW", 0x0D),
- ("LANG_HINDI", 0x39),
- ("LANG_HUNGARIAN", 0x0E),
- ("LANG_ICELANDIC", 0x0F),
- ("LANG_INDONESIAN", 0x21),
- ("LANG_ITALIAN", 0x10),
- ("LANG_JAPANESE", 0x11),
- ("LANG_KANNADA", 0x4B),
- ("LANG_KASHMIRI", 0x60),
- ("LANG_KAZAK", 0x3F),
- ("LANG_KONKANI", 0x57),
- ("LANG_KOREAN", 0x12),
- ("LANG_KYRGYZ", 0x40),
- ("LANG_LATVIAN", 0x26),
- ("LANG_LITHUANIAN", 0x27),
- ("LANG_MACEDONIAN", 0x2F),
- ("LANG_MALAY", 0x3E),
- ("LANG_MALAYALAM", 0x4C),
- ("LANG_MANIPURI", 0x58),
- ("LANG_MARATHI", 0x4E),
- ("LANG_MONGOLIAN", 0x50),
- ("LANG_NEPALI", 0x61),
- ("LANG_NORWEGIAN", 0x14),
- ("LANG_ORIYA", 0x48),
- ("LANG_POLISH", 0x15),
- ("LANG_PORTUGUESE", 0x16),
- ("LANG_PUNJABI", 0x46),
- ("LANG_ROMANIAN", 0x18),
- ("LANG_RUSSIAN", 0x19),
- ("LANG_SANSKRIT", 0x4F),
- ("LANG_SERBIAN", 0x1A),
- ("LANG_SINDHI", 0x59),
- ("LANG_SLOVAK", 0x1B),
- ("LANG_SLOVENIAN", 0x24),
- ("LANG_SPANISH", 0x0A),
- ("LANG_SWAHILI", 0x41),
- ("LANG_SWEDISH", 0x1D),
- ("LANG_SYRIAC", 0x5A),
- ("LANG_TAMIL", 0x49),
- ("LANG_TATAR", 0x44),
- ("LANG_TELUGU", 0x4A),
- ("LANG_THAI", 0x1E),
- ("LANG_TURKISH", 0x1F),
- ("LANG_UKRAINIAN", 0x22),
- ("LANG_URDU", 0x20),
- ("LANG_UZBEK", 0x43),
- ("LANG_VIETNAMESE", 0x2A),
- ("LANG_GAELIC", 0x3C),
- ("LANG_MALTESE", 0x3A),
- ("LANG_MAORI", 0x28),
- ("LANG_RHAETO_ROMANCE", 0x17),
- ("LANG_SAAMI", 0x3B),
- ("LANG_SORBIAN", 0x2E),
- ("LANG_SUTU", 0x30),
- ("LANG_TSONGA", 0x31),
- ("LANG_TSWANA", 0x32),
- ("LANG_VENDA", 0x33),
- ("LANG_XHOSA", 0x34),
- ("LANG_ZULU", 0x35),
- ("LANG_ESPERANTO", 0x8F),
- ("LANG_WALON", 0x90),
- ("LANG_CORNISH", 0x91),
- ("LANG_WELSH", 0x92),
- ("LANG_BRETON", 0x93),
- ]
- LANG = two_way_dict(lang)
- # Sublanguage definitions
- sublang = [
- ("SUBLANG_NEUTRAL", 0x00),
- ("SUBLANG_DEFAULT", 0x01),
- ("SUBLANG_SYS_DEFAULT", 0x02),
- ("SUBLANG_ARABIC_SAUDI_ARABIA", 0x01),
- ("SUBLANG_ARABIC_IRAQ", 0x02),
- ("SUBLANG_ARABIC_EGYPT", 0x03),
- ("SUBLANG_ARABIC_LIBYA", 0x04),
- ("SUBLANG_ARABIC_ALGERIA", 0x05),
- ("SUBLANG_ARABIC_MOROCCO", 0x06),
- ("SUBLANG_ARABIC_TUNISIA", 0x07),
- ("SUBLANG_ARABIC_OMAN", 0x08),
- ("SUBLANG_ARABIC_YEMEN", 0x09),
- ("SUBLANG_ARABIC_SYRIA", 0x0A),
- ("SUBLANG_ARABIC_JORDAN", 0x0B),
- ("SUBLANG_ARABIC_LEBANON", 0x0C),
- ("SUBLANG_ARABIC_KUWAIT", 0x0D),
- ("SUBLANG_ARABIC_UAE", 0x0E),
- ("SUBLANG_ARABIC_BAHRAIN", 0x0F),
- ("SUBLANG_ARABIC_QATAR", 0x10),
- ("SUBLANG_AZERI_LATIN", 0x01),
- ("SUBLANG_AZERI_CYRILLIC", 0x02),
- ("SUBLANG_CHINESE_TRADITIONAL", 0x01),
- ("SUBLANG_CHINESE_SIMPLIFIED", 0x02),
- ("SUBLANG_CHINESE_HONGKONG", 0x03),
- ("SUBLANG_CHINESE_SINGAPORE", 0x04),
- ("SUBLANG_CHINESE_MACAU", 0x05),
- ("SUBLANG_DUTCH", 0x01),
- ("SUBLANG_DUTCH_BELGIAN", 0x02),
- ("SUBLANG_ENGLISH_US", 0x01),
- ("SUBLANG_ENGLISH_UK", 0x02),
- ("SUBLANG_ENGLISH_AUS", 0x03),
- ("SUBLANG_ENGLISH_CAN", 0x04),
- ("SUBLANG_ENGLISH_NZ", 0x05),
- ("SUBLANG_ENGLISH_EIRE", 0x06),
- ("SUBLANG_ENGLISH_SOUTH_AFRICA", 0x07),
- ("SUBLANG_ENGLISH_JAMAICA", 0x08),
- ("SUBLANG_ENGLISH_CARIBBEAN", 0x09),
- ("SUBLANG_ENGLISH_BELIZE", 0x0A),
- ("SUBLANG_ENGLISH_TRINIDAD", 0x0B),
- ("SUBLANG_ENGLISH_ZIMBABWE", 0x0C),
- ("SUBLANG_ENGLISH_PHILIPPINES", 0x0D),
- ("SUBLANG_FRENCH", 0x01),
- ("SUBLANG_FRENCH_BELGIAN", 0x02),
- ("SUBLANG_FRENCH_CANADIAN", 0x03),
- ("SUBLANG_FRENCH_SWISS", 0x04),
- ("SUBLANG_FRENCH_LUXEMBOURG", 0x05),
- ("SUBLANG_FRENCH_MONACO", 0x06),
- ("SUBLANG_GERMAN", 0x01),
- ("SUBLANG_GERMAN_SWISS", 0x02),
- ("SUBLANG_GERMAN_AUSTRIAN", 0x03),
- ("SUBLANG_GERMAN_LUXEMBOURG", 0x04),
- ("SUBLANG_GERMAN_LIECHTENSTEIN", 0x05),
- ("SUBLANG_ITALIAN", 0x01),
- ("SUBLANG_ITALIAN_SWISS", 0x02),
- ("SUBLANG_KASHMIRI_SASIA", 0x02),
- ("SUBLANG_KASHMIRI_INDIA", 0x02),
- ("SUBLANG_KOREAN", 0x01),
- ("SUBLANG_LITHUANIAN", 0x01),
- ("SUBLANG_MALAY_MALAYSIA", 0x01),
- ("SUBLANG_MALAY_BRUNEI_DARUSSALAM", 0x02),
- ("SUBLANG_NEPALI_INDIA", 0x02),
- ("SUBLANG_NORWEGIAN_BOKMAL", 0x01),
- ("SUBLANG_NORWEGIAN_NYNORSK", 0x02),
- ("SUBLANG_PORTUGUESE", 0x02),
- ("SUBLANG_PORTUGUESE_BRAZILIAN", 0x01),
- ("SUBLANG_SERBIAN_LATIN", 0x02),
- ("SUBLANG_SERBIAN_CYRILLIC", 0x03),
- ("SUBLANG_SPANISH", 0x01),
- ("SUBLANG_SPANISH_MEXICAN", 0x02),
- ("SUBLANG_SPANISH_MODERN", 0x03),
- ("SUBLANG_SPANISH_GUATEMALA", 0x04),
- ("SUBLANG_SPANISH_COSTA_RICA", 0x05),
- ("SUBLANG_SPANISH_PANAMA", 0x06),
- ("SUBLANG_SPANISH_DOMINICAN_REPUBLIC", 0x07),
- ("SUBLANG_SPANISH_VENEZUELA", 0x08),
- ("SUBLANG_SPANISH_COLOMBIA", 0x09),
- ("SUBLANG_SPANISH_PERU", 0x0A),
- ("SUBLANG_SPANISH_ARGENTINA", 0x0B),
- ("SUBLANG_SPANISH_ECUADOR", 0x0C),
- ("SUBLANG_SPANISH_CHILE", 0x0D),
- ("SUBLANG_SPANISH_URUGUAY", 0x0E),
- ("SUBLANG_SPANISH_PARAGUAY", 0x0F),
- ("SUBLANG_SPANISH_BOLIVIA", 0x10),
- ("SUBLANG_SPANISH_EL_SALVADOR", 0x11),
- ("SUBLANG_SPANISH_HONDURAS", 0x12),
- ("SUBLANG_SPANISH_NICARAGUA", 0x13),
- ("SUBLANG_SPANISH_PUERTO_RICO", 0x14),
- ("SUBLANG_SWEDISH", 0x01),
- ("SUBLANG_SWEDISH_FINLAND", 0x02),
- ("SUBLANG_URDU_PAKISTAN", 0x01),
- ("SUBLANG_URDU_INDIA", 0x02),
- ("SUBLANG_UZBEK_LATIN", 0x01),
- ("SUBLANG_UZBEK_CYRILLIC", 0x02),
- ("SUBLANG_DUTCH_SURINAM", 0x03),
- ("SUBLANG_ROMANIAN", 0x01),
- ("SUBLANG_ROMANIAN_MOLDAVIA", 0x02),
- ("SUBLANG_RUSSIAN", 0x01),
- ("SUBLANG_RUSSIAN_MOLDAVIA", 0x02),
- ("SUBLANG_CROATIAN", 0x01),
- ("SUBLANG_LITHUANIAN_CLASSIC", 0x02),
- ("SUBLANG_GAELIC", 0x01),
- ("SUBLANG_GAELIC_SCOTTISH", 0x02),
- ("SUBLANG_GAELIC_MANX", 0x03),
- ]
- SUBLANG = two_way_dict(sublang)
- # Initialize the dictionary with all the name->value pairs
- SUBLANG = dict(sublang)
- # Now add all the value->name information, handling duplicates appropriately
- for sublang_name, sublang_value in sublang:
- if sublang_value in SUBLANG:
- SUBLANG[sublang_value].append(sublang_name)
- else:
- SUBLANG[sublang_value] = [sublang_name]
- # Resolve a sublang name given the main lang name
- #
- def get_sublang_name_for_lang(lang_value, sublang_value):
- lang_name = LANG.get(lang_value, "*unknown*")
- for sublang_name in SUBLANG.get(sublang_value, []):
- # if the main language is a substring of sublang's name, then
- # return that
- if lang_name in sublang_name:
- return sublang_name
- # otherwise return the first sublang name
- return SUBLANG.get(sublang_value, ["*unknown*"])[0]
- # Ange Albertini's code to process resources' strings
- #
- def parse_strings(data, counter, l):
- i = 0
- error_count = 0
- while i < len(data):
- data_slice = data[i : i + 2]
- if len(data_slice) < 2:
- break
- len_ = struct.unpack("<h", data_slice)[0]
- i += 2
- if len_ != 0 and 0 <= len_ * 2 <= len(data):
- try:
- l[counter] = b(data[i : i + len_ * 2]).decode("utf-16le")
- except UnicodeDecodeError:
- error_count += 1
- pass
- if error_count >= 3:
- break
- i += len_ * 2
- counter += 1
- def retrieve_flags(flag_dict, flag_filter):
- """Read the flags from a dictionary and return them in a usable form.
- Will return a list of (flag, value) for all flags in "flag_dict"
- matching the filter "flag_filter".
- """
- return [
- (flag, flag_dict[flag])
- for flag in flag_dict.keys()
- if isinstance(flag, (str, bytes)) and flag.startswith(flag_filter)
- ]
- def set_flags(obj, flag_field, flags):
- """Will process the flags and set attributes in the object accordingly.
- The object "obj" will gain attributes named after the flags provided in
- "flags" and valued True/False, matching the results of applying each
- flag value from "flags" to flag_field.
- """
- for flag, value in flags:
- if value & flag_field:
- obj.__dict__[flag] = True
- else:
- obj.__dict__[flag] = False
- def power_of_two(val):
- return val != 0 and (val & (val - 1)) == 0
- def b(x):
- if isinstance(x, (bytes, bytearray)):
- return bytes(x)
- return codecs.encode(x, "cp1252")
- class UnicodeStringWrapperPostProcessor:
- """This class attempts to help the process of identifying strings
- that might be plain Unicode or Pascal. A list of strings will be
- wrapped on it with the hope the overlappings will help make the
- decision about their type."""
- def __init__(self, pe, rva_ptr):
- self.pe = pe
- self.rva_ptr = rva_ptr
- self.string = None
- def get_rva(self):
- """Get the RVA of the string."""
- return self.rva_ptr
- def __str__(self):
- """Return the escaped UTF-8 representation of the string."""
- return self.decode("utf-8", "backslashreplace_")
- def decode(self, *args):
- if not self.string:
- return ""
- return self.string.decode(*args)
- def invalidate(self):
- """Make this instance None, to express it's no known string type."""
- self = None
- def render_pascal_16(self):
- try:
- self.string = self.pe.get_string_u_at_rva(
- self.rva_ptr + 2, max_length=self.get_pascal_16_length()
- )
- except PEFormatError:
- self.pe.get_warnings().append(
- "Failed rendering pascal string, "
- "attempting to read from RVA 0x{0:x}".format(self.rva_ptr + 2)
- )
- def get_pascal_16_length(self):
- return self.__get_word_value_at_rva(self.rva_ptr)
- def __get_word_value_at_rva(self, rva):
- try:
- data = self.pe.get_data(rva, 2)
- except PEFormatError:
- return False
- if len(data) < 2:
- return False
- return struct.unpack("<H", data)[0]
- def ask_unicode_16(self, next_rva_ptr):
- """The next RVA is taken to be the one immediately following this one.
- Such RVA could indicate the natural end of the string and will be checked
- to see if there's a Unicode NULL character there.
- """
- if self.__get_word_value_at_rva(next_rva_ptr - 2) == 0:
- self.length = next_rva_ptr - self.rva_ptr
- return True
- return False
- def render_unicode_16(self):
- try:
- self.string = self.pe.get_string_u_at_rva(self.rva_ptr)
- except PEFormatError:
- self.pe.get_warnings().append(
- "Failed rendering unicode string, "
- "attempting to read from RVA 0x{0:x}".format(self.rva_ptr)
- )
- class PEFormatError(Exception):
- """Generic PE format error exception."""
- def __init__(self, value):
- self.value = value
- def __str__(self):
- return repr(self.value)
- class Dump:
- """Convenience class for dumping the PE information."""
- def __init__(self):
- self.text = []
- def add_lines(self, txt, indent=0):
- """Adds a list of lines.
- The list can be indented with the optional argument 'indent'.
- """
- for line in txt:
- self.add_line(line, indent)
- def add_line(self, txt, indent=0):
- """Adds a line.
- The line can be indented with the optional argument 'indent'.
- """
- self.add(txt + "\n", indent)
- def add(self, txt, indent=0):
- """Adds some text, no newline will be appended.
- The text can be indented with the optional argument 'indent'.
- """
- self.text.append("{0}{1}".format(" " * indent, txt))
- def add_header(self, txt):
- """Adds a header element."""
- self.add_line("{0}{1}{0}\n".format("-" * 10, txt))
- def add_newline(self):
- """Adds a newline."""
- self.text.append("\n")
- def get_text(self):
- """Get the text in its current state."""
- return "".join("{0}".format(b) for b in self.text)
- STRUCT_SIZEOF_TYPES = {
- "x": 1,
- "c": 1,
- "b": 1,
- "B": 1,
- "h": 2,
- "H": 2,
- "i": 4,
- "I": 4,
- "l": 4,
- "L": 4,
- "f": 4,
- "q": 8,
- "Q": 8,
- "d": 8,
- "s": 1,
- }
- @lru_cache(maxsize=2048)
- def sizeof_type(t):
- count = 1
- _t = t
- if t[0] in string.digits:
- # extract the count
- count = int("".join([d for d in t if d in string.digits]))
- _t = "".join([d for d in t if d not in string.digits])
- return STRUCT_SIZEOF_TYPES[_t] * count
- @lru_cache(maxsize=2048, copy=True)
- def set_format(format):
- __format__ = "<"
- __unpacked_data_elms__ = []
- __field_offsets__ = {}
- __keys__ = []
- __format_length__ = 0
- offset = 0
- for elm in format:
- if "," in elm:
- elm_type, elm_name = elm.split(",", 1)
- __format__ += elm_type
- __unpacked_data_elms__.append(None)
- elm_names = elm_name.split(",")
- names = []
- for elm_name in elm_names:
- if elm_name in __keys__:
- search_list = [x[: len(elm_name)] for x in __keys__]
- occ_count = search_list.count(elm_name)
- elm_name = "{0}_{1:d}".format(elm_name, occ_count)
- names.append(elm_name)
- __field_offsets__[elm_name] = offset
- offset += sizeof_type(elm_type)
- # Some PE header structures have unions on them, so a certain
- # value might have different names, so each key has a list of
- # all the possible members referring to the data.
- __keys__.append(names)
- __format_length__ = struct.calcsize(__format__)
- return (
- __format__,
- __unpacked_data_elms__,
- __field_offsets__,
- __keys__,
- __format_length__,
- )
- class Structure:
- """Prepare structure object to extract members from data.
- Format is a list containing definitions for the elements
- of the structure.
- """
- def __init__(self, format, name=None, file_offset=None):
- # Format is forced little endian, for big endian non Intel platforms
- self.__format__ = "<"
- self.__keys__ = []
- self.__format_length__ = 0
- self.__field_offsets__ = {}
- self.__unpacked_data_elms__ = []
- d = format[1]
- # need a tuple to be hashable in set_format using lru cache
- if not isinstance(format[1], tuple):
- d = tuple(format[1])
- (
- self.__format__,
- self.__unpacked_data_elms__,
- self.__field_offsets__,
- self.__keys__,
- self.__format_length__,
- ) = set_format(d)
- self.__all_zeroes__ = False
- self.__file_offset__ = file_offset
- if name:
- self.name = name
- else:
- self.name = format[0]
- def __get_format__(self):
- return self.__format__
- def get_field_absolute_offset(self, field_name):
- """Return the offset within the field for the requested field in the structure."""
- return self.__file_offset__ + self.__field_offsets__[field_name]
- def get_field_relative_offset(self, field_name):
- """Return the offset within the structure for the requested field."""
- return self.__field_offsets__[field_name]
- def get_file_offset(self):
- return self.__file_offset__
- def set_file_offset(self, offset):
- self.__file_offset__ = offset
- def all_zeroes(self):
- """Returns true is the unpacked data is all zeros."""
- return self.__all_zeroes__
- def sizeof(self):
- """Return size of the structure."""
- return self.__format_length__
- def __unpack__(self, data):
- data = b(data)
- if len(data) > self.__format_length__:
- data = data[: self.__format_length__]
- # OC Patch:
- # Some malware have incorrect header lengths.
- # Fail gracefully if this occurs
- # Buggy malware: a29b0118af8b7408444df81701ad5a7f
- #
- elif len(data) < self.__format_length__:
- raise PEFormatError("Data length less than expected header length.")
- if count_zeroes(data) == len(data):
- self.__all_zeroes__ = True
- self.__unpacked_data_elms__ = struct.unpack(self.__format__, data)
- for idx, val in enumerate(self.__unpacked_data_elms__):
- for key in self.__keys__[idx]:
- setattr(self, key, val)
- def __pack__(self):
- new_values = []
- for idx, val in enumerate(self.__unpacked_data_elms__):
- for key in self.__keys__[idx]:
- new_val = getattr(self, key)
- # In the case of unions, when the first changed value
- # is picked the loop is exited
- if new_val != val:
- break
- new_values.append(new_val)
- return struct.pack(self.__format__, *new_values)
- def __str__(self):
- return "\n".join(self.dump())
- def __repr__(self):
- return "<Structure: %s>" % (
- " ".join([" ".join(s.split()) for s in self.dump()])
- )
- def dump(self, indentation=0):
- """Returns a string representation of the structure."""
- dump = []
- dump.append("[{0}]".format(self.name))
- printable_bytes = [
- ord(i) for i in string.printable if i not in string.whitespace
- ]
- # Refer to the __set_format__ method for an explanation
- # of the following construct.
- for keys in self.__keys__:
- for key in keys:
- val = getattr(self, key)
- if isinstance(val, (int, long)):
- if key.startswith("Signature_"):
- val_str = "{:<8X}".format(val)
- else:
- val_str = "0x{:<8X}".format(val)
- if key == "TimeDateStamp" or key == "dwTimeStamp":
- try:
- val_str += " [%s UTC]" % time.asctime(time.gmtime(val))
- except ValueError:
- val_str += " [INVALID TIME]"
- else:
- val_str = bytearray(val)
- if key.startswith("Signature"):
- val_str = "".join(
- ["{:02X}".format(i) for i in val_str.rstrip(b"\x00")]
- )
- else:
- val_str = "".join(
- [
- chr(i)
- if (i in printable_bytes)
- else "\\x{0:02x}".format(i)
- for i in val_str.rstrip(b"\x00")
- ]
- )
- dump.append(
- "0x%-8X 0x%-3X %-30s %s"
- % (
- self.__field_offsets__[key] + self.__file_offset__,
- self.__field_offsets__[key],
- key + ":",
- val_str,
- )
- )
- return dump
- def dump_dict(self):
- """Returns a dictionary representation of the structure."""
- dump_dict = {}
- dump_dict["Structure"] = self.name
- # Refer to the __set_format__ method for an explanation
- # of the following construct.
- for keys in self.__keys__:
- for key in keys:
- val = getattr(self, key)
- if isinstance(val, (int, long)):
- if key == "TimeDateStamp" or key == "dwTimeStamp":
- try:
- val = "0x%-8X [%s UTC]" % (
- val,
- time.asctime(time.gmtime(val)),
- )
- except ValueError:
- val = "0x%-8X [INVALID TIME]" % val
- else:
- val = "".join(
- chr(d) if chr(d) in string.printable else "\\x%02x" % d
- for d in [ord(c) if not isinstance(c, int) else c for c in val]
- )
- dump_dict[key] = {
- "FileOffset": self.__field_offsets__[key] + self.__file_offset__,
- "Offset": self.__field_offsets__[key],
- "Value": val,
- }
- return dump_dict
- class SectionStructure(Structure):
- """Convenience section handling class."""
- def __init__(self, *argl, **argd):
- if "pe" in argd:
- self.pe = argd["pe"]
- del argd["pe"]
- Structure.__init__(self, *argl, **argd)
- self.PointerToRawData_adj = None
- self.VirtualAddress_adj = None
- def get_PointerToRawData_adj(self):
- if self.PointerToRawData_adj is None:
- if self.PointerToRawData is not None:
- self.PointerToRawData_adj = self.pe.adjust_FileAlignment(
- self.PointerToRawData, self.pe.OPTIONAL_HEADER.FileAlignment
- )
- return self.PointerToRawData_adj
- def get_VirtualAddress_adj(self):
- if self.VirtualAddress_adj is None:
- if self.VirtualAddress is not None:
- self.VirtualAddress_adj = self.pe.adjust_SectionAlignment(
- self.VirtualAddress,
- self.pe.OPTIONAL_HEADER.SectionAlignment,
- self.pe.OPTIONAL_HEADER.FileAlignment,
- )
- return self.VirtualAddress_adj
- def get_data(self, start=None, length=None):
- """Get data chunk from a section.
- Allows to query data from the section by passing the
- addresses where the PE file would be loaded by default.
- It is then possible to retrieve code and data by their real
- addresses as they would be if loaded.
- Returns bytes() under Python 3.x and set() under Python 2.7
- """
- if start is None:
- offset = self.get_PointerToRawData_adj()
- else:
- offset = (
- start - self.get_VirtualAddress_adj()
- ) + self.get_PointerToRawData_adj()
- if length is not None:
- end = offset + length
- else:
- end = offset + self.SizeOfRawData
- # PointerToRawData is not adjusted here as we might want to read any possible
- # extra bytes that might get cut off by aligning the start (and hence cutting
- # something off the end)
- if end > self.PointerToRawData + self.SizeOfRawData:
- end = self.PointerToRawData + self.SizeOfRawData
- return self.pe.__data__[offset:end]
- def __setattr__(self, name, val):
- if name == "Characteristics":
- section_flags = retrieve_flags(SECTION_CHARACTERISTICS, "IMAGE_SCN_")
- # Set the section's flags according to the Characteristics member
- set_flags(self, val, section_flags)
- elif "IMAGE_SCN_" in name and hasattr(self, name):
- if val:
- self.__dict__["Characteristics"] |= SECTION_CHARACTERISTICS[name]
- else:
- self.__dict__["Characteristics"] ^= SECTION_CHARACTERISTICS[name]
- self.__dict__[name] = val
- def get_rva_from_offset(self, offset):
- return offset - self.get_PointerToRawData_adj() + self.get_VirtualAddress_adj()
- def get_offset_from_rva(self, rva):
- return rva - self.get_VirtualAddress_adj() + self.get_PointerToRawData_adj()
- def contains_offset(self, offset):
- """Check whether the section contains the file offset provided."""
- if self.PointerToRawData is None:
- # bss and other sections containing only uninitialized data must have 0
- # and do not take space in the file
- return False
- PointerToRawData_adj = self.get_PointerToRawData_adj()
- return (
- PointerToRawData_adj <= offset < PointerToRawData_adj + self.SizeOfRawData
- )
- def contains_rva(self, rva):
- """Check whether the section contains the address provided."""
- VirtualAddress_adj = self.get_VirtualAddress_adj()
- # Check if the SizeOfRawData is realistic. If it's bigger than the size of
- # the whole PE file minus the start address of the section it could be
- # either truncated or the SizeOfRawData contains a misleading value.
- # In either of those cases we take the VirtualSize
- #
- if len(self.pe.__data__) - self.get_PointerToRawData_adj() < self.SizeOfRawData:
- # PECOFF documentation v8 says:
- # VirtualSize: The total size of the section when loaded into memory.
- # If this value is greater than SizeOfRawData, the section is zero-padded.
- # This field is valid only for executable images and should be set to zero
- # for object files.
- #
- size = self.Misc_VirtualSize
- else:
- size = max(self.SizeOfRawData, self.Misc_VirtualSize)
- # Check whether there's any section after the current one that starts before
- # the calculated end for the current one. If so, cut the current section's size
- # to fit in the range up to where the next section starts.
- if (
- self.next_section_virtual_address is not None
- and self.next_section_virtual_address > self.VirtualAddress
- and VirtualAddress_adj + size > self.next_section_virtual_address
- ):
- size = self.next_section_virtual_address - VirtualAddress_adj
- return VirtualAddress_adj <= rva < VirtualAddress_adj + size
- def contains(self, rva):
- return self.contains_rva(rva)
- def get_entropy(self):
- """Calculate and return the entropy for the section."""
- return self.entropy_H(self.get_data())
- def get_hash_sha1(self):
- """Get the SHA-1 hex-digest of the section's data."""
- if sha1 is not None:
- return sha1(self.get_data()).hexdigest()
- def get_hash_sha256(self):
- """Get the SHA-256 hex-digest of the section's data."""
- if sha256 is not None:
- return sha256(self.get_data()).hexdigest()
- def get_hash_sha512(self):
- """Get the SHA-512 hex-digest of the section's data."""
- if sha512 is not None:
- return sha512(self.get_data()).hexdigest()
- def get_hash_md5(self):
- """Get the MD5 hex-digest of the section's data."""
- if md5 is not None:
- return md5(self.get_data()).hexdigest()
- def entropy_H(self, data):
- """Calculate the entropy of a chunk of data."""
- if not data:
- return 0.0
- occurences = Counter(bytearray(data))
- entropy = 0
- for x in occurences.values():
- p_x = float(x) / len(data)
- entropy -= p_x * math.log(p_x, 2)
- return entropy
- @lru_cache(maxsize=2048, copy=False)
- def set_bitfields_format(format):
- class Accumulator:
- def __init__(self, fmt, comp_fields):
- self._subfields = []
- # add a prefix to distinguish the artificially created compoud field
- # from regular fields
- self._name = "~"
- self._type = None
- self._bits_left = 0
- self._comp_fields = comp_fields
- self._format = fmt
- def wrap_up(self):
- if self._type == None:
- return
- self._format.append(self._type + "," + self._name)
- self._comp_fields[len(self._format) - 1] = (self._type, self._subfields)
- self._name = "~"
- self._type = None
- self._subfields = []
- def new_type(self, tp):
- self._bits_left = STRUCT_SIZEOF_TYPES[tp] * 8
- self._type = tp
- def add_subfield(self, name, bitcnt):
- self._name += name
- self._bits_left -= bitcnt
- self._subfields.append((name, bitcnt))
- def get_type(self):
- return self._type
- def get_name(self):
- return self._name
- def get_bits_left(self):
- return self._bits_left
- old_fmt = []
- comp_fields = {}
- ac = Accumulator(old_fmt, comp_fields)
- for elm in format[1]:
- if not ":" in elm:
- ac.wrap_up()
- old_fmt.append(elm)
- continue
- elm_type, elm_name = elm.split(",", 1)
- if "," in elm_name:
- raise NotImplementedError(
- "Structures with bitfields do not support unions yet"
- )
- elm_type, elm_bits = elm_type.split(":", 1)
- elm_bits = int(elm_bits)
- if elm_type != ac.get_type() or elm_bits > ac.get_bits_left():
- ac.wrap_up()
- ac.new_type(elm_type)
- ac.add_subfield(elm_name, elm_bits)
- ac.wrap_up()
- format_str, _, field_offsets, keys, format_length = set_format(tuple(old_fmt))
- extended_keys = []
- for idx, val in enumerate(keys):
- if not idx in comp_fields:
- extended_keys.append(val)
- continue
- _, sbf = comp_fields[idx]
- bf_names = [[f[StructureWithBitfields.BTF_NAME_IDX]] for f in sbf]
- extended_keys.extend(bf_names)
- for n in bf_names:
- field_offsets[n[0]] = field_offsets[val[0]]
- return (format_str, format_length, field_offsets, keys, extended_keys, comp_fields)
- class StructureWithBitfields(Structure):
- """
- Extends Structure's functionality with support for bitfields such as:
- ('B:4,LowerHalf', 'B:4,UpperHalf')
- To this end, two lists are maintained:
- * self.__keys__ that contains compound fields, for example
- ('B,~LowerHalfUpperHalf'), and is used during packing/unpaking
- * self.__keys_ext__ containing a separate key for each field (ex., LowerHalf,
- UpperHalf) to simplify implementation of dump()
- This way the implementation of unpacking/packing and dump() from Structure can be
- reused.
- In addition, we create a dictionary:
- <comound_field_index_in_keys> -->
- (data type, [ (subfield name, length in bits)+ ] )
- that facilitates bitfield paking and unpacking.
- With lru_cache() creating only once instance per format string, the memory
- overhead is negligible.
- """
- BTF_NAME_IDX = 0
- BTF_BITCNT_IDX = 1
- CF_TYPE_IDX = 0
- CF_SUBFLD_IDX = 1
- def __init__(self, format, name=None, file_offset=None):
- (
- self.__format__,
- self.__format_length__,
- self.__field_offsets__,
- self.__keys__,
- self.__keys_ext__,
- self.__compound_fields__,
- ) = set_bitfields_format(format)
- # create our own unpacked_data_elms to ensure they are not shared among
- # StructureWithBitfields instances with the same format string
- self.__unpacked_data_elms__ = [None for i in range(self.__format_length__)]
- self.__all_zeroes__ = False
- self.__file_offset__ = file_offset
- self.name = name if name != None else format[0]
- def __unpack__(self, data):
- # calling the original routine to deal with special cases/spurious data
- # structures
- super(StructureWithBitfields, self).__unpack__(data)
- self._unpack_bitfield_attributes()
- def __pack__(self):
- self._pack_bitfield_attributes()
- try:
- data = super(StructureWithBitfields, self).__pack__()
- finally:
- self._unpack_bitfield_attributes()
- return data
- def dump(self, indentation=0):
- tk = self.__keys__
- self.__keys__ = self.__keys_ext__
- try:
- ret = super(StructureWithBitfields, self).dump(indentation)
- finally:
- self.__keys__ = tk
- return ret
- def dump_dict(self):
- tk = self.__keys__
- self.__keys__ = self.__keys_ext__
- try:
- ret = super(StructureWithBitfields, self).dump_dict()
- finally:
- self.__keys__ = tk
- return ret
- def _unpack_bitfield_attributes(self):
- """Replace compound attributes corresponding to bitfields with separate
- sub-fields.
- """
- for i in self.__compound_fields__.keys():
- cf_name = self.__keys__[i][0]
- cval = getattr(self, cf_name)
- delattr(self, cf_name)
- offst = 0
- for sf in self.__compound_fields__[i][StructureWithBitfields.CF_SUBFLD_IDX]:
- mask = (1 << sf[StructureWithBitfields.BTF_BITCNT_IDX]) - 1
- mask <<= offst
- setattr(
- self,
- sf[StructureWithBitfields.BTF_NAME_IDX],
- (cval & mask) >> offst,
- )
- offst += sf[StructureWithBitfields.BTF_BITCNT_IDX]
- def _pack_bitfield_attributes(self):
- """Pack attributes into a compound bitfield"""
- for i in self.__compound_fields__.keys():
- cf_name = self.__keys__[i][0]
- offst, acc_val = 0, 0
- for sf in self.__compound_fields__[i][StructureWithBitfields.CF_SUBFLD_IDX]:
- mask = (1 << sf[StructureWithBitfields.BTF_BITCNT_IDX]) - 1
- field_val = (
- getattr(self, sf[StructureWithBitfields.BTF_NAME_IDX]) & mask
- )
- acc_val |= field_val << offst
- offst += sf[StructureWithBitfields.BTF_BITCNT_IDX]
- setattr(self, cf_name, acc_val)
- class DataContainer:
- """Generic data container."""
- def __init__(self, **args):
- bare_setattr = super(DataContainer, self).__setattr__
- for key, value in list(args.items()):
- bare_setattr(key, value)
- class ImportDescData(DataContainer):
- """Holds import descriptor information.
- dll: name of the imported DLL
- imports: list of imported symbols (ImportData instances)
- struct: IMAGE_IMPORT_DESCRIPTOR structure
- """
- class ImportData(DataContainer):
- """Holds imported symbol's information.
- ordinal: Ordinal of the symbol
- name: Name of the symbol
- bound: If the symbol is bound, this contains
- the address.
- """
- def __setattr__(self, name, val):
- # If the instance doesn't yet have an ordinal attribute
- # it's not fully initialized so can't do any of the
- # following
- #
- if (
- hasattr(self, "ordinal")
- and hasattr(self, "bound")
- and hasattr(self, "name")
- ):
- if name == "ordinal":
- if self.pe.PE_TYPE == OPTIONAL_HEADER_MAGIC_PE:
- ordinal_flag = IMAGE_ORDINAL_FLAG
- elif self.pe.PE_TYPE == OPTIONAL_HEADER_MAGIC_PE_PLUS:
- ordinal_flag = IMAGE_ORDINAL_FLAG64
- # Set the ordinal and flag the entry as importing by ordinal
- self.struct_table.Ordinal = ordinal_flag | (val & 0xFFFF)
- self.struct_table.AddressOfData = self.struct_table.Ordinal
- self.struct_table.Function = self.struct_table.Ordinal
- self.struct_table.ForwarderString = self.struct_table.Ordinal
- elif name == "bound":
- if self.struct_iat is not None:
- self.struct_iat.AddressOfData = val
- self.struct_iat.AddressOfData = self.struct_iat.AddressOfData
- self.struct_iat.Function = self.struct_iat.AddressOfData
- self.struct_iat.ForwarderString = self.struct_iat.AddressOfData
- elif name == "address":
- self.struct_table.AddressOfData = val
- self.struct_table.Ordinal = self.struct_table.AddressOfData
- self.struct_table.Function = self.struct_table.AddressOfData
- self.struct_table.ForwarderString = self.struct_table.AddressOfData
- elif name == "name":
- # Make sure we reset the entry in case the import had been set to
- # import by ordinal
- if self.name_offset:
- name_rva = self.pe.get_rva_from_offset(self.name_offset)
- self.pe.set_dword_at_offset(
- self.ordinal_offset, (0 << 31) | name_rva
- )
- # Complain if the length of the new name is longer than the
- # existing one
- if len(val) > len(self.name):
- raise PEFormatError(
- "The export name provided is longer than the existing one."
- )
- pass
- self.pe.set_bytes_at_offset(self.name_offset, val)
- self.__dict__[name] = val
- class ExportDirData(DataContainer):
- """Holds export directory information.
- struct: IMAGE_EXPORT_DIRECTORY structure
- symbols: list of exported symbols (ExportData instances)"""
- class ExportData(DataContainer):
- """Holds exported symbols' information.
- ordinal: ordinal of the symbol
- address: address of the symbol
- name: name of the symbol (None if the symbol is
- exported by ordinal only)
- forwarder: if the symbol is forwarded it will
- contain the name of the target symbol,
- None otherwise.
- """
- def __setattr__(self, name, val):
- # If the instance doesn't yet have an ordinal attribute
- # it's not fully initialized so can't do any of the
- # following
- #
- if (
- hasattr(self, "ordinal")
- and hasattr(self, "address")
- and hasattr(self, "forwarder")
- and hasattr(self, "name")
- ):
- if name == "ordinal":
- self.pe.set_word_at_offset(self.ordinal_offset, val)
- elif name == "address":
- self.pe.set_dword_at_offset(self.address_offset, val)
- elif name == "name":
- # Complain if the length of the new name is longer than the
- # existing one
- if len(val) > len(self.name):
- raise PEFormatError(
- "The export name provided is longer than the existing one."
- )
- self.pe.set_bytes_at_offset(self.name_offset, val)
- elif name == "forwarder":
- # Complain if the length of the new name is longer than the
- # existing one
- if len(val) > len(self.forwarder):
- raise PEFormatError(
- "The forwarder name provided is longer than the existing one."
- )
- self.pe.set_bytes_at_offset(self.forwarder_offset, val)
- self.__dict__[name] = val
- class ResourceDirData(DataContainer):
- """Holds resource directory information.
- struct: IMAGE_RESOURCE_DIRECTORY structure
- entries: list of entries (ResourceDirEntryData instances)
- """
- class ResourceDirEntryData(DataContainer):
- """Holds resource directory entry data.
- struct: IMAGE_RESOURCE_DIRECTORY_ENTRY structure
- name: If the resource is identified by name this
- attribute will contain the name string. None
- otherwise. If identified by id, the id is
- available at 'struct.Id'
- id: the id, also in struct.Id
- directory: If this entry has a lower level directory
- this attribute will point to the
- ResourceDirData instance representing it.
- data: If this entry has no further lower directories
- and points to the actual resource data, this
- attribute will reference the corresponding
- ResourceDataEntryData instance.
- (Either of the 'directory' or 'data' attribute will exist,
- but not both.)
- """
- class ResourceDataEntryData(DataContainer):
- """Holds resource data entry information.
- struct: IMAGE_RESOURCE_DATA_ENTRY structure
- lang: Primary language ID
- sublang: Sublanguage ID
- """
- class DebugData(DataContainer):
- """Holds debug information.
- struct: IMAGE_DEBUG_DIRECTORY structure
- entries: list of entries (IMAGE_DEBUG_TYPE instances)
- """
- class BaseRelocationData(DataContainer):
- """Holds base relocation information.
- struct: IMAGE_BASE_RELOCATION structure
- entries: list of relocation data (RelocationData instances)
- """
- class RelocationData(DataContainer):
- """Holds relocation information.
- type: Type of relocation
- The type string can be obtained by
- RELOCATION_TYPE[type]
- rva: RVA of the relocation
- """
- def __setattr__(self, name, val):
- # If the instance doesn't yet have a struct attribute
- # it's not fully initialized so can't do any of the
- # following
- #
- if hasattr(self, "struct"):
- # Get the word containing the type and data
- #
- word = self.struct.Data
- if name == "type":
- word = (val << 12) | (word & 0xFFF)
- elif name == "rva":
- offset = max(val - self.base_rva, 0)
- word = (word & 0xF000) | (offset & 0xFFF)
- # Store the modified data
- #
- self.struct.Data = word
- self.__dict__[name] = val
- class TlsData(DataContainer):
- """Holds TLS information.
- struct: IMAGE_TLS_DIRECTORY structure
- """
- class BoundImportDescData(DataContainer):
- """Holds bound import descriptor data.
- This directory entry will provide information on the
- DLLs this PE file has been bound to (if bound at all).
- The structure will contain the name and timestamp of the
- DLL at the time of binding so that the loader can know
- whether it differs from the one currently present in the
- system and must, therefore, re-bind the PE's imports.
- struct: IMAGE_BOUND_IMPORT_DESCRIPTOR structure
- name: DLL name
- entries: list of entries (BoundImportRefData instances)
- the entries will exist if this DLL has forwarded
- symbols. If so, the destination DLL will have an
- entry in this list.
- """
- class LoadConfigData(DataContainer):
- """Holds Load Config data.
- struct: IMAGE_LOAD_CONFIG_DIRECTORY structure
- name: dll name
- """
- class BoundImportRefData(DataContainer):
- """Holds bound import forwarder reference data.
- Contains the same information as the bound descriptor but
- for forwarded DLLs, if any.
- struct: IMAGE_BOUND_FORWARDER_REF structure
- name: dll name
- """
- class ExceptionsDirEntryData(DataContainer):
- """Holds the data related to SEH (and stack unwinding, in particular)
- struct an instance of RUNTIME_FUNTION
- unwindinfo an instance of UNWIND_INFO
- """
- class UnwindInfo(StructureWithBitfields):
- """Handles the complexities of UNWIND_INFO structure:
- * variable number of UWIND_CODEs
- * optional ExceptionHandler and FunctionEntry fields
- """
- def __init__(self, file_offset=0):
- super(UnwindInfo, self).__init__(
- (
- "UNWIND_INFO",
- (
- "B:3,Version",
- "B:5,Flags",
- "B,SizeOfProlog",
- "B,CountOfCodes",
- "B:4,FrameRegister",
- "B:4,FrameOffset",
- ),
- ),
- file_offset=file_offset,
- )
- self._full_size = super(UnwindInfo, self).sizeof()
- self._opt_field_name = None
- self._code_info = StructureWithBitfields(
- ("UNWIND_CODE", ("B,CodeOffset", "B:4,UnwindOp", "B:4,OpInfo")),
- file_offset=0,
- )
- self._chained_entry = None
- self._finished_unpacking = False
- def unpack_in_stages(self, data):
- """Unpacks the UNWIND_INFO "in two calls", with the first call establishing
- a full size of the structure and the second, performing the actual unpacking.
- """
- if self._finished_unpacking:
- return None
- super(UnwindInfo, self).__unpack__(data)
- codes_cnt_max = (self.CountOfCodes + 1) & ~1
- hdlr_offset = (
- super(UnwindInfo, self).sizeof() + codes_cnt_max * self._code_info.sizeof()
- )
- self._full_size = hdlr_offset + (
- 0 if self.Flags == 0 else STRUCT_SIZEOF_TYPES["I"]
- )
- if len(data) < self._full_size:
- return None
- if self.Version != 1 and self.Version != 2:
- return "Unsupported version of UNWIND_INFO at " + hex(self.__file_offset__)
- self.UnwindCodes = []
- ro = super(UnwindInfo, self).sizeof()
- codes_left = self.CountOfCodes
- while codes_left > 0:
- self._code_info.__unpack__(data[ro : ro + self._code_info.sizeof()])
- ucode = PrologEpilogOpsFactory.create(self._code_info)
- if ucode is None:
- return "Unknown UNWIND_CODE at " + hex(self.__file_offset__ + ro)
- len_in_codes = ucode.length_in_code_structures(self._code_info, self)
- opc_size = self._code_info.sizeof() * len_in_codes
- ucode.initialize(
- self._code_info,
- data[ro : ro + opc_size],
- self,
- self.__file_offset__ + ro,
- )
- ro += opc_size
- codes_left -= len_in_codes
- self.UnwindCodes.append(ucode)
- if self.UNW_FLAG_EHANDLER or self.UNW_FLAG_UHANDLER:
- self._opt_field_name = "ExceptionHandler"
- if self.UNW_FLAG_CHAININFO:
- self._opt_field_name = "FunctionEntry"
- if self._opt_field_name != None:
- setattr(
- self,
- self._opt_field_name,
- struct.unpack(
- "<I", data[hdlr_offset : hdlr_offset + STRUCT_SIZEOF_TYPES["I"]]
- )[0],
- )
- self._finished_unpacking = True
- return None
- def dump(self, indentation=0):
- # Because __keys_ext__ are shared among all the instances with the same
- # format string, we have to add and sunsequently remove the optional field
- # each time.
- # It saves space (as compared to keeping a copy self.__keys_ext__ per
- # UnwindInfo instance), but makes our dump() implementation thread-unsafe.
- if self._opt_field_name != None:
- self.__field_offsets__[self._opt_field_name] = (
- self._full_size - STRUCT_SIZEOF_TYPES["I"]
- )
- self.__keys_ext__.append([self._opt_field_name])
- try:
- dump = super(UnwindInfo, self).dump(indentation)
- finally:
- if self._opt_field_name != None:
- self.__keys_ext__.pop()
- dump.append(
- "Flags: "
- + ", ".join([s[0] for s in unwind_info_flags if getattr(self, s[0])])
- )
- dump.append(
- "Unwind codes: "
- + "; ".join([str(c) for c in self.UnwindCodes if c.is_valid()])
- )
- return dump
- def dump_dict(self):
- if self._opt_field_name != None:
- self.__field_offsets__[self._opt_field_name] = (
- self._full_size - STRUCT_SIZEOF_TYPES["I"]
- )
- self.__keys_ext__.append([self._opt_field_name])
- try:
- ret = super(UnwindInfo, self).dump_dict()
- finally:
- if self._opt_field_name != None:
- self.__keys_ext__.pop()
- return ret
- def __setattr__(self, name, val):
- if name == "Flags":
- set_flags(self, val, unwind_info_flags)
- elif "UNW_FLAG_" in name and hasattr(self, name):
- if val:
- self.__dict__["Flags"] |= UNWIND_INFO_FLAGS[name]
- else:
- self.__dict__["Flags"] ^= UNWIND_INFO_FLAGS[name]
- self.__dict__[name] = val
- def sizeof(self):
- return self._full_size
- def __pack__(self):
- data = bytearray(self._full_size)
- data[0 : super(UnwindInfo, self).sizeof()] = super(UnwindInfo, self).__pack__()
- cur_offset = super(UnwindInfo, self).sizeof()
- for uc in self.UnwindCodes:
- if cur_offset + uc.struct.sizeof() > self._full_size:
- break
- data[cur_offset : cur_offset + uc.struct.sizeof()] = uc.struct.__pack__()
- cur_offset += uc.struct.sizeof()
- if self._opt_field_name != None:
- data[
- self._full_size - STRUCT_SIZEOF_TYPES["I"] : self._full_size
- ] = struct.pack("<I", getattr(self, self._opt_field_name))
- return data
- def get_chained_function_entry(self):
- return self._chained_entry
- def set_chained_function_entry(self, entry):
- if self._chained_entry != None:
- raise PEFormatError("Chained function entry cannot be changed")
- self._chained_entry = entry
- class PrologEpilogOp:
- """Meant as an abstract class representing a generic unwind code.
- There is a subclass of PrologEpilogOp for each member of UNWIND_OP_CODES enum.
- """
- def initialize(self, unw_code, data, unw_info, file_offset):
- self.struct = StructureWithBitfields(
- self._get_format(unw_code), file_offset=file_offset
- )
- self.struct.__unpack__(data)
- def length_in_code_structures(self, unw_code, unw_info):
- """Computes how many UNWIND_CODE structures UNWIND_CODE occupies.
- May be called before initialize() and, for that reason, should not rely on
- the values of intance attributes.
- """
- return 1
- def is_valid(self):
- return True
- def _get_format(self, unw_code):
- return ("UNWIND_CODE", ("B,CodeOffset", "B:4,UnwindOp", "B:4,OpInfo"))
- class PrologEpilogOpPushReg(PrologEpilogOp):
- """UWOP_PUSH_NONVOL"""
- def _get_format(self, unw_code):
- return ("UNWIND_CODE_PUSH_NONVOL", ("B,CodeOffset", "B:4,UnwindOp", "B:4,Reg"))
- def __str__(self):
- return ".PUSHREG " + REGISTERS[self.struct.Reg]
- class PrologEpilogOpAllocLarge(PrologEpilogOp):
- """UWOP_ALLOC_LARGE"""
- def _get_format(self, unw_code):
- return (
- "UNWIND_CODE_ALLOC_LARGE",
- (
- "B,CodeOffset",
- "B:4,UnwindOp",
- "B:4,OpInfo",
- "H,AllocSizeInQwords" if unw_code.OpInfo == 0 else "I,AllocSize",
- ),
- )
- def length_in_code_structures(self, unw_code, unw_info):
- return 2 if unw_code.OpInfo == 0 else 3
- def get_alloc_size(self):
- return (
- self.struct.AllocSizeInQwords * 8
- if self.struct.OpInfo == 0
- else self.struct.AllocSize
- )
- def __str__(self):
- return ".ALLOCSTACK " + hex(self.get_alloc_size())
- class PrologEpilogOpAllocSmall(PrologEpilogOp):
- """UWOP_ALLOC_SMALL"""
- def _get_format(self, unw_code):
- return (
- "UNWIND_CODE_ALLOC_SMALL",
- ("B,CodeOffset", "B:4,UnwindOp", "B:4,AllocSizeInQwordsMinus8"),
- )
- def get_alloc_size(self):
- return self.struct.AllocSizeInQwordsMinus8 * 8 + 8
- def __str__(self):
- return ".ALLOCSTACK " + hex(self.get_alloc_size())
- class PrologEpilogOpSetFP(PrologEpilogOp):
- """UWOP_SET_FPREG"""
- def initialize(self, unw_code, data, unw_info, file_offset):
- super(PrologEpilogOpSetFP, self).initialize(
- unw_code, data, unw_info, file_offset
- )
- self._frame_register = unw_info.FrameRegister
- self._frame_offset = unw_info.FrameOffset * 16
- def __str__(self):
- return (
- ".SETFRAME "
- + REGISTERS[self._frame_register]
- + ", "
- + hex(self._frame_offset)
- )
- class PrologEpilogOpSaveReg(PrologEpilogOp):
- """UWOP_SAVE_NONVOL"""
- def length_in_code_structures(self, unwcode, unw_info):
- return 2
- def get_offset(self):
- return self.struct.OffsetInQwords * 8
- def _get_format(self, unw_code):
- return (
- "UNWIND_CODE_SAVE_NONVOL",
- ("B,CodeOffset", "B:4,UnwindOp", "B:4,Reg", "H,OffsetInQwords"),
- )
- def __str__(self):
- return ".SAVEREG " + REGISTERS[self.struct.Reg] + ", " + hex(self.get_offset())
- class PrologEpilogOpSaveRegFar(PrologEpilogOp):
- """UWOP_SAVE_NONVOL_FAR"""
- def length_in_code_structures(self, unw_code, unw_info):
- return 3
- def get_offset(self):
- return self.struct.Offset
- def _get_format(self, unw_code):
- return (
- "UNWIND_CODE_SAVE_NONVOL_FAR",
- ("B,CodeOffset", "B:4,UnwindOp", "B:4,Reg", "I,Offset"),
- )
- def __str__(self):
- return ".SAVEREG " + REGISTERS[self.struct.Reg] + ", " + hex(self.struct.Offset)
- class PrologEpilogOpSaveXMM(PrologEpilogOp):
- """UWOP_SAVE_XMM128"""
- def _get_format(self, unw_code):
- return (
- "UNWIND_CODE_SAVE_XMM128",
- ("B,CodeOffset", "B:4,UnwindOp", "B:4,Reg", "H,OffsetIn2Qwords"),
- )
- def length_in_code_structures(self, unw_code, unw_info):
- return 2
- def get_offset(self):
- return self.struct.OffsetIn2Qwords * 16
- def __str__(self):
- return ".SAVEXMM128 XMM" + str(self.struct.Reg) + ", " + hex(self.get_offset())
- class PrologEpilogOpSaveXMMFar(PrologEpilogOp):
- """UWOP_SAVE_XMM128_FAR"""
- def _get_format(self, unw_code):
- return (
- "UNWIND_CODE_SAVE_XMM128_FAR",
- ("B,CodeOffset", "B:4,UnwindOp", "B:4,Reg", "I,Offset"),
- )
- def length_in_code_structures(self, unw_code, unw_info):
- return 3
- def get_offset(self):
- return self.struct.Offset
- def __str__(self):
- return ".SAVEXMM128 XMM" + str(self.struct.Reg) + ", " + hex(self.struct.Offset)
- class PrologEpilogOpPushFrame(PrologEpilogOp):
- """UWOP_PUSH_MACHFRAME"""
- def __str__(self):
- return ".PUSHFRAME" + (" <code>" if self.struct.OpInfo else "")
- class PrologEpilogOpEpilogMarker(PrologEpilogOp):
- """UWOP_EPILOG"""
- def initialize(self, unw_code, data, unw_info, file_offset):
- self._long_offst = True
- self._first = not hasattr(unw_info, "SizeOfEpilog")
- super(PrologEpilogOpEpilogMarker, self).initialize(
- unw_code, data, unw_info, file_offset
- )
- if self._first:
- setattr(unw_info, "SizeOfEpilog", self.struct.Size)
- self._long_offst = unw_code.OpInfo & 1 == 0
- self._epilog_size = unw_info.SizeOfEpilog
- def _get_format(self, unw_code):
- # check if it is the first epilog code among encountered; then its record
- # will contain size of the epilog
- if self._first:
- return (
- "UNWIND_CODE_EPILOG",
- ("B,OffsetLow,Size", "B:4,UnwindOp", "B:4,Flags")
- if unw_code.OpInfo & 1 == 1
- else (
- "B,Size",
- "B:4,UnwindOp",
- "B:4,Flags",
- "B,OffsetLow",
- "B:4,Unused",
- "B:4,OffsetHigh",
- ),
- )
- else:
- return (
- "UNWIND_CODE_EPILOG",
- ("B,OffsetLow", "B:4,UnwindOp", "B:4,OffsetHigh"),
- )
- def length_in_code_structures(self, unw_code, unw_info):
- return (
- 2
- if not hasattr(unw_info, "SizeOfEpilog") and (unw_code.OpInfo & 1) == 0
- else 1
- )
- def get_offset(self):
- return self.struct.OffsetLow | (
- self.struct.OffsetHigh << 8 if self._long_offst else 0
- )
- def is_valid(self):
- return self.get_offset() > 0
- def __str__(self):
- # the EPILOG sequence may have a terminating all-zeros entry
- return (
- "EPILOG: size="
- + hex(self._epilog_size)
- + ", offset from the end=-"
- + hex(self.get_offset())
- if self.get_offset() > 0
- else ""
- )
- class PrologEpilogOpsFactory:
- """A factory for creating unwind codes based on the value of UnwindOp"""
- _class_dict = {
- UWOP_PUSH_NONVOL: PrologEpilogOpPushReg,
- UWOP_ALLOC_LARGE: PrologEpilogOpAllocLarge,
- UWOP_ALLOC_SMALL: PrologEpilogOpAllocSmall,
- UWOP_SET_FPREG: PrologEpilogOpSetFP,
- UWOP_SAVE_NONVOL: PrologEpilogOpSaveReg,
- UWOP_SAVE_NONVOL_FAR: PrologEpilogOpSaveRegFar,
- UWOP_SAVE_XMM128: PrologEpilogOpSaveXMM,
- UWOP_SAVE_XMM128_FAR: PrologEpilogOpSaveXMMFar,
- UWOP_PUSH_MACHFRAME: PrologEpilogOpPushFrame,
- UWOP_EPILOG: PrologEpilogOpEpilogMarker,
- }
- @staticmethod
- def create(unwcode):
- code = unwcode.UnwindOp
- return (
- PrologEpilogOpsFactory._class_dict[code]()
- if code in PrologEpilogOpsFactory._class_dict
- else None
- )
- # Valid FAT32 8.3 short filename characters according to:
- # http://en.wikipedia.org/wiki/8.3_filename
- # This will help decide whether DLL ASCII names are likely
- # to be valid or otherwise corrupt data
- #
- # The filename length is not checked because the DLLs filename
- # can be longer that the 8.3
- allowed_filename = b(
- string.ascii_lowercase
- + string.ascii_uppercase
- + string.digits
- + "!#$%&'()-@^_`{}~+,.;=[]"
- )
- def is_valid_dos_filename(s):
- if s is None or not isinstance(s, (str, bytes, bytearray)):
- return False
- # Allow path separators as import names can contain directories.
- allowed = allowed_filename + b"\\/"
- return all(c in allowed for c in set(s))
- # Check if an imported name uses the valid accepted characters expected in
- # mangled function names. If the symbol's characters don't fall within this
- # charset we will assume the name is invalid.
- allowed_function_name = b(
- string.ascii_lowercase + string.ascii_uppercase + string.digits + "_?@$()<>"
- )
- @lru_cache(maxsize=2048)
- def is_valid_function_name(s):
- return (
- s is not None
- and isinstance(s, (str, bytes, bytearray))
- and all(c in allowed_function_name for c in set(s))
- )
- class PE:
- """A Portable Executable representation.
- This class provides access to most of the information in a PE file.
- It expects to be supplied the name of the file to load or PE data
- to process and an optional argument 'fast_load' (False by default)
- which controls whether to load all the directories information,
- which can be quite time consuming.
- pe = pefile.PE('module.dll')
- pe = pefile.PE(name='module.dll')
- would load 'module.dll' and process it. If the data is already
- available in a buffer the same can be achieved with:
- pe = pefile.PE(data=module_dll_data)
- The "fast_load" can be set to a default by setting its value in the
- module itself by means, for instance, of a "pefile.fast_load = True".
- That will make all the subsequent instances not to load the
- whole PE structure. The "full_load" method can be used to parse
- the missing data at a later stage.
- Basic headers information will be available in the attributes:
- DOS_HEADER
- NT_HEADERS
- FILE_HEADER
- OPTIONAL_HEADER
- All of them will contain among their attributes the members of the
- corresponding structures as defined in WINNT.H
- The raw data corresponding to the header (from the beginning of the
- file up to the start of the first section) will be available in the
- instance's attribute 'header' as a string.
- The sections will be available as a list in the 'sections' attribute.
- Each entry will contain as attributes all the structure's members.
- Directory entries will be available as attributes (if they exist):
- (no other entries are processed at this point)
- DIRECTORY_ENTRY_IMPORT (list of ImportDescData instances)
- DIRECTORY_ENTRY_EXPORT (ExportDirData instance)
- DIRECTORY_ENTRY_RESOURCE (ResourceDirData instance)
- DIRECTORY_ENTRY_DEBUG (list of DebugData instances)
- DIRECTORY_ENTRY_BASERELOC (list of BaseRelocationData instances)
- DIRECTORY_ENTRY_TLS
- DIRECTORY_ENTRY_BOUND_IMPORT (list of BoundImportData instances)
- The following dictionary attributes provide ways of mapping different
- constants. They will accept the numeric value and return the string
- representation and the opposite, feed in the string and get the
- numeric constant:
- DIRECTORY_ENTRY
- IMAGE_CHARACTERISTICS
- SECTION_CHARACTERISTICS
- DEBUG_TYPE
- SUBSYSTEM_TYPE
- MACHINE_TYPE
- RELOCATION_TYPE
- RESOURCE_TYPE
- LANG
- SUBLANG
- """
- #
- # Format specifications for PE structures.
- #
- __IMAGE_DOS_HEADER_format__ = (
- "IMAGE_DOS_HEADER",
- (
- "H,e_magic",
- "H,e_cblp",
- "H,e_cp",
- "H,e_crlc",
- "H,e_cparhdr",
- "H,e_minalloc",
- "H,e_maxalloc",
- "H,e_ss",
- "H,e_sp",
- "H,e_csum",
- "H,e_ip",
- "H,e_cs",
- "H,e_lfarlc",
- "H,e_ovno",
- "8s,e_res",
- "H,e_oemid",
- "H,e_oeminfo",
- "20s,e_res2",
- "I,e_lfanew",
- ),
- )
- __IMAGE_FILE_HEADER_format__ = (
- "IMAGE_FILE_HEADER",
- (
- "H,Machine",
- "H,NumberOfSections",
- "I,TimeDateStamp",
- "I,PointerToSymbolTable",
- "I,NumberOfSymbols",
- "H,SizeOfOptionalHeader",
- "H,Characteristics",
- ),
- )
- __IMAGE_DATA_DIRECTORY_format__ = (
- "IMAGE_DATA_DIRECTORY",
- ("I,VirtualAddress", "I,Size"),
- )
- __IMAGE_OPTIONAL_HEADER_format__ = (
- "IMAGE_OPTIONAL_HEADER",
- (
- "H,Magic",
- "B,MajorLinkerVersion",
- "B,MinorLinkerVersion",
- "I,SizeOfCode",
- "I,SizeOfInitializedData",
- "I,SizeOfUninitializedData",
- "I,AddressOfEntryPoint",
- "I,BaseOfCode",
- "I,BaseOfData",
- "I,ImageBase",
- "I,SectionAlignment",
- "I,FileAlignment",
- "H,MajorOperatingSystemVersion",
- "H,MinorOperatingSystemVersion",
- "H,MajorImageVersion",
- "H,MinorImageVersion",
- "H,MajorSubsystemVersion",
- "H,MinorSubsystemVersion",
- "I,Reserved1",
- "I,SizeOfImage",
- "I,SizeOfHeaders",
- "I,CheckSum",
- "H,Subsystem",
- "H,DllCharacteristics",
- "I,SizeOfStackReserve",
- "I,SizeOfStackCommit",
- "I,SizeOfHeapReserve",
- "I,SizeOfHeapCommit",
- "I,LoaderFlags",
- "I,NumberOfRvaAndSizes",
- ),
- )
- __IMAGE_OPTIONAL_HEADER64_format__ = (
- "IMAGE_OPTIONAL_HEADER64",
- (
- "H,Magic",
- "B,MajorLinkerVersion",
- "B,MinorLinkerVersion",
- "I,SizeOfCode",
- "I,SizeOfInitializedData",
- "I,SizeOfUninitializedData",
- "I,AddressOfEntryPoint",
- "I,BaseOfCode",
- "Q,ImageBase",
- "I,SectionAlignment",
- "I,FileAlignment",
- "H,MajorOperatingSystemVersion",
- "H,MinorOperatingSystemVersion",
- "H,MajorImageVersion",
- "H,MinorImageVersion",
- "H,MajorSubsystemVersion",
- "H,MinorSubsystemVersion",
- "I,Reserved1",
- "I,SizeOfImage",
- "I,SizeOfHeaders",
- "I,CheckSum",
- "H,Subsystem",
- "H,DllCharacteristics",
- "Q,SizeOfStackReserve",
- "Q,SizeOfStackCommit",
- "Q,SizeOfHeapReserve",
- "Q,SizeOfHeapCommit",
- "I,LoaderFlags",
- "I,NumberOfRvaAndSizes",
- ),
- )
- __IMAGE_NT_HEADERS_format__ = ("IMAGE_NT_HEADERS", ("I,Signature",))
- __IMAGE_SECTION_HEADER_format__ = (
- "IMAGE_SECTION_HEADER",
- (
- "8s,Name",
- "I,Misc,Misc_PhysicalAddress,Misc_VirtualSize",
- "I,VirtualAddress",
- "I,SizeOfRawData",
- "I,PointerToRawData",
- "I,PointerToRelocations",
- "I,PointerToLinenumbers",
- "H,NumberOfRelocations",
- "H,NumberOfLinenumbers",
- "I,Characteristics",
- ),
- )
- __IMAGE_DELAY_IMPORT_DESCRIPTOR_format__ = (
- "IMAGE_DELAY_IMPORT_DESCRIPTOR",
- (
- "I,grAttrs",
- "I,szName",
- "I,phmod",
- "I,pIAT",
- "I,pINT",
- "I,pBoundIAT",
- "I,pUnloadIAT",
- "I,dwTimeStamp",
- ),
- )
- __IMAGE_IMPORT_DESCRIPTOR_format__ = (
- "IMAGE_IMPORT_DESCRIPTOR",
- (
- "I,OriginalFirstThunk,Characteristics",
- "I,TimeDateStamp",
- "I,ForwarderChain",
- "I,Name",
- "I,FirstThunk",
- ),
- )
- __IMAGE_EXPORT_DIRECTORY_format__ = (
- "IMAGE_EXPORT_DIRECTORY",
- (
- "I,Characteristics",
- "I,TimeDateStamp",
- "H,MajorVersion",
- "H,MinorVersion",
- "I,Name",
- "I,Base",
- "I,NumberOfFunctions",
- "I,NumberOfNames",
- "I,AddressOfFunctions",
- "I,AddressOfNames",
- "I,AddressOfNameOrdinals",
- ),
- )
- __IMAGE_RESOURCE_DIRECTORY_format__ = (
- "IMAGE_RESOURCE_DIRECTORY",
- (
- "I,Characteristics",
- "I,TimeDateStamp",
- "H,MajorVersion",
- "H,MinorVersion",
- "H,NumberOfNamedEntries",
- "H,NumberOfIdEntries",
- ),
- )
- __IMAGE_RESOURCE_DIRECTORY_ENTRY_format__ = (
- "IMAGE_RESOURCE_DIRECTORY_ENTRY",
- ("I,Name", "I,OffsetToData"),
- )
- __IMAGE_RESOURCE_DATA_ENTRY_format__ = (
- "IMAGE_RESOURCE_DATA_ENTRY",
- ("I,OffsetToData", "I,Size", "I,CodePage", "I,Reserved"),
- )
- __VS_VERSIONINFO_format__ = (
- "VS_VERSIONINFO",
- ("H,Length", "H,ValueLength", "H,Type"),
- )
- __VS_FIXEDFILEINFO_format__ = (
- "VS_FIXEDFILEINFO",
- (
- "I,Signature",
- "I,StrucVersion",
- "I,FileVersionMS",
- "I,FileVersionLS",
- "I,ProductVersionMS",
- "I,ProductVersionLS",
- "I,FileFlagsMask",
- "I,FileFlags",
- "I,FileOS",
- "I,FileType",
- "I,FileSubtype",
- "I,FileDateMS",
- "I,FileDateLS",
- ),
- )
- __StringFileInfo_format__ = (
- "StringFileInfo",
- ("H,Length", "H,ValueLength", "H,Type"),
- )
- __StringTable_format__ = ("StringTable", ("H,Length", "H,ValueLength", "H,Type"))
- __String_format__ = ("String", ("H,Length", "H,ValueLength", "H,Type"))
- __Var_format__ = ("Var", ("H,Length", "H,ValueLength", "H,Type"))
- __IMAGE_THUNK_DATA_format__ = (
- "IMAGE_THUNK_DATA",
- ("I,ForwarderString,Function,Ordinal,AddressOfData",),
- )
- __IMAGE_THUNK_DATA64_format__ = (
- "IMAGE_THUNK_DATA",
- ("Q,ForwarderString,Function,Ordinal,AddressOfData",),
- )
- __IMAGE_DEBUG_DIRECTORY_format__ = (
- "IMAGE_DEBUG_DIRECTORY",
- (
- "I,Characteristics",
- "I,TimeDateStamp",
- "H,MajorVersion",
- "H,MinorVersion",
- "I,Type",
- "I,SizeOfData",
- "I,AddressOfRawData",
- "I,PointerToRawData",
- ),
- )
- __IMAGE_BASE_RELOCATION_format__ = (
- "IMAGE_BASE_RELOCATION",
- ("I,VirtualAddress", "I,SizeOfBlock"),
- )
- __IMAGE_BASE_RELOCATION_ENTRY_format__ = (
- "IMAGE_BASE_RELOCATION_ENTRY",
- ("H,Data",),
- )
- __IMAGE_TLS_DIRECTORY_format__ = (
- "IMAGE_TLS_DIRECTORY",
- (
- "I,StartAddressOfRawData",
- "I,EndAddressOfRawData",
- "I,AddressOfIndex",
- "I,AddressOfCallBacks",
- "I,SizeOfZeroFill",
- "I,Characteristics",
- ),
- )
- __IMAGE_TLS_DIRECTORY64_format__ = (
- "IMAGE_TLS_DIRECTORY",
- (
- "Q,StartAddressOfRawData",
- "Q,EndAddressOfRawData",
- "Q,AddressOfIndex",
- "Q,AddressOfCallBacks",
- "I,SizeOfZeroFill",
- "I,Characteristics",
- ),
- )
- __IMAGE_LOAD_CONFIG_DIRECTORY_format__ = (
- "IMAGE_LOAD_CONFIG_DIRECTORY",
- (
- "I,Size",
- "I,TimeDateStamp",
- "H,MajorVersion",
- "H,MinorVersion",
- "I,GlobalFlagsClear",
- "I,GlobalFlagsSet",
- "I,CriticalSectionDefaultTimeout",
- "I,DeCommitFreeBlockThreshold",
- "I,DeCommitTotalFreeThreshold",
- "I,LockPrefixTable",
- "I,MaximumAllocationSize",
- "I,VirtualMemoryThreshold",
- "I,ProcessHeapFlags",
- "I,ProcessAffinityMask",
- "H,CSDVersion",
- "H,Reserved1",
- "I,EditList",
- "I,SecurityCookie",
- "I,SEHandlerTable",
- "I,SEHandlerCount",
- "I,GuardCFCheckFunctionPointer",
- "I,Reserved2",
- "I,GuardCFFunctionTable",
- "I,GuardCFFunctionCount",
- "I,GuardFlags",
- ),
- )
- __IMAGE_LOAD_CONFIG_DIRECTORY64_format__ = (
- "IMAGE_LOAD_CONFIG_DIRECTORY",
- (
- "I,Size",
- "I,TimeDateStamp",
- "H,MajorVersion",
- "H,MinorVersion",
- "I,GlobalFlagsClear",
- "I,GlobalFlagsSet",
- "I,CriticalSectionDefaultTimeout",
- "Q,DeCommitFreeBlockThreshold",
- "Q,DeCommitTotalFreeThreshold",
- "Q,LockPrefixTable",
- "Q,MaximumAllocationSize",
- "Q,VirtualMemoryThreshold",
- "Q,ProcessAffinityMask",
- "I,ProcessHeapFlags",
- "H,CSDVersion",
- "H,Reserved1",
- "Q,EditList",
- "Q,SecurityCookie",
- "Q,SEHandlerTable",
- "Q,SEHandlerCount",
- "Q,GuardCFCheckFunctionPointer",
- "Q,Reserved2",
- "Q,GuardCFFunctionTable",
- "Q,GuardCFFunctionCount",
- "I,GuardFlags",
- ),
- )
- __IMAGE_BOUND_IMPORT_DESCRIPTOR_format__ = (
- "IMAGE_BOUND_IMPORT_DESCRIPTOR",
- ("I,TimeDateStamp", "H,OffsetModuleName", "H,NumberOfModuleForwarderRefs"),
- )
- __IMAGE_BOUND_FORWARDER_REF_format__ = (
- "IMAGE_BOUND_FORWARDER_REF",
- ("I,TimeDateStamp", "H,OffsetModuleName", "H,Reserved"),
- )
- __RUNTIME_FUNCTION_format__ = (
- "RUNTIME_FUNCTION",
- ("I,BeginAddress", "I,EndAddress", "I,UnwindData"),
- )
- def __init__(
- self,
- name=None,
- data=None,
- fast_load=None,
- max_symbol_exports=MAX_SYMBOL_EXPORT_COUNT,
- max_repeated_symbol=120,
- ):
- self.max_symbol_exports = max_symbol_exports
- self.max_repeated_symbol = max_repeated_symbol
- self.sections = []
- self.__warnings = []
- self.PE_TYPE = None
- if name is None and data is None:
- raise ValueError("Must supply either name or data")
- # This list will keep track of all the structures created.
- # That will allow for an easy iteration through the list
- # in order to save the modifications made
- self.__structures__ = []
- self.__from_file = None
- # We only want to print these warnings once
- self.FileAlignment_Warning = False
- self.SectionAlignment_Warning = False
- # Count of total resource entries across nested tables
- self.__total_resource_entries_count = 0
- # Sum of the size of all resource entries parsed, which should not
- # exceed the file size.
- self.__total_resource_bytes = 0
- # The number of imports parsed in this file
- self.__total_import_symbols = 0
- fast_load = fast_load or globals()["fast_load"]
- try:
- self.__parse__(name, data, fast_load)
- except:
- self.close()
- raise
- def close(self):
- if (
- self.__from_file is True
- and hasattr(self, "__data__")
- and (
- (isinstance(mmap.mmap, type) and isinstance(self.__data__, mmap.mmap))
- or "mmap.mmap" in repr(type(self.__data__))
- )
- ):
- self.__data__.close()
- del self.__data__
- def __unpack_data__(self, format, data, file_offset):
- """Apply structure format to raw data.
- Returns an unpacked structure object if successful, None otherwise.
- """
- structure = Structure(format, file_offset=file_offset)
- try:
- structure.__unpack__(data)
- except PEFormatError as err:
- self.__warnings.append(
- 'Corrupt header "{0}" at file offset {1}. Exception: {2}'.format(
- format[0], file_offset, err
- )
- )
- return None
- self.__structures__.append(structure)
- return structure
- def __parse__(self, fname, data, fast_load):
- """Parse a Portable Executable file.
- Loads a PE file, parsing all its structures and making them available
- through the instance's attributes.
- """
- if fname is not None:
- stat = os.stat(fname)
- if stat.st_size == 0:
- raise PEFormatError("The file is empty")
- fd = None
- try:
- fd = open(fname, "rb")
- self.fileno = fd.fileno()
- if hasattr(mmap, "MAP_PRIVATE"):
- # Unix
- self.__data__ = mmap.mmap(self.fileno, 0, mmap.MAP_PRIVATE)
- else:
- # Windows
- self.__data__ = mmap.mmap(self.fileno, 0, access=mmap.ACCESS_READ)
- self.__from_file = True
- except IOError as excp:
- exception_msg = "{0}".format(excp)
- exception_msg = exception_msg and (": %s" % exception_msg)
- raise Exception(
- "Unable to access file '{0}'{1}".format(fname, exception_msg)
- )
- finally:
- if fd is not None:
- fd.close()
- elif data is not None:
- self.__data__ = data
- self.__from_file = False
- # Resources should not overlap each other, so they should not exceed the
- # file size.
- self.__resource_size_limit_upperbounds = len(self.__data__)
- self.__resource_size_limit_reached = False
- if not fast_load:
- for byte, byte_count in Counter(bytearray(self.__data__)).items():
- # Only report the cases where a byte makes up for more than 50% (if
- # zero) or 15% (if non-zero) of the file's contents. There are
- # legitimate PEs where 0x00 bytes are close to 50% of the whole
- # file's contents.
- if (byte == 0 and 1.0 * byte_count / len(self.__data__) > 0.5) or (
- byte != 0 and 1.0 * byte_count / len(self.__data__) > 0.15
- ):
- self.__warnings.append(
- (
- "Byte 0x{0:02x} makes up {1:.4f}% of the file's contents."
- " This may indicate truncation / malformation."
- ).format(byte, 100.0 * byte_count / len(self.__data__))
- )
- dos_header_data = self.__data__[:64]
- if len(dos_header_data) != 64:
- raise PEFormatError(
- "Unable to read the DOS Header, possibly a truncated file."
- )
- self.DOS_HEADER = self.__unpack_data__(
- self.__IMAGE_DOS_HEADER_format__, dos_header_data, file_offset=0
- )
- if self.DOS_HEADER.e_magic == IMAGE_DOSZM_SIGNATURE:
- raise PEFormatError("Probably a ZM Executable (not a PE file).")
- if not self.DOS_HEADER or self.DOS_HEADER.e_magic != IMAGE_DOS_SIGNATURE:
- raise PEFormatError("DOS Header magic not found.")
- # OC Patch:
- # Check for sane value in e_lfanew
- #
- if self.DOS_HEADER.e_lfanew > len(self.__data__):
- raise PEFormatError("Invalid e_lfanew value, probably not a PE file")
- nt_headers_offset = self.DOS_HEADER.e_lfanew
- self.NT_HEADERS = self.__unpack_data__(
- self.__IMAGE_NT_HEADERS_format__,
- self.__data__[nt_headers_offset : nt_headers_offset + 8],
- file_offset=nt_headers_offset,
- )
- # We better check the signature right here, before the file screws
- # around with sections:
- # OC Patch:
- # Some malware will cause the Signature value to not exist at all
- if not self.NT_HEADERS or not self.NT_HEADERS.Signature:
- raise PEFormatError("NT Headers not found.")
- if (0xFFFF & self.NT_HEADERS.Signature) == IMAGE_NE_SIGNATURE:
- raise PEFormatError("Invalid NT Headers signature. Probably a NE file")
- if (0xFFFF & self.NT_HEADERS.Signature) == IMAGE_LE_SIGNATURE:
- raise PEFormatError("Invalid NT Headers signature. Probably a LE file")
- if (0xFFFF & self.NT_HEADERS.Signature) == IMAGE_LX_SIGNATURE:
- raise PEFormatError("Invalid NT Headers signature. Probably a LX file")
- if (0xFFFF & self.NT_HEADERS.Signature) == IMAGE_TE_SIGNATURE:
- raise PEFormatError("Invalid NT Headers signature. Probably a TE file")
- if self.NT_HEADERS.Signature != IMAGE_NT_SIGNATURE:
- raise PEFormatError("Invalid NT Headers signature.")
- self.FILE_HEADER = self.__unpack_data__(
- self.__IMAGE_FILE_HEADER_format__,
- self.__data__[nt_headers_offset + 4 : nt_headers_offset + 4 + 32],
- file_offset=nt_headers_offset + 4,
- )
- image_flags = retrieve_flags(IMAGE_CHARACTERISTICS, "IMAGE_FILE_")
- if not self.FILE_HEADER:
- raise PEFormatError("File Header missing")
- # Set the image's flags according the the Characteristics member
- set_flags(self.FILE_HEADER, self.FILE_HEADER.Characteristics, image_flags)
- optional_header_offset = nt_headers_offset + 4 + self.FILE_HEADER.sizeof()
- # Note: location of sections can be controlled from PE header:
- sections_offset = optional_header_offset + self.FILE_HEADER.SizeOfOptionalHeader
- self.OPTIONAL_HEADER = self.__unpack_data__(
- self.__IMAGE_OPTIONAL_HEADER_format__,
- # Read up to 256 bytes to allow creating a copy of too much data
- self.__data__[optional_header_offset : optional_header_offset + 256],
- file_offset=optional_header_offset,
- )
- # According to solardesigner's findings for his
- # Tiny PE project, the optional header does not
- # need fields beyond "Subsystem" in order to be
- # loadable by the Windows loader (given that zeros
- # are acceptable values and the header is loaded
- # in a zeroed memory page)
- # If trying to parse a full Optional Header fails
- # we try to parse it again with some 0 padding
- #
- MINIMUM_VALID_OPTIONAL_HEADER_RAW_SIZE = 69
- if (
- self.OPTIONAL_HEADER is None
- and len(
- self.__data__[optional_header_offset : optional_header_offset + 0x200]
- )
- >= MINIMUM_VALID_OPTIONAL_HEADER_RAW_SIZE
- ):
- # Add enough zeros to make up for the unused fields
- #
- padding_length = 128
- # Create padding
- #
- padded_data = self.__data__[
- optional_header_offset : optional_header_offset + 0x200
- ] + (b"\0" * padding_length)
- self.OPTIONAL_HEADER = self.__unpack_data__(
- self.__IMAGE_OPTIONAL_HEADER_format__,
- padded_data,
- file_offset=optional_header_offset,
- )
- # Check the Magic in the OPTIONAL_HEADER and set the PE file
- # type accordingly
- #
- if self.OPTIONAL_HEADER is not None:
- if self.OPTIONAL_HEADER.Magic == OPTIONAL_HEADER_MAGIC_PE:
- self.PE_TYPE = OPTIONAL_HEADER_MAGIC_PE
- elif self.OPTIONAL_HEADER.Magic == OPTIONAL_HEADER_MAGIC_PE_PLUS:
- self.PE_TYPE = OPTIONAL_HEADER_MAGIC_PE_PLUS
- self.OPTIONAL_HEADER = self.__unpack_data__(
- self.__IMAGE_OPTIONAL_HEADER64_format__,
- self.__data__[
- optional_header_offset : optional_header_offset + 0x200
- ],
- file_offset=optional_header_offset,
- )
- # Again, as explained above, we try to parse
- # a reduced form of the Optional Header which
- # is still valid despite not including all
- # structure members
- #
- MINIMUM_VALID_OPTIONAL_HEADER_RAW_SIZE = 69 + 4
- if (
- self.OPTIONAL_HEADER is None
- and len(
- self.__data__[
- optional_header_offset : optional_header_offset + 0x200
- ]
- )
- >= MINIMUM_VALID_OPTIONAL_HEADER_RAW_SIZE
- ):
- padding_length = 128
- padded_data = self.__data__[
- optional_header_offset : optional_header_offset + 0x200
- ] + (b"\0" * padding_length)
- self.OPTIONAL_HEADER = self.__unpack_data__(
- self.__IMAGE_OPTIONAL_HEADER64_format__,
- padded_data,
- file_offset=optional_header_offset,
- )
- if not self.FILE_HEADER:
- raise PEFormatError("File Header missing")
- # OC Patch:
- # Die gracefully if there is no OPTIONAL_HEADER field
- # 975440f5ad5e2e4a92c4d9a5f22f75c1
- if self.OPTIONAL_HEADER is None:
- raise PEFormatError("No Optional Header found, invalid PE32 or PE32+ file.")
- if self.PE_TYPE is None:
- self.__warnings.append(
- "Invalid type 0x{0:04x} in Optional Header.".format(
- self.OPTIONAL_HEADER.Magic
- )
- )
- dll_characteristics_flags = retrieve_flags(
- DLL_CHARACTERISTICS, "IMAGE_DLLCHARACTERISTICS_"
- )
- # Set the Dll Characteristics flags according the the DllCharacteristics member
- set_flags(
- self.OPTIONAL_HEADER,
- self.OPTIONAL_HEADER.DllCharacteristics,
- dll_characteristics_flags,
- )
- self.OPTIONAL_HEADER.DATA_DIRECTORY = []
- # offset = (optional_header_offset + self.FILE_HEADER.SizeOfOptionalHeader)
- offset = optional_header_offset + self.OPTIONAL_HEADER.sizeof()
- self.NT_HEADERS.FILE_HEADER = self.FILE_HEADER
- self.NT_HEADERS.OPTIONAL_HEADER = self.OPTIONAL_HEADER
- # Windows 8 specific check
- #
- if (
- self.OPTIONAL_HEADER.AddressOfEntryPoint
- < self.OPTIONAL_HEADER.SizeOfHeaders
- ):
- self.__warnings.append(
- "SizeOfHeaders is smaller than AddressOfEntryPoint: this file "
- "cannot run under Windows 8."
- )
- # The NumberOfRvaAndSizes is sanitized to stay within
- # reasonable limits so can be casted to an int
- #
- if self.OPTIONAL_HEADER.NumberOfRvaAndSizes > 0x10:
- self.__warnings.append(
- "Suspicious NumberOfRvaAndSizes in the Optional Header. "
- "Normal values are never larger than 0x10, the value is: 0x%x"
- % self.OPTIONAL_HEADER.NumberOfRvaAndSizes
- )
- MAX_ASSUMED_VALID_NUMBER_OF_RVA_AND_SIZES = 0x100
- for i in range(int(0x7FFFFFFF & self.OPTIONAL_HEADER.NumberOfRvaAndSizes)):
- if len(self.__data__) - offset == 0:
- break
- if len(self.__data__) - offset < 8:
- data = self.__data__[offset:] + b"\0" * 8
- else:
- data = self.__data__[
- offset : offset + MAX_ASSUMED_VALID_NUMBER_OF_RVA_AND_SIZES
- ]
- dir_entry = self.__unpack_data__(
- self.__IMAGE_DATA_DIRECTORY_format__, data, file_offset=offset
- )
- if dir_entry is None:
- break
- # Would fail if missing an entry
- # 1d4937b2fa4d84ad1bce0309857e70ca offending sample
- try:
- dir_entry.name = DIRECTORY_ENTRY[i]
- except (KeyError, AttributeError):
- break
- offset += dir_entry.sizeof()
- self.OPTIONAL_HEADER.DATA_DIRECTORY.append(dir_entry)
- # If the offset goes outside the optional header,
- # the loop is broken, regardless of how many directories
- # NumberOfRvaAndSizes says there are
- #
- # We assume a normally sized optional header, hence that we do
- # a sizeof() instead of reading SizeOfOptionalHeader.
- # Then we add a default number of directories times their size,
- # if we go beyond that, we assume the number of directories
- # is wrong and stop processing
- if offset >= (
- optional_header_offset + self.OPTIONAL_HEADER.sizeof() + 8 * 16
- ):
- break
- offset = self.parse_sections(sections_offset)
- # OC Patch:
- # There could be a problem if there are no raw data sections
- # greater than 0
- # fc91013eb72529da005110a3403541b6 example
- # Should this throw an exception in the minimum header offset
- # can't be found?
- #
- rawDataPointers = [
- self.adjust_FileAlignment(
- s.PointerToRawData, self.OPTIONAL_HEADER.FileAlignment
- )
- for s in self.sections
- if s.PointerToRawData > 0
- ]
- if len(rawDataPointers) > 0:
- lowest_section_offset = min(rawDataPointers)
- else:
- lowest_section_offset = None
- if not lowest_section_offset or lowest_section_offset < offset:
- self.header = self.__data__[:offset]
- else:
- self.header = self.__data__[:lowest_section_offset]
- # Check whether the entry point lies within a section
- #
- if (
- self.get_section_by_rva(self.OPTIONAL_HEADER.AddressOfEntryPoint)
- is not None
- ):
- # Check whether the entry point lies within the file
- #
- ep_offset = self.get_offset_from_rva(
- self.OPTIONAL_HEADER.AddressOfEntryPoint
- )
- if ep_offset > len(self.__data__):
- self.__warnings.append(
- "Possibly corrupt file. AddressOfEntryPoint lies outside the"
- " file. AddressOfEntryPoint: 0x%x"
- % self.OPTIONAL_HEADER.AddressOfEntryPoint
- )
- else:
- self.__warnings.append(
- "AddressOfEntryPoint lies outside the sections' boundaries. "
- "AddressOfEntryPoint: 0x%x" % self.OPTIONAL_HEADER.AddressOfEntryPoint
- )
- if not fast_load:
- self.full_load()
- def parse_rich_header(self):
- """Parses the rich header
- see http://www.ntcore.com/files/richsign.htm for more information
- Structure:
- 00 DanS ^ checksum, checksum, checksum, checksum
- 10 Symbol RVA ^ checksum, Symbol size ^ checksum...
- ...
- XX Rich, checksum, 0, 0,...
- """
- # Rich Header constants
- #
- DANS = 0x536E6144 # 'DanS' as dword
- RICH = 0x68636952 # 'Rich' as dword
- rich_index = self.__data__.find(
- b"Rich", 0x80, self.OPTIONAL_HEADER.get_file_offset()
- )
- if rich_index == -1:
- return None
- # Read a block of data
- try:
- # The end of the structure is 8 bytes after the start of the Rich
- # string.
- rich_data = self.__data__[0x80 : rich_index + 8]
- # Make the data have length a multiple of 4, otherwise the
- # subsequent parsing will fail. It's not impossible that we retrieve
- # truncated data that it's not a multiple.
- rich_data = rich_data[: 4 * int(len(rich_data) / 4)]
- data = list(
- struct.unpack("<{0}I".format(int(len(rich_data) / 4)), rich_data)
- )
- if RICH not in data:
- return None
- except PEFormatError:
- return None
- # get key, raw_data and clear_data
- key = struct.pack("<L", data[data.index(RICH) + 1])
- result = {"key": key}
- raw_data = rich_data[: rich_data.find(b"Rich")]
- result["raw_data"] = raw_data
- ord_ = lambda c: ord(c) if not isinstance(c, int) else c
- clear_data = bytearray()
- for idx, val in enumerate(raw_data):
- clear_data.append((ord_(val) ^ ord_(key[idx % len(key)])))
- result["clear_data"] = bytes(clear_data)
- # the checksum should be present 3 times after the DanS signature
- #
- checksum = data[1]
- if data[0] ^ checksum != DANS or data[2] != checksum or data[3] != checksum:
- return None
- result["checksum"] = checksum
- headervalues = []
- result["values"] = headervalues
- data = data[4:]
- for i in range(int(len(data) / 2)):
- # Stop until the Rich footer signature is found
- #
- if data[2 * i] == RICH:
- # it should be followed by the checksum
- #
- if data[2 * i + 1] != checksum:
- self.__warnings.append("Rich Header is malformed")
- break
- # header values come by pairs
- #
- headervalues += [data[2 * i] ^ checksum, data[2 * i + 1] ^ checksum]
- return result
- def get_warnings(self):
- """Return the list of warnings.
- Non-critical problems found when parsing the PE file are
- appended to a list of warnings. This method returns the
- full list.
- """
- return self.__warnings
- def show_warnings(self):
- """Print the list of warnings.
- Non-critical problems found when parsing the PE file are
- appended to a list of warnings. This method prints the
- full list to standard output.
- """
- for warning in self.__warnings:
- print(">", warning)
- def full_load(self):
- """Process the data directories.
- This method will load the data directories which might not have
- been loaded if the "fast_load" option was used.
- """
- self.parse_data_directories()
- class RichHeader:
- pass
- rich_header = self.parse_rich_header()
- if rich_header:
- self.RICH_HEADER = RichHeader()
- self.RICH_HEADER.checksum = rich_header.get("checksum", None)
- self.RICH_HEADER.values = rich_header.get("values", None)
- self.RICH_HEADER.key = rich_header.get("key", None)
- self.RICH_HEADER.raw_data = rich_header.get("raw_data", None)
- self.RICH_HEADER.clear_data = rich_header.get("clear_data", None)
- else:
- self.RICH_HEADER = None
- def write(self, filename=None):
- """Write the PE file.
- This function will process all headers and components
- of the PE file and include all changes made (by just
- assigning to attributes in the PE objects) and write
- the changes back to a file whose name is provided as
- an argument. The filename is optional, if not
- provided the data will be returned as a 'str' object.
- """
- file_data = bytearray(self.__data__)
- for structure in self.__structures__:
- struct_data = bytearray(structure.__pack__())
- offset = structure.get_file_offset()
- file_data[offset : offset + len(struct_data)] = struct_data
- if hasattr(self, "VS_VERSIONINFO"):
- if hasattr(self, "FileInfo"):
- for finfo in self.FileInfo:
- for entry in finfo:
- if hasattr(entry, "StringTable"):
- for st_entry in entry.StringTable:
- for key, entry in list(st_entry.entries.items()):
- # Offsets and lengths of the keys and values.
- # Each value in the dictionary is a tuple:
- # (key length, value length)
- # The lengths are in characters, not in bytes.
- offsets = st_entry.entries_offsets[key]
- lengths = st_entry.entries_lengths[key]
- if len(entry) > lengths[1]:
- l = entry.decode("utf-8").encode("utf-16le")
- file_data[
- offsets[1] : offsets[1] + lengths[1] * 2
- ] = l[: lengths[1] * 2]
- else:
- encoded_data = entry.decode("utf-8").encode(
- "utf-16le"
- )
- file_data[
- offsets[1] : offsets[1] + len(encoded_data)
- ] = encoded_data
- new_file_data = file_data
- if not filename:
- return new_file_data
- f = open(filename, "wb+")
- f.write(new_file_data)
- f.close()
- return
- def parse_sections(self, offset):
- """Fetch the PE file sections.
- The sections will be readily available in the "sections" attribute.
- Its attributes will contain all the section information plus "data"
- a buffer containing the section's data.
- The "Characteristics" member will be processed and attributes
- representing the section characteristics (with the 'IMAGE_SCN_'
- string trimmed from the constant's names) will be added to the
- section instance.
- Refer to the SectionStructure class for additional info.
- """
- self.sections = []
- MAX_SIMULTANEOUS_ERRORS = 3
- for i in range(self.FILE_HEADER.NumberOfSections):
- if i >= MAX_SECTIONS:
- self.__warnings.append(
- "Too many sections {0} (>={1})".format(
- self.FILE_HEADER.NumberOfSections, MAX_SECTIONS
- )
- )
- break
- simultaneous_errors = 0
- section = SectionStructure(self.__IMAGE_SECTION_HEADER_format__, pe=self)
- if not section:
- break
- section_offset = offset + section.sizeof() * i
- section.set_file_offset(section_offset)
- section_data = self.__data__[
- section_offset : section_offset + section.sizeof()
- ]
- # Check if the section is all nulls and stop if so.
- if count_zeroes(section_data) == section.sizeof():
- self.__warnings.append(f"Invalid section {i}. Contents are null-bytes.")
- break
- if not section_data:
- self.__warnings.append(
- f"Invalid section {i}. No data in the file (is this corkami's "
- "virtsectblXP?)."
- )
- break
- section.__unpack__(section_data)
- self.__structures__.append(section)
- if section.SizeOfRawData + section.PointerToRawData > len(self.__data__):
- simultaneous_errors += 1
- self.__warnings.append(
- f"Error parsing section {i}. SizeOfRawData is larger than file."
- )
- if self.adjust_FileAlignment(
- section.PointerToRawData, self.OPTIONAL_HEADER.FileAlignment
- ) > len(self.__data__):
- simultaneous_errors += 1
- self.__warnings.append(
- f"Error parsing section {i}. PointerToRawData points beyond "
- "the end of the file."
- )
- if section.Misc_VirtualSize > 0x10000000:
- simultaneous_errors += 1
- self.__warnings.append(
- f"Suspicious value found parsing section {i}. VirtualSize is "
- "extremely large > 256MiB."
- )
- if (
- self.adjust_SectionAlignment(
- section.VirtualAddress,
- self.OPTIONAL_HEADER.SectionAlignment,
- self.OPTIONAL_HEADER.FileAlignment,
- )
- > 0x10000000
- ):
- simultaneous_errors += 1
- self.__warnings.append(
- f"Suspicious value found parsing section {i}. VirtualAddress is "
- "beyond 0x10000000."
- )
- if (
- self.OPTIONAL_HEADER.FileAlignment != 0
- and (section.PointerToRawData % self.OPTIONAL_HEADER.FileAlignment) != 0
- ):
- simultaneous_errors += 1
- self.__warnings.append(
- (
- f"Error parsing section {i}. "
- "PointerToRawData should normally be "
- "a multiple of FileAlignment, this might imply the file "
- "is trying to confuse tools which parse this incorrectly."
- )
- )
- if simultaneous_errors >= MAX_SIMULTANEOUS_ERRORS:
- self.__warnings.append("Too many warnings parsing section. Aborting.")
- break
- section_flags = retrieve_flags(SECTION_CHARACTERISTICS, "IMAGE_SCN_")
- # Set the section's flags according the the Characteristics member
- set_flags(section, section.Characteristics, section_flags)
- if section.__dict__.get(
- "IMAGE_SCN_MEM_WRITE", False
- ) and section.__dict__.get("IMAGE_SCN_MEM_EXECUTE", False):
- if section.Name.rstrip(b"\x00") == b"PAGE" and self.is_driver():
- # Drivers can have a PAGE section with those flags set without
- # implying that it is malicious
- pass
- else:
- self.__warnings.append(
- f"Suspicious flags set for section {i}. "
- "Both IMAGE_SCN_MEM_WRITE and IMAGE_SCN_MEM_EXECUTE are set. "
- "This might indicate a packed executable."
- )
- self.sections.append(section)
- # Sort the sections by their VirtualAddress and add a field to each of them
- # with the VirtualAddress of the next section. This will allow to check
- # for potentially overlapping sections in badly constructed PEs.
- self.sections.sort(key=lambda a: a.VirtualAddress)
- for idx, section in enumerate(self.sections):
- if idx == len(self.sections) - 1:
- section.next_section_virtual_address = None
- else:
- section.next_section_virtual_address = self.sections[
- idx + 1
- ].VirtualAddress
- if self.FILE_HEADER.NumberOfSections > 0 and self.sections:
- return (
- offset + self.sections[0].sizeof() * self.FILE_HEADER.NumberOfSections
- )
- else:
- return offset
- def parse_data_directories(
- self, directories=None, forwarded_exports_only=False, import_dllnames_only=False
- ):
- """Parse and process the PE file's data directories.
- If the optional argument 'directories' is given, only
- the directories at the specified indexes will be parsed.
- Such functionality allows parsing of areas of interest
- without the burden of having to parse all others.
- The directories can then be specified as:
- For export / import only:
- directories = [ 0, 1 ]
- or (more verbosely):
- directories = [ DIRECTORY_ENTRY['IMAGE_DIRECTORY_ENTRY_IMPORT'],
- DIRECTORY_ENTRY['IMAGE_DIRECTORY_ENTRY_EXPORT'] ]
- If 'directories' is a list, the ones that are processed will be removed,
- leaving only the ones that are not present in the image.
- If `forwarded_exports_only` is True, the IMAGE_DIRECTORY_ENTRY_EXPORT
- attribute will only contain exports that are forwarded to another DLL.
- If `import_dllnames_only` is True, symbols will not be parsed from
- the import table and the entries in the IMAGE_DIRECTORY_ENTRY_IMPORT
- attribute will not have a `symbols` attribute.
- """
- directory_parsing = (
- ("IMAGE_DIRECTORY_ENTRY_IMPORT", self.parse_import_directory),
- ("IMAGE_DIRECTORY_ENTRY_EXPORT", self.parse_export_directory),
- ("IMAGE_DIRECTORY_ENTRY_RESOURCE", self.parse_resources_directory),
- ("IMAGE_DIRECTORY_ENTRY_DEBUG", self.parse_debug_directory),
- ("IMAGE_DIRECTORY_ENTRY_BASERELOC", self.parse_relocations_directory),
- ("IMAGE_DIRECTORY_ENTRY_TLS", self.parse_directory_tls),
- ("IMAGE_DIRECTORY_ENTRY_LOAD_CONFIG", self.parse_directory_load_config),
- ("IMAGE_DIRECTORY_ENTRY_DELAY_IMPORT", self.parse_delay_import_directory),
- ("IMAGE_DIRECTORY_ENTRY_BOUND_IMPORT", self.parse_directory_bound_imports),
- ("IMAGE_DIRECTORY_ENTRY_EXCEPTION", self.parse_exceptions_directory),
- )
- if directories is not None:
- if not isinstance(directories, (tuple, list)):
- directories = [directories]
- for entry in directory_parsing:
- # OC Patch:
- #
- try:
- directory_index = DIRECTORY_ENTRY[entry[0]]
- dir_entry = self.OPTIONAL_HEADER.DATA_DIRECTORY[directory_index]
- except IndexError:
- break
- # Only process all the directories if no individual ones have
- # been chosen
- #
- if directories is None or directory_index in directories:
- if dir_entry.VirtualAddress:
- if (
- forwarded_exports_only
- and entry[0] == "IMAGE_DIRECTORY_ENTRY_EXPORT"
- ):
- value = entry[1](
- dir_entry.VirtualAddress,
- dir_entry.Size,
- forwarded_only=True,
- )
- elif (
- import_dllnames_only
- and entry[0] == "IMAGE_DIRECTORY_ENTRY_IMPORT"
- ):
- value = entry[1](
- dir_entry.VirtualAddress, dir_entry.Size, dllnames_only=True
- )
- else:
- try:
- value = entry[1](dir_entry.VirtualAddress, dir_entry.Size)
- except PEFormatError as excp:
- self.__warnings.append(
- f'Failed to process directoty "{entry[0]}": {excp}'
- )
- if value:
- setattr(self, entry[0][6:], value)
- if (
- (directories is not None)
- and isinstance(directories, list)
- and (entry[0] in directories)
- ):
- directories.remove(directory_index)
- def parse_exceptions_directory(self, rva, size):
- """Parses exception directory
- All the code related to handling exception directories is documented in
- https://auscitte.github.io/systems%20blog/Exception-Directory-pefile#implementation-details
- """
- # "For x64 and Itanium platforms; the format is different for other platforms"
- if (
- self.FILE_HEADER.Machine != MACHINE_TYPE["IMAGE_FILE_MACHINE_AMD64"]
- and self.FILE_HEADER.Machine != MACHINE_TYPE["IMAGE_FILE_MACHINE_IA64"]
- ):
- return None
- rf = Structure(self.__RUNTIME_FUNCTION_format__)
- rf_size = rf.sizeof()
- rva2rt = {}
- rt_funcs = []
- rva2infos = {}
- for _ in range(size // rf_size):
- rf = self.__unpack_data__(
- self.__RUNTIME_FUNCTION_format__,
- self.get_data(rva, rf_size),
- file_offset=self.get_offset_from_rva(rva),
- )
- if rf is None:
- break
- ui = None
- if (rf.UnwindData & 0x1) == 0:
- # according to "Improving Automated Analysis of Windows x64 Binaries",
- # if the lowest bit is set, (UnwindData & ~0x1) should point to the
- # chained RUNTIME_FUNCTION instead of UNWIND_INFO
- if (
- rf.UnwindData in rva2infos
- ): # unwind info data structures can be shared among functions
- ui = rva2infos[rf.UnwindData]
- else:
- ui = UnwindInfo(file_offset=self.get_offset_from_rva(rf.UnwindData))
- rva2infos[rf.UnwindData] = ui
- ws = ui.unpack_in_stages(self.get_data(rf.UnwindData, ui.sizeof()))
- if ws != None:
- self.__warnings.append(ws)
- break
- ws = ui.unpack_in_stages(self.get_data(rf.UnwindData, ui.sizeof()))
- if ws != None:
- self.__warnings.append(ws)
- break
- self.__structures__.append(ui)
- entry = ExceptionsDirEntryData(struct=rf, unwindinfo=ui)
- rt_funcs.append(entry)
- rva2rt[rf.BeginAddress] = entry
- rva += rf_size
- # each chained function entry holds a reference to the function first in chain
- for rf in rt_funcs:
- if rf.unwindinfo == None:
- # TODO: have not encountered such a binary yet;
- # in theory, (UnwindData & ~0x1) should point to the chained
- # RUNTIME_FUNCTION which could be used to locate the corresponding
- # ExceptionsDirEntryData and set_chained_function_entry()
- continue
- if not hasattr(rf.unwindinfo, "FunctionEntry"):
- continue
- if not rf.unwindinfo.FunctionEntry in rva2rt:
- self.__warnings.append(
- f"FunctionEntry of UNWIND_INFO at {rf.struct.get_file_offset():x}"
- " points to an entry that does not exist"
- )
- continue
- try:
- rf.unwindinfo.set_chained_function_entry(
- rva2rt[rf.unwindinfo.FunctionEntry]
- )
- except PEFormatError as excp:
- self.__warnings.append(
- "Failed parsing FunctionEntry of UNWIND_INFO at "
- f"{rf.struct.get_file_offset():x}: {excp}"
- )
- continue
- return rt_funcs
- def parse_directory_bound_imports(self, rva, size):
- """"""
- bnd_descr = Structure(self.__IMAGE_BOUND_IMPORT_DESCRIPTOR_format__)
- bnd_descr_size = bnd_descr.sizeof()
- start = rva
- bound_imports = []
- while True:
- bnd_descr = self.__unpack_data__(
- self.__IMAGE_BOUND_IMPORT_DESCRIPTOR_format__,
- self.__data__[rva : rva + bnd_descr_size],
- file_offset=rva,
- )
- if bnd_descr is None:
- # If can't parse directory then silently return.
- # This directory does not necessarily have to be valid to
- # still have a valid PE file
- self.__warnings.append(
- "The Bound Imports directory exists but can't be parsed."
- )
- return
- if bnd_descr.all_zeroes():
- break
- rva += bnd_descr.sizeof()
- section = self.get_section_by_offset(rva)
- file_offset = self.get_offset_from_rva(rva)
- if section is None:
- safety_boundary = len(self.__data__) - file_offset
- sections_after_offset = [
- s.PointerToRawData
- for s in self.sections
- if s.PointerToRawData > file_offset
- ]
- if sections_after_offset:
- # Find the first section starting at a later offset than that
- # specified by 'rva'
- first_section_after_offset = min(sections_after_offset)
- section = self.get_section_by_offset(first_section_after_offset)
- if section is not None:
- safety_boundary = section.PointerToRawData - file_offset
- else:
- safety_boundary = (
- section.PointerToRawData + len(section.get_data()) - file_offset
- )
- if not section:
- self.__warnings.append(
- (
- "RVA of IMAGE_BOUND_IMPORT_DESCRIPTOR points "
- "to an invalid address: {0:x}"
- ).format(rva)
- )
- return
- forwarder_refs = []
- # 8 is the size of __IMAGE_BOUND_IMPORT_DESCRIPTOR_format__
- for _ in range(
- min(bnd_descr.NumberOfModuleForwarderRefs, int(safety_boundary / 8))
- ):
- # Both structures IMAGE_BOUND_IMPORT_DESCRIPTOR and
- # IMAGE_BOUND_FORWARDER_REF have the same size.
- bnd_frwd_ref = self.__unpack_data__(
- self.__IMAGE_BOUND_FORWARDER_REF_format__,
- self.__data__[rva : rva + bnd_descr_size],
- file_offset=rva,
- )
- # OC Patch:
- if not bnd_frwd_ref:
- raise PEFormatError("IMAGE_BOUND_FORWARDER_REF cannot be read")
- rva += bnd_frwd_ref.sizeof()
- offset = start + bnd_frwd_ref.OffsetModuleName
- name_str = self.get_string_from_data(
- 0, self.__data__[offset : offset + MAX_STRING_LENGTH]
- )
- # OffsetModuleName points to a DLL name. These shouldn't be too long.
- # Anything longer than a safety length of 128 will be taken to indicate
- # a corrupt entry and abort the processing of these entries.
- # Names shorter than 4 characters will be taken as invalid as well.
- if name_str:
- invalid_chars = [
- c for c in bytearray(name_str) if chr(c) not in string.printable
- ]
- if len(name_str) > 256 or invalid_chars:
- break
- forwarder_refs.append(
- BoundImportRefData(struct=bnd_frwd_ref, name=name_str)
- )
- offset = start + bnd_descr.OffsetModuleName
- name_str = self.get_string_from_data(
- 0, self.__data__[offset : offset + MAX_STRING_LENGTH]
- )
- if name_str:
- invalid_chars = [
- c for c in bytearray(name_str) if chr(c) not in string.printable
- ]
- if len(name_str) > 256 or invalid_chars:
- break
- if not name_str:
- break
- bound_imports.append(
- BoundImportDescData(
- struct=bnd_descr, name=name_str, entries=forwarder_refs
- )
- )
- return bound_imports
- def parse_directory_tls(self, rva, size):
- """"""
- # By default let's pretend the format is a 32-bit PE. It may help
- # produce some output for files where the Magic in the Optional Header
- # is incorrect.
- format = self.__IMAGE_TLS_DIRECTORY_format__
- if self.PE_TYPE == OPTIONAL_HEADER_MAGIC_PE_PLUS:
- format = self.__IMAGE_TLS_DIRECTORY64_format__
- try:
- tls_struct = self.__unpack_data__(
- format,
- self.get_data(rva, Structure(format).sizeof()),
- file_offset=self.get_offset_from_rva(rva),
- )
- except PEFormatError:
- self.__warnings.append(
- "Invalid TLS information. Can't read " "data at RVA: 0x%x" % rva
- )
- tls_struct = None
- if not tls_struct:
- return None
- return TlsData(struct=tls_struct)
- def parse_directory_load_config(self, rva, size):
- """"""
- if self.PE_TYPE == OPTIONAL_HEADER_MAGIC_PE:
- format = self.__IMAGE_LOAD_CONFIG_DIRECTORY_format__
- elif self.PE_TYPE == OPTIONAL_HEADER_MAGIC_PE_PLUS:
- format = self.__IMAGE_LOAD_CONFIG_DIRECTORY64_format__
- else:
- self.__warnings.append(
- "Don't know how to parse LOAD_CONFIG information for non-PE32/"
- "PE32+ file"
- )
- return None
- load_config_struct = None
- try:
- load_config_struct = self.__unpack_data__(
- format,
- self.get_data(rva, Structure(format).sizeof()),
- file_offset=self.get_offset_from_rva(rva),
- )
- except PEFormatError:
- self.__warnings.append(
- "Invalid LOAD_CONFIG information. Can't read " "data at RVA: 0x%x" % rva
- )
- if not load_config_struct:
- return None
- return LoadConfigData(struct=load_config_struct)
- def parse_relocations_directory(self, rva, size):
- """"""
- rlc_size = Structure(self.__IMAGE_BASE_RELOCATION_format__).sizeof()
- end = rva + size
- relocations = []
- while rva < end:
- # OC Patch:
- # Malware that has bad RVA entries will cause an error.
- # Just continue on after an exception
- #
- try:
- rlc = self.__unpack_data__(
- self.__IMAGE_BASE_RELOCATION_format__,
- self.get_data(rva, rlc_size),
- file_offset=self.get_offset_from_rva(rva),
- )
- except PEFormatError:
- self.__warnings.append(
- "Invalid relocation information. Can't read "
- "data at RVA: 0x%x" % rva
- )
- rlc = None
- if not rlc:
- break
- # rlc.VirtualAddress must lie within the Image
- if rlc.VirtualAddress > self.OPTIONAL_HEADER.SizeOfImage:
- self.__warnings.append(
- "Invalid relocation information. VirtualAddress outside"
- " of Image: 0x%x" % rlc.VirtualAddress
- )
- break
- # rlc.SizeOfBlock must be less or equal than the size of the image
- # (It's a rather loose sanity test)
- if rlc.SizeOfBlock > self.OPTIONAL_HEADER.SizeOfImage:
- self.__warnings.append(
- "Invalid relocation information. SizeOfBlock too large"
- ": %d" % rlc.SizeOfBlock
- )
- break
- reloc_entries = self.parse_relocations(
- rva + rlc_size, rlc.VirtualAddress, rlc.SizeOfBlock - rlc_size
- )
- relocations.append(BaseRelocationData(struct=rlc, entries=reloc_entries))
- if not rlc.SizeOfBlock:
- break
- rva += rlc.SizeOfBlock
- return relocations
- def parse_relocations(self, data_rva, rva, size):
- """"""
- try:
- data = self.get_data(data_rva, size)
- file_offset = self.get_offset_from_rva(data_rva)
- except PEFormatError:
- self.__warnings.append(f"Bad RVA in relocation data: 0x{data_rva:x}")
- return []
- entries = []
- offsets_and_type = []
- for idx in range(int(len(data) / 2)):
- entry = self.__unpack_data__(
- self.__IMAGE_BASE_RELOCATION_ENTRY_format__,
- data[idx * 2 : (idx + 1) * 2],
- file_offset=file_offset,
- )
- if not entry:
- break
- word = entry.Data
- reloc_type = word >> 12
- reloc_offset = word & 0x0FFF
- if (reloc_offset, reloc_type) in offsets_and_type:
- self.__warnings.append(
- "Overlapping offsets in relocation data "
- "at RVA: 0x%x" % (reloc_offset + rva)
- )
- break
- if len(offsets_and_type) >= 1000:
- offsets_and_type.pop()
- offsets_and_type.insert(0, (reloc_offset, reloc_type))
- entries.append(
- RelocationData(
- struct=entry, type=reloc_type, base_rva=rva, rva=reloc_offset + rva
- )
- )
- file_offset += entry.sizeof()
- return entries
- def parse_debug_directory(self, rva, size):
- """"""
- dbg_size = Structure(self.__IMAGE_DEBUG_DIRECTORY_format__).sizeof()
- debug = []
- for idx in range(int(size / dbg_size)):
- try:
- data = self.get_data(rva + dbg_size * idx, dbg_size)
- except PEFormatError:
- self.__warnings.append(
- "Invalid debug information. Can't read " "data at RVA: 0x%x" % rva
- )
- return None
- dbg = self.__unpack_data__(
- self.__IMAGE_DEBUG_DIRECTORY_format__,
- data,
- file_offset=self.get_offset_from_rva(rva + dbg_size * idx),
- )
- if not dbg:
- return None
- # apply structure according to DEBUG_TYPE
- # http://www.debuginfo.com/articles/debuginfomatch.html
- #
- dbg_type = None
- if dbg.Type == 1:
- # IMAGE_DEBUG_TYPE_COFF
- pass
- elif dbg.Type == 2:
- # if IMAGE_DEBUG_TYPE_CODEVIEW
- dbg_type_offset = dbg.PointerToRawData
- dbg_type_size = dbg.SizeOfData
- dbg_type_data = self.__data__[
- dbg_type_offset : dbg_type_offset + dbg_type_size
- ]
- if dbg_type_data[:4] == b"RSDS":
- # pdb7.0
- __CV_INFO_PDB70_format__ = [
- "CV_INFO_PDB70",
- [
- "I,CvSignature",
- "I,Signature_Data1", # Signature is of GUID type
- "H,Signature_Data2",
- "H,Signature_Data3",
- "8s,Signature_Data4",
- # 'H,Signature_Data5',
- # 'I,Signature_Data6',
- "I,Age",
- ],
- ]
- pdbFileName_size = (
- dbg_type_size - Structure(__CV_INFO_PDB70_format__).sizeof()
- )
- # pdbFileName_size can be negative here, as seen in the malware
- # sample with hash
- # MD5: 7c297600870d026c014d42596bb9b5fd
- # SHA256:
- # 83f4e63681fcba8a9d7bbb1688c71981b1837446514a1773597e0192bba9fac3
- # Checking for positive size here to ensure proper parsing.
- if pdbFileName_size > 0:
- __CV_INFO_PDB70_format__[1].append(
- "{0}s,PdbFileName".format(pdbFileName_size)
- )
- dbg_type = self.__unpack_data__(
- __CV_INFO_PDB70_format__, dbg_type_data, dbg_type_offset
- )
- elif dbg_type_data[:4] == b"NB10":
- # pdb2.0
- __CV_INFO_PDB20_format__ = [
- "CV_INFO_PDB20",
- [
- "I,CvHeaderSignature",
- "I,CvHeaderOffset",
- "I,Signature",
- "I,Age",
- ],
- ]
- pdbFileName_size = (
- dbg_type_size - Structure(__CV_INFO_PDB20_format__).sizeof()
- )
- # As with the PDB 7.0 case, ensuring a positive size for
- # pdbFileName_size to ensure proper parsing.
- if pdbFileName_size > 0:
- # Add the last variable-length string field.
- __CV_INFO_PDB20_format__[1].append(
- "{0}s,PdbFileName".format(pdbFileName_size)
- )
- dbg_type = self.__unpack_data__(
- __CV_INFO_PDB20_format__, dbg_type_data, dbg_type_offset
- )
- elif dbg.Type == 4:
- # IMAGE_DEBUG_TYPE_MISC
- dbg_type_offset = dbg.PointerToRawData
- dbg_type_size = dbg.SizeOfData
- dbg_type_data = self.__data__[
- dbg_type_offset : dbg_type_offset + dbg_type_size
- ]
- ___IMAGE_DEBUG_MISC_format__ = [
- "IMAGE_DEBUG_MISC",
- [
- "I,DataType",
- "I,Length",
- "B,Unicode",
- "B,Reserved1",
- "H,Reserved2",
- ],
- ]
- dbg_type_partial = self.__unpack_data__(
- ___IMAGE_DEBUG_MISC_format__, dbg_type_data, dbg_type_offset
- )
- # Need to check that dbg_type_partial contains a correctly unpacked data
- # structure, as the malware sample with the following hash
- # MD5: 5e7d6707d693108de5a303045c17d95b
- # SHA256:
- # 5dd94a95025f3b6e3dd440d52f7c6d2964fdd1aa119e0ee92e38c7bf83829e5c
- # contains a value of None for dbg_type_partial after unpacking,
- # presumably due to a malformed DEBUG entry.
- if dbg_type_partial:
- # The Unicode bool should be set to 0 or 1.
- if dbg_type_partial.Unicode in (0, 1):
- data_size = (
- dbg_type_size
- - Structure(___IMAGE_DEBUG_MISC_format__).sizeof()
- )
- # As with the PDB case, ensuring a positive size for data_size
- # here to ensure proper parsing.
- if data_size > 0:
- ___IMAGE_DEBUG_MISC_format__[1].append(
- "{0}s,Data".format(data_size)
- )
- dbg_type = self.__unpack_data__(
- ___IMAGE_DEBUG_MISC_format__, dbg_type_data, dbg_type_offset
- )
- debug.append(DebugData(struct=dbg, entry=dbg_type))
- return debug
- def parse_resources_directory(self, rva, size=0, base_rva=None, level=0, dirs=None):
- """Parse the resources directory.
- Given the RVA of the resources directory, it will process all
- its entries.
- The root will have the corresponding member of its structure,
- IMAGE_RESOURCE_DIRECTORY plus 'entries', a list of all the
- entries in the directory.
- Those entries will have, correspondingly, all the structure's
- members (IMAGE_RESOURCE_DIRECTORY_ENTRY) and an additional one,
- "directory", pointing to the IMAGE_RESOURCE_DIRECTORY structure
- representing upper layers of the tree. This one will also have
- an 'entries' attribute, pointing to the 3rd, and last, level.
- Another directory with more entries. Those last entries will
- have a new attribute (both 'leaf' or 'data_entry' can be used to
- access it). This structure finally points to the resource data.
- All the members of this structure, IMAGE_RESOURCE_DATA_ENTRY,
- are available as its attributes.
- """
- # OC Patch:
- if dirs is None:
- dirs = [rva]
- if base_rva is None:
- base_rva = rva
- if level > MAX_RESOURCE_DEPTH:
- self.__warnings.append(
- "Error parsing the resources directory. "
- "Excessively nested table depth %d (>%s)" % (level, MAX_RESOURCE_DEPTH)
- )
- return None
- try:
- # If the RVA is invalid all would blow up. Some EXEs seem to be
- # specially nasty and have an invalid RVA.
- data = self.get_data(
- rva, Structure(self.__IMAGE_RESOURCE_DIRECTORY_format__).sizeof()
- )
- except PEFormatError:
- self.__warnings.append(
- "Invalid resources directory. Can't read "
- "directory data at RVA: 0x%x" % rva
- )
- return None
- # Get the resource directory structure, that is, the header
- # of the table preceding the actual entries
- #
- resource_dir = self.__unpack_data__(
- self.__IMAGE_RESOURCE_DIRECTORY_format__,
- data,
- file_offset=self.get_offset_from_rva(rva),
- )
- if resource_dir is None:
- # If we can't parse resources directory then silently return.
- # This directory does not necessarily have to be valid to
- # still have a valid PE file
- self.__warnings.append(
- "Invalid resources directory. Can't parse "
- "directory data at RVA: 0x%x" % rva
- )
- return None
- dir_entries = []
- # Advance the RVA to the position immediately following the directory
- # table header and pointing to the first entry in the table
- #
- rva += resource_dir.sizeof()
- number_of_entries = (
- resource_dir.NumberOfNamedEntries + resource_dir.NumberOfIdEntries
- )
- # Set a hard limit on the maximum reasonable number of entries
- MAX_ALLOWED_ENTRIES = 4096
- if number_of_entries > MAX_ALLOWED_ENTRIES:
- self.__warnings.append(
- "Error parsing the resources directory. "
- "The directory contains %d entries (>%s)"
- % (number_of_entries, MAX_ALLOWED_ENTRIES)
- )
- return None
- self.__total_resource_entries_count += number_of_entries
- if self.__total_resource_entries_count > MAX_RESOURCE_ENTRIES:
- self.__warnings.append(
- "Error parsing the resources directory. "
- "The file contains at least %d entries (>%d)"
- % (self.__total_resource_entries_count, MAX_RESOURCE_ENTRIES)
- )
- return None
- strings_to_postprocess = []
- # Keep track of the last name's start and end offsets in order
- # to be able to detect overlapping entries that might suggest
- # and invalid or corrupt directory.
- last_name_begin_end = None
- for idx in range(number_of_entries):
- if (
- not self.__resource_size_limit_reached
- and self.__total_resource_bytes > self.__resource_size_limit_upperbounds
- ):
- self.__resource_size_limit_reached = True
- self.__warnings.append(
- "Resource size 0x%x exceeds file size 0x%x, overlapping "
- "resources found."
- % (
- self.__total_resource_bytes,
- self.__resource_size_limit_upperbounds,
- )
- )
- res = self.parse_resource_entry(rva)
- if res is None:
- self.__warnings.append(
- "Error parsing the resources directory, "
- "Entry %d is invalid, RVA = 0x%x. " % (idx, rva)
- )
- break
- entry_name = None
- entry_id = None
- name_is_string = (res.Name & 0x80000000) >> 31
- if not name_is_string:
- entry_id = res.Name
- else:
- ustr_offset = base_rva + res.NameOffset
- try:
- entry_name = UnicodeStringWrapperPostProcessor(self, ustr_offset)
- self.__total_resource_bytes += entry_name.get_pascal_16_length()
- # If the last entry's offset points before the current's but its end
- # is past the current's beginning, assume the overlap indicates a
- # corrupt name.
- if last_name_begin_end and (
- last_name_begin_end[0] < ustr_offset
- and last_name_begin_end[1] >= ustr_offset
- ):
- # Remove the previous overlapping entry as it's likely to be
- # already corrupt data.
- strings_to_postprocess.pop()
- self.__warnings.append(
- "Error parsing the resources directory, "
- "attempting to read entry name. "
- "Entry names overlap 0x%x" % (ustr_offset)
- )
- break
- last_name_begin_end = (
- ustr_offset,
- ustr_offset + entry_name.get_pascal_16_length(),
- )
- strings_to_postprocess.append(entry_name)
- except PEFormatError:
- self.__warnings.append(
- "Error parsing the resources directory, "
- "attempting to read entry name. "
- "Can't read unicode string at offset 0x%x" % (ustr_offset)
- )
- if res.DataIsDirectory:
- # OC Patch:
- #
- # One trick malware can do is to recursively reference
- # the next directory. This causes hilarity to ensue when
- # trying to parse everything correctly.
- # If the original RVA given to this function is equal to
- # the next one to parse, we assume that it's a trick.
- # Instead of raising a PEFormatError this would skip some
- # reasonable data so we just break.
- #
- # 9ee4d0a0caf095314fd7041a3e4404dc is the offending sample
- if base_rva + res.OffsetToDirectory in dirs:
- break
- entry_directory = self.parse_resources_directory(
- base_rva + res.OffsetToDirectory,
- size - (rva - base_rva), # size
- base_rva=base_rva,
- level=level + 1,
- dirs=dirs + [base_rva + res.OffsetToDirectory],
- )
- if not entry_directory:
- break
- # Ange Albertini's code to process resources' strings
- #
- strings = None
- if entry_id == RESOURCE_TYPE["RT_STRING"]:
- strings = {}
- for resource_id in entry_directory.entries:
- if hasattr(resource_id, "directory"):
- resource_strings = {}
- for resource_lang in resource_id.directory.entries:
- if (
- resource_lang is None
- or not hasattr(resource_lang, "data")
- or resource_lang.data.struct.Size is None
- or resource_id.id is None
- ):
- continue
- string_entry_rva = (
- resource_lang.data.struct.OffsetToData
- )
- string_entry_size = resource_lang.data.struct.Size
- string_entry_id = resource_id.id
- # XXX: has been raising exceptions preventing parsing
- try:
- string_entry_data = self.get_data(
- string_entry_rva, string_entry_size
- )
- except:
- self.__warnings.append(
- f"Error parsing resource of type RT_STRING at "
- f"RVA 0x{string_entry_rva:x} with "
- f"size {string_entry_size}"
- )
- continue
- parse_strings(
- string_entry_data,
- (int(string_entry_id) - 1) * 16,
- resource_strings,
- )
- strings.update(resource_strings)
- resource_id.directory.strings = resource_strings
- dir_entries.append(
- ResourceDirEntryData(
- struct=res,
- name=entry_name,
- id=entry_id,
- directory=entry_directory,
- )
- )
- else:
- struct = self.parse_resource_data_entry(
- base_rva + res.OffsetToDirectory
- )
- if struct:
- self.__total_resource_bytes += struct.Size
- entry_data = ResourceDataEntryData(
- struct=struct, lang=res.Name & 0x3FF, sublang=res.Name >> 10
- )
- dir_entries.append(
- ResourceDirEntryData(
- struct=res, name=entry_name, id=entry_id, data=entry_data
- )
- )
- else:
- break
- # Check if this entry contains version information
- #
- if level == 0 and res.Id == RESOURCE_TYPE["RT_VERSION"]:
- if dir_entries:
- last_entry = dir_entries[-1]
- try:
- version_entries = last_entry.directory.entries[0].directory.entries
- except:
- # Maybe a malformed directory structure...?
- # Let's ignore it
- pass
- else:
- for version_entry in version_entries:
- rt_version_struct = None
- try:
- rt_version_struct = version_entry.data.struct
- except:
- # Maybe a malformed directory structure...?
- # Let's ignore it
- pass
- if rt_version_struct is not None:
- self.parse_version_information(rt_version_struct)
- rva += res.sizeof()
- string_rvas = [s.get_rva() for s in strings_to_postprocess]
- string_rvas.sort()
- for idx, s in enumerate(strings_to_postprocess):
- s.render_pascal_16()
- resource_directory_data = ResourceDirData(
- struct=resource_dir, entries=dir_entries
- )
- return resource_directory_data
- def parse_resource_data_entry(self, rva):
- """Parse a data entry from the resources directory."""
- try:
- # If the RVA is invalid all would blow up. Some EXEs seem to be
- # specially nasty and have an invalid RVA.
- data = self.get_data(
- rva, Structure(self.__IMAGE_RESOURCE_DATA_ENTRY_format__).sizeof()
- )
- except PEFormatError:
- self.__warnings.append(
- "Error parsing a resource directory data entry, "
- "the RVA is invalid: 0x%x" % (rva)
- )
- return None
- data_entry = self.__unpack_data__(
- self.__IMAGE_RESOURCE_DATA_ENTRY_format__,
- data,
- file_offset=self.get_offset_from_rva(rva),
- )
- return data_entry
- def parse_resource_entry(self, rva):
- """Parse a directory entry from the resources directory."""
- try:
- data = self.get_data(
- rva, Structure(self.__IMAGE_RESOURCE_DIRECTORY_ENTRY_format__).sizeof()
- )
- except PEFormatError:
- # A warning will be added by the caller if this method returns None
- return None
- resource = self.__unpack_data__(
- self.__IMAGE_RESOURCE_DIRECTORY_ENTRY_format__,
- data,
- file_offset=self.get_offset_from_rva(rva),
- )
- if resource is None:
- return None
- # resource.NameIsString = (resource.Name & 0x80000000L) >> 31
- resource.NameOffset = resource.Name & 0x7FFFFFFF
- resource.__pad = resource.Name & 0xFFFF0000
- resource.Id = resource.Name & 0x0000FFFF
- resource.DataIsDirectory = (resource.OffsetToData & 0x80000000) >> 31
- resource.OffsetToDirectory = resource.OffsetToData & 0x7FFFFFFF
- return resource
- def parse_version_information(self, version_struct):
- """Parse version information structure.
- The date will be made available in three attributes of the PE object.
- VS_VERSIONINFO will contain the first three fields of the main structure:
- 'Length', 'ValueLength', and 'Type'
- VS_FIXEDFILEINFO will hold the rest of the fields, accessible as sub-attributes:
- 'Signature', 'StrucVersion', 'FileVersionMS', 'FileVersionLS',
- 'ProductVersionMS', 'ProductVersionLS', 'FileFlagsMask', 'FileFlags',
- 'FileOS', 'FileType', 'FileSubtype', 'FileDateMS', 'FileDateLS'
- FileInfo is a list of all StringFileInfo and VarFileInfo structures.
- StringFileInfo structures will have a list as an attribute named 'StringTable'
- containing all the StringTable structures. Each of those structures contains a
- dictionary 'entries' with all the key / value version information string pairs.
- VarFileInfo structures will have a list as an attribute named 'Var' containing
- all Var structures. Each Var structure will have a dictionary as an attribute
- named 'entry' which will contain the name and value of the Var.
- """
- # Retrieve the data for the version info resource
- #
- try:
- start_offset = self.get_offset_from_rva(version_struct.OffsetToData)
- except PEFormatError:
- self.__warnings.append(
- "Error parsing the version information, "
- "attempting to read OffsetToData with RVA: 0x{:x}".format(
- version_struct.OffsetToData
- )
- )
- return
- raw_data = self.__data__[start_offset : start_offset + version_struct.Size]
- # Map the main structure and the subsequent string
- #
- versioninfo_struct = self.__unpack_data__(
- self.__VS_VERSIONINFO_format__, raw_data, file_offset=start_offset
- )
- if versioninfo_struct is None:
- return
- ustr_offset = version_struct.OffsetToData + versioninfo_struct.sizeof()
- section = self.get_section_by_rva(ustr_offset)
- section_end = None
- if section:
- section_end = section.VirtualAddress + max(
- section.SizeOfRawData, section.Misc_VirtualSize
- )
- versioninfo_string = None
- # These should return 'ascii' decoded data. For the case when it's
- # garbled data the ascii string will retain the byte values while
- # encoding it to something else may yield values that don't match the
- # file's contents.
- try:
- if section_end is None:
- versioninfo_string = self.get_string_u_at_rva(
- ustr_offset, encoding="ascii"
- )
- else:
- versioninfo_string = self.get_string_u_at_rva(
- ustr_offset, (section_end - ustr_offset) >> 1, encoding="ascii"
- )
- except PEFormatError:
- self.__warnings.append(
- "Error parsing the version information, "
- "attempting to read VS_VERSION_INFO string. Can't "
- "read unicode string at offset 0x%x" % (ustr_offset)
- )
- if versioninfo_string == None:
- self.__warnings.append(
- "Invalid VS_VERSION_INFO block: {0}".format(versioninfo_string)
- )
- return
- # If the structure does not contain the expected name, it's assumed to
- # be invalid
- if versioninfo_string is not None and versioninfo_string != b"VS_VERSION_INFO":
- if len(versioninfo_string) > 128:
- excerpt = versioninfo_string[:128].decode("ascii")
- # Don't leave any half-escaped characters
- excerpt = excerpt[: excerpt.rfind("\\u")]
- versioninfo_string = b(
- "{0} ... ({1} bytes, too long to display)".format(
- excerpt, len(versioninfo_string)
- )
- )
- self.__warnings.append(
- "Invalid VS_VERSION_INFO block: {0}".format(
- versioninfo_string.decode("ascii").replace("\00", "\\00")
- )
- )
- return
- if not hasattr(self, "VS_VERSIONINFO"):
- self.VS_VERSIONINFO = []
- # Set the PE object's VS_VERSIONINFO to this one
- vinfo = versioninfo_struct
- # Set the Key attribute to point to the unicode string identifying the structure
- vinfo.Key = versioninfo_string
- self.VS_VERSIONINFO.append(vinfo)
- if versioninfo_string is None:
- versioninfo_string = ""
- # Process the fixed version information, get the offset and structure
- fixedfileinfo_offset = self.dword_align(
- versioninfo_struct.sizeof() + 2 * (len(versioninfo_string) + 1),
- version_struct.OffsetToData,
- )
- fixedfileinfo_struct = self.__unpack_data__(
- self.__VS_FIXEDFILEINFO_format__,
- raw_data[fixedfileinfo_offset:],
- file_offset=start_offset + fixedfileinfo_offset,
- )
- if not fixedfileinfo_struct:
- return
- if not hasattr(self, "VS_FIXEDFILEINFO"):
- self.VS_FIXEDFILEINFO = []
- # Set the PE object's VS_FIXEDFILEINFO to this one
- self.VS_FIXEDFILEINFO.append(fixedfileinfo_struct)
- # Start parsing all the StringFileInfo and VarFileInfo structures
- # Get the first one
- stringfileinfo_offset = self.dword_align(
- fixedfileinfo_offset + fixedfileinfo_struct.sizeof(),
- version_struct.OffsetToData,
- )
- # Set the PE object's attribute that will contain them all.
- if not hasattr(self, "FileInfo"):
- self.FileInfo = []
- finfo = []
- while True:
- # Process the StringFileInfo/VarFileInfo structure
- stringfileinfo_struct = self.__unpack_data__(
- self.__StringFileInfo_format__,
- raw_data[stringfileinfo_offset:],
- file_offset=start_offset + stringfileinfo_offset,
- )
- if stringfileinfo_struct is None:
- self.__warnings.append(
- "Error parsing StringFileInfo/VarFileInfo struct"
- )
- return None
- # Get the subsequent string defining the structure.
- ustr_offset = (
- version_struct.OffsetToData
- + stringfileinfo_offset
- + versioninfo_struct.sizeof()
- )
- try:
- stringfileinfo_string = self.get_string_u_at_rva(ustr_offset)
- except PEFormatError:
- self.__warnings.append(
- "Error parsing the version information, "
- "attempting to read StringFileInfo string. Can't "
- "read unicode string at offset 0x{0:x}".format(ustr_offset)
- )
- break
- # Set such string as the Key attribute
- stringfileinfo_struct.Key = stringfileinfo_string
- # Append the structure to the PE object's list
- finfo.append(stringfileinfo_struct)
- # Parse a StringFileInfo entry
- if stringfileinfo_string and stringfileinfo_string.startswith(
- b"StringFileInfo"
- ):
- if (
- stringfileinfo_struct.Type in (0, 1)
- and stringfileinfo_struct.ValueLength == 0
- ):
- stringtable_offset = self.dword_align(
- stringfileinfo_offset
- + stringfileinfo_struct.sizeof()
- + 2 * (len(stringfileinfo_string) + 1),
- version_struct.OffsetToData,
- )
- stringfileinfo_struct.StringTable = []
- # Process the String Table entries
- while True:
- stringtable_struct = self.__unpack_data__(
- self.__StringTable_format__,
- raw_data[stringtable_offset:],
- file_offset=start_offset + stringtable_offset,
- )
- if not stringtable_struct:
- break
- ustr_offset = (
- version_struct.OffsetToData
- + stringtable_offset
- + stringtable_struct.sizeof()
- )
- try:
- stringtable_string = self.get_string_u_at_rva(ustr_offset)
- except PEFormatError:
- self.__warnings.append(
- "Error parsing the version information, "
- "attempting to read StringTable string. Can't "
- "read unicode string at offset 0x{0:x}".format(
- ustr_offset
- )
- )
- break
- stringtable_struct.LangID = stringtable_string
- stringtable_struct.entries = {}
- stringtable_struct.entries_offsets = {}
- stringtable_struct.entries_lengths = {}
- stringfileinfo_struct.StringTable.append(stringtable_struct)
- entry_offset = self.dword_align(
- stringtable_offset
- + stringtable_struct.sizeof()
- + 2 * (len(stringtable_string) + 1),
- version_struct.OffsetToData,
- )
- # Process all entries in the string table
- while (
- entry_offset
- < stringtable_offset + stringtable_struct.Length
- ):
- string_struct = self.__unpack_data__(
- self.__String_format__,
- raw_data[entry_offset:],
- file_offset=start_offset + entry_offset,
- )
- if not string_struct:
- break
- ustr_offset = (
- version_struct.OffsetToData
- + entry_offset
- + string_struct.sizeof()
- )
- try:
- key = self.get_string_u_at_rva(ustr_offset)
- key_offset = self.get_offset_from_rva(ustr_offset)
- except PEFormatError:
- self.__warnings.append(
- "Error parsing the version information, "
- "attempting to read StringTable Key string. Can't "
- "read unicode string at offset 0x{0:x}".format(
- ustr_offset
- )
- )
- break
- value_offset = self.dword_align(
- 2 * (len(key) + 1)
- + entry_offset
- + string_struct.sizeof(),
- version_struct.OffsetToData,
- )
- ustr_offset = version_struct.OffsetToData + value_offset
- try:
- value = self.get_string_u_at_rva(
- ustr_offset, max_length=string_struct.ValueLength
- )
- value_offset = self.get_offset_from_rva(ustr_offset)
- except PEFormatError:
- self.__warnings.append(
- "Error parsing the version information, attempting "
- "to read StringTable Value string. Can't read "
- f"unicode string at offset 0x{ustr_offset:x}"
- )
- break
- if string_struct.Length == 0:
- entry_offset = (
- stringtable_offset + stringtable_struct.Length
- )
- else:
- entry_offset = self.dword_align(
- string_struct.Length + entry_offset,
- version_struct.OffsetToData,
- )
- stringtable_struct.entries[key] = value
- stringtable_struct.entries_offsets[key] = (
- key_offset,
- value_offset,
- )
- stringtable_struct.entries_lengths[key] = (
- len(key),
- len(value),
- )
- new_stringtable_offset = self.dword_align(
- stringtable_struct.Length + stringtable_offset,
- version_struct.OffsetToData,
- )
- # Check if the entry is crafted in a way that would lead
- # to an infinite loop and break if so.
- if new_stringtable_offset == stringtable_offset:
- break
- stringtable_offset = new_stringtable_offset
- if stringtable_offset >= stringfileinfo_struct.Length:
- break
- # Parse a VarFileInfo entry
- elif stringfileinfo_string and stringfileinfo_string.startswith(
- b"VarFileInfo"
- ):
- varfileinfo_struct = stringfileinfo_struct
- varfileinfo_struct.name = "VarFileInfo"
- if (
- varfileinfo_struct.Type in (0, 1)
- and varfileinfo_struct.ValueLength == 0
- ):
- var_offset = self.dword_align(
- stringfileinfo_offset
- + varfileinfo_struct.sizeof()
- + 2 * (len(stringfileinfo_string) + 1),
- version_struct.OffsetToData,
- )
- varfileinfo_struct.Var = []
- # Process all entries
- while True:
- var_struct = self.__unpack_data__(
- self.__Var_format__,
- raw_data[var_offset:],
- file_offset=start_offset + var_offset,
- )
- if not var_struct:
- break
- ustr_offset = (
- version_struct.OffsetToData
- + var_offset
- + var_struct.sizeof()
- )
- try:
- var_string = self.get_string_u_at_rva(ustr_offset)
- except PEFormatError:
- self.__warnings.append(
- "Error parsing the version information, "
- "attempting to read VarFileInfo Var string. "
- "Can't read unicode string at offset 0x{0:x}".format(
- ustr_offset
- )
- )
- break
- if var_string is None:
- break
- varfileinfo_struct.Var.append(var_struct)
- varword_offset = self.dword_align(
- 2 * (len(var_string) + 1)
- + var_offset
- + var_struct.sizeof(),
- version_struct.OffsetToData,
- )
- orig_varword_offset = varword_offset
- while (
- varword_offset
- < orig_varword_offset + var_struct.ValueLength
- ):
- word1 = self.get_word_from_data(
- raw_data[varword_offset : varword_offset + 2], 0
- )
- word2 = self.get_word_from_data(
- raw_data[varword_offset + 2 : varword_offset + 4], 0
- )
- varword_offset += 4
- if isinstance(word1, int) and isinstance(word2, int):
- var_struct.entry = {
- var_string: "0x%04x 0x%04x" % (word1, word2)
- }
- var_offset = self.dword_align(
- var_offset + var_struct.Length, version_struct.OffsetToData
- )
- if var_offset <= var_offset + var_struct.Length:
- break
- # Increment and align the offset
- stringfileinfo_offset = self.dword_align(
- stringfileinfo_struct.Length + stringfileinfo_offset,
- version_struct.OffsetToData,
- )
- # Check if all the StringFileInfo and VarFileInfo items have been processed
- if (
- stringfileinfo_struct.Length == 0
- or stringfileinfo_offset >= versioninfo_struct.Length
- ):
- break
- self.FileInfo.append(finfo)
- def parse_export_directory(self, rva, size, forwarded_only=False):
- """Parse the export directory.
- Given the RVA of the export directory, it will process all
- its entries.
- The exports will be made available as a list of ExportData
- instances in the 'IMAGE_DIRECTORY_ENTRY_EXPORT' PE attribute.
- """
- try:
- export_dir = self.__unpack_data__(
- self.__IMAGE_EXPORT_DIRECTORY_format__,
- self.get_data(
- rva, Structure(self.__IMAGE_EXPORT_DIRECTORY_format__).sizeof()
- ),
- file_offset=self.get_offset_from_rva(rva),
- )
- except PEFormatError:
- self.__warnings.append(
- "Error parsing export directory at RVA: 0x%x" % (rva)
- )
- return
- if not export_dir:
- return
- # We keep track of the bytes left in the file and use it to set a upper
- # bound in the number of items that can be read from the different
- # arrays.
- def length_until_eof(rva):
- return len(self.__data__) - self.get_offset_from_rva(rva)
- try:
- address_of_names = self.get_data(
- export_dir.AddressOfNames,
- min(
- length_until_eof(export_dir.AddressOfNames),
- export_dir.NumberOfNames * 4,
- ),
- )
- address_of_name_ordinals = self.get_data(
- export_dir.AddressOfNameOrdinals,
- min(
- length_until_eof(export_dir.AddressOfNameOrdinals),
- export_dir.NumberOfNames * 4,
- ),
- )
- address_of_functions = self.get_data(
- export_dir.AddressOfFunctions,
- min(
- length_until_eof(export_dir.AddressOfFunctions),
- export_dir.NumberOfFunctions * 4,
- ),
- )
- except PEFormatError:
- self.__warnings.append(
- "Error parsing export directory at RVA: 0x%x" % (rva)
- )
- return
- exports = []
- max_failed_entries_before_giving_up = 10
- section = self.get_section_by_rva(export_dir.AddressOfNames)
- # Overly generous upper bound
- safety_boundary = len(self.__data__)
- if section:
- safety_boundary = (
- section.VirtualAddress
- + len(section.get_data())
- - export_dir.AddressOfNames
- )
- symbol_counts = collections.defaultdict(int)
- export_parsing_loop_completed_normally = True
- for i in range(min(export_dir.NumberOfNames, int(safety_boundary / 4))):
- symbol_ordinal = self.get_word_from_data(address_of_name_ordinals, i)
- if symbol_ordinal is not None and symbol_ordinal * 4 < len(
- address_of_functions
- ):
- symbol_address = self.get_dword_from_data(
- address_of_functions, symbol_ordinal
- )
- else:
- # Corrupt? a bad pointer... we assume it's all
- # useless, no exports
- return None
- if symbol_address is None or symbol_address == 0:
- continue
- # If the function's RVA points within the export directory
- # it will point to a string with the forwarded symbol's string
- # instead of pointing the the function start address.
- if symbol_address >= rva and symbol_address < rva + size:
- forwarder_str = self.get_string_at_rva(symbol_address)
- try:
- forwarder_offset = self.get_offset_from_rva(symbol_address)
- except PEFormatError:
- continue
- else:
- if forwarded_only:
- continue
- forwarder_str = None
- forwarder_offset = None
- symbol_name_address = self.get_dword_from_data(address_of_names, i)
- if symbol_name_address is None:
- max_failed_entries_before_giving_up -= 1
- if max_failed_entries_before_giving_up <= 0:
- export_parsing_loop_completed_normally = False
- break
- symbol_name = self.get_string_at_rva(
- symbol_name_address, MAX_SYMBOL_NAME_LENGTH
- )
- if not is_valid_function_name(symbol_name):
- export_parsing_loop_completed_normally = False
- break
- try:
- symbol_name_offset = self.get_offset_from_rva(symbol_name_address)
- except PEFormatError:
- max_failed_entries_before_giving_up -= 1
- if max_failed_entries_before_giving_up <= 0:
- export_parsing_loop_completed_normally = False
- break
- try:
- symbol_name_offset = self.get_offset_from_rva(symbol_name_address)
- except PEFormatError:
- max_failed_entries_before_giving_up -= 1
- if max_failed_entries_before_giving_up <= 0:
- export_parsing_loop_completed_normally = False
- break
- continue
- # File 0b1d3d3664915577ab9a32188d29bbf3542b86c7b9ce333e245496c3018819f1
- # was being parsed as potentially containing millions of exports.
- # Checking for duplicates addresses the issue.
- symbol_counts[(symbol_name, symbol_address)] += 1
- if symbol_counts[(symbol_name, symbol_address)] > 10:
- self.__warnings.append(
- f"Export directory contains more than 10 repeated entries "
- f"({symbol_name}, {symbol_address:#02x}). Assuming corrupt."
- )
- break
- elif len(symbol_counts) > self.max_symbol_exports:
- self.__warnings.append(
- "Export directory contains more than {} symbol entries. "
- "Assuming corrupt.".format(self.max_symbol_exports)
- )
- break
- exports.append(
- ExportData(
- pe=self,
- ordinal=export_dir.Base + symbol_ordinal,
- ordinal_offset=self.get_offset_from_rva(
- export_dir.AddressOfNameOrdinals + 2 * i
- ),
- address=symbol_address,
- address_offset=self.get_offset_from_rva(
- export_dir.AddressOfFunctions + 4 * symbol_ordinal
- ),
- name=symbol_name,
- name_offset=symbol_name_offset,
- forwarder=forwarder_str,
- forwarder_offset=forwarder_offset,
- )
- )
- if not export_parsing_loop_completed_normally:
- self.__warnings.append(
- f"RVA AddressOfNames in the export directory points to an invalid "
- f"address: {export_dir.AddressOfNames:x}"
- )
- ordinals = {exp.ordinal for exp in exports}
- max_failed_entries_before_giving_up = 10
- section = self.get_section_by_rva(export_dir.AddressOfFunctions)
- # Overly generous upper bound
- safety_boundary = len(self.__data__)
- if section:
- safety_boundary = (
- section.VirtualAddress
- + len(section.get_data())
- - export_dir.AddressOfFunctions
- )
- symbol_counts = collections.defaultdict(int)
- export_parsing_loop_completed_normally = True
- for idx in range(min(export_dir.NumberOfFunctions, int(safety_boundary / 4))):
- if not idx + export_dir.Base in ordinals:
- try:
- symbol_address = self.get_dword_from_data(address_of_functions, idx)
- except PEFormatError:
- symbol_address = None
- if symbol_address is None:
- max_failed_entries_before_giving_up -= 1
- if max_failed_entries_before_giving_up <= 0:
- export_parsing_loop_completed_normally = False
- break
- if symbol_address == 0:
- continue
- # Checking for forwarder again.
- if (
- symbol_address is not None
- and symbol_address >= rva
- and symbol_address < rva + size
- ):
- forwarder_str = self.get_string_at_rva(symbol_address)
- else:
- forwarder_str = None
- # File 0b1d3d3664915577ab9a32188d29bbf3542b86c7b9ce333e245496c3018819f1
- # was being parsed as potentially containing millions of exports.
- # Checking for duplicates addresses the issue.
- symbol_counts[symbol_address] += 1
- if symbol_counts[symbol_address] > self.max_repeated_symbol:
- # if most_common and most_common[0][1] > 10:
- self.__warnings.append(
- "Export directory contains more than {} repeated "
- "ordinal entries (0x{:x}). Assuming corrupt.".format(
- self.max_repeated_symbol, symbol_address
- )
- )
- break
- elif len(symbol_counts) > self.max_symbol_exports:
- self.__warnings.append(
- "Export directory contains more than "
- f"{self.max_symbol_exports} ordinal entries. Assuming corrupt."
- )
- break
- exports.append(
- ExportData(
- ordinal=export_dir.Base + idx,
- address=symbol_address,
- name=None,
- forwarder=forwarder_str,
- )
- )
- if not export_parsing_loop_completed_normally:
- self.__warnings.append(
- "RVA AddressOfFunctions in the export directory points to an invalid "
- f"address: {export_dir.AddressOfFunctions:x}"
- )
- return
- if not exports and export_dir.all_zeroes():
- return None
- return ExportDirData(
- struct=export_dir,
- symbols=exports,
- name=self.get_string_at_rva(export_dir.Name),
- )
- def dword_align(self, offset, base):
- return ((offset + base + 3) & 0xFFFFFFFC) - (base & 0xFFFFFFFC)
- def normalize_import_va(self, va):
- # Setup image range
- begin_of_image = self.OPTIONAL_HEADER.ImageBase
- end_of_image = self.OPTIONAL_HEADER.ImageBase + self.OPTIONAL_HEADER.SizeOfImage
- # Try to avoid bogus VAs, which are out of the image.
- # This also filters out entries that are zero
- if begin_of_image <= va and va < end_of_image:
- va -= begin_of_image
- return va
- def parse_delay_import_directory(self, rva, size):
- """Walk and parse the delay import directory."""
- import_descs = []
- error_count = 0
- while True:
- try:
- # If the RVA is invalid all would blow up. Some PEs seem to be
- # specially nasty and have an invalid RVA.
- data = self.get_data(
- rva,
- Structure(self.__IMAGE_DELAY_IMPORT_DESCRIPTOR_format__).sizeof(),
- )
- except PEFormatError:
- self.__warnings.append(
- "Error parsing the Delay import directory at RVA: 0x%x" % (rva)
- )
- break
- file_offset = self.get_offset_from_rva(rva)
- import_desc = self.__unpack_data__(
- self.__IMAGE_DELAY_IMPORT_DESCRIPTOR_format__,
- data,
- file_offset=file_offset,
- )
- # If the structure is all zeros, we reached the end of the list
- if not import_desc or import_desc.all_zeroes():
- break
- contains_addresses = False
- # Handle old import descriptor that has Virtual Addresses instead of RVAs
- # This version of import descriptor is created by old Visual Studio versions
- # (pre 6.0)
- # Can only be present in 32-bit binaries (no 64-bit compiler existed at the
- # time)
- # Sample: e8d3bff0c1a9a6955993f7a441121a2692261421e82fdfadaaded45d3bea9980
- if (
- import_desc.grAttrs == 0
- and self.FILE_HEADER.Machine == MACHINE_TYPE["IMAGE_FILE_MACHINE_I386"]
- ):
- import_desc.pBoundIAT = self.normalize_import_va(import_desc.pBoundIAT)
- import_desc.pIAT = self.normalize_import_va(import_desc.pIAT)
- import_desc.pINT = self.normalize_import_va(import_desc.pINT)
- import_desc.pUnloadIAT = self.normalize_import_va(
- import_desc.pUnloadIAT
- )
- import_desc.phmod = self.normalize_import_va(import_desc.pUnloadIAT)
- import_desc.szName = self.normalize_import_va(import_desc.szName)
- contains_addresses = True
- rva += import_desc.sizeof()
- # If the array of thunks is somewhere earlier than the import
- # descriptor we can set a maximum length for the array. Otherwise
- # just set a maximum length of the size of the file
- max_len = len(self.__data__) - file_offset
- if rva > import_desc.pINT or rva > import_desc.pIAT:
- max_len = max(rva - import_desc.pINT, rva - import_desc.pIAT)
- import_data = []
- try:
- import_data = self.parse_imports(
- import_desc.pINT,
- import_desc.pIAT,
- None,
- max_len,
- contains_addresses,
- )
- except PEFormatError as excp:
- self.__warnings.append(
- "Error parsing the Delay import directory. "
- "Invalid import data at RVA: 0x{0:x} ({1})".format(rva, excp.value)
- )
- if error_count > 5:
- self.__warnings.append(
- "Too many errors parsing the Delay import directory. "
- "Invalid import data at RVA: 0x{0:x}".format(rva)
- )
- break
- if not import_data:
- error_count += 1
- continue
- if self.__total_import_symbols > MAX_IMPORT_SYMBOLS:
- self.__warnings.append(
- "Error, too many imported symbols %d (>%s)"
- % (self.__total_import_symbols, MAX_IMPORT_SYMBOLS)
- )
- break
- dll = self.get_string_at_rva(import_desc.szName, MAX_DLL_LENGTH)
- if not is_valid_dos_filename(dll):
- dll = b("*invalid*")
- if dll:
- for symbol in import_data:
- if symbol.name is None:
- funcname = ordlookup.ordLookup(dll.lower(), symbol.ordinal)
- if funcname:
- symbol.name = funcname
- import_descs.append(
- ImportDescData(struct=import_desc, imports=import_data, dll=dll)
- )
- return import_descs
- def get_rich_header_hash(self, algorithm="md5"):
- if not hasattr(self, "RICH_HEADER") or self.RICH_HEADER is None:
- return ""
- if algorithm == "md5":
- return md5(self.RICH_HEADER.clear_data).hexdigest()
- elif algorithm == "sha1":
- return sha1(self.RICH_HEADER.clear_data).hexdigest()
- elif algorithm == "sha256":
- return sha256(self.RICH_HEADER.clear_data).hexdigest()
- elif algorithm == "sha512":
- return sha512(self.RICH_HEADER.clear_data).hexdigest()
- raise Exception("Invalid hashing algorithm specified")
- def get_imphash(self):
- impstrs = []
- exts = ["ocx", "sys", "dll"]
- if not hasattr(self, "DIRECTORY_ENTRY_IMPORT"):
- return ""
- for entry in self.DIRECTORY_ENTRY_IMPORT:
- if isinstance(entry.dll, bytes):
- libname = entry.dll.decode().lower()
- else:
- libname = entry.dll.lower()
- parts = libname.rsplit(".", 1)
- if len(parts) > 1 and parts[1] in exts:
- libname = parts[0]
- for imp in entry.imports:
- funcname = None
- if not imp.name:
- funcname = ordlookup.ordLookup(
- entry.dll.lower(), imp.ordinal, make_name=True
- )
- if not funcname:
- raise PEFormatError(
- f"Unable to look up ordinal {entry.dll}:{imp.ordinal:04x}"
- )
- else:
- funcname = imp.name
- if not funcname:
- continue
- if isinstance(funcname, bytes):
- funcname = funcname.decode()
- impstrs.append("%s.%s" % (libname.lower(), funcname.lower()))
- return md5(",".join(impstrs).encode()).hexdigest()
- def parse_import_directory(self, rva, size, dllnames_only=False):
- """Walk and parse the import directory."""
- import_descs = []
- error_count = 0
- while True:
- try:
- # If the RVA is invalid all would blow up. Some EXEs seem to be
- # specially nasty and have an invalid RVA.
- data = self.get_data(
- rva, Structure(self.__IMAGE_IMPORT_DESCRIPTOR_format__).sizeof()
- )
- except PEFormatError:
- self.__warnings.append(
- f"Error parsing the import directory at RVA: 0x{rva:x}"
- )
- break
- file_offset = self.get_offset_from_rva(rva)
- import_desc = self.__unpack_data__(
- self.__IMAGE_IMPORT_DESCRIPTOR_format__, data, file_offset=file_offset
- )
- # If the structure is all zeros, we reached the end of the list
- if not import_desc or import_desc.all_zeroes():
- break
- rva += import_desc.sizeof()
- # If the array of thunks is somewhere earlier than the import
- # descriptor we can set a maximum length for the array. Otherwise
- # just set a maximum length of the size of the file
- max_len = len(self.__data__) - file_offset
- if rva > import_desc.OriginalFirstThunk or rva > import_desc.FirstThunk:
- max_len = max(
- rva - import_desc.OriginalFirstThunk, rva - import_desc.FirstThunk
- )
- import_data = []
- if not dllnames_only:
- try:
- import_data = self.parse_imports(
- import_desc.OriginalFirstThunk,
- import_desc.FirstThunk,
- import_desc.ForwarderChain,
- max_length=max_len,
- )
- except PEFormatError as e:
- self.__warnings.append(
- "Error parsing the import directory. "
- f"Invalid Import data at RVA: 0x{rva:x} ({e.value})"
- )
- if error_count > 5:
- self.__warnings.append(
- "Too many errors parsing the import directory. "
- f"Invalid import data at RVA: 0x{rva:x}"
- )
- break
- if not import_data:
- error_count += 1
- # TODO: do not continue here
- continue
- dll = self.get_string_at_rva(import_desc.Name, MAX_DLL_LENGTH)
- if not is_valid_dos_filename(dll):
- dll = b("*invalid*")
- if dll:
- for symbol in import_data:
- if symbol.name is None:
- funcname = ordlookup.ordLookup(dll.lower(), symbol.ordinal)
- if funcname:
- symbol.name = funcname
- import_descs.append(
- ImportDescData(struct=import_desc, imports=import_data, dll=dll)
- )
- if not dllnames_only:
- suspicious_imports = set(["LoadLibrary", "GetProcAddress"])
- suspicious_imports_count = 0
- total_symbols = 0
- for imp_dll in import_descs:
- for symbol in imp_dll.imports:
- for suspicious_symbol in suspicious_imports:
- if not symbol or not symbol.name:
- continue
- name = symbol.name
- if type(symbol.name) == bytes:
- name = symbol.name.decode("utf-8")
- if name.startswith(suspicious_symbol):
- suspicious_imports_count += 1
- break
- total_symbols += 1
- if (
- suspicious_imports_count == len(suspicious_imports)
- and total_symbols < 20
- ):
- self.__warnings.append(
- "Imported symbols contain entries typical of packed executables."
- )
- return import_descs
- def parse_imports(
- self,
- original_first_thunk,
- first_thunk,
- forwarder_chain,
- max_length=None,
- contains_addresses=False,
- ):
- """Parse the imported symbols.
- It will fill a list, which will be available as the dictionary
- attribute "imports". Its keys will be the DLL names and the values
- of all the symbols imported from that object.
- """
- imported_symbols = []
- # Import Lookup Table. Contains ordinals or pointers to strings.
- ilt = self.get_import_table(
- original_first_thunk, max_length, contains_addresses
- )
- # Import Address Table. May have identical content to ILT if
- # PE file is not bound. It will contain the address of the
- # imported symbols once the binary is loaded or if it is already
- # bound.
- iat = self.get_import_table(first_thunk, max_length, contains_addresses)
- # OC Patch:
- # Would crash if IAT or ILT had None type
- if (not iat or len(iat) == 0) and (not ilt or len(ilt) == 0):
- self.__warnings.append(
- "Damaged Import Table information. "
- "ILT and/or IAT appear to be broken. "
- f"OriginalFirstThunk: 0x{original_first_thunk:x} "
- f"FirstThunk: 0x{first_thunk:x}"
- )
- return []
- table = None
- if ilt:
- table = ilt
- elif iat:
- table = iat
- else:
- return None
- imp_offset = 4
- address_mask = 0x7FFFFFFF
- if self.PE_TYPE == OPTIONAL_HEADER_MAGIC_PE:
- ordinal_flag = IMAGE_ORDINAL_FLAG
- elif self.PE_TYPE == OPTIONAL_HEADER_MAGIC_PE_PLUS:
- ordinal_flag = IMAGE_ORDINAL_FLAG64
- imp_offset = 8
- address_mask = 0x7FFFFFFFFFFFFFFF
- else:
- # Some PEs may have an invalid value in the Magic field of the
- # Optional Header. Just in case the remaining file is parseable
- # let's pretend it's a 32bit PE32 by default.
- ordinal_flag = IMAGE_ORDINAL_FLAG
- num_invalid = 0
- for idx, tbl_entry in enumerate(table):
- imp_ord = None
- imp_hint = None
- imp_name = None
- name_offset = None
- hint_name_table_rva = None
- if tbl_entry.AddressOfData:
- # If imported by ordinal, we will append the ordinal number
- #
- if tbl_entry.AddressOfData & ordinal_flag:
- import_by_ordinal = True
- imp_ord = tbl_entry.AddressOfData & 0xFFFF
- imp_name = None
- name_offset = None
- else:
- import_by_ordinal = False
- try:
- hint_name_table_rva = tbl_entry.AddressOfData & address_mask
- data = self.get_data(hint_name_table_rva, 2)
- # Get the Hint
- imp_hint = self.get_word_from_data(data, 0)
- imp_name = self.get_string_at_rva(
- tbl_entry.AddressOfData + 2, MAX_IMPORT_NAME_LENGTH
- )
- if not is_valid_function_name(imp_name):
- imp_name = b("*invalid*")
- name_offset = self.get_offset_from_rva(
- tbl_entry.AddressOfData + 2
- )
- except PEFormatError:
- pass
- # by nriva: we want the ThunkRVA and ThunkOffset
- thunk_offset = tbl_entry.get_file_offset()
- thunk_rva = self.get_rva_from_offset(thunk_offset)
- imp_address = (
- first_thunk + self.OPTIONAL_HEADER.ImageBase + idx * imp_offset
- )
- struct_iat = None
- try:
- if iat and ilt and ilt[idx].AddressOfData != iat[idx].AddressOfData:
- imp_bound = iat[idx].AddressOfData
- struct_iat = iat[idx]
- else:
- imp_bound = None
- except IndexError:
- imp_bound = None
- # The file with hashes:
- #
- # MD5: bfe97192e8107d52dd7b4010d12b2924
- # SHA256: 3d22f8b001423cb460811ab4f4789f277b35838d45c62ec0454c877e7c82c7f5
- #
- # has an invalid table built in a way that it's parseable but contains
- # invalid entries that lead pefile to take extremely long amounts of time to
- # parse. It also leads to extreme memory consumption.
- # To prevent similar cases, if invalid entries are found in the middle of a
- # table the parsing will be aborted
- #
- if imp_ord == None and imp_name == None:
- raise PEFormatError("Invalid entries, aborting parsing.")
- # Some PEs appear to interleave valid and invalid imports. Instead of
- # aborting the parsing altogether we will simply skip the invalid entries.
- # Although if we see 1000 invalid entries and no legit ones, we abort.
- if imp_name == b("*invalid*"):
- if num_invalid > 1000 and num_invalid == idx:
- raise PEFormatError("Too many invalid names, aborting parsing.")
- num_invalid += 1
- continue
- if imp_ord or imp_name:
- imported_symbols.append(
- ImportData(
- pe=self,
- struct_table=tbl_entry,
- struct_iat=struct_iat, # for bound imports if any
- import_by_ordinal=import_by_ordinal,
- ordinal=imp_ord,
- ordinal_offset=tbl_entry.get_file_offset(),
- hint=imp_hint,
- name=imp_name,
- name_offset=name_offset,
- bound=imp_bound,
- address=imp_address,
- hint_name_table_rva=hint_name_table_rva,
- thunk_offset=thunk_offset,
- thunk_rva=thunk_rva,
- )
- )
- return imported_symbols
- def get_import_table(self, rva, max_length=None, contains_addresses=False):
- table = []
- # We need the ordinal flag for a simple heuristic
- # we're implementing within the loop
- #
- if self.PE_TYPE == OPTIONAL_HEADER_MAGIC_PE:
- ordinal_flag = IMAGE_ORDINAL_FLAG
- format = self.__IMAGE_THUNK_DATA_format__
- elif self.PE_TYPE == OPTIONAL_HEADER_MAGIC_PE_PLUS:
- ordinal_flag = IMAGE_ORDINAL_FLAG64
- format = self.__IMAGE_THUNK_DATA64_format__
- else:
- # Some PEs may have an invalid value in the Magic field of the
- # Optional Header. Just in case the remaining file is parseable
- # let's pretend it's a 32bit PE32 by default.
- ordinal_flag = IMAGE_ORDINAL_FLAG
- format = self.__IMAGE_THUNK_DATA_format__
- MAX_ADDRESS_SPREAD = 128 * 2 ** 20 # 64 MB
- MAX_REPEATED_ADDRESSES = 15
- repeated_address = 0
- addresses_of_data_set_64 = set()
- addresses_of_data_set_32 = set()
- start_rva = rva
- while rva:
- if max_length is not None and rva >= start_rva + max_length:
- self.__warnings.append(
- "Error parsing the import table. Entries go beyond bounds."
- )
- break
- # Enforce an upper bounds on import symbols.
- if self.__total_import_symbols > MAX_IMPORT_SYMBOLS:
- self.__warnings.append(
- "Excessive number of imports %d (>%s)"
- % (self.__total_import_symbols, MAX_IMPORT_SYMBOLS)
- )
- break
- self.__total_import_symbols += 1
- # if we see too many times the same entry we assume it could be
- # a table containing bogus data (with malicious intent or otherwise)
- if repeated_address >= MAX_REPEATED_ADDRESSES:
- return []
- # if the addresses point somewhere but the difference between the highest
- # and lowest address is larger than MAX_ADDRESS_SPREAD we assume a bogus
- # table as the addresses should be contained within a module
- if (
- addresses_of_data_set_32
- and max(addresses_of_data_set_32) - min(addresses_of_data_set_32)
- > MAX_ADDRESS_SPREAD
- ):
- return []
- if (
- addresses_of_data_set_64
- and max(addresses_of_data_set_64) - min(addresses_of_data_set_64)
- > MAX_ADDRESS_SPREAD
- ):
- return []
- failed = False
- try:
- data = self.get_data(rva, Structure(format).sizeof())
- except PEFormatError:
- failed = True
- if failed or len(data) != Structure(format).sizeof():
- self.__warnings.append(
- "Error parsing the import table. " "Invalid data at RVA: 0x%x" % rva
- )
- return None
- thunk_data = self.__unpack_data__(
- format, data, file_offset=self.get_offset_from_rva(rva)
- )
- # If the thunk data contains VAs instead of RVAs, we need to normalize them
- if contains_addresses:
- thunk_data.AddressOfData = self.normalize_import_va(
- thunk_data.AddressOfData
- )
- thunk_data.ForwarderString = self.normalize_import_va(
- thunk_data.ForwarderString
- )
- thunk_data.Function = self.normalize_import_va(thunk_data.Function)
- thunk_data.Ordinal = self.normalize_import_va(thunk_data.Ordinal)
- # Check if the AddressOfData lies within the range of RVAs that it's
- # being scanned, abort if that is the case, as it is very unlikely
- # to be legitimate data.
- # Seen in PE with SHA256:
- # 5945bb6f0ac879ddf61b1c284f3b8d20c06b228e75ae4f571fa87f5b9512902c
- if (
- thunk_data
- and thunk_data.AddressOfData >= start_rva
- and thunk_data.AddressOfData <= rva
- ):
- self.__warnings.append(
- "Error parsing the import table. "
- "AddressOfData overlaps with THUNK_DATA for "
- "THUNK at RVA 0x%x" % (rva)
- )
- break
- if thunk_data and thunk_data.AddressOfData:
- # If the entry looks like could be an ordinal...
- if thunk_data.AddressOfData & ordinal_flag:
- # but its value is beyond 2^16, we will assume it's a
- # corrupted and ignore it altogether
- if thunk_data.AddressOfData & 0x7FFFFFFF > 0xFFFF:
- return []
- # and if it looks like it should be an RVA
- else:
- # keep track of the RVAs seen and store them to study their
- # properties. When certain non-standard features are detected
- # the parsing will be aborted
- if (
- thunk_data.AddressOfData in addresses_of_data_set_32
- or thunk_data.AddressOfData in addresses_of_data_set_64
- ):
- repeated_address += 1
- if thunk_data.AddressOfData >= 2 ** 32:
- addresses_of_data_set_64.add(thunk_data.AddressOfData)
- else:
- addresses_of_data_set_32.add(thunk_data.AddressOfData)
- if not thunk_data or thunk_data.all_zeroes():
- break
- rva += thunk_data.sizeof()
- table.append(thunk_data)
- return table
- def get_memory_mapped_image(self, max_virtual_address=0x10000000, ImageBase=None):
- """Returns the data corresponding to the memory layout of the PE file.
- The data includes the PE header and the sections loaded at offsets
- corresponding to their relative virtual addresses. (the VirtualAddress
- section header member).
- Any offset in this data corresponds to the absolute memory address
- ImageBase+offset.
- The optional argument 'max_virtual_address' provides with means of limiting
- which sections are processed.
- Any section with their VirtualAddress beyond this value will be skipped.
- Normally, sections with values beyond this range are just there to confuse
- tools. It's a common trick to see in packed executables.
- If the 'ImageBase' optional argument is supplied, the file's relocations
- will be applied to the image by calling the 'relocate_image()' method. Beware
- that the relocation information is applied permanently.
- """
- # Rebase if requested
- #
- if ImageBase is not None:
- # Keep a copy of the image's data before modifying it by rebasing it
- #
- original_data = self.__data__
- self.relocate_image(ImageBase)
- # Collect all sections in one code block
- mapped_data = self.__data__[:]
- for section in self.sections:
- # Miscellaneous integrity tests.
- # Some packer will set these to bogus values to make tools go nuts.
- if section.Misc_VirtualSize == 0 and section.SizeOfRawData == 0:
- continue
- srd = section.SizeOfRawData
- prd = self.adjust_FileAlignment(
- section.PointerToRawData, self.OPTIONAL_HEADER.FileAlignment
- )
- VirtualAddress_adj = self.adjust_SectionAlignment(
- section.VirtualAddress,
- self.OPTIONAL_HEADER.SectionAlignment,
- self.OPTIONAL_HEADER.FileAlignment,
- )
- if (
- srd > len(self.__data__)
- or prd > len(self.__data__)
- or srd + prd > len(self.__data__)
- or VirtualAddress_adj >= max_virtual_address
- ):
- continue
- padding_length = VirtualAddress_adj - len(mapped_data)
- if padding_length > 0:
- mapped_data += b"\0" * padding_length
- elif padding_length < 0:
- mapped_data = mapped_data[:padding_length]
- mapped_data += section.get_data()
- # If the image was rebased, restore it to its original form
- #
- if ImageBase is not None:
- self.__data__ = original_data
- return mapped_data
- def get_resources_strings(self):
- """Returns a list of all the strings found withing the resources (if any).
- This method will scan all entries in the resources directory of the PE, if
- there is one, and will return a [] with the strings.
- An empty list will be returned otherwise.
- """
- resources_strings = []
- if hasattr(self, "DIRECTORY_ENTRY_RESOURCE"):
- for res_type in self.DIRECTORY_ENTRY_RESOURCE.entries:
- if hasattr(res_type, "directory"):
- for resource_id in res_type.directory.entries:
- if hasattr(resource_id, "directory"):
- if (
- hasattr(resource_id.directory, "strings")
- and resource_id.directory.strings
- ):
- for res_string in list(
- resource_id.directory.strings.values()
- ):
- resources_strings.append(res_string)
- return resources_strings
- def get_data(self, rva=0, length=None):
- """Get data regardless of the section where it lies on.
- Given a RVA and the size of the chunk to retrieve, this method
- will find the section where the data lies and return the data.
- """
- s = self.get_section_by_rva(rva)
- if length:
- end = rva + length
- else:
- end = None
- if not s:
- if rva < len(self.header):
- return self.header[rva:end]
- # Before we give up we check whether the file might
- # contain the data anyway. There are cases of PE files
- # without sections that rely on windows loading the first
- # 8291 bytes into memory and assume the data will be
- # there
- # A functional file with these characteristics is:
- # MD5: 0008892cdfbc3bda5ce047c565e52295
- # SHA-1: c7116b9ff950f86af256defb95b5d4859d4752a9
- #
- if rva < len(self.__data__):
- return self.__data__[rva:end]
- raise PEFormatError("data at RVA can't be fetched. Corrupt header?")
- return s.get_data(rva, length)
- def get_rva_from_offset(self, offset):
- """Get the RVA corresponding to this file offset."""
- s = self.get_section_by_offset(offset)
- if not s:
- if self.sections:
- lowest_rva = min(
- [
- self.adjust_SectionAlignment(
- s.VirtualAddress,
- self.OPTIONAL_HEADER.SectionAlignment,
- self.OPTIONAL_HEADER.FileAlignment,
- )
- for s in self.sections
- ]
- )
- if offset < lowest_rva:
- # We will assume that the offset lies within the headers, or
- # at least points before where the earliest section starts
- # and we will simply return the offset as the RVA
- #
- # The case illustrating this behavior can be found at:
- # http://corkami.blogspot.com/2010/01/hey-hey-hey-whats-in-your-head.html
- # where the import table is not contained by any section
- # hence the RVA needs to be resolved to a raw offset
- return offset
- return None
- else:
- return offset
- return s.get_rva_from_offset(offset)
- def get_offset_from_rva(self, rva):
- """Get the file offset corresponding to this RVA.
- Given a RVA , this method will find the section where the
- data lies and return the offset within the file.
- """
- s = self.get_section_by_rva(rva)
- if not s:
- # If not found within a section assume it might
- # point to overlay data or otherwise data present
- # but not contained in any section. In those
- # cases the RVA should equal the offset
- if rva < len(self.__data__):
- return rva
- raise PEFormatError(f"data at RVA 0x{rva:x} can't be fetched")
- return s.get_offset_from_rva(rva)
- def get_string_at_rva(self, rva, max_length=MAX_STRING_LENGTH):
- """Get an ASCII string located at the given address."""
- if rva is None:
- return None
- s = self.get_section_by_rva(rva)
- if not s:
- return self.get_string_from_data(0, self.__data__[rva : rva + max_length])
- return self.get_string_from_data(0, s.get_data(rva, length=max_length))
- def get_bytes_from_data(self, offset, data):
- """."""
- if offset > len(data):
- return b""
- d = data[offset:]
- if isinstance(d, bytearray):
- return bytes(d)
- return d
- def get_string_from_data(self, offset, data):
- """Get an ASCII string from data."""
- s = self.get_bytes_from_data(offset, data)
- end = s.find(b"\0")
- if end >= 0:
- s = s[:end]
- return s
- def get_string_u_at_rva(self, rva, max_length=2 ** 16, encoding=None):
- """Get an Unicode string located at the given address."""
- if max_length == 0:
- return b""
- # If the RVA is invalid let the exception reach the callers. All
- # call-sites of get_string_u_at_rva() will handle it.
- data = self.get_data(rva, 2)
- # max_length is the maximum count of 16bit characters needs to be
- # doubled to get size in bytes
- max_length <<= 1
- requested = min(max_length, 256)
- data = self.get_data(rva, requested)
- # try to find null-termination
- null_index = -1
- while True:
- null_index = data.find(b"\x00\x00", null_index + 1)
- if null_index == -1:
- data_length = len(data)
- if data_length < requested or data_length == max_length:
- null_index = len(data) >> 1
- break
- # Request remaining part of data limited by max_length
- data += self.get_data(rva + data_length, max_length - data_length)
- null_index = requested - 1
- requested = max_length
- elif null_index % 2 == 0:
- null_index >>= 1
- break
- # convert selected part of the string to unicode
- uchrs = struct.unpack("<{:d}H".format(null_index), data[: null_index * 2])
- s = "".join(map(chr, uchrs))
- if encoding:
- return b(s.encode(encoding, "backslashreplace_"))
- return b(s.encode("utf-8", "backslashreplace_"))
- def get_section_by_offset(self, offset):
- """Get the section containing the given file offset."""
- for section in self.sections:
- if section.contains_offset(offset):
- return section
- return None
- def get_section_by_rva(self, rva):
- """Get the section containing the given address."""
- for section in self.sections:
- if section.contains_rva(rva):
- return section
- return None
- def __str__(self):
- return self.dump_info()
- def has_relocs(self):
- """Checks if the PE file has relocation directory"""
- return hasattr(self, "DIRECTORY_ENTRY_BASERELOC")
- def print_info(self, encoding="utf-8"):
- """Print all the PE header information in a human readable from."""
- print(self.dump_info(encoding=encoding))
- def dump_info(self, dump=None, encoding="ascii"):
- """Dump all the PE header information into human readable string."""
- if dump is None:
- dump = Dump()
- warnings = self.get_warnings()
- if warnings:
- dump.add_header("Parsing Warnings")
- for warning in warnings:
- dump.add_line(warning)
- dump.add_newline()
- dump.add_header("DOS_HEADER")
- dump.add_lines(self.DOS_HEADER.dump())
- dump.add_newline()
- dump.add_header("NT_HEADERS")
- dump.add_lines(self.NT_HEADERS.dump())
- dump.add_newline()
- dump.add_header("FILE_HEADER")
- dump.add_lines(self.FILE_HEADER.dump())
- image_flags = retrieve_flags(IMAGE_CHARACTERISTICS, "IMAGE_FILE_")
- dump.add("Flags: ")
- flags = []
- for flag in sorted(image_flags):
- if getattr(self.FILE_HEADER, flag[0]):
- flags.append(flag[0])
- dump.add_line(", ".join(flags))
- dump.add_newline()
- if hasattr(self, "OPTIONAL_HEADER") and self.OPTIONAL_HEADER is not None:
- dump.add_header("OPTIONAL_HEADER")
- dump.add_lines(self.OPTIONAL_HEADER.dump())
- dll_characteristics_flags = retrieve_flags(
- DLL_CHARACTERISTICS, "IMAGE_DLLCHARACTERISTICS_"
- )
- dump.add("DllCharacteristics: ")
- flags = []
- for flag in sorted(dll_characteristics_flags):
- if getattr(self.OPTIONAL_HEADER, flag[0]):
- flags.append(flag[0])
- dump.add_line(", ".join(flags))
- dump.add_newline()
- dump.add_header("PE Sections")
- section_flags = retrieve_flags(SECTION_CHARACTERISTICS, "IMAGE_SCN_")
- for section in self.sections:
- dump.add_lines(section.dump())
- dump.add("Flags: ")
- flags = []
- for flag in sorted(section_flags):
- if getattr(section, flag[0]):
- flags.append(flag[0])
- dump.add_line(", ".join(flags))
- dump.add_line(
- "Entropy: {0:f} (Min=0.0, Max=8.0)".format(section.get_entropy())
- )
- if md5 is not None:
- dump.add_line("MD5 hash: {0}".format(section.get_hash_md5()))
- if sha1 is not None:
- dump.add_line("SHA-1 hash: %s" % section.get_hash_sha1())
- if sha256 is not None:
- dump.add_line("SHA-256 hash: %s" % section.get_hash_sha256())
- if sha512 is not None:
- dump.add_line("SHA-512 hash: %s" % section.get_hash_sha512())
- dump.add_newline()
- if hasattr(self, "OPTIONAL_HEADER") and hasattr(
- self.OPTIONAL_HEADER, "DATA_DIRECTORY"
- ):
- dump.add_header("Directories")
- for directory in self.OPTIONAL_HEADER.DATA_DIRECTORY:
- if directory is not None:
- dump.add_lines(directory.dump())
- dump.add_newline()
- if hasattr(self, "VS_VERSIONINFO"):
- for idx, vinfo_entry in enumerate(self.VS_VERSIONINFO):
- if len(self.VS_VERSIONINFO) > 1:
- dump.add_header(f"Version Information {idx + 1}")
- else:
- dump.add_header("Version Information")
- if vinfo_entry is not None:
- dump.add_lines(vinfo_entry.dump())
- dump.add_newline()
- if hasattr(self, "VS_FIXEDFILEINFO"):
- dump.add_lines(self.VS_FIXEDFILEINFO[idx].dump())
- dump.add_newline()
- if hasattr(self, "FileInfo") and len(self.FileInfo) > idx:
- for entry in self.FileInfo[idx]:
- dump.add_lines(entry.dump())
- dump.add_newline()
- if hasattr(entry, "StringTable"):
- for st_entry in entry.StringTable:
- [dump.add_line(" " + line) for line in st_entry.dump()]
- dump.add_line(
- " LangID: {0}".format(
- st_entry.LangID.decode(
- encoding, "backslashreplace_"
- )
- )
- )
- dump.add_newline()
- for str_entry in sorted(list(st_entry.entries.items())):
- # try:
- dump.add_line(
- " {0}: {1}".format(
- str_entry[0].decode(
- encoding, "backslashreplace_"
- ),
- str_entry[1].decode(
- encoding, "backslashreplace_"
- ),
- )
- )
- dump.add_newline()
- elif hasattr(entry, "Var"):
- for var_entry in entry.Var:
- if hasattr(var_entry, "entry"):
- [
- dump.add_line(" " + line)
- for line in var_entry.dump()
- ]
- dump.add_line(
- " {0}: {1}".format(
- list(var_entry.entry.keys())[0].decode(
- "utf-8", "backslashreplace_"
- ),
- list(var_entry.entry.values())[0],
- )
- )
- dump.add_newline()
- if hasattr(self, "DIRECTORY_ENTRY_EXPORT"):
- dump.add_header("Exported symbols")
- dump.add_lines(self.DIRECTORY_ENTRY_EXPORT.struct.dump())
- dump.add_newline()
- dump.add_line("%-10s %-10s %s" % ("Ordinal", "RVA", "Name"))
- for export in self.DIRECTORY_ENTRY_EXPORT.symbols:
- if export.address is not None:
- name = b("None")
- if export.name:
- name = export.name
- dump.add(
- "%-10d 0x%08X %s"
- % (export.ordinal, export.address, name.decode(encoding))
- )
- if export.forwarder:
- dump.add_line(
- " forwarder: {0}".format(
- export.forwarder.decode(encoding, "backslashreplace_")
- )
- )
- else:
- dump.add_newline()
- dump.add_newline()
- if hasattr(self, "DIRECTORY_ENTRY_IMPORT"):
- dump.add_header("Imported symbols")
- for module in self.DIRECTORY_ENTRY_IMPORT:
- dump.add_lines(module.struct.dump())
- # Print the name of the DLL if there are no imports.
- if not module.imports:
- dump.add(
- " Name -> {0}".format(
- self.get_string_at_rva(module.struct.Name).decode(
- encoding, "backslashreplace_"
- )
- )
- )
- dump.add_newline()
- dump.add_newline()
- for symbol in module.imports:
- if symbol.import_by_ordinal is True:
- if symbol.name is not None:
- dump.add(
- "{0}.{1} Ordinal[{2}] (Imported by Ordinal)".format(
- module.dll.decode("utf-8"),
- symbol.name.decode("utf-8"),
- symbol.ordinal,
- )
- )
- else:
- dump.add(
- "{0} Ordinal[{1}] (Imported by Ordinal)".format(
- module.dll.decode("utf-8"), symbol.ordinal
- )
- )
- else:
- dump.add(
- "{0}.{1} Hint[{2:d}]".format(
- module.dll.decode(encoding, "backslashreplace_"),
- symbol.name.decode(encoding, "backslashreplace_"),
- symbol.hint,
- )
- )
- if symbol.bound:
- dump.add_line(" Bound: 0x{0:08X}".format(symbol.bound))
- else:
- dump.add_newline()
- dump.add_newline()
- if hasattr(self, "DIRECTORY_ENTRY_BOUND_IMPORT"):
- dump.add_header("Bound imports")
- for bound_imp_desc in self.DIRECTORY_ENTRY_BOUND_IMPORT:
- dump.add_lines(bound_imp_desc.struct.dump())
- dump.add_line(
- "DLL: {0}".format(
- bound_imp_desc.name.decode(encoding, "backslashreplace_")
- )
- )
- dump.add_newline()
- for bound_imp_ref in bound_imp_desc.entries:
- dump.add_lines(bound_imp_ref.struct.dump(), 4)
- dump.add_line(
- "DLL: {0}".format(
- bound_imp_ref.name.decode(encoding, "backslashreplace_")
- ),
- 4,
- )
- dump.add_newline()
- if hasattr(self, "DIRECTORY_ENTRY_DELAY_IMPORT"):
- dump.add_header("Delay Imported symbols")
- for module in self.DIRECTORY_ENTRY_DELAY_IMPORT:
- dump.add_lines(module.struct.dump())
- dump.add_newline()
- for symbol in module.imports:
- if symbol.import_by_ordinal is True:
- dump.add(
- "{0} Ordinal[{1:d}] (Imported by Ordinal)".format(
- module.dll.decode(encoding, "backslashreplace_"),
- symbol.ordinal,
- )
- )
- else:
- dump.add(
- "{0}.{1} Hint[{2}]".format(
- module.dll.decode(encoding, "backslashreplace_"),
- symbol.name.decode(encoding, "backslashreplace_"),
- symbol.hint,
- )
- )
- if symbol.bound:
- dump.add_line(" Bound: 0x{0:08X}".format(symbol.bound))
- else:
- dump.add_newline()
- dump.add_newline()
- if hasattr(self, "DIRECTORY_ENTRY_RESOURCE"):
- dump.add_header("Resource directory")
- dump.add_lines(self.DIRECTORY_ENTRY_RESOURCE.struct.dump())
- for res_type in self.DIRECTORY_ENTRY_RESOURCE.entries:
- if res_type.name is not None:
- name = res_type.name.decode(encoding, "backslashreplace_")
- dump.add_line(
- f"Name: [{name}]",
- 2,
- )
- else:
- res_type_id = RESOURCE_TYPE.get(res_type.struct.Id, "-")
- dump.add_line(
- f"Id: [0x{res_type.struct.Id:X}] ({res_type_id})",
- 2,
- )
- dump.add_lines(res_type.struct.dump(), 2)
- if hasattr(res_type, "directory"):
- dump.add_lines(res_type.directory.struct.dump(), 4)
- for resource_id in res_type.directory.entries:
- if resource_id.name is not None:
- name = resource_id.name.decode("utf-8", "backslashreplace_")
- dump.add_line(
- f"Name: [{name}]",
- 6,
- )
- else:
- dump.add_line(f"Id: [0x{resource_id.struct.Id:X}]", 6)
- dump.add_lines(resource_id.struct.dump(), 6)
- if hasattr(resource_id, "directory"):
- dump.add_lines(resource_id.directory.struct.dump(), 8)
- for resource_lang in resource_id.directory.entries:
- if hasattr(resource_lang, "data"):
- dump.add_line(
- "\\--- LANG [%d,%d][%s,%s]"
- % (
- resource_lang.data.lang,
- resource_lang.data.sublang,
- LANG.get(
- resource_lang.data.lang, "*unknown*"
- ),
- get_sublang_name_for_lang(
- resource_lang.data.lang,
- resource_lang.data.sublang,
- ),
- ),
- 8,
- )
- dump.add_lines(resource_lang.struct.dump(), 10)
- dump.add_lines(resource_lang.data.struct.dump(), 12)
- if (
- hasattr(resource_id.directory, "strings")
- and resource_id.directory.strings
- ):
- dump.add_line("[STRINGS]", 10)
- for idx, res_string in list(
- sorted(resource_id.directory.strings.items())
- ):
- dump.add_line(
- "{0:6d}: {1}".format(
- idx,
- res_string.encode(
- "unicode-escape", "backslashreplace"
- ).decode("ascii"),
- ),
- 12,
- )
- dump.add_newline()
- dump.add_newline()
- if (
- hasattr(self, "DIRECTORY_ENTRY_TLS")
- and self.DIRECTORY_ENTRY_TLS
- and self.DIRECTORY_ENTRY_TLS.struct
- ):
- dump.add_header("TLS")
- dump.add_lines(self.DIRECTORY_ENTRY_TLS.struct.dump())
- dump.add_newline()
- if (
- hasattr(self, "DIRECTORY_ENTRY_LOAD_CONFIG")
- and self.DIRECTORY_ENTRY_LOAD_CONFIG
- and self.DIRECTORY_ENTRY_LOAD_CONFIG.struct
- ):
- dump.add_header("LOAD_CONFIG")
- dump.add_lines(self.DIRECTORY_ENTRY_LOAD_CONFIG.struct.dump())
- dump.add_newline()
- if hasattr(self, "DIRECTORY_ENTRY_DEBUG"):
- dump.add_header("Debug information")
- for dbg in self.DIRECTORY_ENTRY_DEBUG:
- dump.add_lines(dbg.struct.dump())
- try:
- dump.add_line("Type: " + DEBUG_TYPE[dbg.struct.Type])
- except KeyError:
- dump.add_line("Type: 0x{0:x}(Unknown)".format(dbg.struct.Type))
- dump.add_newline()
- if dbg.entry:
- dump.add_lines(dbg.entry.dump(), 4)
- dump.add_newline()
- if self.has_relocs():
- dump.add_header("Base relocations")
- for base_reloc in self.DIRECTORY_ENTRY_BASERELOC:
- dump.add_lines(base_reloc.struct.dump())
- for reloc in base_reloc.entries:
- try:
- dump.add_line(
- "%08Xh %s" % (reloc.rva, RELOCATION_TYPE[reloc.type][16:]),
- 4,
- )
- except KeyError:
- dump.add_line(
- "0x%08X 0x%x(Unknown)" % (reloc.rva, reloc.type), 4
- )
- dump.add_newline()
- if (
- hasattr(self, "DIRECTORY_ENTRY_EXCEPTION")
- and len(self.DIRECTORY_ENTRY_EXCEPTION) > 0
- ):
- dump.add_header("Unwind data for exception handling")
- for rf in self.DIRECTORY_ENTRY_EXCEPTION:
- dump.add_lines(rf.struct.dump())
- if hasattr(rf, "unwindinfo") and rf.unwindinfo is not None:
- dump.add_lines(rf.unwindinfo.dump(), 4)
- return dump.get_text()
- def dump_dict(self):
- """Dump all the PE header information into a dictionary."""
- dump_dict = {}
- warnings = self.get_warnings()
- if warnings:
- dump_dict["Parsing Warnings"] = warnings
- dump_dict["DOS_HEADER"] = self.DOS_HEADER.dump_dict()
- dump_dict["NT_HEADERS"] = self.NT_HEADERS.dump_dict()
- dump_dict["FILE_HEADER"] = self.FILE_HEADER.dump_dict()
- image_flags = retrieve_flags(IMAGE_CHARACTERISTICS, "IMAGE_FILE_")
- dump_dict["Flags"] = []
- for flag in image_flags:
- if getattr(self.FILE_HEADER, flag[0]):
- dump_dict["Flags"].append(flag[0])
- if hasattr(self, "OPTIONAL_HEADER") and self.OPTIONAL_HEADER is not None:
- dump_dict["OPTIONAL_HEADER"] = self.OPTIONAL_HEADER.dump_dict()
- dll_characteristics_flags = retrieve_flags(
- DLL_CHARACTERISTICS, "IMAGE_DLLCHARACTERISTICS_"
- )
- dump_dict["DllCharacteristics"] = []
- for flag in dll_characteristics_flags:
- if getattr(self.OPTIONAL_HEADER, flag[0]):
- dump_dict["DllCharacteristics"].append(flag[0])
- dump_dict["PE Sections"] = []
- section_flags = retrieve_flags(SECTION_CHARACTERISTICS, "IMAGE_SCN_")
- for section in self.sections:
- section_dict = section.dump_dict()
- dump_dict["PE Sections"].append(section_dict)
- section_dict["Flags"] = []
- for flag in section_flags:
- if getattr(section, flag[0]):
- section_dict["Flags"].append(flag[0])
- section_dict["Entropy"] = section.get_entropy()
- if md5 is not None:
- section_dict["MD5"] = section.get_hash_md5()
- if sha1 is not None:
- section_dict["SHA1"] = section.get_hash_sha1()
- if sha256 is not None:
- section_dict["SHA256"] = section.get_hash_sha256()
- if sha512 is not None:
- section_dict["SHA512"] = section.get_hash_sha512()
- if hasattr(self, "OPTIONAL_HEADER") and hasattr(
- self.OPTIONAL_HEADER, "DATA_DIRECTORY"
- ):
- dump_dict["Directories"] = []
- for idx, directory in enumerate(self.OPTIONAL_HEADER.DATA_DIRECTORY):
- if directory is not None:
- dump_dict["Directories"].append(directory.dump_dict())
- if hasattr(self, "VS_VERSIONINFO"):
- dump_dict["Version Information"] = []
- for idx, vs_vinfo in enumerate(self.VS_VERSIONINFO):
- version_info_list = []
- version_info_list.append(vs_vinfo.dump_dict())
- if hasattr(self, "VS_FIXEDFILEINFO"):
- version_info_list.append(self.VS_FIXEDFILEINFO[idx].dump_dict())
- if hasattr(self, "FileInfo") and len(self.FileInfo) > idx:
- fileinfo_list = []
- version_info_list.append(fileinfo_list)
- for entry in self.FileInfo[idx]:
- fileinfo_list.append(entry.dump_dict())
- if hasattr(entry, "StringTable"):
- stringtable_dict = {}
- for st_entry in entry.StringTable:
- fileinfo_list.extend(st_entry.dump_dict())
- stringtable_dict["LangID"] = st_entry.LangID
- for str_entry in list(st_entry.entries.items()):
- stringtable_dict[str_entry[0]] = str_entry[1]
- fileinfo_list.append(stringtable_dict)
- elif hasattr(entry, "Var"):
- for var_entry in entry.Var:
- var_dict = {}
- if hasattr(var_entry, "entry"):
- fileinfo_list.extend(var_entry.dump_dict())
- var_dict[list(var_entry.entry.keys())[0]] = list(
- var_entry.entry.values()
- )[0]
- fileinfo_list.append(var_dict)
- dump_dict["Version Information"].append(version_info_list)
- if hasattr(self, "DIRECTORY_ENTRY_EXPORT"):
- dump_dict["Exported symbols"] = []
- dump_dict["Exported symbols"].append(
- self.DIRECTORY_ENTRY_EXPORT.struct.dump_dict()
- )
- for export in self.DIRECTORY_ENTRY_EXPORT.symbols:
- export_dict = {}
- if export.address is not None:
- export_dict.update(
- {
- "Ordinal": export.ordinal,
- "RVA": export.address,
- "Name": export.name,
- }
- )
- if export.forwarder:
- export_dict["forwarder"] = export.forwarder
- dump_dict["Exported symbols"].append(export_dict)
- if hasattr(self, "DIRECTORY_ENTRY_IMPORT"):
- dump_dict["Imported symbols"] = []
- for module in self.DIRECTORY_ENTRY_IMPORT:
- import_list = []
- dump_dict["Imported symbols"].append(import_list)
- import_list.append(module.struct.dump_dict())
- for symbol in module.imports:
- symbol_dict = {}
- if symbol.import_by_ordinal is True:
- symbol_dict["DLL"] = module.dll
- symbol_dict["Ordinal"] = symbol.ordinal
- else:
- symbol_dict["DLL"] = module.dll
- symbol_dict["Name"] = symbol.name
- symbol_dict["Hint"] = symbol.hint
- if symbol.bound:
- symbol_dict["Bound"] = symbol.bound
- import_list.append(symbol_dict)
- if hasattr(self, "DIRECTORY_ENTRY_BOUND_IMPORT"):
- dump_dict["Bound imports"] = []
- for bound_imp_desc in self.DIRECTORY_ENTRY_BOUND_IMPORT:
- bound_imp_desc_dict = {}
- dump_dict["Bound imports"].append(bound_imp_desc_dict)
- bound_imp_desc_dict.update(bound_imp_desc.struct.dump_dict())
- bound_imp_desc_dict["DLL"] = bound_imp_desc.name
- for bound_imp_ref in bound_imp_desc.entries:
- bound_imp_ref_dict = {}
- bound_imp_ref_dict.update(bound_imp_ref.struct.dump_dict())
- bound_imp_ref_dict["DLL"] = bound_imp_ref.name
- if hasattr(self, "DIRECTORY_ENTRY_DELAY_IMPORT"):
- dump_dict["Delay Imported symbols"] = []
- for module in self.DIRECTORY_ENTRY_DELAY_IMPORT:
- module_list = []
- dump_dict["Delay Imported symbols"].append(module_list)
- module_list.append(module.struct.dump_dict())
- for symbol in module.imports:
- symbol_dict = {}
- if symbol.import_by_ordinal is True:
- symbol_dict["DLL"] = module.dll
- symbol_dict["Ordinal"] = symbol.ordinal
- else:
- symbol_dict["DLL"] = module.dll
- symbol_dict["Name"] = symbol.name
- symbol_dict["Hint"] = symbol.hint
- if symbol.bound:
- symbol_dict["Bound"] = symbol.bound
- module_list.append(symbol_dict)
- if hasattr(self, "DIRECTORY_ENTRY_RESOURCE"):
- dump_dict["Resource directory"] = []
- dump_dict["Resource directory"].append(
- self.DIRECTORY_ENTRY_RESOURCE.struct.dump_dict()
- )
- for res_type in self.DIRECTORY_ENTRY_RESOURCE.entries:
- resource_type_dict = {}
- if res_type.name is not None:
- resource_type_dict["Name"] = res_type.name
- else:
- resource_type_dict["Id"] = (
- res_type.struct.Id,
- RESOURCE_TYPE.get(res_type.struct.Id, "-"),
- )
- resource_type_dict.update(res_type.struct.dump_dict())
- dump_dict["Resource directory"].append(resource_type_dict)
- if hasattr(res_type, "directory"):
- directory_list = []
- directory_list.append(res_type.directory.struct.dump_dict())
- dump_dict["Resource directory"].append(directory_list)
- for resource_id in res_type.directory.entries:
- resource_id_dict = {}
- if resource_id.name is not None:
- resource_id_dict["Name"] = resource_id.name
- else:
- resource_id_dict["Id"] = resource_id.struct.Id
- resource_id_dict.update(resource_id.struct.dump_dict())
- directory_list.append(resource_id_dict)
- if hasattr(resource_id, "directory"):
- resource_id_list = []
- resource_id_list.append(
- resource_id.directory.struct.dump_dict()
- )
- directory_list.append(resource_id_list)
- for resource_lang in resource_id.directory.entries:
- if hasattr(resource_lang, "data"):
- resource_lang_dict = {}
- resource_lang_dict["LANG"] = resource_lang.data.lang
- resource_lang_dict[
- "SUBLANG"
- ] = resource_lang.data.sublang
- resource_lang_dict["LANG_NAME"] = LANG.get(
- resource_lang.data.lang, "*unknown*"
- )
- resource_lang_dict[
- "SUBLANG_NAME"
- ] = get_sublang_name_for_lang(
- resource_lang.data.lang,
- resource_lang.data.sublang,
- )
- resource_lang_dict.update(
- resource_lang.struct.dump_dict()
- )
- resource_lang_dict.update(
- resource_lang.data.struct.dump_dict()
- )
- resource_id_list.append(resource_lang_dict)
- if (
- hasattr(resource_id.directory, "strings")
- and resource_id.directory.strings
- ):
- for idx, res_string in list(
- resource_id.directory.strings.items()
- ):
- resource_id_list.append(
- res_string.encode(
- "unicode-escape", "backslashreplace"
- ).decode("ascii")
- )
- if (
- hasattr(self, "DIRECTORY_ENTRY_TLS")
- and self.DIRECTORY_ENTRY_TLS
- and self.DIRECTORY_ENTRY_TLS.struct
- ):
- dump_dict["TLS"] = self.DIRECTORY_ENTRY_TLS.struct.dump_dict()
- if (
- hasattr(self, "DIRECTORY_ENTRY_LOAD_CONFIG")
- and self.DIRECTORY_ENTRY_LOAD_CONFIG
- and self.DIRECTORY_ENTRY_LOAD_CONFIG.struct
- ):
- dump_dict[
- "LOAD_CONFIG"
- ] = self.DIRECTORY_ENTRY_LOAD_CONFIG.struct.dump_dict()
- if hasattr(self, "DIRECTORY_ENTRY_DEBUG"):
- dump_dict["Debug information"] = []
- for dbg in self.DIRECTORY_ENTRY_DEBUG:
- dbg_dict = {}
- dump_dict["Debug information"].append(dbg_dict)
- dbg_dict.update(dbg.struct.dump_dict())
- dbg_dict["Type"] = DEBUG_TYPE.get(dbg.struct.Type, dbg.struct.Type)
- if self.has_relocs():
- dump_dict["Base relocations"] = []
- for base_reloc in self.DIRECTORY_ENTRY_BASERELOC:
- base_reloc_list = []
- dump_dict["Base relocations"].append(base_reloc_list)
- base_reloc_list.append(base_reloc.struct.dump_dict())
- for reloc in base_reloc.entries:
- reloc_dict = {}
- base_reloc_list.append(reloc_dict)
- reloc_dict["RVA"] = reloc.rva
- try:
- reloc_dict["Type"] = RELOCATION_TYPE[reloc.type][16:]
- except KeyError:
- reloc_dict["Type"] = reloc.type
- return dump_dict
- # OC Patch
- def get_physical_by_rva(self, rva):
- """Gets the physical address in the PE file from an RVA value."""
- try:
- return self.get_offset_from_rva(rva)
- except Exception:
- return None
- ##
- # Double-Word get / set
- ##
- def get_data_from_dword(self, dword):
- """Return a four byte string representing the double word value (little endian)."""
- return struct.pack("<L", dword & 0xFFFFFFFF)
- def get_dword_from_data(self, data, offset):
- """Convert four bytes of data to a double word (little endian)
- 'offset' is assumed to index into a dword array. So setting it to
- N will return a dword out of the data starting at offset N*4.
- Returns None if the data can't be turned into a double word.
- """
- if (offset + 1) * 4 > len(data):
- return None
- return struct.unpack("<I", data[offset * 4 : (offset + 1) * 4])[0]
- def get_dword_at_rva(self, rva):
- """Return the double word value at the given RVA.
- Returns None if the value can't be read, i.e. the RVA can't be mapped
- to a file offset.
- """
- try:
- return self.get_dword_from_data(self.get_data(rva, 4), 0)
- except PEFormatError:
- return None
- def get_dword_from_offset(self, offset):
- """Return the double word value at the given file offset. (little endian)"""
- if offset + 4 > len(self.__data__):
- return None
- return self.get_dword_from_data(self.__data__[offset : offset + 4], 0)
- def set_dword_at_rva(self, rva, dword):
- """Set the double word value at the file offset corresponding to the given RVA."""
- return self.set_bytes_at_rva(rva, self.get_data_from_dword(dword))
- def set_dword_at_offset(self, offset, dword):
- """Set the double word value at the given file offset."""
- return self.set_bytes_at_offset(offset, self.get_data_from_dword(dword))
- ##
- # Word get / set
- ##
- def get_data_from_word(self, word):
- """Return a two byte string representing the word value. (little endian)."""
- return struct.pack("<H", word)
- def get_word_from_data(self, data, offset):
- """Convert two bytes of data to a word (little endian)
- 'offset' is assumed to index into a word array. So setting it to
- N will return a dword out of the data starting at offset N*2.
- Returns None if the data can't be turned into a word.
- """
- if (offset + 1) * 2 > len(data):
- return None
- return struct.unpack("<H", data[offset * 2 : (offset + 1) * 2])[0]
- def get_word_at_rva(self, rva):
- """Return the word value at the given RVA.
- Returns None if the value can't be read, i.e. the RVA can't be mapped
- to a file offset.
- """
- try:
- return self.get_word_from_data(self.get_data(rva)[:2], 0)
- except PEFormatError:
- return None
- def get_word_from_offset(self, offset):
- """Return the word value at the given file offset. (little endian)"""
- if offset + 2 > len(self.__data__):
- return None
- return self.get_word_from_data(self.__data__[offset : offset + 2], 0)
- def set_word_at_rva(self, rva, word):
- """Set the word value at the file offset corresponding to the given RVA."""
- return self.set_bytes_at_rva(rva, self.get_data_from_word(word))
- def set_word_at_offset(self, offset, word):
- """Set the word value at the given file offset."""
- return self.set_bytes_at_offset(offset, self.get_data_from_word(word))
- ##
- # Quad-Word get / set
- ##
- def get_data_from_qword(self, word):
- """Return an eight byte string representing the quad-word value (little endian)."""
- return struct.pack("<Q", word)
- def get_qword_from_data(self, data, offset):
- """Convert eight bytes of data to a word (little endian)
- 'offset' is assumed to index into a word array. So setting it to
- N will return a dword out of the data starting at offset N*8.
- Returns None if the data can't be turned into a quad word.
- """
- if (offset + 1) * 8 > len(data):
- return None
- return struct.unpack("<Q", data[offset * 8 : (offset + 1) * 8])[0]
- def get_qword_at_rva(self, rva):
- """Return the quad-word value at the given RVA.
- Returns None if the value can't be read, i.e. the RVA can't be mapped
- to a file offset.
- """
- try:
- return self.get_qword_from_data(self.get_data(rva)[:8], 0)
- except PEFormatError:
- return None
- def get_qword_from_offset(self, offset):
- """Return the quad-word value at the given file offset. (little endian)"""
- if offset + 8 > len(self.__data__):
- return None
- return self.get_qword_from_data(self.__data__[offset : offset + 8], 0)
- def set_qword_at_rva(self, rva, qword):
- """Set the quad-word value at the file offset corresponding to the given RVA."""
- return self.set_bytes_at_rva(rva, self.get_data_from_qword(qword))
- def set_qword_at_offset(self, offset, qword):
- """Set the quad-word value at the given file offset."""
- return self.set_bytes_at_offset(offset, self.get_data_from_qword(qword))
- ##
- # Set bytes
- ##
- def set_bytes_at_rva(self, rva, data):
- """Overwrite, with the given string, the bytes at the file offset corresponding
- to the given RVA.
- Return True if successful, False otherwise. It can fail if the
- offset is outside the file's boundaries.
- """
- if not isinstance(data, bytes):
- raise TypeError("data should be of type: bytes")
- offset = self.get_physical_by_rva(rva)
- if not offset:
- return False
- return self.set_bytes_at_offset(offset, data)
- def set_bytes_at_offset(self, offset, data):
- """Overwrite the bytes at the given file offset with the given string.
- Return True if successful, False otherwise. It can fail if the
- offset is outside the file's boundaries.
- """
- if not isinstance(data, bytes):
- raise TypeError("data should be of type: bytes")
- if 0 <= offset < len(self.__data__):
- self.__data__ = (
- self.__data__[:offset] + data + self.__data__[offset + len(data) :]
- )
- else:
- return False
- return True
- def merge_modified_section_data(self):
- """Update the PE image content with any individual section data that has been
- modified.
- """
- for section in self.sections:
- section_data_start = self.adjust_FileAlignment(
- section.PointerToRawData, self.OPTIONAL_HEADER.FileAlignment
- )
- section_data_end = section_data_start + section.SizeOfRawData
- if section_data_start < len(self.__data__) and section_data_end < len(
- self.__data__
- ):
- self.__data__ = (
- self.__data__[:section_data_start]
- + section.get_data()
- + self.__data__[section_data_end:]
- )
- def relocate_image(self, new_ImageBase):
- """Apply the relocation information to the image using the provided image base.
- This method will apply the relocation information to the image. Given the new
- base, all the relocations will be processed and both the raw data and the
- section's data will be fixed accordingly.
- The resulting image can be retrieved as well through the method:
- get_memory_mapped_image()
- In order to get something that would more closely match what could be found in
- memory once the Windows loader finished its work.
- """
- relocation_difference = new_ImageBase - self.OPTIONAL_HEADER.ImageBase
- if (
- len(self.OPTIONAL_HEADER.DATA_DIRECTORY) >= 6
- and self.OPTIONAL_HEADER.DATA_DIRECTORY[5].Size
- ):
- if not hasattr(self, "DIRECTORY_ENTRY_BASERELOC"):
- self.parse_data_directories(
- directories=[DIRECTORY_ENTRY["IMAGE_DIRECTORY_ENTRY_BASERELOC"]]
- )
- if not hasattr(self, "DIRECTORY_ENTRY_BASERELOC"):
- self.__warnings.append(
- "Relocating image but PE does not have (or pefile cannot "
- "parse) a DIRECTORY_ENTRY_BASERELOC"
- )
- else:
- for reloc in self.DIRECTORY_ENTRY_BASERELOC:
- # We iterate with an index because if the relocation is of type
- # IMAGE_REL_BASED_HIGHADJ we need to also process the next entry
- # at once and skip it for the next iteration
- #
- entry_idx = 0
- while entry_idx < len(reloc.entries):
- entry = reloc.entries[entry_idx]
- entry_idx += 1
- if entry.type == RELOCATION_TYPE["IMAGE_REL_BASED_ABSOLUTE"]:
- # Nothing to do for this type of relocation
- pass
- elif entry.type == RELOCATION_TYPE["IMAGE_REL_BASED_HIGH"]:
- # Fix the high 16-bits of a relocation
- #
- # Add high 16-bits of relocation_difference to the
- # 16-bit value at RVA=entry.rva
- self.set_word_at_rva(
- entry.rva,
- (
- self.get_word_at_rva(entry.rva)
- + relocation_difference
- >> 16
- )
- & 0xFFFF,
- )
- elif entry.type == RELOCATION_TYPE["IMAGE_REL_BASED_LOW"]:
- # Fix the low 16-bits of a relocation
- #
- # Add low 16 bits of relocation_difference to the 16-bit
- # value at RVA=entry.rva
- self.set_word_at_rva(
- entry.rva,
- (
- self.get_word_at_rva(entry.rva)
- + relocation_difference
- )
- & 0xFFFF,
- )
- elif entry.type == RELOCATION_TYPE["IMAGE_REL_BASED_HIGHLOW"]:
- # Handle all high and low parts of a 32-bit relocation
- #
- # Add relocation_difference to the value at RVA=entry.rva
- self.set_dword_at_rva(
- entry.rva,
- self.get_dword_at_rva(entry.rva)
- + relocation_difference,
- )
- elif entry.type == RELOCATION_TYPE["IMAGE_REL_BASED_HIGHADJ"]:
- # Fix the high 16-bits of a relocation and adjust
- #
- # Add high 16-bits of relocation_difference to the 32-bit
- # value composed from the (16-bit value at
- # RVA=entry.rva)<<16 plus the 16-bit value at the next
- # relocation entry.
- # If the next entry is beyond the array's limits,
- # abort... the table is corrupt
- if entry_idx == len(reloc.entries):
- break
- next_entry = reloc.entries[entry_idx]
- entry_idx += 1
- self.set_word_at_rva(
- entry.rva,
- (
- (self.get_word_at_rva(entry.rva) << 16)
- + next_entry.rva
- + relocation_difference
- & 0xFFFF0000
- )
- >> 16,
- )
- elif entry.type == RELOCATION_TYPE["IMAGE_REL_BASED_DIR64"]:
- # Apply the difference to the 64-bit value at the offset
- # RVA=entry.rva
- self.set_qword_at_rva(
- entry.rva,
- self.get_qword_at_rva(entry.rva)
- + relocation_difference,
- )
- self.OPTIONAL_HEADER.ImageBase = new_ImageBase
- # correct VAs(virtual addresses) occurrences in directory information
- if hasattr(self, "DIRECTORY_ENTRY_IMPORT"):
- for dll in self.DIRECTORY_ENTRY_IMPORT:
- for func in dll.imports:
- func.address += relocation_difference
- if hasattr(self, "DIRECTORY_ENTRY_TLS"):
- self.DIRECTORY_ENTRY_TLS.struct.StartAddressOfRawData += (
- relocation_difference
- )
- self.DIRECTORY_ENTRY_TLS.struct.EndAddressOfRawData += (
- relocation_difference
- )
- self.DIRECTORY_ENTRY_TLS.struct.AddressOfIndex += relocation_difference
- self.DIRECTORY_ENTRY_TLS.struct.AddressOfCallBacks += (
- relocation_difference
- )
- if hasattr(self, "DIRECTORY_ENTRY_LOAD_CONFIG"):
- if self.DIRECTORY_ENTRY_LOAD_CONFIG.struct.LockPrefixTable:
- self.DIRECTORY_ENTRY_LOAD_CONFIG.struct.LockPrefixTable += (
- relocation_difference
- )
- if self.DIRECTORY_ENTRY_LOAD_CONFIG.struct.EditList:
- self.DIRECTORY_ENTRY_LOAD_CONFIG.struct.EditList += (
- relocation_difference
- )
- if self.DIRECTORY_ENTRY_LOAD_CONFIG.struct.SecurityCookie:
- self.DIRECTORY_ENTRY_LOAD_CONFIG.struct.SecurityCookie += (
- relocation_difference
- )
- if self.DIRECTORY_ENTRY_LOAD_CONFIG.struct.SEHandlerTable:
- self.DIRECTORY_ENTRY_LOAD_CONFIG.struct.SEHandlerTable += (
- relocation_difference
- )
- if self.DIRECTORY_ENTRY_LOAD_CONFIG.struct.GuardCFCheckFunctionPointer:
- self.DIRECTORY_ENTRY_LOAD_CONFIG.struct.GuardCFCheckFunctionPointer += (
- relocation_difference
- )
- if self.DIRECTORY_ENTRY_LOAD_CONFIG.struct.GuardCFFunctionTable:
- self.DIRECTORY_ENTRY_LOAD_CONFIG.struct.GuardCFFunctionTable += (
- relocation_difference
- )
- def verify_checksum(self):
- return self.OPTIONAL_HEADER.CheckSum == self.generate_checksum()
- def generate_checksum(self):
- # This will make sure that the data representing the PE image
- # is updated with any changes that might have been made by
- # assigning values to header fields as those are not automatically
- # updated upon assignment.
- #
- # data = self.write()
- # print('{0}'.format(len(data)))
- # for idx, b in enumerate(data):
- # if b != ord(self.__data__[idx]) or (idx > 1244440 and idx < 1244460):
- # print('Idx: {0} G {1:02x} {3} B {2:02x}'.format(
- # idx, ord(self.__data__[idx]), b,
- # self.__data__[idx], chr(b)))
- self.__data__ = self.write()
- # Get the offset to the CheckSum field in the OptionalHeader
- # (The offset is the same in PE32 and PE32+)
- checksum_offset = self.OPTIONAL_HEADER.get_file_offset() + 0x40 # 64
- checksum = 0
- # Verify the data is dword-aligned. Add padding if needed
- #
- remainder = len(self.__data__) % 4
- data_len = len(self.__data__) + ((4 - remainder) * (remainder != 0))
- for i in range(int(data_len / 4)):
- # Skip the checksum field
- if i == int(checksum_offset / 4):
- continue
- if i + 1 == (int(data_len / 4)) and remainder:
- dword = struct.unpack(
- "I", self.__data__[i * 4 :] + (b"\0" * (4 - remainder))
- )[0]
- else:
- dword = struct.unpack("I", self.__data__[i * 4 : i * 4 + 4])[0]
- # Optimized the calculation (thanks to Emmanuel Bourg for pointing it out!)
- checksum += dword
- if checksum >= 2 ** 32:
- checksum = (checksum & 0xFFFFFFFF) + (checksum >> 32)
- checksum = (checksum & 0xFFFF) + (checksum >> 16)
- checksum = (checksum) + (checksum >> 16)
- checksum = checksum & 0xFFFF
- # The length is the one of the original data, not the padded one
- #
- return checksum + len(self.__data__)
- def is_exe(self):
- """Check whether the file is a standard executable.
- This will return true only if the file has the IMAGE_FILE_EXECUTABLE_IMAGE flag
- set and the IMAGE_FILE_DLL not set and the file does not appear to be a driver
- either.
- """
- EXE_flag = IMAGE_CHARACTERISTICS["IMAGE_FILE_EXECUTABLE_IMAGE"]
- if (
- (not self.is_dll())
- and (not self.is_driver())
- and (EXE_flag & self.FILE_HEADER.Characteristics) == EXE_flag
- ):
- return True
- return False
- def is_dll(self):
- """Check whether the file is a standard DLL.
- This will return true only if the image has the IMAGE_FILE_DLL flag set.
- """
- DLL_flag = IMAGE_CHARACTERISTICS["IMAGE_FILE_DLL"]
- if (DLL_flag & self.FILE_HEADER.Characteristics) == DLL_flag:
- return True
- return False
- def is_driver(self):
- """Check whether the file is a Windows driver.
- This will return true only if there are reliable indicators of the image
- being a driver.
- """
- # Checking that the ImageBase field of the OptionalHeader is above or
- # equal to 0x80000000 (that is, whether it lies in the upper 2GB of
- # the address space, normally belonging to the kernel) is not a
- # reliable enough indicator. For instance, PEs that play the invalid
- # ImageBase trick to get relocated could be incorrectly assumed to be
- # drivers.
- # This is not reliable either...
- #
- # if any((section.Characteristics &
- # SECTION_CHARACTERISTICS['IMAGE_SCN_MEM_NOT_PAGED']) for
- # section in self.sections ):
- # return True
- # If the import directory was not parsed (fast_load = True); do it now.
- if not hasattr(self, "DIRECTORY_ENTRY_IMPORT"):
- self.parse_data_directories(
- directories=[DIRECTORY_ENTRY["IMAGE_DIRECTORY_ENTRY_IMPORT"]]
- )
- # If there's still no import directory (the PE doesn't have one or it's
- # malformed), give up.
- if not hasattr(self, "DIRECTORY_ENTRY_IMPORT"):
- return False
- # self.DIRECTORY_ENTRY_IMPORT will now exist, although it may be empty.
- # If it imports from "ntoskrnl.exe" or other kernel components it should
- # be a driver
- #
- system_DLLs = set(
- (b"ntoskrnl.exe", b"hal.dll", b"ndis.sys", b"bootvid.dll", b"kdcom.dll")
- )
- if system_DLLs.intersection(
- [imp.dll.lower() for imp in self.DIRECTORY_ENTRY_IMPORT]
- ):
- return True
- driver_like_section_names = set((b"page", b"paged"))
- if driver_like_section_names.intersection(
- [section.Name.lower().rstrip(b"\x00") for section in self.sections]
- ) and (
- self.OPTIONAL_HEADER.Subsystem
- in (
- SUBSYSTEM_TYPE["IMAGE_SUBSYSTEM_NATIVE"],
- SUBSYSTEM_TYPE["IMAGE_SUBSYSTEM_NATIVE_WINDOWS"],
- )
- ):
- return True
- return False
- def get_overlay_data_start_offset(self):
- """Get the offset of data appended to the file and not contained within
- the area described in the headers."""
- largest_offset_and_size = (0, 0)
- def update_if_sum_is_larger_and_within_file(
- offset_and_size, file_size=len(self.__data__)
- ):
- if sum(offset_and_size) <= file_size and sum(offset_and_size) > sum(
- largest_offset_and_size
- ):
- return offset_and_size
- return largest_offset_and_size
- if hasattr(self, "OPTIONAL_HEADER"):
- largest_offset_and_size = update_if_sum_is_larger_and_within_file(
- (
- self.OPTIONAL_HEADER.get_file_offset(),
- self.FILE_HEADER.SizeOfOptionalHeader,
- )
- )
- for section in self.sections:
- largest_offset_and_size = update_if_sum_is_larger_and_within_file(
- (section.PointerToRawData, section.SizeOfRawData)
- )
- skip_directories = [DIRECTORY_ENTRY["IMAGE_DIRECTORY_ENTRY_SECURITY"]]
- for idx, directory in enumerate(self.OPTIONAL_HEADER.DATA_DIRECTORY):
- if idx in skip_directories:
- continue
- try:
- largest_offset_and_size = update_if_sum_is_larger_and_within_file(
- (self.get_offset_from_rva(directory.VirtualAddress), directory.Size)
- )
- # Ignore directories with RVA out of file
- except PEFormatError:
- continue
- if len(self.__data__) > sum(largest_offset_and_size):
- return sum(largest_offset_and_size)
- return None
- def get_overlay(self):
- """Get the data appended to the file and not contained within the area described
- in the headers."""
- overlay_data_offset = self.get_overlay_data_start_offset()
- if overlay_data_offset is not None:
- return self.__data__[overlay_data_offset:]
- return None
- def trim(self):
- """Return the just data defined by the PE headers, removing any overlaid data."""
- overlay_data_offset = self.get_overlay_data_start_offset()
- if overlay_data_offset is not None:
- return self.__data__[:overlay_data_offset]
- return self.__data__[:]
- # According to http://corkami.blogspot.com/2010/01/parce-que-la-planche-aura-brule.html
- # if PointerToRawData is less that 0x200 it's rounded to zero. Loading the test file
- # in a debugger it's easy to verify that the PointerToRawData value of 1 is rounded
- # to zero. Hence we reproduce the behavior
- #
- # According to the document:
- # [ Microsoft Portable Executable and Common Object File Format Specification ]
- # "The alignment factor (in bytes) that is used to align the raw data of sections in
- # the image file. The value should be a power of 2 between 512 and 64 K, inclusive.
- # The default is 512. If the SectionAlignment is less than the architecture's page
- # size, then FileAlignment must match SectionAlignment."
- #
- # The following is a hard-coded constant if the Windows loader
- def adjust_FileAlignment(self, val, file_alignment):
- if file_alignment > FILE_ALIGNMENT_HARDCODED_VALUE:
- # If it's not a power of two, report it:
- if self.FileAlignment_Warning is False and not power_of_two(file_alignment):
- self.__warnings.append(
- "If FileAlignment > 0x200 it should be a power of 2. Value: %x"
- % (file_alignment)
- )
- self.FileAlignment_Warning = True
- return cache_adjust_FileAlignment(val, file_alignment)
- # According to the document:
- # [ Microsoft Portable Executable and Common Object File Format Specification ]
- # "The alignment (in bytes) of sections when they are loaded into memory. It must be
- # greater than or equal to FileAlignment. The default is the page size for the
- # architecture."
- #
- def adjust_SectionAlignment(self, val, section_alignment, file_alignment):
- if file_alignment < FILE_ALIGNMENT_HARDCODED_VALUE:
- if (
- file_alignment != section_alignment
- and self.SectionAlignment_Warning is False
- ):
- self.__warnings.append(
- "If FileAlignment(%x) < 0x200 it should equal SectionAlignment(%x)"
- % (file_alignment, section_alignment)
- )
- self.SectionAlignment_Warning = True
- return cache_adjust_SectionAlignment(val, section_alignment, file_alignment)
- def main():
- import sys
- usage = """\
- pefile.py <filename>
- pefile.py exports <filename>"""
- if not sys.argv[1:]:
- print(usage)
- elif sys.argv[1] == "exports":
- if not sys.argv[2:]:
- sys.exit("error: <filename> required")
- pe = PE(sys.argv[2])
- for exp in pe.DIRECTORY_ENTRY_EXPORT.symbols:
- print(
- hex(pe.OPTIONAL_HEADER.ImageBase + exp.address), exp.name, exp.ordinal
- )
- else:
- print(PE(sys.argv[1]).dump_info())
- if __name__ == "__main__":
- main()
|