pefile.py 268 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375337633773378337933803381338233833384338533863387338833893390339133923393339433953396339733983399340034013402340334043405340634073408340934103411341234133414341534163417341834193420342134223423342434253426342734283429343034313432343334343435343634373438343934403441344234433444344534463447344834493450345134523453345434553456345734583459346034613462346334643465346634673468346934703471347234733474347534763477347834793480348134823483348434853486348734883489349034913492349334943495349634973498349935003501350235033504350535063507350835093510351135123513351435153516351735183519352035213522352335243525352635273528352935303531353235333534353535363537353835393540354135423543354435453546354735483549355035513552355335543555355635573558355935603561356235633564356535663567356835693570357135723573357435753576357735783579358035813582358335843585358635873588358935903591359235933594359535963597359835993600360136023603360436053606360736083609361036113612361336143615361636173618361936203621362236233624362536263627362836293630363136323633363436353636363736383639364036413642364336443645364636473648364936503651365236533654365536563657365836593660366136623663366436653666366736683669367036713672367336743675367636773678367936803681368236833684368536863687368836893690369136923693369436953696369736983699370037013702370337043705370637073708370937103711371237133714371537163717371837193720372137223723372437253726372737283729373037313732373337343735373637373738373937403741374237433744374537463747374837493750375137523753375437553756375737583759376037613762376337643765376637673768376937703771377237733774377537763777377837793780378137823783378437853786378737883789379037913792379337943795379637973798379938003801380238033804380538063807380838093810381138123813381438153816381738183819382038213822382338243825382638273828382938303831383238333834383538363837383838393840384138423843384438453846384738483849385038513852385338543855385638573858385938603861386238633864386538663867386838693870387138723873387438753876387738783879388038813882388338843885388638873888388938903891389238933894389538963897389838993900390139023903390439053906390739083909391039113912391339143915391639173918391939203921392239233924392539263927392839293930393139323933393439353936393739383939394039413942394339443945394639473948394939503951395239533954395539563957395839593960396139623963396439653966396739683969397039713972397339743975397639773978397939803981398239833984398539863987398839893990399139923993399439953996399739983999400040014002400340044005400640074008400940104011401240134014401540164017401840194020402140224023402440254026402740284029403040314032403340344035403640374038403940404041404240434044404540464047404840494050405140524053405440554056405740584059406040614062406340644065406640674068406940704071407240734074407540764077407840794080408140824083408440854086408740884089409040914092409340944095409640974098409941004101410241034104410541064107410841094110411141124113411441154116411741184119412041214122412341244125412641274128412941304131413241334134413541364137413841394140414141424143414441454146414741484149415041514152415341544155415641574158415941604161416241634164416541664167416841694170417141724173417441754176417741784179418041814182418341844185418641874188418941904191419241934194419541964197419841994200420142024203420442054206420742084209421042114212421342144215421642174218421942204221422242234224422542264227422842294230423142324233423442354236423742384239424042414242424342444245424642474248424942504251425242534254425542564257425842594260426142624263426442654266426742684269427042714272427342744275427642774278427942804281428242834284428542864287428842894290429142924293429442954296429742984299430043014302430343044305430643074308430943104311431243134314431543164317431843194320432143224323432443254326432743284329433043314332433343344335433643374338433943404341434243434344434543464347434843494350435143524353435443554356435743584359436043614362436343644365436643674368436943704371437243734374437543764377437843794380438143824383438443854386438743884389439043914392439343944395439643974398439944004401440244034404440544064407440844094410441144124413441444154416441744184419442044214422442344244425442644274428442944304431443244334434443544364437443844394440444144424443444444454446444744484449445044514452445344544455445644574458445944604461446244634464446544664467446844694470447144724473447444754476447744784479448044814482448344844485448644874488448944904491449244934494449544964497449844994500450145024503450445054506450745084509451045114512451345144515451645174518451945204521452245234524452545264527452845294530453145324533453445354536453745384539454045414542454345444545454645474548454945504551455245534554455545564557455845594560456145624563456445654566456745684569457045714572457345744575457645774578457945804581458245834584458545864587458845894590459145924593459445954596459745984599460046014602460346044605460646074608460946104611461246134614461546164617461846194620462146224623462446254626462746284629463046314632463346344635463646374638463946404641464246434644464546464647464846494650465146524653465446554656465746584659466046614662466346644665466646674668466946704671467246734674467546764677467846794680468146824683468446854686468746884689469046914692469346944695469646974698469947004701470247034704470547064707470847094710471147124713471447154716471747184719472047214722472347244725472647274728472947304731473247334734473547364737473847394740474147424743474447454746474747484749475047514752475347544755475647574758475947604761476247634764476547664767476847694770477147724773477447754776477747784779478047814782478347844785478647874788478947904791479247934794479547964797479847994800480148024803480448054806480748084809481048114812481348144815481648174818481948204821482248234824482548264827482848294830483148324833483448354836483748384839484048414842484348444845484648474848484948504851485248534854485548564857485848594860486148624863486448654866486748684869487048714872487348744875487648774878487948804881488248834884488548864887488848894890489148924893489448954896489748984899490049014902490349044905490649074908490949104911491249134914491549164917491849194920492149224923492449254926492749284929493049314932493349344935493649374938493949404941494249434944494549464947494849494950495149524953495449554956495749584959496049614962496349644965496649674968496949704971497249734974497549764977497849794980498149824983498449854986498749884989499049914992499349944995499649974998499950005001500250035004500550065007500850095010501150125013501450155016501750185019502050215022502350245025502650275028502950305031503250335034503550365037503850395040504150425043504450455046504750485049505050515052505350545055505650575058505950605061506250635064506550665067506850695070507150725073507450755076507750785079508050815082508350845085508650875088508950905091509250935094509550965097509850995100510151025103510451055106510751085109511051115112511351145115511651175118511951205121512251235124512551265127512851295130513151325133513451355136513751385139514051415142514351445145514651475148514951505151515251535154515551565157515851595160516151625163516451655166516751685169517051715172517351745175517651775178517951805181518251835184518551865187518851895190519151925193519451955196519751985199520052015202520352045205520652075208520952105211521252135214521552165217521852195220522152225223522452255226522752285229523052315232523352345235523652375238523952405241524252435244524552465247524852495250525152525253525452555256525752585259526052615262526352645265526652675268526952705271527252735274527552765277527852795280528152825283528452855286528752885289529052915292529352945295529652975298529953005301530253035304530553065307530853095310531153125313531453155316531753185319532053215322532353245325532653275328532953305331533253335334533553365337533853395340534153425343534453455346534753485349535053515352535353545355535653575358535953605361536253635364536553665367536853695370537153725373537453755376537753785379538053815382538353845385538653875388538953905391539253935394539553965397539853995400540154025403540454055406540754085409541054115412541354145415541654175418541954205421542254235424542554265427542854295430543154325433543454355436543754385439544054415442544354445445544654475448544954505451545254535454545554565457545854595460546154625463546454655466546754685469547054715472547354745475547654775478547954805481548254835484548554865487548854895490549154925493549454955496549754985499550055015502550355045505550655075508550955105511551255135514551555165517551855195520552155225523552455255526552755285529553055315532553355345535553655375538553955405541554255435544554555465547554855495550555155525553555455555556555755585559556055615562556355645565556655675568556955705571557255735574557555765577557855795580558155825583558455855586558755885589559055915592559355945595559655975598559956005601560256035604560556065607560856095610561156125613561456155616561756185619562056215622562356245625562656275628562956305631563256335634563556365637563856395640564156425643564456455646564756485649565056515652565356545655565656575658565956605661566256635664566556665667566856695670567156725673567456755676567756785679568056815682568356845685568656875688568956905691569256935694569556965697569856995700570157025703570457055706570757085709571057115712571357145715571657175718571957205721572257235724572557265727572857295730573157325733573457355736573757385739574057415742574357445745574657475748574957505751575257535754575557565757575857595760576157625763576457655766576757685769577057715772577357745775577657775778577957805781578257835784578557865787578857895790579157925793579457955796579757985799580058015802580358045805580658075808580958105811581258135814581558165817581858195820582158225823582458255826582758285829583058315832583358345835583658375838583958405841584258435844584558465847584858495850585158525853585458555856585758585859586058615862586358645865586658675868586958705871587258735874587558765877587858795880588158825883588458855886588758885889589058915892589358945895589658975898589959005901590259035904590559065907590859095910591159125913591459155916591759185919592059215922592359245925592659275928592959305931593259335934593559365937593859395940594159425943594459455946594759485949595059515952595359545955595659575958595959605961596259635964596559665967596859695970597159725973597459755976597759785979598059815982598359845985598659875988598959905991599259935994599559965997599859996000600160026003600460056006600760086009601060116012601360146015601660176018601960206021602260236024602560266027602860296030603160326033603460356036603760386039604060416042604360446045604660476048604960506051605260536054605560566057605860596060606160626063606460656066606760686069607060716072607360746075607660776078607960806081608260836084608560866087608860896090609160926093609460956096609760986099610061016102610361046105610661076108610961106111611261136114611561166117611861196120612161226123612461256126612761286129613061316132613361346135613661376138613961406141614261436144614561466147614861496150615161526153615461556156615761586159616061616162616361646165616661676168616961706171617261736174617561766177617861796180618161826183618461856186618761886189619061916192619361946195619661976198619962006201620262036204620562066207620862096210621162126213621462156216621762186219622062216222622362246225622662276228622962306231623262336234623562366237623862396240624162426243624462456246624762486249625062516252625362546255625662576258625962606261626262636264626562666267626862696270627162726273627462756276627762786279628062816282628362846285628662876288628962906291629262936294629562966297629862996300630163026303630463056306630763086309631063116312631363146315631663176318631963206321632263236324632563266327632863296330633163326333633463356336633763386339634063416342634363446345634663476348634963506351635263536354635563566357635863596360636163626363636463656366636763686369637063716372637363746375637663776378637963806381638263836384638563866387638863896390639163926393639463956396639763986399640064016402640364046405640664076408640964106411641264136414641564166417641864196420642164226423642464256426642764286429643064316432643364346435643664376438643964406441644264436444644564466447644864496450645164526453645464556456645764586459646064616462646364646465646664676468646964706471647264736474647564766477647864796480648164826483648464856486648764886489649064916492649364946495649664976498649965006501650265036504650565066507650865096510651165126513651465156516651765186519652065216522652365246525652665276528652965306531653265336534653565366537653865396540654165426543654465456546654765486549655065516552655365546555655665576558655965606561656265636564656565666567656865696570657165726573657465756576657765786579658065816582658365846585658665876588658965906591659265936594659565966597659865996600660166026603660466056606660766086609661066116612661366146615661666176618661966206621662266236624662566266627662866296630663166326633663466356636663766386639664066416642664366446645664666476648664966506651665266536654665566566657665866596660666166626663666466656666666766686669667066716672667366746675667666776678667966806681668266836684668566866687668866896690669166926693669466956696669766986699670067016702670367046705670667076708670967106711671267136714671567166717671867196720672167226723672467256726672767286729673067316732673367346735673667376738673967406741674267436744674567466747674867496750675167526753675467556756675767586759676067616762676367646765676667676768676967706771677267736774677567766777677867796780678167826783678467856786678767886789679067916792679367946795679667976798679968006801680268036804680568066807680868096810681168126813681468156816681768186819682068216822682368246825682668276828682968306831683268336834683568366837683868396840684168426843684468456846684768486849685068516852685368546855685668576858685968606861686268636864686568666867686868696870687168726873687468756876687768786879688068816882688368846885688668876888688968906891689268936894689568966897689868996900690169026903690469056906690769086909691069116912691369146915691669176918691969206921692269236924692569266927692869296930693169326933693469356936693769386939694069416942694369446945694669476948694969506951695269536954695569566957695869596960696169626963696469656966696769686969697069716972697369746975697669776978697969806981698269836984698569866987698869896990699169926993699469956996699769986999700070017002700370047005700670077008700970107011701270137014701570167017701870197020702170227023702470257026702770287029703070317032703370347035703670377038703970407041704270437044704570467047704870497050705170527053705470557056705770587059706070617062706370647065706670677068706970707071707270737074707570767077707870797080708170827083708470857086708770887089709070917092709370947095709670977098709971007101710271037104710571067107710871097110711171127113711471157116711771187119712071217122712371247125712671277128712971307131713271337134713571367137713871397140714171427143714471457146714771487149715071517152715371547155715671577158715971607161716271637164716571667167716871697170717171727173717471757176717771787179718071817182718371847185718671877188718971907191719271937194719571967197719871997200720172027203720472057206720772087209721072117212721372147215721672177218721972207221722272237224722572267227722872297230723172327233723472357236723772387239724072417242724372447245724672477248724972507251725272537254725572567257725872597260726172627263726472657266726772687269727072717272727372747275727672777278727972807281728272837284728572867287728872897290729172927293729472957296729772987299730073017302730373047305730673077308730973107311731273137314731573167317731873197320732173227323732473257326732773287329733073317332733373347335733673377338733973407341734273437344734573467347734873497350735173527353735473557356735773587359736073617362736373647365736673677368736973707371737273737374737573767377737873797380738173827383738473857386738773887389739073917392
  1. #!/usr/bin/python
  2. # -*- coding: utf-8 -*-
  3. """pefile, Portable Executable reader module
  4. All the PE file basic structures are available with their default names as
  5. attributes of the instance returned.
  6. Processed elements such as the import table are made available with lowercase
  7. names, to differentiate them from the upper case basic structure names.
  8. pefile has been tested against many edge cases such as corrupted and malformed
  9. PEs as well as malware, which often attempts to abuse the format way beyond its
  10. standard use. To the best of my knowledge most of the abuse is handled
  11. gracefully.
  12. Copyright (c) 2005-2021 Ero Carrera <ero.carrera@gmail.com>
  13. """
  14. __author__ = "Ero Carrera"
  15. __version__ = "2021.9.3"
  16. __contact__ = "ero.carrera@gmail.com"
  17. import collections
  18. import os
  19. import struct
  20. import codecs
  21. import time
  22. import math
  23. import string
  24. import mmap
  25. from collections import Counter
  26. from hashlib import sha1
  27. from hashlib import sha256
  28. from hashlib import sha512
  29. from hashlib import md5
  30. import functools
  31. import copy as copymod
  32. import ordlookup
  33. codecs.register_error("backslashreplace_", codecs.lookup_error("backslashreplace"))
  34. long = int
  35. # lru_cache with a shallow copy of the objects returned (list, dicts, ..)
  36. # we don't use deepcopy as it's _really_ slow and the data we retrieved using
  37. # this is enough with copy.copy taken from
  38. # https://stackoverflow.com/questions/54909357
  39. def lru_cache(maxsize=128, typed=False, copy=False):
  40. if not copy:
  41. return functools.lru_cache(maxsize, typed)
  42. def decorator(f):
  43. cached_func = functools.lru_cache(maxsize, typed)(f)
  44. @functools.wraps(f)
  45. def wrapper(*args, **kwargs):
  46. # return copymod.deepcopy(cached_func(*args, **kwargs))
  47. return copymod.copy(cached_func(*args, **kwargs))
  48. return wrapper
  49. return decorator
  50. @lru_cache(maxsize=2048)
  51. def cache_adjust_FileAlignment(val, file_alignment):
  52. if file_alignment < FILE_ALIGNMENT_HARDCODED_VALUE:
  53. return val
  54. return (int(val / 0x200)) * 0x200
  55. @lru_cache(maxsize=2048)
  56. def cache_adjust_SectionAlignment(val, section_alignment, file_alignment):
  57. if section_alignment < 0x1000: # page size
  58. section_alignment = file_alignment
  59. # 0x200 is the minimum valid FileAlignment according to the documentation
  60. # although ntoskrnl.exe has an alignment of 0x80 in some Windows versions
  61. #
  62. # elif section_alignment < 0x80:
  63. # section_alignment = 0x80
  64. if section_alignment and val % section_alignment:
  65. return section_alignment * (int(val / section_alignment))
  66. return val
  67. def count_zeroes(data):
  68. try:
  69. # newbytes' count() takes a str in Python 2
  70. count = data.count("\0")
  71. except TypeError:
  72. # bytes' count() takes an int in Python 3
  73. count = data.count(0)
  74. return count
  75. fast_load = False
  76. # This will set a maximum length of a string to be retrieved from the file.
  77. # It's there to prevent loading massive amounts of data from memory mapped
  78. # files. Strings longer than 1MB should be rather rare.
  79. MAX_STRING_LENGTH = 0x100000 # 2^20
  80. # Maximum number of imports to parse.
  81. MAX_IMPORT_SYMBOLS = 0x2000
  82. # Limit maximum length for specific string types separately
  83. MAX_IMPORT_NAME_LENGTH = 0x200
  84. MAX_DLL_LENGTH = 0x200
  85. MAX_SYMBOL_NAME_LENGTH = 0x200
  86. # Lmit maximum number of sections before processing of sections will stop
  87. MAX_SECTIONS = 0x800
  88. # The global maximum number of resource entries to parse per file
  89. MAX_RESOURCE_ENTRIES = 0x8000
  90. # The maximum depth of nested resource tables
  91. MAX_RESOURCE_DEPTH = 32
  92. # Limit number of exported symbols
  93. MAX_SYMBOL_EXPORT_COUNT = 0x2000
  94. IMAGE_DOS_SIGNATURE = 0x5A4D
  95. IMAGE_DOSZM_SIGNATURE = 0x4D5A
  96. IMAGE_NE_SIGNATURE = 0x454E
  97. IMAGE_LE_SIGNATURE = 0x454C
  98. IMAGE_LX_SIGNATURE = 0x584C
  99. IMAGE_TE_SIGNATURE = 0x5A56 # Terse Executables have a 'VZ' signature
  100. IMAGE_NT_SIGNATURE = 0x00004550
  101. IMAGE_NUMBEROF_DIRECTORY_ENTRIES = 16
  102. IMAGE_ORDINAL_FLAG = 0x80000000
  103. IMAGE_ORDINAL_FLAG64 = 0x8000000000000000
  104. OPTIONAL_HEADER_MAGIC_PE = 0x10B
  105. OPTIONAL_HEADER_MAGIC_PE_PLUS = 0x20B
  106. def two_way_dict(pairs):
  107. return dict([(e[1], e[0]) for e in pairs] + pairs)
  108. directory_entry_types = [
  109. ("IMAGE_DIRECTORY_ENTRY_EXPORT", 0),
  110. ("IMAGE_DIRECTORY_ENTRY_IMPORT", 1),
  111. ("IMAGE_DIRECTORY_ENTRY_RESOURCE", 2),
  112. ("IMAGE_DIRECTORY_ENTRY_EXCEPTION", 3),
  113. ("IMAGE_DIRECTORY_ENTRY_SECURITY", 4),
  114. ("IMAGE_DIRECTORY_ENTRY_BASERELOC", 5),
  115. ("IMAGE_DIRECTORY_ENTRY_DEBUG", 6),
  116. # Architecture on non-x86 platforms
  117. ("IMAGE_DIRECTORY_ENTRY_COPYRIGHT", 7),
  118. ("IMAGE_DIRECTORY_ENTRY_GLOBALPTR", 8),
  119. ("IMAGE_DIRECTORY_ENTRY_TLS", 9),
  120. ("IMAGE_DIRECTORY_ENTRY_LOAD_CONFIG", 10),
  121. ("IMAGE_DIRECTORY_ENTRY_BOUND_IMPORT", 11),
  122. ("IMAGE_DIRECTORY_ENTRY_IAT", 12),
  123. ("IMAGE_DIRECTORY_ENTRY_DELAY_IMPORT", 13),
  124. ("IMAGE_DIRECTORY_ENTRY_COM_DESCRIPTOR", 14),
  125. ("IMAGE_DIRECTORY_ENTRY_RESERVED", 15),
  126. ]
  127. DIRECTORY_ENTRY = two_way_dict(directory_entry_types)
  128. image_characteristics = [
  129. ("IMAGE_FILE_RELOCS_STRIPPED", 0x0001),
  130. ("IMAGE_FILE_EXECUTABLE_IMAGE", 0x0002),
  131. ("IMAGE_FILE_LINE_NUMS_STRIPPED", 0x0004),
  132. ("IMAGE_FILE_LOCAL_SYMS_STRIPPED", 0x0008),
  133. ("IMAGE_FILE_AGGRESIVE_WS_TRIM", 0x0010),
  134. ("IMAGE_FILE_LARGE_ADDRESS_AWARE", 0x0020),
  135. ("IMAGE_FILE_16BIT_MACHINE", 0x0040),
  136. ("IMAGE_FILE_BYTES_REVERSED_LO", 0x0080),
  137. ("IMAGE_FILE_32BIT_MACHINE", 0x0100),
  138. ("IMAGE_FILE_DEBUG_STRIPPED", 0x0200),
  139. ("IMAGE_FILE_REMOVABLE_RUN_FROM_SWAP", 0x0400),
  140. ("IMAGE_FILE_NET_RUN_FROM_SWAP", 0x0800),
  141. ("IMAGE_FILE_SYSTEM", 0x1000),
  142. ("IMAGE_FILE_DLL", 0x2000),
  143. ("IMAGE_FILE_UP_SYSTEM_ONLY", 0x4000),
  144. ("IMAGE_FILE_BYTES_REVERSED_HI", 0x8000),
  145. ]
  146. IMAGE_CHARACTERISTICS = two_way_dict(image_characteristics)
  147. section_characteristics = [
  148. ("IMAGE_SCN_TYPE_REG", 0x00000000), # reserved
  149. ("IMAGE_SCN_TYPE_DSECT", 0x00000001), # reserved
  150. ("IMAGE_SCN_TYPE_NOLOAD", 0x00000002), # reserved
  151. ("IMAGE_SCN_TYPE_GROUP", 0x00000004), # reserved
  152. ("IMAGE_SCN_TYPE_NO_PAD", 0x00000008), # reserved
  153. ("IMAGE_SCN_TYPE_COPY", 0x00000010), # reserved
  154. ("IMAGE_SCN_CNT_CODE", 0x00000020),
  155. ("IMAGE_SCN_CNT_INITIALIZED_DATA", 0x00000040),
  156. ("IMAGE_SCN_CNT_UNINITIALIZED_DATA", 0x00000080),
  157. ("IMAGE_SCN_LNK_OTHER", 0x00000100),
  158. ("IMAGE_SCN_LNK_INFO", 0x00000200),
  159. ("IMAGE_SCN_LNK_OVER", 0x00000400), # reserved
  160. ("IMAGE_SCN_LNK_REMOVE", 0x00000800),
  161. ("IMAGE_SCN_LNK_COMDAT", 0x00001000),
  162. ("IMAGE_SCN_MEM_PROTECTED", 0x00004000), # obsolete
  163. ("IMAGE_SCN_NO_DEFER_SPEC_EXC", 0x00004000),
  164. ("IMAGE_SCN_GPREL", 0x00008000),
  165. ("IMAGE_SCN_MEM_FARDATA", 0x00008000),
  166. ("IMAGE_SCN_MEM_SYSHEAP", 0x00010000), # obsolete
  167. ("IMAGE_SCN_MEM_PURGEABLE", 0x00020000),
  168. ("IMAGE_SCN_MEM_16BIT", 0x00020000),
  169. ("IMAGE_SCN_MEM_LOCKED", 0x00040000),
  170. ("IMAGE_SCN_MEM_PRELOAD", 0x00080000),
  171. ("IMAGE_SCN_ALIGN_1BYTES", 0x00100000),
  172. ("IMAGE_SCN_ALIGN_2BYTES", 0x00200000),
  173. ("IMAGE_SCN_ALIGN_4BYTES", 0x00300000),
  174. ("IMAGE_SCN_ALIGN_8BYTES", 0x00400000),
  175. ("IMAGE_SCN_ALIGN_16BYTES", 0x00500000), # default alignment
  176. ("IMAGE_SCN_ALIGN_32BYTES", 0x00600000),
  177. ("IMAGE_SCN_ALIGN_64BYTES", 0x00700000),
  178. ("IMAGE_SCN_ALIGN_128BYTES", 0x00800000),
  179. ("IMAGE_SCN_ALIGN_256BYTES", 0x00900000),
  180. ("IMAGE_SCN_ALIGN_512BYTES", 0x00A00000),
  181. ("IMAGE_SCN_ALIGN_1024BYTES", 0x00B00000),
  182. ("IMAGE_SCN_ALIGN_2048BYTES", 0x00C00000),
  183. ("IMAGE_SCN_ALIGN_4096BYTES", 0x00D00000),
  184. ("IMAGE_SCN_ALIGN_8192BYTES", 0x00E00000),
  185. ("IMAGE_SCN_ALIGN_MASK", 0x00F00000),
  186. ("IMAGE_SCN_LNK_NRELOC_OVFL", 0x01000000),
  187. ("IMAGE_SCN_MEM_DISCARDABLE", 0x02000000),
  188. ("IMAGE_SCN_MEM_NOT_CACHED", 0x04000000),
  189. ("IMAGE_SCN_MEM_NOT_PAGED", 0x08000000),
  190. ("IMAGE_SCN_MEM_SHARED", 0x10000000),
  191. ("IMAGE_SCN_MEM_EXECUTE", 0x20000000),
  192. ("IMAGE_SCN_MEM_READ", 0x40000000),
  193. ("IMAGE_SCN_MEM_WRITE", 0x80000000),
  194. ]
  195. SECTION_CHARACTERISTICS = two_way_dict(section_characteristics)
  196. debug_types = [
  197. ("IMAGE_DEBUG_TYPE_UNKNOWN", 0),
  198. ("IMAGE_DEBUG_TYPE_COFF", 1),
  199. ("IMAGE_DEBUG_TYPE_CODEVIEW", 2),
  200. ("IMAGE_DEBUG_TYPE_FPO", 3),
  201. ("IMAGE_DEBUG_TYPE_MISC", 4),
  202. ("IMAGE_DEBUG_TYPE_EXCEPTION", 5),
  203. ("IMAGE_DEBUG_TYPE_FIXUP", 6),
  204. ("IMAGE_DEBUG_TYPE_OMAP_TO_SRC", 7),
  205. ("IMAGE_DEBUG_TYPE_OMAP_FROM_SRC", 8),
  206. ("IMAGE_DEBUG_TYPE_BORLAND", 9),
  207. ("IMAGE_DEBUG_TYPE_RESERVED10", 10),
  208. ("IMAGE_DEBUG_TYPE_CLSID", 11),
  209. ("IMAGE_DEBUG_TYPE_VC_FEATURE", 12),
  210. ("IMAGE_DEBUG_TYPE_POGO", 13),
  211. ("IMAGE_DEBUG_TYPE_ILTCG", 14),
  212. ("IMAGE_DEBUG_TYPE_MPX", 15),
  213. ("IMAGE_DEBUG_TYPE_REPRO", 16),
  214. ("IMAGE_DEBUG_TYPE_EX_DLLCHARACTERISTICS", 20),
  215. ]
  216. DEBUG_TYPE = two_way_dict(debug_types)
  217. subsystem_types = [
  218. ("IMAGE_SUBSYSTEM_UNKNOWN", 0),
  219. ("IMAGE_SUBSYSTEM_NATIVE", 1),
  220. ("IMAGE_SUBSYSTEM_WINDOWS_GUI", 2),
  221. ("IMAGE_SUBSYSTEM_WINDOWS_CUI", 3),
  222. ("IMAGE_SUBSYSTEM_OS2_CUI", 5),
  223. ("IMAGE_SUBSYSTEM_POSIX_CUI", 7),
  224. ("IMAGE_SUBSYSTEM_NATIVE_WINDOWS", 8),
  225. ("IMAGE_SUBSYSTEM_WINDOWS_CE_GUI", 9),
  226. ("IMAGE_SUBSYSTEM_EFI_APPLICATION", 10),
  227. ("IMAGE_SUBSYSTEM_EFI_BOOT_SERVICE_DRIVER", 11),
  228. ("IMAGE_SUBSYSTEM_EFI_RUNTIME_DRIVER", 12),
  229. ("IMAGE_SUBSYSTEM_EFI_ROM", 13),
  230. ("IMAGE_SUBSYSTEM_XBOX", 14),
  231. ("IMAGE_SUBSYSTEM_WINDOWS_BOOT_APPLICATION", 16),
  232. ]
  233. SUBSYSTEM_TYPE = two_way_dict(subsystem_types)
  234. machine_types = [
  235. ("IMAGE_FILE_MACHINE_UNKNOWN", 0),
  236. ("IMAGE_FILE_MACHINE_I386", 0x014C),
  237. ("IMAGE_FILE_MACHINE_R3000", 0x0162),
  238. ("IMAGE_FILE_MACHINE_R4000", 0x0166),
  239. ("IMAGE_FILE_MACHINE_R10000", 0x0168),
  240. ("IMAGE_FILE_MACHINE_WCEMIPSV2", 0x0169),
  241. ("IMAGE_FILE_MACHINE_ALPHA", 0x0184),
  242. ("IMAGE_FILE_MACHINE_SH3", 0x01A2),
  243. ("IMAGE_FILE_MACHINE_SH3DSP", 0x01A3),
  244. ("IMAGE_FILE_MACHINE_SH3E", 0x01A4),
  245. ("IMAGE_FILE_MACHINE_SH4", 0x01A6),
  246. ("IMAGE_FILE_MACHINE_SH5", 0x01A8),
  247. ("IMAGE_FILE_MACHINE_ARM", 0x01C0),
  248. ("IMAGE_FILE_MACHINE_THUMB", 0x01C2),
  249. ("IMAGE_FILE_MACHINE_ARMNT", 0x01C4),
  250. ("IMAGE_FILE_MACHINE_AM33", 0x01D3),
  251. ("IMAGE_FILE_MACHINE_POWERPC", 0x01F0),
  252. ("IMAGE_FILE_MACHINE_POWERPCFP", 0x01F1),
  253. ("IMAGE_FILE_MACHINE_IA64", 0x0200),
  254. ("IMAGE_FILE_MACHINE_MIPS16", 0x0266),
  255. ("IMAGE_FILE_MACHINE_ALPHA64", 0x0284),
  256. ("IMAGE_FILE_MACHINE_AXP64", 0x0284), # same
  257. ("IMAGE_FILE_MACHINE_MIPSFPU", 0x0366),
  258. ("IMAGE_FILE_MACHINE_MIPSFPU16", 0x0466),
  259. ("IMAGE_FILE_MACHINE_TRICORE", 0x0520),
  260. ("IMAGE_FILE_MACHINE_CEF", 0x0CEF),
  261. ("IMAGE_FILE_MACHINE_EBC", 0x0EBC),
  262. ("IMAGE_FILE_MACHINE_AMD64", 0x8664),
  263. ("IMAGE_FILE_MACHINE_M32R", 0x9041),
  264. ("IMAGE_FILE_MACHINE_ARM64", 0xAA64),
  265. ("IMAGE_FILE_MACHINE_CEE", 0xC0EE),
  266. ]
  267. MACHINE_TYPE = two_way_dict(machine_types)
  268. relocation_types = [
  269. ("IMAGE_REL_BASED_ABSOLUTE", 0),
  270. ("IMAGE_REL_BASED_HIGH", 1),
  271. ("IMAGE_REL_BASED_LOW", 2),
  272. ("IMAGE_REL_BASED_HIGHLOW", 3),
  273. ("IMAGE_REL_BASED_HIGHADJ", 4),
  274. ("IMAGE_REL_BASED_MIPS_JMPADDR", 5),
  275. ("IMAGE_REL_BASED_SECTION", 6),
  276. ("IMAGE_REL_BASED_REL", 7),
  277. ("IMAGE_REL_BASED_MIPS_JMPADDR16", 9),
  278. ("IMAGE_REL_BASED_IA64_IMM64", 9),
  279. ("IMAGE_REL_BASED_DIR64", 10),
  280. ("IMAGE_REL_BASED_HIGH3ADJ", 11),
  281. ]
  282. RELOCATION_TYPE = two_way_dict(relocation_types)
  283. dll_characteristics = [
  284. ("IMAGE_LIBRARY_PROCESS_INIT", 0x0001), # reserved
  285. ("IMAGE_LIBRARY_PROCESS_TERM", 0x0002), # reserved
  286. ("IMAGE_LIBRARY_THREAD_INIT", 0x0004), # reserved
  287. ("IMAGE_LIBRARY_THREAD_TERM", 0x0008), # reserved
  288. ("IMAGE_DLLCHARACTERISTICS_HIGH_ENTROPY_VA", 0x0020),
  289. ("IMAGE_DLLCHARACTERISTICS_DYNAMIC_BASE", 0x0040),
  290. ("IMAGE_DLLCHARACTERISTICS_FORCE_INTEGRITY", 0x0080),
  291. ("IMAGE_DLLCHARACTERISTICS_NX_COMPAT", 0x0100),
  292. ("IMAGE_DLLCHARACTERISTICS_NO_ISOLATION", 0x0200),
  293. ("IMAGE_DLLCHARACTERISTICS_NO_SEH", 0x0400),
  294. ("IMAGE_DLLCHARACTERISTICS_NO_BIND", 0x0800),
  295. ("IMAGE_DLLCHARACTERISTICS_APPCONTAINER", 0x1000),
  296. ("IMAGE_DLLCHARACTERISTICS_WDM_DRIVER", 0x2000),
  297. ("IMAGE_DLLCHARACTERISTICS_GUARD_CF", 0x4000),
  298. ("IMAGE_DLLCHARACTERISTICS_TERMINAL_SERVER_AWARE", 0x8000),
  299. ]
  300. DLL_CHARACTERISTICS = two_way_dict(dll_characteristics)
  301. FILE_ALIGNMENT_HARDCODED_VALUE = 0x200
  302. # Unwind info-related enums
  303. unwind_info_flags = [
  304. ("UNW_FLAG_EHANDLER", 0x01),
  305. ("UNW_FLAG_UHANDLER", 0x02),
  306. ("UNW_FLAG_CHAININFO", 0x04),
  307. ]
  308. UNWIND_INFO_FLAGS = two_way_dict(unwind_info_flags)
  309. registers = [
  310. ("RAX", 0),
  311. ("RCX", 1),
  312. ("RDX", 2),
  313. ("RBX", 3),
  314. ("RSP", 4),
  315. ("RBP", 5),
  316. ("RSI", 6),
  317. ("RDI", 7),
  318. ("R8", 8),
  319. ("R9", 9),
  320. ("R10", 10),
  321. ("R11", 11),
  322. ("R12", 12),
  323. ("R13", 13),
  324. ("R14", 14),
  325. ("R15", 15),
  326. ]
  327. REGISTERS = two_way_dict(registers)
  328. # enum _UNWIND_OP_CODES
  329. UWOP_PUSH_NONVOL = 0
  330. UWOP_ALLOC_LARGE = 1
  331. UWOP_ALLOC_SMALL = 2
  332. UWOP_SET_FPREG = 3
  333. UWOP_SAVE_NONVOL = 4
  334. UWOP_SAVE_NONVOL_FAR = 5
  335. UWOP_EPILOG = 6
  336. UWOP_SAVE_XMM128 = 8
  337. UWOP_SAVE_XMM128_FAR = 9
  338. UWOP_PUSH_MACHFRAME = 10
  339. # Resource types
  340. resource_type = [
  341. ("RT_CURSOR", 1),
  342. ("RT_BITMAP", 2),
  343. ("RT_ICON", 3),
  344. ("RT_MENU", 4),
  345. ("RT_DIALOG", 5),
  346. ("RT_STRING", 6),
  347. ("RT_FONTDIR", 7),
  348. ("RT_FONT", 8),
  349. ("RT_ACCELERATOR", 9),
  350. ("RT_RCDATA", 10),
  351. ("RT_MESSAGETABLE", 11),
  352. ("RT_GROUP_CURSOR", 12),
  353. ("RT_GROUP_ICON", 14),
  354. ("RT_VERSION", 16),
  355. ("RT_DLGINCLUDE", 17),
  356. ("RT_PLUGPLAY", 19),
  357. ("RT_VXD", 20),
  358. ("RT_ANICURSOR", 21),
  359. ("RT_ANIICON", 22),
  360. ("RT_HTML", 23),
  361. ("RT_MANIFEST", 24),
  362. ]
  363. RESOURCE_TYPE = two_way_dict(resource_type)
  364. # Language definitions
  365. lang = [
  366. ("LANG_NEUTRAL", 0x00),
  367. ("LANG_INVARIANT", 0x7F),
  368. ("LANG_AFRIKAANS", 0x36),
  369. ("LANG_ALBANIAN", 0x1C),
  370. ("LANG_ARABIC", 0x01),
  371. ("LANG_ARMENIAN", 0x2B),
  372. ("LANG_ASSAMESE", 0x4D),
  373. ("LANG_AZERI", 0x2C),
  374. ("LANG_BASQUE", 0x2D),
  375. ("LANG_BELARUSIAN", 0x23),
  376. ("LANG_BENGALI", 0x45),
  377. ("LANG_BULGARIAN", 0x02),
  378. ("LANG_CATALAN", 0x03),
  379. ("LANG_CHINESE", 0x04),
  380. ("LANG_CROATIAN", 0x1A),
  381. ("LANG_CZECH", 0x05),
  382. ("LANG_DANISH", 0x06),
  383. ("LANG_DIVEHI", 0x65),
  384. ("LANG_DUTCH", 0x13),
  385. ("LANG_ENGLISH", 0x09),
  386. ("LANG_ESTONIAN", 0x25),
  387. ("LANG_FAEROESE", 0x38),
  388. ("LANG_FARSI", 0x29),
  389. ("LANG_FINNISH", 0x0B),
  390. ("LANG_FRENCH", 0x0C),
  391. ("LANG_GALICIAN", 0x56),
  392. ("LANG_GEORGIAN", 0x37),
  393. ("LANG_GERMAN", 0x07),
  394. ("LANG_GREEK", 0x08),
  395. ("LANG_GUJARATI", 0x47),
  396. ("LANG_HEBREW", 0x0D),
  397. ("LANG_HINDI", 0x39),
  398. ("LANG_HUNGARIAN", 0x0E),
  399. ("LANG_ICELANDIC", 0x0F),
  400. ("LANG_INDONESIAN", 0x21),
  401. ("LANG_ITALIAN", 0x10),
  402. ("LANG_JAPANESE", 0x11),
  403. ("LANG_KANNADA", 0x4B),
  404. ("LANG_KASHMIRI", 0x60),
  405. ("LANG_KAZAK", 0x3F),
  406. ("LANG_KONKANI", 0x57),
  407. ("LANG_KOREAN", 0x12),
  408. ("LANG_KYRGYZ", 0x40),
  409. ("LANG_LATVIAN", 0x26),
  410. ("LANG_LITHUANIAN", 0x27),
  411. ("LANG_MACEDONIAN", 0x2F),
  412. ("LANG_MALAY", 0x3E),
  413. ("LANG_MALAYALAM", 0x4C),
  414. ("LANG_MANIPURI", 0x58),
  415. ("LANG_MARATHI", 0x4E),
  416. ("LANG_MONGOLIAN", 0x50),
  417. ("LANG_NEPALI", 0x61),
  418. ("LANG_NORWEGIAN", 0x14),
  419. ("LANG_ORIYA", 0x48),
  420. ("LANG_POLISH", 0x15),
  421. ("LANG_PORTUGUESE", 0x16),
  422. ("LANG_PUNJABI", 0x46),
  423. ("LANG_ROMANIAN", 0x18),
  424. ("LANG_RUSSIAN", 0x19),
  425. ("LANG_SANSKRIT", 0x4F),
  426. ("LANG_SERBIAN", 0x1A),
  427. ("LANG_SINDHI", 0x59),
  428. ("LANG_SLOVAK", 0x1B),
  429. ("LANG_SLOVENIAN", 0x24),
  430. ("LANG_SPANISH", 0x0A),
  431. ("LANG_SWAHILI", 0x41),
  432. ("LANG_SWEDISH", 0x1D),
  433. ("LANG_SYRIAC", 0x5A),
  434. ("LANG_TAMIL", 0x49),
  435. ("LANG_TATAR", 0x44),
  436. ("LANG_TELUGU", 0x4A),
  437. ("LANG_THAI", 0x1E),
  438. ("LANG_TURKISH", 0x1F),
  439. ("LANG_UKRAINIAN", 0x22),
  440. ("LANG_URDU", 0x20),
  441. ("LANG_UZBEK", 0x43),
  442. ("LANG_VIETNAMESE", 0x2A),
  443. ("LANG_GAELIC", 0x3C),
  444. ("LANG_MALTESE", 0x3A),
  445. ("LANG_MAORI", 0x28),
  446. ("LANG_RHAETO_ROMANCE", 0x17),
  447. ("LANG_SAAMI", 0x3B),
  448. ("LANG_SORBIAN", 0x2E),
  449. ("LANG_SUTU", 0x30),
  450. ("LANG_TSONGA", 0x31),
  451. ("LANG_TSWANA", 0x32),
  452. ("LANG_VENDA", 0x33),
  453. ("LANG_XHOSA", 0x34),
  454. ("LANG_ZULU", 0x35),
  455. ("LANG_ESPERANTO", 0x8F),
  456. ("LANG_WALON", 0x90),
  457. ("LANG_CORNISH", 0x91),
  458. ("LANG_WELSH", 0x92),
  459. ("LANG_BRETON", 0x93),
  460. ]
  461. LANG = two_way_dict(lang)
  462. # Sublanguage definitions
  463. sublang = [
  464. ("SUBLANG_NEUTRAL", 0x00),
  465. ("SUBLANG_DEFAULT", 0x01),
  466. ("SUBLANG_SYS_DEFAULT", 0x02),
  467. ("SUBLANG_ARABIC_SAUDI_ARABIA", 0x01),
  468. ("SUBLANG_ARABIC_IRAQ", 0x02),
  469. ("SUBLANG_ARABIC_EGYPT", 0x03),
  470. ("SUBLANG_ARABIC_LIBYA", 0x04),
  471. ("SUBLANG_ARABIC_ALGERIA", 0x05),
  472. ("SUBLANG_ARABIC_MOROCCO", 0x06),
  473. ("SUBLANG_ARABIC_TUNISIA", 0x07),
  474. ("SUBLANG_ARABIC_OMAN", 0x08),
  475. ("SUBLANG_ARABIC_YEMEN", 0x09),
  476. ("SUBLANG_ARABIC_SYRIA", 0x0A),
  477. ("SUBLANG_ARABIC_JORDAN", 0x0B),
  478. ("SUBLANG_ARABIC_LEBANON", 0x0C),
  479. ("SUBLANG_ARABIC_KUWAIT", 0x0D),
  480. ("SUBLANG_ARABIC_UAE", 0x0E),
  481. ("SUBLANG_ARABIC_BAHRAIN", 0x0F),
  482. ("SUBLANG_ARABIC_QATAR", 0x10),
  483. ("SUBLANG_AZERI_LATIN", 0x01),
  484. ("SUBLANG_AZERI_CYRILLIC", 0x02),
  485. ("SUBLANG_CHINESE_TRADITIONAL", 0x01),
  486. ("SUBLANG_CHINESE_SIMPLIFIED", 0x02),
  487. ("SUBLANG_CHINESE_HONGKONG", 0x03),
  488. ("SUBLANG_CHINESE_SINGAPORE", 0x04),
  489. ("SUBLANG_CHINESE_MACAU", 0x05),
  490. ("SUBLANG_DUTCH", 0x01),
  491. ("SUBLANG_DUTCH_BELGIAN", 0x02),
  492. ("SUBLANG_ENGLISH_US", 0x01),
  493. ("SUBLANG_ENGLISH_UK", 0x02),
  494. ("SUBLANG_ENGLISH_AUS", 0x03),
  495. ("SUBLANG_ENGLISH_CAN", 0x04),
  496. ("SUBLANG_ENGLISH_NZ", 0x05),
  497. ("SUBLANG_ENGLISH_EIRE", 0x06),
  498. ("SUBLANG_ENGLISH_SOUTH_AFRICA", 0x07),
  499. ("SUBLANG_ENGLISH_JAMAICA", 0x08),
  500. ("SUBLANG_ENGLISH_CARIBBEAN", 0x09),
  501. ("SUBLANG_ENGLISH_BELIZE", 0x0A),
  502. ("SUBLANG_ENGLISH_TRINIDAD", 0x0B),
  503. ("SUBLANG_ENGLISH_ZIMBABWE", 0x0C),
  504. ("SUBLANG_ENGLISH_PHILIPPINES", 0x0D),
  505. ("SUBLANG_FRENCH", 0x01),
  506. ("SUBLANG_FRENCH_BELGIAN", 0x02),
  507. ("SUBLANG_FRENCH_CANADIAN", 0x03),
  508. ("SUBLANG_FRENCH_SWISS", 0x04),
  509. ("SUBLANG_FRENCH_LUXEMBOURG", 0x05),
  510. ("SUBLANG_FRENCH_MONACO", 0x06),
  511. ("SUBLANG_GERMAN", 0x01),
  512. ("SUBLANG_GERMAN_SWISS", 0x02),
  513. ("SUBLANG_GERMAN_AUSTRIAN", 0x03),
  514. ("SUBLANG_GERMAN_LUXEMBOURG", 0x04),
  515. ("SUBLANG_GERMAN_LIECHTENSTEIN", 0x05),
  516. ("SUBLANG_ITALIAN", 0x01),
  517. ("SUBLANG_ITALIAN_SWISS", 0x02),
  518. ("SUBLANG_KASHMIRI_SASIA", 0x02),
  519. ("SUBLANG_KASHMIRI_INDIA", 0x02),
  520. ("SUBLANG_KOREAN", 0x01),
  521. ("SUBLANG_LITHUANIAN", 0x01),
  522. ("SUBLANG_MALAY_MALAYSIA", 0x01),
  523. ("SUBLANG_MALAY_BRUNEI_DARUSSALAM", 0x02),
  524. ("SUBLANG_NEPALI_INDIA", 0x02),
  525. ("SUBLANG_NORWEGIAN_BOKMAL", 0x01),
  526. ("SUBLANG_NORWEGIAN_NYNORSK", 0x02),
  527. ("SUBLANG_PORTUGUESE", 0x02),
  528. ("SUBLANG_PORTUGUESE_BRAZILIAN", 0x01),
  529. ("SUBLANG_SERBIAN_LATIN", 0x02),
  530. ("SUBLANG_SERBIAN_CYRILLIC", 0x03),
  531. ("SUBLANG_SPANISH", 0x01),
  532. ("SUBLANG_SPANISH_MEXICAN", 0x02),
  533. ("SUBLANG_SPANISH_MODERN", 0x03),
  534. ("SUBLANG_SPANISH_GUATEMALA", 0x04),
  535. ("SUBLANG_SPANISH_COSTA_RICA", 0x05),
  536. ("SUBLANG_SPANISH_PANAMA", 0x06),
  537. ("SUBLANG_SPANISH_DOMINICAN_REPUBLIC", 0x07),
  538. ("SUBLANG_SPANISH_VENEZUELA", 0x08),
  539. ("SUBLANG_SPANISH_COLOMBIA", 0x09),
  540. ("SUBLANG_SPANISH_PERU", 0x0A),
  541. ("SUBLANG_SPANISH_ARGENTINA", 0x0B),
  542. ("SUBLANG_SPANISH_ECUADOR", 0x0C),
  543. ("SUBLANG_SPANISH_CHILE", 0x0D),
  544. ("SUBLANG_SPANISH_URUGUAY", 0x0E),
  545. ("SUBLANG_SPANISH_PARAGUAY", 0x0F),
  546. ("SUBLANG_SPANISH_BOLIVIA", 0x10),
  547. ("SUBLANG_SPANISH_EL_SALVADOR", 0x11),
  548. ("SUBLANG_SPANISH_HONDURAS", 0x12),
  549. ("SUBLANG_SPANISH_NICARAGUA", 0x13),
  550. ("SUBLANG_SPANISH_PUERTO_RICO", 0x14),
  551. ("SUBLANG_SWEDISH", 0x01),
  552. ("SUBLANG_SWEDISH_FINLAND", 0x02),
  553. ("SUBLANG_URDU_PAKISTAN", 0x01),
  554. ("SUBLANG_URDU_INDIA", 0x02),
  555. ("SUBLANG_UZBEK_LATIN", 0x01),
  556. ("SUBLANG_UZBEK_CYRILLIC", 0x02),
  557. ("SUBLANG_DUTCH_SURINAM", 0x03),
  558. ("SUBLANG_ROMANIAN", 0x01),
  559. ("SUBLANG_ROMANIAN_MOLDAVIA", 0x02),
  560. ("SUBLANG_RUSSIAN", 0x01),
  561. ("SUBLANG_RUSSIAN_MOLDAVIA", 0x02),
  562. ("SUBLANG_CROATIAN", 0x01),
  563. ("SUBLANG_LITHUANIAN_CLASSIC", 0x02),
  564. ("SUBLANG_GAELIC", 0x01),
  565. ("SUBLANG_GAELIC_SCOTTISH", 0x02),
  566. ("SUBLANG_GAELIC_MANX", 0x03),
  567. ]
  568. SUBLANG = two_way_dict(sublang)
  569. # Initialize the dictionary with all the name->value pairs
  570. SUBLANG = dict(sublang)
  571. # Now add all the value->name information, handling duplicates appropriately
  572. for sublang_name, sublang_value in sublang:
  573. if sublang_value in SUBLANG:
  574. SUBLANG[sublang_value].append(sublang_name)
  575. else:
  576. SUBLANG[sublang_value] = [sublang_name]
  577. # Resolve a sublang name given the main lang name
  578. #
  579. def get_sublang_name_for_lang(lang_value, sublang_value):
  580. lang_name = LANG.get(lang_value, "*unknown*")
  581. for sublang_name in SUBLANG.get(sublang_value, []):
  582. # if the main language is a substring of sublang's name, then
  583. # return that
  584. if lang_name in sublang_name:
  585. return sublang_name
  586. # otherwise return the first sublang name
  587. return SUBLANG.get(sublang_value, ["*unknown*"])[0]
  588. # Ange Albertini's code to process resources' strings
  589. #
  590. def parse_strings(data, counter, l):
  591. i = 0
  592. error_count = 0
  593. while i < len(data):
  594. data_slice = data[i : i + 2]
  595. if len(data_slice) < 2:
  596. break
  597. len_ = struct.unpack("<h", data_slice)[0]
  598. i += 2
  599. if len_ != 0 and 0 <= len_ * 2 <= len(data):
  600. try:
  601. l[counter] = b(data[i : i + len_ * 2]).decode("utf-16le")
  602. except UnicodeDecodeError:
  603. error_count += 1
  604. pass
  605. if error_count >= 3:
  606. break
  607. i += len_ * 2
  608. counter += 1
  609. def retrieve_flags(flag_dict, flag_filter):
  610. """Read the flags from a dictionary and return them in a usable form.
  611. Will return a list of (flag, value) for all flags in "flag_dict"
  612. matching the filter "flag_filter".
  613. """
  614. return [
  615. (flag, flag_dict[flag])
  616. for flag in flag_dict.keys()
  617. if isinstance(flag, (str, bytes)) and flag.startswith(flag_filter)
  618. ]
  619. def set_flags(obj, flag_field, flags):
  620. """Will process the flags and set attributes in the object accordingly.
  621. The object "obj" will gain attributes named after the flags provided in
  622. "flags" and valued True/False, matching the results of applying each
  623. flag value from "flags" to flag_field.
  624. """
  625. for flag, value in flags:
  626. if value & flag_field:
  627. obj.__dict__[flag] = True
  628. else:
  629. obj.__dict__[flag] = False
  630. def power_of_two(val):
  631. return val != 0 and (val & (val - 1)) == 0
  632. def b(x):
  633. if isinstance(x, (bytes, bytearray)):
  634. return bytes(x)
  635. return codecs.encode(x, "cp1252")
  636. class UnicodeStringWrapperPostProcessor:
  637. """This class attempts to help the process of identifying strings
  638. that might be plain Unicode or Pascal. A list of strings will be
  639. wrapped on it with the hope the overlappings will help make the
  640. decision about their type."""
  641. def __init__(self, pe, rva_ptr):
  642. self.pe = pe
  643. self.rva_ptr = rva_ptr
  644. self.string = None
  645. def get_rva(self):
  646. """Get the RVA of the string."""
  647. return self.rva_ptr
  648. def __str__(self):
  649. """Return the escaped UTF-8 representation of the string."""
  650. return self.decode("utf-8", "backslashreplace_")
  651. def decode(self, *args):
  652. if not self.string:
  653. return ""
  654. return self.string.decode(*args)
  655. def invalidate(self):
  656. """Make this instance None, to express it's no known string type."""
  657. self = None
  658. def render_pascal_16(self):
  659. try:
  660. self.string = self.pe.get_string_u_at_rva(
  661. self.rva_ptr + 2, max_length=self.get_pascal_16_length()
  662. )
  663. except PEFormatError:
  664. self.pe.get_warnings().append(
  665. "Failed rendering pascal string, "
  666. "attempting to read from RVA 0x{0:x}".format(self.rva_ptr + 2)
  667. )
  668. def get_pascal_16_length(self):
  669. return self.__get_word_value_at_rva(self.rva_ptr)
  670. def __get_word_value_at_rva(self, rva):
  671. try:
  672. data = self.pe.get_data(rva, 2)
  673. except PEFormatError:
  674. return False
  675. if len(data) < 2:
  676. return False
  677. return struct.unpack("<H", data)[0]
  678. def ask_unicode_16(self, next_rva_ptr):
  679. """The next RVA is taken to be the one immediately following this one.
  680. Such RVA could indicate the natural end of the string and will be checked
  681. to see if there's a Unicode NULL character there.
  682. """
  683. if self.__get_word_value_at_rva(next_rva_ptr - 2) == 0:
  684. self.length = next_rva_ptr - self.rva_ptr
  685. return True
  686. return False
  687. def render_unicode_16(self):
  688. try:
  689. self.string = self.pe.get_string_u_at_rva(self.rva_ptr)
  690. except PEFormatError:
  691. self.pe.get_warnings().append(
  692. "Failed rendering unicode string, "
  693. "attempting to read from RVA 0x{0:x}".format(self.rva_ptr)
  694. )
  695. class PEFormatError(Exception):
  696. """Generic PE format error exception."""
  697. def __init__(self, value):
  698. self.value = value
  699. def __str__(self):
  700. return repr(self.value)
  701. class Dump:
  702. """Convenience class for dumping the PE information."""
  703. def __init__(self):
  704. self.text = []
  705. def add_lines(self, txt, indent=0):
  706. """Adds a list of lines.
  707. The list can be indented with the optional argument 'indent'.
  708. """
  709. for line in txt:
  710. self.add_line(line, indent)
  711. def add_line(self, txt, indent=0):
  712. """Adds a line.
  713. The line can be indented with the optional argument 'indent'.
  714. """
  715. self.add(txt + "\n", indent)
  716. def add(self, txt, indent=0):
  717. """Adds some text, no newline will be appended.
  718. The text can be indented with the optional argument 'indent'.
  719. """
  720. self.text.append("{0}{1}".format(" " * indent, txt))
  721. def add_header(self, txt):
  722. """Adds a header element."""
  723. self.add_line("{0}{1}{0}\n".format("-" * 10, txt))
  724. def add_newline(self):
  725. """Adds a newline."""
  726. self.text.append("\n")
  727. def get_text(self):
  728. """Get the text in its current state."""
  729. return "".join("{0}".format(b) for b in self.text)
  730. STRUCT_SIZEOF_TYPES = {
  731. "x": 1,
  732. "c": 1,
  733. "b": 1,
  734. "B": 1,
  735. "h": 2,
  736. "H": 2,
  737. "i": 4,
  738. "I": 4,
  739. "l": 4,
  740. "L": 4,
  741. "f": 4,
  742. "q": 8,
  743. "Q": 8,
  744. "d": 8,
  745. "s": 1,
  746. }
  747. @lru_cache(maxsize=2048)
  748. def sizeof_type(t):
  749. count = 1
  750. _t = t
  751. if t[0] in string.digits:
  752. # extract the count
  753. count = int("".join([d for d in t if d in string.digits]))
  754. _t = "".join([d for d in t if d not in string.digits])
  755. return STRUCT_SIZEOF_TYPES[_t] * count
  756. @lru_cache(maxsize=2048, copy=True)
  757. def set_format(format):
  758. __format__ = "<"
  759. __unpacked_data_elms__ = []
  760. __field_offsets__ = {}
  761. __keys__ = []
  762. __format_length__ = 0
  763. offset = 0
  764. for elm in format:
  765. if "," in elm:
  766. elm_type, elm_name = elm.split(",", 1)
  767. __format__ += elm_type
  768. __unpacked_data_elms__.append(None)
  769. elm_names = elm_name.split(",")
  770. names = []
  771. for elm_name in elm_names:
  772. if elm_name in __keys__:
  773. search_list = [x[: len(elm_name)] for x in __keys__]
  774. occ_count = search_list.count(elm_name)
  775. elm_name = "{0}_{1:d}".format(elm_name, occ_count)
  776. names.append(elm_name)
  777. __field_offsets__[elm_name] = offset
  778. offset += sizeof_type(elm_type)
  779. # Some PE header structures have unions on them, so a certain
  780. # value might have different names, so each key has a list of
  781. # all the possible members referring to the data.
  782. __keys__.append(names)
  783. __format_length__ = struct.calcsize(__format__)
  784. return (
  785. __format__,
  786. __unpacked_data_elms__,
  787. __field_offsets__,
  788. __keys__,
  789. __format_length__,
  790. )
  791. class Structure:
  792. """Prepare structure object to extract members from data.
  793. Format is a list containing definitions for the elements
  794. of the structure.
  795. """
  796. def __init__(self, format, name=None, file_offset=None):
  797. # Format is forced little endian, for big endian non Intel platforms
  798. self.__format__ = "<"
  799. self.__keys__ = []
  800. self.__format_length__ = 0
  801. self.__field_offsets__ = {}
  802. self.__unpacked_data_elms__ = []
  803. d = format[1]
  804. # need a tuple to be hashable in set_format using lru cache
  805. if not isinstance(format[1], tuple):
  806. d = tuple(format[1])
  807. (
  808. self.__format__,
  809. self.__unpacked_data_elms__,
  810. self.__field_offsets__,
  811. self.__keys__,
  812. self.__format_length__,
  813. ) = set_format(d)
  814. self.__all_zeroes__ = False
  815. self.__file_offset__ = file_offset
  816. if name:
  817. self.name = name
  818. else:
  819. self.name = format[0]
  820. def __get_format__(self):
  821. return self.__format__
  822. def get_field_absolute_offset(self, field_name):
  823. """Return the offset within the field for the requested field in the structure."""
  824. return self.__file_offset__ + self.__field_offsets__[field_name]
  825. def get_field_relative_offset(self, field_name):
  826. """Return the offset within the structure for the requested field."""
  827. return self.__field_offsets__[field_name]
  828. def get_file_offset(self):
  829. return self.__file_offset__
  830. def set_file_offset(self, offset):
  831. self.__file_offset__ = offset
  832. def all_zeroes(self):
  833. """Returns true is the unpacked data is all zeros."""
  834. return self.__all_zeroes__
  835. def sizeof(self):
  836. """Return size of the structure."""
  837. return self.__format_length__
  838. def __unpack__(self, data):
  839. data = b(data)
  840. if len(data) > self.__format_length__:
  841. data = data[: self.__format_length__]
  842. # OC Patch:
  843. # Some malware have incorrect header lengths.
  844. # Fail gracefully if this occurs
  845. # Buggy malware: a29b0118af8b7408444df81701ad5a7f
  846. #
  847. elif len(data) < self.__format_length__:
  848. raise PEFormatError("Data length less than expected header length.")
  849. if count_zeroes(data) == len(data):
  850. self.__all_zeroes__ = True
  851. self.__unpacked_data_elms__ = struct.unpack(self.__format__, data)
  852. for idx, val in enumerate(self.__unpacked_data_elms__):
  853. for key in self.__keys__[idx]:
  854. setattr(self, key, val)
  855. def __pack__(self):
  856. new_values = []
  857. for idx, val in enumerate(self.__unpacked_data_elms__):
  858. for key in self.__keys__[idx]:
  859. new_val = getattr(self, key)
  860. # In the case of unions, when the first changed value
  861. # is picked the loop is exited
  862. if new_val != val:
  863. break
  864. new_values.append(new_val)
  865. return struct.pack(self.__format__, *new_values)
  866. def __str__(self):
  867. return "\n".join(self.dump())
  868. def __repr__(self):
  869. return "<Structure: %s>" % (
  870. " ".join([" ".join(s.split()) for s in self.dump()])
  871. )
  872. def dump(self, indentation=0):
  873. """Returns a string representation of the structure."""
  874. dump = []
  875. dump.append("[{0}]".format(self.name))
  876. printable_bytes = [
  877. ord(i) for i in string.printable if i not in string.whitespace
  878. ]
  879. # Refer to the __set_format__ method for an explanation
  880. # of the following construct.
  881. for keys in self.__keys__:
  882. for key in keys:
  883. val = getattr(self, key)
  884. if isinstance(val, (int, long)):
  885. if key.startswith("Signature_"):
  886. val_str = "{:<8X}".format(val)
  887. else:
  888. val_str = "0x{:<8X}".format(val)
  889. if key == "TimeDateStamp" or key == "dwTimeStamp":
  890. try:
  891. val_str += " [%s UTC]" % time.asctime(time.gmtime(val))
  892. except ValueError:
  893. val_str += " [INVALID TIME]"
  894. else:
  895. val_str = bytearray(val)
  896. if key.startswith("Signature"):
  897. val_str = "".join(
  898. ["{:02X}".format(i) for i in val_str.rstrip(b"\x00")]
  899. )
  900. else:
  901. val_str = "".join(
  902. [
  903. chr(i)
  904. if (i in printable_bytes)
  905. else "\\x{0:02x}".format(i)
  906. for i in val_str.rstrip(b"\x00")
  907. ]
  908. )
  909. dump.append(
  910. "0x%-8X 0x%-3X %-30s %s"
  911. % (
  912. self.__field_offsets__[key] + self.__file_offset__,
  913. self.__field_offsets__[key],
  914. key + ":",
  915. val_str,
  916. )
  917. )
  918. return dump
  919. def dump_dict(self):
  920. """Returns a dictionary representation of the structure."""
  921. dump_dict = {}
  922. dump_dict["Structure"] = self.name
  923. # Refer to the __set_format__ method for an explanation
  924. # of the following construct.
  925. for keys in self.__keys__:
  926. for key in keys:
  927. val = getattr(self, key)
  928. if isinstance(val, (int, long)):
  929. if key == "TimeDateStamp" or key == "dwTimeStamp":
  930. try:
  931. val = "0x%-8X [%s UTC]" % (
  932. val,
  933. time.asctime(time.gmtime(val)),
  934. )
  935. except ValueError:
  936. val = "0x%-8X [INVALID TIME]" % val
  937. else:
  938. val = "".join(
  939. chr(d) if chr(d) in string.printable else "\\x%02x" % d
  940. for d in [ord(c) if not isinstance(c, int) else c for c in val]
  941. )
  942. dump_dict[key] = {
  943. "FileOffset": self.__field_offsets__[key] + self.__file_offset__,
  944. "Offset": self.__field_offsets__[key],
  945. "Value": val,
  946. }
  947. return dump_dict
  948. class SectionStructure(Structure):
  949. """Convenience section handling class."""
  950. def __init__(self, *argl, **argd):
  951. if "pe" in argd:
  952. self.pe = argd["pe"]
  953. del argd["pe"]
  954. Structure.__init__(self, *argl, **argd)
  955. self.PointerToRawData_adj = None
  956. self.VirtualAddress_adj = None
  957. def get_PointerToRawData_adj(self):
  958. if self.PointerToRawData_adj is None:
  959. if self.PointerToRawData is not None:
  960. self.PointerToRawData_adj = self.pe.adjust_FileAlignment(
  961. self.PointerToRawData, self.pe.OPTIONAL_HEADER.FileAlignment
  962. )
  963. return self.PointerToRawData_adj
  964. def get_VirtualAddress_adj(self):
  965. if self.VirtualAddress_adj is None:
  966. if self.VirtualAddress is not None:
  967. self.VirtualAddress_adj = self.pe.adjust_SectionAlignment(
  968. self.VirtualAddress,
  969. self.pe.OPTIONAL_HEADER.SectionAlignment,
  970. self.pe.OPTIONAL_HEADER.FileAlignment,
  971. )
  972. return self.VirtualAddress_adj
  973. def get_data(self, start=None, length=None):
  974. """Get data chunk from a section.
  975. Allows to query data from the section by passing the
  976. addresses where the PE file would be loaded by default.
  977. It is then possible to retrieve code and data by their real
  978. addresses as they would be if loaded.
  979. Returns bytes() under Python 3.x and set() under Python 2.7
  980. """
  981. if start is None:
  982. offset = self.get_PointerToRawData_adj()
  983. else:
  984. offset = (
  985. start - self.get_VirtualAddress_adj()
  986. ) + self.get_PointerToRawData_adj()
  987. if length is not None:
  988. end = offset + length
  989. else:
  990. end = offset + self.SizeOfRawData
  991. # PointerToRawData is not adjusted here as we might want to read any possible
  992. # extra bytes that might get cut off by aligning the start (and hence cutting
  993. # something off the end)
  994. if end > self.PointerToRawData + self.SizeOfRawData:
  995. end = self.PointerToRawData + self.SizeOfRawData
  996. return self.pe.__data__[offset:end]
  997. def __setattr__(self, name, val):
  998. if name == "Characteristics":
  999. section_flags = retrieve_flags(SECTION_CHARACTERISTICS, "IMAGE_SCN_")
  1000. # Set the section's flags according to the Characteristics member
  1001. set_flags(self, val, section_flags)
  1002. elif "IMAGE_SCN_" in name and hasattr(self, name):
  1003. if val:
  1004. self.__dict__["Characteristics"] |= SECTION_CHARACTERISTICS[name]
  1005. else:
  1006. self.__dict__["Characteristics"] ^= SECTION_CHARACTERISTICS[name]
  1007. self.__dict__[name] = val
  1008. def get_rva_from_offset(self, offset):
  1009. return offset - self.get_PointerToRawData_adj() + self.get_VirtualAddress_adj()
  1010. def get_offset_from_rva(self, rva):
  1011. return rva - self.get_VirtualAddress_adj() + self.get_PointerToRawData_adj()
  1012. def contains_offset(self, offset):
  1013. """Check whether the section contains the file offset provided."""
  1014. if self.PointerToRawData is None:
  1015. # bss and other sections containing only uninitialized data must have 0
  1016. # and do not take space in the file
  1017. return False
  1018. PointerToRawData_adj = self.get_PointerToRawData_adj()
  1019. return (
  1020. PointerToRawData_adj <= offset < PointerToRawData_adj + self.SizeOfRawData
  1021. )
  1022. def contains_rva(self, rva):
  1023. """Check whether the section contains the address provided."""
  1024. VirtualAddress_adj = self.get_VirtualAddress_adj()
  1025. # Check if the SizeOfRawData is realistic. If it's bigger than the size of
  1026. # the whole PE file minus the start address of the section it could be
  1027. # either truncated or the SizeOfRawData contains a misleading value.
  1028. # In either of those cases we take the VirtualSize
  1029. #
  1030. if len(self.pe.__data__) - self.get_PointerToRawData_adj() < self.SizeOfRawData:
  1031. # PECOFF documentation v8 says:
  1032. # VirtualSize: The total size of the section when loaded into memory.
  1033. # If this value is greater than SizeOfRawData, the section is zero-padded.
  1034. # This field is valid only for executable images and should be set to zero
  1035. # for object files.
  1036. #
  1037. size = self.Misc_VirtualSize
  1038. else:
  1039. size = max(self.SizeOfRawData, self.Misc_VirtualSize)
  1040. # Check whether there's any section after the current one that starts before
  1041. # the calculated end for the current one. If so, cut the current section's size
  1042. # to fit in the range up to where the next section starts.
  1043. if (
  1044. self.next_section_virtual_address is not None
  1045. and self.next_section_virtual_address > self.VirtualAddress
  1046. and VirtualAddress_adj + size > self.next_section_virtual_address
  1047. ):
  1048. size = self.next_section_virtual_address - VirtualAddress_adj
  1049. return VirtualAddress_adj <= rva < VirtualAddress_adj + size
  1050. def contains(self, rva):
  1051. return self.contains_rva(rva)
  1052. def get_entropy(self):
  1053. """Calculate and return the entropy for the section."""
  1054. return self.entropy_H(self.get_data())
  1055. def get_hash_sha1(self):
  1056. """Get the SHA-1 hex-digest of the section's data."""
  1057. if sha1 is not None:
  1058. return sha1(self.get_data()).hexdigest()
  1059. def get_hash_sha256(self):
  1060. """Get the SHA-256 hex-digest of the section's data."""
  1061. if sha256 is not None:
  1062. return sha256(self.get_data()).hexdigest()
  1063. def get_hash_sha512(self):
  1064. """Get the SHA-512 hex-digest of the section's data."""
  1065. if sha512 is not None:
  1066. return sha512(self.get_data()).hexdigest()
  1067. def get_hash_md5(self):
  1068. """Get the MD5 hex-digest of the section's data."""
  1069. if md5 is not None:
  1070. return md5(self.get_data()).hexdigest()
  1071. def entropy_H(self, data):
  1072. """Calculate the entropy of a chunk of data."""
  1073. if not data:
  1074. return 0.0
  1075. occurences = Counter(bytearray(data))
  1076. entropy = 0
  1077. for x in occurences.values():
  1078. p_x = float(x) / len(data)
  1079. entropy -= p_x * math.log(p_x, 2)
  1080. return entropy
  1081. @lru_cache(maxsize=2048, copy=False)
  1082. def set_bitfields_format(format):
  1083. class Accumulator:
  1084. def __init__(self, fmt, comp_fields):
  1085. self._subfields = []
  1086. # add a prefix to distinguish the artificially created compoud field
  1087. # from regular fields
  1088. self._name = "~"
  1089. self._type = None
  1090. self._bits_left = 0
  1091. self._comp_fields = comp_fields
  1092. self._format = fmt
  1093. def wrap_up(self):
  1094. if self._type == None:
  1095. return
  1096. self._format.append(self._type + "," + self._name)
  1097. self._comp_fields[len(self._format) - 1] = (self._type, self._subfields)
  1098. self._name = "~"
  1099. self._type = None
  1100. self._subfields = []
  1101. def new_type(self, tp):
  1102. self._bits_left = STRUCT_SIZEOF_TYPES[tp] * 8
  1103. self._type = tp
  1104. def add_subfield(self, name, bitcnt):
  1105. self._name += name
  1106. self._bits_left -= bitcnt
  1107. self._subfields.append((name, bitcnt))
  1108. def get_type(self):
  1109. return self._type
  1110. def get_name(self):
  1111. return self._name
  1112. def get_bits_left(self):
  1113. return self._bits_left
  1114. old_fmt = []
  1115. comp_fields = {}
  1116. ac = Accumulator(old_fmt, comp_fields)
  1117. for elm in format[1]:
  1118. if not ":" in elm:
  1119. ac.wrap_up()
  1120. old_fmt.append(elm)
  1121. continue
  1122. elm_type, elm_name = elm.split(",", 1)
  1123. if "," in elm_name:
  1124. raise NotImplementedError(
  1125. "Structures with bitfields do not support unions yet"
  1126. )
  1127. elm_type, elm_bits = elm_type.split(":", 1)
  1128. elm_bits = int(elm_bits)
  1129. if elm_type != ac.get_type() or elm_bits > ac.get_bits_left():
  1130. ac.wrap_up()
  1131. ac.new_type(elm_type)
  1132. ac.add_subfield(elm_name, elm_bits)
  1133. ac.wrap_up()
  1134. format_str, _, field_offsets, keys, format_length = set_format(tuple(old_fmt))
  1135. extended_keys = []
  1136. for idx, val in enumerate(keys):
  1137. if not idx in comp_fields:
  1138. extended_keys.append(val)
  1139. continue
  1140. _, sbf = comp_fields[idx]
  1141. bf_names = [[f[StructureWithBitfields.BTF_NAME_IDX]] for f in sbf]
  1142. extended_keys.extend(bf_names)
  1143. for n in bf_names:
  1144. field_offsets[n[0]] = field_offsets[val[0]]
  1145. return (format_str, format_length, field_offsets, keys, extended_keys, comp_fields)
  1146. class StructureWithBitfields(Structure):
  1147. """
  1148. Extends Structure's functionality with support for bitfields such as:
  1149. ('B:4,LowerHalf', 'B:4,UpperHalf')
  1150. To this end, two lists are maintained:
  1151. * self.__keys__ that contains compound fields, for example
  1152. ('B,~LowerHalfUpperHalf'), and is used during packing/unpaking
  1153. * self.__keys_ext__ containing a separate key for each field (ex., LowerHalf,
  1154. UpperHalf) to simplify implementation of dump()
  1155. This way the implementation of unpacking/packing and dump() from Structure can be
  1156. reused.
  1157. In addition, we create a dictionary:
  1158. <comound_field_index_in_keys> -->
  1159. (data type, [ (subfield name, length in bits)+ ] )
  1160. that facilitates bitfield paking and unpacking.
  1161. With lru_cache() creating only once instance per format string, the memory
  1162. overhead is negligible.
  1163. """
  1164. BTF_NAME_IDX = 0
  1165. BTF_BITCNT_IDX = 1
  1166. CF_TYPE_IDX = 0
  1167. CF_SUBFLD_IDX = 1
  1168. def __init__(self, format, name=None, file_offset=None):
  1169. (
  1170. self.__format__,
  1171. self.__format_length__,
  1172. self.__field_offsets__,
  1173. self.__keys__,
  1174. self.__keys_ext__,
  1175. self.__compound_fields__,
  1176. ) = set_bitfields_format(format)
  1177. # create our own unpacked_data_elms to ensure they are not shared among
  1178. # StructureWithBitfields instances with the same format string
  1179. self.__unpacked_data_elms__ = [None for i in range(self.__format_length__)]
  1180. self.__all_zeroes__ = False
  1181. self.__file_offset__ = file_offset
  1182. self.name = name if name != None else format[0]
  1183. def __unpack__(self, data):
  1184. # calling the original routine to deal with special cases/spurious data
  1185. # structures
  1186. super(StructureWithBitfields, self).__unpack__(data)
  1187. self._unpack_bitfield_attributes()
  1188. def __pack__(self):
  1189. self._pack_bitfield_attributes()
  1190. try:
  1191. data = super(StructureWithBitfields, self).__pack__()
  1192. finally:
  1193. self._unpack_bitfield_attributes()
  1194. return data
  1195. def dump(self, indentation=0):
  1196. tk = self.__keys__
  1197. self.__keys__ = self.__keys_ext__
  1198. try:
  1199. ret = super(StructureWithBitfields, self).dump(indentation)
  1200. finally:
  1201. self.__keys__ = tk
  1202. return ret
  1203. def dump_dict(self):
  1204. tk = self.__keys__
  1205. self.__keys__ = self.__keys_ext__
  1206. try:
  1207. ret = super(StructureWithBitfields, self).dump_dict()
  1208. finally:
  1209. self.__keys__ = tk
  1210. return ret
  1211. def _unpack_bitfield_attributes(self):
  1212. """Replace compound attributes corresponding to bitfields with separate
  1213. sub-fields.
  1214. """
  1215. for i in self.__compound_fields__.keys():
  1216. cf_name = self.__keys__[i][0]
  1217. cval = getattr(self, cf_name)
  1218. delattr(self, cf_name)
  1219. offst = 0
  1220. for sf in self.__compound_fields__[i][StructureWithBitfields.CF_SUBFLD_IDX]:
  1221. mask = (1 << sf[StructureWithBitfields.BTF_BITCNT_IDX]) - 1
  1222. mask <<= offst
  1223. setattr(
  1224. self,
  1225. sf[StructureWithBitfields.BTF_NAME_IDX],
  1226. (cval & mask) >> offst,
  1227. )
  1228. offst += sf[StructureWithBitfields.BTF_BITCNT_IDX]
  1229. def _pack_bitfield_attributes(self):
  1230. """Pack attributes into a compound bitfield"""
  1231. for i in self.__compound_fields__.keys():
  1232. cf_name = self.__keys__[i][0]
  1233. offst, acc_val = 0, 0
  1234. for sf in self.__compound_fields__[i][StructureWithBitfields.CF_SUBFLD_IDX]:
  1235. mask = (1 << sf[StructureWithBitfields.BTF_BITCNT_IDX]) - 1
  1236. field_val = (
  1237. getattr(self, sf[StructureWithBitfields.BTF_NAME_IDX]) & mask
  1238. )
  1239. acc_val |= field_val << offst
  1240. offst += sf[StructureWithBitfields.BTF_BITCNT_IDX]
  1241. setattr(self, cf_name, acc_val)
  1242. class DataContainer:
  1243. """Generic data container."""
  1244. def __init__(self, **args):
  1245. bare_setattr = super(DataContainer, self).__setattr__
  1246. for key, value in list(args.items()):
  1247. bare_setattr(key, value)
  1248. class ImportDescData(DataContainer):
  1249. """Holds import descriptor information.
  1250. dll: name of the imported DLL
  1251. imports: list of imported symbols (ImportData instances)
  1252. struct: IMAGE_IMPORT_DESCRIPTOR structure
  1253. """
  1254. class ImportData(DataContainer):
  1255. """Holds imported symbol's information.
  1256. ordinal: Ordinal of the symbol
  1257. name: Name of the symbol
  1258. bound: If the symbol is bound, this contains
  1259. the address.
  1260. """
  1261. def __setattr__(self, name, val):
  1262. # If the instance doesn't yet have an ordinal attribute
  1263. # it's not fully initialized so can't do any of the
  1264. # following
  1265. #
  1266. if (
  1267. hasattr(self, "ordinal")
  1268. and hasattr(self, "bound")
  1269. and hasattr(self, "name")
  1270. ):
  1271. if name == "ordinal":
  1272. if self.pe.PE_TYPE == OPTIONAL_HEADER_MAGIC_PE:
  1273. ordinal_flag = IMAGE_ORDINAL_FLAG
  1274. elif self.pe.PE_TYPE == OPTIONAL_HEADER_MAGIC_PE_PLUS:
  1275. ordinal_flag = IMAGE_ORDINAL_FLAG64
  1276. # Set the ordinal and flag the entry as importing by ordinal
  1277. self.struct_table.Ordinal = ordinal_flag | (val & 0xFFFF)
  1278. self.struct_table.AddressOfData = self.struct_table.Ordinal
  1279. self.struct_table.Function = self.struct_table.Ordinal
  1280. self.struct_table.ForwarderString = self.struct_table.Ordinal
  1281. elif name == "bound":
  1282. if self.struct_iat is not None:
  1283. self.struct_iat.AddressOfData = val
  1284. self.struct_iat.AddressOfData = self.struct_iat.AddressOfData
  1285. self.struct_iat.Function = self.struct_iat.AddressOfData
  1286. self.struct_iat.ForwarderString = self.struct_iat.AddressOfData
  1287. elif name == "address":
  1288. self.struct_table.AddressOfData = val
  1289. self.struct_table.Ordinal = self.struct_table.AddressOfData
  1290. self.struct_table.Function = self.struct_table.AddressOfData
  1291. self.struct_table.ForwarderString = self.struct_table.AddressOfData
  1292. elif name == "name":
  1293. # Make sure we reset the entry in case the import had been set to
  1294. # import by ordinal
  1295. if self.name_offset:
  1296. name_rva = self.pe.get_rva_from_offset(self.name_offset)
  1297. self.pe.set_dword_at_offset(
  1298. self.ordinal_offset, (0 << 31) | name_rva
  1299. )
  1300. # Complain if the length of the new name is longer than the
  1301. # existing one
  1302. if len(val) > len(self.name):
  1303. raise PEFormatError(
  1304. "The export name provided is longer than the existing one."
  1305. )
  1306. pass
  1307. self.pe.set_bytes_at_offset(self.name_offset, val)
  1308. self.__dict__[name] = val
  1309. class ExportDirData(DataContainer):
  1310. """Holds export directory information.
  1311. struct: IMAGE_EXPORT_DIRECTORY structure
  1312. symbols: list of exported symbols (ExportData instances)"""
  1313. class ExportData(DataContainer):
  1314. """Holds exported symbols' information.
  1315. ordinal: ordinal of the symbol
  1316. address: address of the symbol
  1317. name: name of the symbol (None if the symbol is
  1318. exported by ordinal only)
  1319. forwarder: if the symbol is forwarded it will
  1320. contain the name of the target symbol,
  1321. None otherwise.
  1322. """
  1323. def __setattr__(self, name, val):
  1324. # If the instance doesn't yet have an ordinal attribute
  1325. # it's not fully initialized so can't do any of the
  1326. # following
  1327. #
  1328. if (
  1329. hasattr(self, "ordinal")
  1330. and hasattr(self, "address")
  1331. and hasattr(self, "forwarder")
  1332. and hasattr(self, "name")
  1333. ):
  1334. if name == "ordinal":
  1335. self.pe.set_word_at_offset(self.ordinal_offset, val)
  1336. elif name == "address":
  1337. self.pe.set_dword_at_offset(self.address_offset, val)
  1338. elif name == "name":
  1339. # Complain if the length of the new name is longer than the
  1340. # existing one
  1341. if len(val) > len(self.name):
  1342. raise PEFormatError(
  1343. "The export name provided is longer than the existing one."
  1344. )
  1345. self.pe.set_bytes_at_offset(self.name_offset, val)
  1346. elif name == "forwarder":
  1347. # Complain if the length of the new name is longer than the
  1348. # existing one
  1349. if len(val) > len(self.forwarder):
  1350. raise PEFormatError(
  1351. "The forwarder name provided is longer than the existing one."
  1352. )
  1353. self.pe.set_bytes_at_offset(self.forwarder_offset, val)
  1354. self.__dict__[name] = val
  1355. class ResourceDirData(DataContainer):
  1356. """Holds resource directory information.
  1357. struct: IMAGE_RESOURCE_DIRECTORY structure
  1358. entries: list of entries (ResourceDirEntryData instances)
  1359. """
  1360. class ResourceDirEntryData(DataContainer):
  1361. """Holds resource directory entry data.
  1362. struct: IMAGE_RESOURCE_DIRECTORY_ENTRY structure
  1363. name: If the resource is identified by name this
  1364. attribute will contain the name string. None
  1365. otherwise. If identified by id, the id is
  1366. available at 'struct.Id'
  1367. id: the id, also in struct.Id
  1368. directory: If this entry has a lower level directory
  1369. this attribute will point to the
  1370. ResourceDirData instance representing it.
  1371. data: If this entry has no further lower directories
  1372. and points to the actual resource data, this
  1373. attribute will reference the corresponding
  1374. ResourceDataEntryData instance.
  1375. (Either of the 'directory' or 'data' attribute will exist,
  1376. but not both.)
  1377. """
  1378. class ResourceDataEntryData(DataContainer):
  1379. """Holds resource data entry information.
  1380. struct: IMAGE_RESOURCE_DATA_ENTRY structure
  1381. lang: Primary language ID
  1382. sublang: Sublanguage ID
  1383. """
  1384. class DebugData(DataContainer):
  1385. """Holds debug information.
  1386. struct: IMAGE_DEBUG_DIRECTORY structure
  1387. entries: list of entries (IMAGE_DEBUG_TYPE instances)
  1388. """
  1389. class BaseRelocationData(DataContainer):
  1390. """Holds base relocation information.
  1391. struct: IMAGE_BASE_RELOCATION structure
  1392. entries: list of relocation data (RelocationData instances)
  1393. """
  1394. class RelocationData(DataContainer):
  1395. """Holds relocation information.
  1396. type: Type of relocation
  1397. The type string can be obtained by
  1398. RELOCATION_TYPE[type]
  1399. rva: RVA of the relocation
  1400. """
  1401. def __setattr__(self, name, val):
  1402. # If the instance doesn't yet have a struct attribute
  1403. # it's not fully initialized so can't do any of the
  1404. # following
  1405. #
  1406. if hasattr(self, "struct"):
  1407. # Get the word containing the type and data
  1408. #
  1409. word = self.struct.Data
  1410. if name == "type":
  1411. word = (val << 12) | (word & 0xFFF)
  1412. elif name == "rva":
  1413. offset = max(val - self.base_rva, 0)
  1414. word = (word & 0xF000) | (offset & 0xFFF)
  1415. # Store the modified data
  1416. #
  1417. self.struct.Data = word
  1418. self.__dict__[name] = val
  1419. class TlsData(DataContainer):
  1420. """Holds TLS information.
  1421. struct: IMAGE_TLS_DIRECTORY structure
  1422. """
  1423. class BoundImportDescData(DataContainer):
  1424. """Holds bound import descriptor data.
  1425. This directory entry will provide information on the
  1426. DLLs this PE file has been bound to (if bound at all).
  1427. The structure will contain the name and timestamp of the
  1428. DLL at the time of binding so that the loader can know
  1429. whether it differs from the one currently present in the
  1430. system and must, therefore, re-bind the PE's imports.
  1431. struct: IMAGE_BOUND_IMPORT_DESCRIPTOR structure
  1432. name: DLL name
  1433. entries: list of entries (BoundImportRefData instances)
  1434. the entries will exist if this DLL has forwarded
  1435. symbols. If so, the destination DLL will have an
  1436. entry in this list.
  1437. """
  1438. class LoadConfigData(DataContainer):
  1439. """Holds Load Config data.
  1440. struct: IMAGE_LOAD_CONFIG_DIRECTORY structure
  1441. name: dll name
  1442. """
  1443. class BoundImportRefData(DataContainer):
  1444. """Holds bound import forwarder reference data.
  1445. Contains the same information as the bound descriptor but
  1446. for forwarded DLLs, if any.
  1447. struct: IMAGE_BOUND_FORWARDER_REF structure
  1448. name: dll name
  1449. """
  1450. class ExceptionsDirEntryData(DataContainer):
  1451. """Holds the data related to SEH (and stack unwinding, in particular)
  1452. struct an instance of RUNTIME_FUNTION
  1453. unwindinfo an instance of UNWIND_INFO
  1454. """
  1455. class UnwindInfo(StructureWithBitfields):
  1456. """Handles the complexities of UNWIND_INFO structure:
  1457. * variable number of UWIND_CODEs
  1458. * optional ExceptionHandler and FunctionEntry fields
  1459. """
  1460. def __init__(self, file_offset=0):
  1461. super(UnwindInfo, self).__init__(
  1462. (
  1463. "UNWIND_INFO",
  1464. (
  1465. "B:3,Version",
  1466. "B:5,Flags",
  1467. "B,SizeOfProlog",
  1468. "B,CountOfCodes",
  1469. "B:4,FrameRegister",
  1470. "B:4,FrameOffset",
  1471. ),
  1472. ),
  1473. file_offset=file_offset,
  1474. )
  1475. self._full_size = super(UnwindInfo, self).sizeof()
  1476. self._opt_field_name = None
  1477. self._code_info = StructureWithBitfields(
  1478. ("UNWIND_CODE", ("B,CodeOffset", "B:4,UnwindOp", "B:4,OpInfo")),
  1479. file_offset=0,
  1480. )
  1481. self._chained_entry = None
  1482. self._finished_unpacking = False
  1483. def unpack_in_stages(self, data):
  1484. """Unpacks the UNWIND_INFO "in two calls", with the first call establishing
  1485. a full size of the structure and the second, performing the actual unpacking.
  1486. """
  1487. if self._finished_unpacking:
  1488. return None
  1489. super(UnwindInfo, self).__unpack__(data)
  1490. codes_cnt_max = (self.CountOfCodes + 1) & ~1
  1491. hdlr_offset = (
  1492. super(UnwindInfo, self).sizeof() + codes_cnt_max * self._code_info.sizeof()
  1493. )
  1494. self._full_size = hdlr_offset + (
  1495. 0 if self.Flags == 0 else STRUCT_SIZEOF_TYPES["I"]
  1496. )
  1497. if len(data) < self._full_size:
  1498. return None
  1499. if self.Version != 1 and self.Version != 2:
  1500. return "Unsupported version of UNWIND_INFO at " + hex(self.__file_offset__)
  1501. self.UnwindCodes = []
  1502. ro = super(UnwindInfo, self).sizeof()
  1503. codes_left = self.CountOfCodes
  1504. while codes_left > 0:
  1505. self._code_info.__unpack__(data[ro : ro + self._code_info.sizeof()])
  1506. ucode = PrologEpilogOpsFactory.create(self._code_info)
  1507. if ucode is None:
  1508. return "Unknown UNWIND_CODE at " + hex(self.__file_offset__ + ro)
  1509. len_in_codes = ucode.length_in_code_structures(self._code_info, self)
  1510. opc_size = self._code_info.sizeof() * len_in_codes
  1511. ucode.initialize(
  1512. self._code_info,
  1513. data[ro : ro + opc_size],
  1514. self,
  1515. self.__file_offset__ + ro,
  1516. )
  1517. ro += opc_size
  1518. codes_left -= len_in_codes
  1519. self.UnwindCodes.append(ucode)
  1520. if self.UNW_FLAG_EHANDLER or self.UNW_FLAG_UHANDLER:
  1521. self._opt_field_name = "ExceptionHandler"
  1522. if self.UNW_FLAG_CHAININFO:
  1523. self._opt_field_name = "FunctionEntry"
  1524. if self._opt_field_name != None:
  1525. setattr(
  1526. self,
  1527. self._opt_field_name,
  1528. struct.unpack(
  1529. "<I", data[hdlr_offset : hdlr_offset + STRUCT_SIZEOF_TYPES["I"]]
  1530. )[0],
  1531. )
  1532. self._finished_unpacking = True
  1533. return None
  1534. def dump(self, indentation=0):
  1535. # Because __keys_ext__ are shared among all the instances with the same
  1536. # format string, we have to add and sunsequently remove the optional field
  1537. # each time.
  1538. # It saves space (as compared to keeping a copy self.__keys_ext__ per
  1539. # UnwindInfo instance), but makes our dump() implementation thread-unsafe.
  1540. if self._opt_field_name != None:
  1541. self.__field_offsets__[self._opt_field_name] = (
  1542. self._full_size - STRUCT_SIZEOF_TYPES["I"]
  1543. )
  1544. self.__keys_ext__.append([self._opt_field_name])
  1545. try:
  1546. dump = super(UnwindInfo, self).dump(indentation)
  1547. finally:
  1548. if self._opt_field_name != None:
  1549. self.__keys_ext__.pop()
  1550. dump.append(
  1551. "Flags: "
  1552. + ", ".join([s[0] for s in unwind_info_flags if getattr(self, s[0])])
  1553. )
  1554. dump.append(
  1555. "Unwind codes: "
  1556. + "; ".join([str(c) for c in self.UnwindCodes if c.is_valid()])
  1557. )
  1558. return dump
  1559. def dump_dict(self):
  1560. if self._opt_field_name != None:
  1561. self.__field_offsets__[self._opt_field_name] = (
  1562. self._full_size - STRUCT_SIZEOF_TYPES["I"]
  1563. )
  1564. self.__keys_ext__.append([self._opt_field_name])
  1565. try:
  1566. ret = super(UnwindInfo, self).dump_dict()
  1567. finally:
  1568. if self._opt_field_name != None:
  1569. self.__keys_ext__.pop()
  1570. return ret
  1571. def __setattr__(self, name, val):
  1572. if name == "Flags":
  1573. set_flags(self, val, unwind_info_flags)
  1574. elif "UNW_FLAG_" in name and hasattr(self, name):
  1575. if val:
  1576. self.__dict__["Flags"] |= UNWIND_INFO_FLAGS[name]
  1577. else:
  1578. self.__dict__["Flags"] ^= UNWIND_INFO_FLAGS[name]
  1579. self.__dict__[name] = val
  1580. def sizeof(self):
  1581. return self._full_size
  1582. def __pack__(self):
  1583. data = bytearray(self._full_size)
  1584. data[0 : super(UnwindInfo, self).sizeof()] = super(UnwindInfo, self).__pack__()
  1585. cur_offset = super(UnwindInfo, self).sizeof()
  1586. for uc in self.UnwindCodes:
  1587. if cur_offset + uc.struct.sizeof() > self._full_size:
  1588. break
  1589. data[cur_offset : cur_offset + uc.struct.sizeof()] = uc.struct.__pack__()
  1590. cur_offset += uc.struct.sizeof()
  1591. if self._opt_field_name != None:
  1592. data[
  1593. self._full_size - STRUCT_SIZEOF_TYPES["I"] : self._full_size
  1594. ] = struct.pack("<I", getattr(self, self._opt_field_name))
  1595. return data
  1596. def get_chained_function_entry(self):
  1597. return self._chained_entry
  1598. def set_chained_function_entry(self, entry):
  1599. if self._chained_entry != None:
  1600. raise PEFormatError("Chained function entry cannot be changed")
  1601. self._chained_entry = entry
  1602. class PrologEpilogOp:
  1603. """Meant as an abstract class representing a generic unwind code.
  1604. There is a subclass of PrologEpilogOp for each member of UNWIND_OP_CODES enum.
  1605. """
  1606. def initialize(self, unw_code, data, unw_info, file_offset):
  1607. self.struct = StructureWithBitfields(
  1608. self._get_format(unw_code), file_offset=file_offset
  1609. )
  1610. self.struct.__unpack__(data)
  1611. def length_in_code_structures(self, unw_code, unw_info):
  1612. """Computes how many UNWIND_CODE structures UNWIND_CODE occupies.
  1613. May be called before initialize() and, for that reason, should not rely on
  1614. the values of intance attributes.
  1615. """
  1616. return 1
  1617. def is_valid(self):
  1618. return True
  1619. def _get_format(self, unw_code):
  1620. return ("UNWIND_CODE", ("B,CodeOffset", "B:4,UnwindOp", "B:4,OpInfo"))
  1621. class PrologEpilogOpPushReg(PrologEpilogOp):
  1622. """UWOP_PUSH_NONVOL"""
  1623. def _get_format(self, unw_code):
  1624. return ("UNWIND_CODE_PUSH_NONVOL", ("B,CodeOffset", "B:4,UnwindOp", "B:4,Reg"))
  1625. def __str__(self):
  1626. return ".PUSHREG " + REGISTERS[self.struct.Reg]
  1627. class PrologEpilogOpAllocLarge(PrologEpilogOp):
  1628. """UWOP_ALLOC_LARGE"""
  1629. def _get_format(self, unw_code):
  1630. return (
  1631. "UNWIND_CODE_ALLOC_LARGE",
  1632. (
  1633. "B,CodeOffset",
  1634. "B:4,UnwindOp",
  1635. "B:4,OpInfo",
  1636. "H,AllocSizeInQwords" if unw_code.OpInfo == 0 else "I,AllocSize",
  1637. ),
  1638. )
  1639. def length_in_code_structures(self, unw_code, unw_info):
  1640. return 2 if unw_code.OpInfo == 0 else 3
  1641. def get_alloc_size(self):
  1642. return (
  1643. self.struct.AllocSizeInQwords * 8
  1644. if self.struct.OpInfo == 0
  1645. else self.struct.AllocSize
  1646. )
  1647. def __str__(self):
  1648. return ".ALLOCSTACK " + hex(self.get_alloc_size())
  1649. class PrologEpilogOpAllocSmall(PrologEpilogOp):
  1650. """UWOP_ALLOC_SMALL"""
  1651. def _get_format(self, unw_code):
  1652. return (
  1653. "UNWIND_CODE_ALLOC_SMALL",
  1654. ("B,CodeOffset", "B:4,UnwindOp", "B:4,AllocSizeInQwordsMinus8"),
  1655. )
  1656. def get_alloc_size(self):
  1657. return self.struct.AllocSizeInQwordsMinus8 * 8 + 8
  1658. def __str__(self):
  1659. return ".ALLOCSTACK " + hex(self.get_alloc_size())
  1660. class PrologEpilogOpSetFP(PrologEpilogOp):
  1661. """UWOP_SET_FPREG"""
  1662. def initialize(self, unw_code, data, unw_info, file_offset):
  1663. super(PrologEpilogOpSetFP, self).initialize(
  1664. unw_code, data, unw_info, file_offset
  1665. )
  1666. self._frame_register = unw_info.FrameRegister
  1667. self._frame_offset = unw_info.FrameOffset * 16
  1668. def __str__(self):
  1669. return (
  1670. ".SETFRAME "
  1671. + REGISTERS[self._frame_register]
  1672. + ", "
  1673. + hex(self._frame_offset)
  1674. )
  1675. class PrologEpilogOpSaveReg(PrologEpilogOp):
  1676. """UWOP_SAVE_NONVOL"""
  1677. def length_in_code_structures(self, unwcode, unw_info):
  1678. return 2
  1679. def get_offset(self):
  1680. return self.struct.OffsetInQwords * 8
  1681. def _get_format(self, unw_code):
  1682. return (
  1683. "UNWIND_CODE_SAVE_NONVOL",
  1684. ("B,CodeOffset", "B:4,UnwindOp", "B:4,Reg", "H,OffsetInQwords"),
  1685. )
  1686. def __str__(self):
  1687. return ".SAVEREG " + REGISTERS[self.struct.Reg] + ", " + hex(self.get_offset())
  1688. class PrologEpilogOpSaveRegFar(PrologEpilogOp):
  1689. """UWOP_SAVE_NONVOL_FAR"""
  1690. def length_in_code_structures(self, unw_code, unw_info):
  1691. return 3
  1692. def get_offset(self):
  1693. return self.struct.Offset
  1694. def _get_format(self, unw_code):
  1695. return (
  1696. "UNWIND_CODE_SAVE_NONVOL_FAR",
  1697. ("B,CodeOffset", "B:4,UnwindOp", "B:4,Reg", "I,Offset"),
  1698. )
  1699. def __str__(self):
  1700. return ".SAVEREG " + REGISTERS[self.struct.Reg] + ", " + hex(self.struct.Offset)
  1701. class PrologEpilogOpSaveXMM(PrologEpilogOp):
  1702. """UWOP_SAVE_XMM128"""
  1703. def _get_format(self, unw_code):
  1704. return (
  1705. "UNWIND_CODE_SAVE_XMM128",
  1706. ("B,CodeOffset", "B:4,UnwindOp", "B:4,Reg", "H,OffsetIn2Qwords"),
  1707. )
  1708. def length_in_code_structures(self, unw_code, unw_info):
  1709. return 2
  1710. def get_offset(self):
  1711. return self.struct.OffsetIn2Qwords * 16
  1712. def __str__(self):
  1713. return ".SAVEXMM128 XMM" + str(self.struct.Reg) + ", " + hex(self.get_offset())
  1714. class PrologEpilogOpSaveXMMFar(PrologEpilogOp):
  1715. """UWOP_SAVE_XMM128_FAR"""
  1716. def _get_format(self, unw_code):
  1717. return (
  1718. "UNWIND_CODE_SAVE_XMM128_FAR",
  1719. ("B,CodeOffset", "B:4,UnwindOp", "B:4,Reg", "I,Offset"),
  1720. )
  1721. def length_in_code_structures(self, unw_code, unw_info):
  1722. return 3
  1723. def get_offset(self):
  1724. return self.struct.Offset
  1725. def __str__(self):
  1726. return ".SAVEXMM128 XMM" + str(self.struct.Reg) + ", " + hex(self.struct.Offset)
  1727. class PrologEpilogOpPushFrame(PrologEpilogOp):
  1728. """UWOP_PUSH_MACHFRAME"""
  1729. def __str__(self):
  1730. return ".PUSHFRAME" + (" <code>" if self.struct.OpInfo else "")
  1731. class PrologEpilogOpEpilogMarker(PrologEpilogOp):
  1732. """UWOP_EPILOG"""
  1733. def initialize(self, unw_code, data, unw_info, file_offset):
  1734. self._long_offst = True
  1735. self._first = not hasattr(unw_info, "SizeOfEpilog")
  1736. super(PrologEpilogOpEpilogMarker, self).initialize(
  1737. unw_code, data, unw_info, file_offset
  1738. )
  1739. if self._first:
  1740. setattr(unw_info, "SizeOfEpilog", self.struct.Size)
  1741. self._long_offst = unw_code.OpInfo & 1 == 0
  1742. self._epilog_size = unw_info.SizeOfEpilog
  1743. def _get_format(self, unw_code):
  1744. # check if it is the first epilog code among encountered; then its record
  1745. # will contain size of the epilog
  1746. if self._first:
  1747. return (
  1748. "UNWIND_CODE_EPILOG",
  1749. ("B,OffsetLow,Size", "B:4,UnwindOp", "B:4,Flags")
  1750. if unw_code.OpInfo & 1 == 1
  1751. else (
  1752. "B,Size",
  1753. "B:4,UnwindOp",
  1754. "B:4,Flags",
  1755. "B,OffsetLow",
  1756. "B:4,Unused",
  1757. "B:4,OffsetHigh",
  1758. ),
  1759. )
  1760. else:
  1761. return (
  1762. "UNWIND_CODE_EPILOG",
  1763. ("B,OffsetLow", "B:4,UnwindOp", "B:4,OffsetHigh"),
  1764. )
  1765. def length_in_code_structures(self, unw_code, unw_info):
  1766. return (
  1767. 2
  1768. if not hasattr(unw_info, "SizeOfEpilog") and (unw_code.OpInfo & 1) == 0
  1769. else 1
  1770. )
  1771. def get_offset(self):
  1772. return self.struct.OffsetLow | (
  1773. self.struct.OffsetHigh << 8 if self._long_offst else 0
  1774. )
  1775. def is_valid(self):
  1776. return self.get_offset() > 0
  1777. def __str__(self):
  1778. # the EPILOG sequence may have a terminating all-zeros entry
  1779. return (
  1780. "EPILOG: size="
  1781. + hex(self._epilog_size)
  1782. + ", offset from the end=-"
  1783. + hex(self.get_offset())
  1784. if self.get_offset() > 0
  1785. else ""
  1786. )
  1787. class PrologEpilogOpsFactory:
  1788. """A factory for creating unwind codes based on the value of UnwindOp"""
  1789. _class_dict = {
  1790. UWOP_PUSH_NONVOL: PrologEpilogOpPushReg,
  1791. UWOP_ALLOC_LARGE: PrologEpilogOpAllocLarge,
  1792. UWOP_ALLOC_SMALL: PrologEpilogOpAllocSmall,
  1793. UWOP_SET_FPREG: PrologEpilogOpSetFP,
  1794. UWOP_SAVE_NONVOL: PrologEpilogOpSaveReg,
  1795. UWOP_SAVE_NONVOL_FAR: PrologEpilogOpSaveRegFar,
  1796. UWOP_SAVE_XMM128: PrologEpilogOpSaveXMM,
  1797. UWOP_SAVE_XMM128_FAR: PrologEpilogOpSaveXMMFar,
  1798. UWOP_PUSH_MACHFRAME: PrologEpilogOpPushFrame,
  1799. UWOP_EPILOG: PrologEpilogOpEpilogMarker,
  1800. }
  1801. @staticmethod
  1802. def create(unwcode):
  1803. code = unwcode.UnwindOp
  1804. return (
  1805. PrologEpilogOpsFactory._class_dict[code]()
  1806. if code in PrologEpilogOpsFactory._class_dict
  1807. else None
  1808. )
  1809. # Valid FAT32 8.3 short filename characters according to:
  1810. # http://en.wikipedia.org/wiki/8.3_filename
  1811. # This will help decide whether DLL ASCII names are likely
  1812. # to be valid or otherwise corrupt data
  1813. #
  1814. # The filename length is not checked because the DLLs filename
  1815. # can be longer that the 8.3
  1816. allowed_filename = b(
  1817. string.ascii_lowercase
  1818. + string.ascii_uppercase
  1819. + string.digits
  1820. + "!#$%&'()-@^_`{}~+,.;=[]"
  1821. )
  1822. def is_valid_dos_filename(s):
  1823. if s is None or not isinstance(s, (str, bytes, bytearray)):
  1824. return False
  1825. # Allow path separators as import names can contain directories.
  1826. allowed = allowed_filename + b"\\/"
  1827. return all(c in allowed for c in set(s))
  1828. # Check if an imported name uses the valid accepted characters expected in
  1829. # mangled function names. If the symbol's characters don't fall within this
  1830. # charset we will assume the name is invalid.
  1831. allowed_function_name = b(
  1832. string.ascii_lowercase + string.ascii_uppercase + string.digits + "_?@$()<>"
  1833. )
  1834. @lru_cache(maxsize=2048)
  1835. def is_valid_function_name(s):
  1836. return (
  1837. s is not None
  1838. and isinstance(s, (str, bytes, bytearray))
  1839. and all(c in allowed_function_name for c in set(s))
  1840. )
  1841. class PE:
  1842. """A Portable Executable representation.
  1843. This class provides access to most of the information in a PE file.
  1844. It expects to be supplied the name of the file to load or PE data
  1845. to process and an optional argument 'fast_load' (False by default)
  1846. which controls whether to load all the directories information,
  1847. which can be quite time consuming.
  1848. pe = pefile.PE('module.dll')
  1849. pe = pefile.PE(name='module.dll')
  1850. would load 'module.dll' and process it. If the data is already
  1851. available in a buffer the same can be achieved with:
  1852. pe = pefile.PE(data=module_dll_data)
  1853. The "fast_load" can be set to a default by setting its value in the
  1854. module itself by means, for instance, of a "pefile.fast_load = True".
  1855. That will make all the subsequent instances not to load the
  1856. whole PE structure. The "full_load" method can be used to parse
  1857. the missing data at a later stage.
  1858. Basic headers information will be available in the attributes:
  1859. DOS_HEADER
  1860. NT_HEADERS
  1861. FILE_HEADER
  1862. OPTIONAL_HEADER
  1863. All of them will contain among their attributes the members of the
  1864. corresponding structures as defined in WINNT.H
  1865. The raw data corresponding to the header (from the beginning of the
  1866. file up to the start of the first section) will be available in the
  1867. instance's attribute 'header' as a string.
  1868. The sections will be available as a list in the 'sections' attribute.
  1869. Each entry will contain as attributes all the structure's members.
  1870. Directory entries will be available as attributes (if they exist):
  1871. (no other entries are processed at this point)
  1872. DIRECTORY_ENTRY_IMPORT (list of ImportDescData instances)
  1873. DIRECTORY_ENTRY_EXPORT (ExportDirData instance)
  1874. DIRECTORY_ENTRY_RESOURCE (ResourceDirData instance)
  1875. DIRECTORY_ENTRY_DEBUG (list of DebugData instances)
  1876. DIRECTORY_ENTRY_BASERELOC (list of BaseRelocationData instances)
  1877. DIRECTORY_ENTRY_TLS
  1878. DIRECTORY_ENTRY_BOUND_IMPORT (list of BoundImportData instances)
  1879. The following dictionary attributes provide ways of mapping different
  1880. constants. They will accept the numeric value and return the string
  1881. representation and the opposite, feed in the string and get the
  1882. numeric constant:
  1883. DIRECTORY_ENTRY
  1884. IMAGE_CHARACTERISTICS
  1885. SECTION_CHARACTERISTICS
  1886. DEBUG_TYPE
  1887. SUBSYSTEM_TYPE
  1888. MACHINE_TYPE
  1889. RELOCATION_TYPE
  1890. RESOURCE_TYPE
  1891. LANG
  1892. SUBLANG
  1893. """
  1894. #
  1895. # Format specifications for PE structures.
  1896. #
  1897. __IMAGE_DOS_HEADER_format__ = (
  1898. "IMAGE_DOS_HEADER",
  1899. (
  1900. "H,e_magic",
  1901. "H,e_cblp",
  1902. "H,e_cp",
  1903. "H,e_crlc",
  1904. "H,e_cparhdr",
  1905. "H,e_minalloc",
  1906. "H,e_maxalloc",
  1907. "H,e_ss",
  1908. "H,e_sp",
  1909. "H,e_csum",
  1910. "H,e_ip",
  1911. "H,e_cs",
  1912. "H,e_lfarlc",
  1913. "H,e_ovno",
  1914. "8s,e_res",
  1915. "H,e_oemid",
  1916. "H,e_oeminfo",
  1917. "20s,e_res2",
  1918. "I,e_lfanew",
  1919. ),
  1920. )
  1921. __IMAGE_FILE_HEADER_format__ = (
  1922. "IMAGE_FILE_HEADER",
  1923. (
  1924. "H,Machine",
  1925. "H,NumberOfSections",
  1926. "I,TimeDateStamp",
  1927. "I,PointerToSymbolTable",
  1928. "I,NumberOfSymbols",
  1929. "H,SizeOfOptionalHeader",
  1930. "H,Characteristics",
  1931. ),
  1932. )
  1933. __IMAGE_DATA_DIRECTORY_format__ = (
  1934. "IMAGE_DATA_DIRECTORY",
  1935. ("I,VirtualAddress", "I,Size"),
  1936. )
  1937. __IMAGE_OPTIONAL_HEADER_format__ = (
  1938. "IMAGE_OPTIONAL_HEADER",
  1939. (
  1940. "H,Magic",
  1941. "B,MajorLinkerVersion",
  1942. "B,MinorLinkerVersion",
  1943. "I,SizeOfCode",
  1944. "I,SizeOfInitializedData",
  1945. "I,SizeOfUninitializedData",
  1946. "I,AddressOfEntryPoint",
  1947. "I,BaseOfCode",
  1948. "I,BaseOfData",
  1949. "I,ImageBase",
  1950. "I,SectionAlignment",
  1951. "I,FileAlignment",
  1952. "H,MajorOperatingSystemVersion",
  1953. "H,MinorOperatingSystemVersion",
  1954. "H,MajorImageVersion",
  1955. "H,MinorImageVersion",
  1956. "H,MajorSubsystemVersion",
  1957. "H,MinorSubsystemVersion",
  1958. "I,Reserved1",
  1959. "I,SizeOfImage",
  1960. "I,SizeOfHeaders",
  1961. "I,CheckSum",
  1962. "H,Subsystem",
  1963. "H,DllCharacteristics",
  1964. "I,SizeOfStackReserve",
  1965. "I,SizeOfStackCommit",
  1966. "I,SizeOfHeapReserve",
  1967. "I,SizeOfHeapCommit",
  1968. "I,LoaderFlags",
  1969. "I,NumberOfRvaAndSizes",
  1970. ),
  1971. )
  1972. __IMAGE_OPTIONAL_HEADER64_format__ = (
  1973. "IMAGE_OPTIONAL_HEADER64",
  1974. (
  1975. "H,Magic",
  1976. "B,MajorLinkerVersion",
  1977. "B,MinorLinkerVersion",
  1978. "I,SizeOfCode",
  1979. "I,SizeOfInitializedData",
  1980. "I,SizeOfUninitializedData",
  1981. "I,AddressOfEntryPoint",
  1982. "I,BaseOfCode",
  1983. "Q,ImageBase",
  1984. "I,SectionAlignment",
  1985. "I,FileAlignment",
  1986. "H,MajorOperatingSystemVersion",
  1987. "H,MinorOperatingSystemVersion",
  1988. "H,MajorImageVersion",
  1989. "H,MinorImageVersion",
  1990. "H,MajorSubsystemVersion",
  1991. "H,MinorSubsystemVersion",
  1992. "I,Reserved1",
  1993. "I,SizeOfImage",
  1994. "I,SizeOfHeaders",
  1995. "I,CheckSum",
  1996. "H,Subsystem",
  1997. "H,DllCharacteristics",
  1998. "Q,SizeOfStackReserve",
  1999. "Q,SizeOfStackCommit",
  2000. "Q,SizeOfHeapReserve",
  2001. "Q,SizeOfHeapCommit",
  2002. "I,LoaderFlags",
  2003. "I,NumberOfRvaAndSizes",
  2004. ),
  2005. )
  2006. __IMAGE_NT_HEADERS_format__ = ("IMAGE_NT_HEADERS", ("I,Signature",))
  2007. __IMAGE_SECTION_HEADER_format__ = (
  2008. "IMAGE_SECTION_HEADER",
  2009. (
  2010. "8s,Name",
  2011. "I,Misc,Misc_PhysicalAddress,Misc_VirtualSize",
  2012. "I,VirtualAddress",
  2013. "I,SizeOfRawData",
  2014. "I,PointerToRawData",
  2015. "I,PointerToRelocations",
  2016. "I,PointerToLinenumbers",
  2017. "H,NumberOfRelocations",
  2018. "H,NumberOfLinenumbers",
  2019. "I,Characteristics",
  2020. ),
  2021. )
  2022. __IMAGE_DELAY_IMPORT_DESCRIPTOR_format__ = (
  2023. "IMAGE_DELAY_IMPORT_DESCRIPTOR",
  2024. (
  2025. "I,grAttrs",
  2026. "I,szName",
  2027. "I,phmod",
  2028. "I,pIAT",
  2029. "I,pINT",
  2030. "I,pBoundIAT",
  2031. "I,pUnloadIAT",
  2032. "I,dwTimeStamp",
  2033. ),
  2034. )
  2035. __IMAGE_IMPORT_DESCRIPTOR_format__ = (
  2036. "IMAGE_IMPORT_DESCRIPTOR",
  2037. (
  2038. "I,OriginalFirstThunk,Characteristics",
  2039. "I,TimeDateStamp",
  2040. "I,ForwarderChain",
  2041. "I,Name",
  2042. "I,FirstThunk",
  2043. ),
  2044. )
  2045. __IMAGE_EXPORT_DIRECTORY_format__ = (
  2046. "IMAGE_EXPORT_DIRECTORY",
  2047. (
  2048. "I,Characteristics",
  2049. "I,TimeDateStamp",
  2050. "H,MajorVersion",
  2051. "H,MinorVersion",
  2052. "I,Name",
  2053. "I,Base",
  2054. "I,NumberOfFunctions",
  2055. "I,NumberOfNames",
  2056. "I,AddressOfFunctions",
  2057. "I,AddressOfNames",
  2058. "I,AddressOfNameOrdinals",
  2059. ),
  2060. )
  2061. __IMAGE_RESOURCE_DIRECTORY_format__ = (
  2062. "IMAGE_RESOURCE_DIRECTORY",
  2063. (
  2064. "I,Characteristics",
  2065. "I,TimeDateStamp",
  2066. "H,MajorVersion",
  2067. "H,MinorVersion",
  2068. "H,NumberOfNamedEntries",
  2069. "H,NumberOfIdEntries",
  2070. ),
  2071. )
  2072. __IMAGE_RESOURCE_DIRECTORY_ENTRY_format__ = (
  2073. "IMAGE_RESOURCE_DIRECTORY_ENTRY",
  2074. ("I,Name", "I,OffsetToData"),
  2075. )
  2076. __IMAGE_RESOURCE_DATA_ENTRY_format__ = (
  2077. "IMAGE_RESOURCE_DATA_ENTRY",
  2078. ("I,OffsetToData", "I,Size", "I,CodePage", "I,Reserved"),
  2079. )
  2080. __VS_VERSIONINFO_format__ = (
  2081. "VS_VERSIONINFO",
  2082. ("H,Length", "H,ValueLength", "H,Type"),
  2083. )
  2084. __VS_FIXEDFILEINFO_format__ = (
  2085. "VS_FIXEDFILEINFO",
  2086. (
  2087. "I,Signature",
  2088. "I,StrucVersion",
  2089. "I,FileVersionMS",
  2090. "I,FileVersionLS",
  2091. "I,ProductVersionMS",
  2092. "I,ProductVersionLS",
  2093. "I,FileFlagsMask",
  2094. "I,FileFlags",
  2095. "I,FileOS",
  2096. "I,FileType",
  2097. "I,FileSubtype",
  2098. "I,FileDateMS",
  2099. "I,FileDateLS",
  2100. ),
  2101. )
  2102. __StringFileInfo_format__ = (
  2103. "StringFileInfo",
  2104. ("H,Length", "H,ValueLength", "H,Type"),
  2105. )
  2106. __StringTable_format__ = ("StringTable", ("H,Length", "H,ValueLength", "H,Type"))
  2107. __String_format__ = ("String", ("H,Length", "H,ValueLength", "H,Type"))
  2108. __Var_format__ = ("Var", ("H,Length", "H,ValueLength", "H,Type"))
  2109. __IMAGE_THUNK_DATA_format__ = (
  2110. "IMAGE_THUNK_DATA",
  2111. ("I,ForwarderString,Function,Ordinal,AddressOfData",),
  2112. )
  2113. __IMAGE_THUNK_DATA64_format__ = (
  2114. "IMAGE_THUNK_DATA",
  2115. ("Q,ForwarderString,Function,Ordinal,AddressOfData",),
  2116. )
  2117. __IMAGE_DEBUG_DIRECTORY_format__ = (
  2118. "IMAGE_DEBUG_DIRECTORY",
  2119. (
  2120. "I,Characteristics",
  2121. "I,TimeDateStamp",
  2122. "H,MajorVersion",
  2123. "H,MinorVersion",
  2124. "I,Type",
  2125. "I,SizeOfData",
  2126. "I,AddressOfRawData",
  2127. "I,PointerToRawData",
  2128. ),
  2129. )
  2130. __IMAGE_BASE_RELOCATION_format__ = (
  2131. "IMAGE_BASE_RELOCATION",
  2132. ("I,VirtualAddress", "I,SizeOfBlock"),
  2133. )
  2134. __IMAGE_BASE_RELOCATION_ENTRY_format__ = (
  2135. "IMAGE_BASE_RELOCATION_ENTRY",
  2136. ("H,Data",),
  2137. )
  2138. __IMAGE_TLS_DIRECTORY_format__ = (
  2139. "IMAGE_TLS_DIRECTORY",
  2140. (
  2141. "I,StartAddressOfRawData",
  2142. "I,EndAddressOfRawData",
  2143. "I,AddressOfIndex",
  2144. "I,AddressOfCallBacks",
  2145. "I,SizeOfZeroFill",
  2146. "I,Characteristics",
  2147. ),
  2148. )
  2149. __IMAGE_TLS_DIRECTORY64_format__ = (
  2150. "IMAGE_TLS_DIRECTORY",
  2151. (
  2152. "Q,StartAddressOfRawData",
  2153. "Q,EndAddressOfRawData",
  2154. "Q,AddressOfIndex",
  2155. "Q,AddressOfCallBacks",
  2156. "I,SizeOfZeroFill",
  2157. "I,Characteristics",
  2158. ),
  2159. )
  2160. __IMAGE_LOAD_CONFIG_DIRECTORY_format__ = (
  2161. "IMAGE_LOAD_CONFIG_DIRECTORY",
  2162. (
  2163. "I,Size",
  2164. "I,TimeDateStamp",
  2165. "H,MajorVersion",
  2166. "H,MinorVersion",
  2167. "I,GlobalFlagsClear",
  2168. "I,GlobalFlagsSet",
  2169. "I,CriticalSectionDefaultTimeout",
  2170. "I,DeCommitFreeBlockThreshold",
  2171. "I,DeCommitTotalFreeThreshold",
  2172. "I,LockPrefixTable",
  2173. "I,MaximumAllocationSize",
  2174. "I,VirtualMemoryThreshold",
  2175. "I,ProcessHeapFlags",
  2176. "I,ProcessAffinityMask",
  2177. "H,CSDVersion",
  2178. "H,Reserved1",
  2179. "I,EditList",
  2180. "I,SecurityCookie",
  2181. "I,SEHandlerTable",
  2182. "I,SEHandlerCount",
  2183. "I,GuardCFCheckFunctionPointer",
  2184. "I,Reserved2",
  2185. "I,GuardCFFunctionTable",
  2186. "I,GuardCFFunctionCount",
  2187. "I,GuardFlags",
  2188. ),
  2189. )
  2190. __IMAGE_LOAD_CONFIG_DIRECTORY64_format__ = (
  2191. "IMAGE_LOAD_CONFIG_DIRECTORY",
  2192. (
  2193. "I,Size",
  2194. "I,TimeDateStamp",
  2195. "H,MajorVersion",
  2196. "H,MinorVersion",
  2197. "I,GlobalFlagsClear",
  2198. "I,GlobalFlagsSet",
  2199. "I,CriticalSectionDefaultTimeout",
  2200. "Q,DeCommitFreeBlockThreshold",
  2201. "Q,DeCommitTotalFreeThreshold",
  2202. "Q,LockPrefixTable",
  2203. "Q,MaximumAllocationSize",
  2204. "Q,VirtualMemoryThreshold",
  2205. "Q,ProcessAffinityMask",
  2206. "I,ProcessHeapFlags",
  2207. "H,CSDVersion",
  2208. "H,Reserved1",
  2209. "Q,EditList",
  2210. "Q,SecurityCookie",
  2211. "Q,SEHandlerTable",
  2212. "Q,SEHandlerCount",
  2213. "Q,GuardCFCheckFunctionPointer",
  2214. "Q,Reserved2",
  2215. "Q,GuardCFFunctionTable",
  2216. "Q,GuardCFFunctionCount",
  2217. "I,GuardFlags",
  2218. ),
  2219. )
  2220. __IMAGE_BOUND_IMPORT_DESCRIPTOR_format__ = (
  2221. "IMAGE_BOUND_IMPORT_DESCRIPTOR",
  2222. ("I,TimeDateStamp", "H,OffsetModuleName", "H,NumberOfModuleForwarderRefs"),
  2223. )
  2224. __IMAGE_BOUND_FORWARDER_REF_format__ = (
  2225. "IMAGE_BOUND_FORWARDER_REF",
  2226. ("I,TimeDateStamp", "H,OffsetModuleName", "H,Reserved"),
  2227. )
  2228. __RUNTIME_FUNCTION_format__ = (
  2229. "RUNTIME_FUNCTION",
  2230. ("I,BeginAddress", "I,EndAddress", "I,UnwindData"),
  2231. )
  2232. def __init__(
  2233. self,
  2234. name=None,
  2235. data=None,
  2236. fast_load=None,
  2237. max_symbol_exports=MAX_SYMBOL_EXPORT_COUNT,
  2238. max_repeated_symbol=120,
  2239. ):
  2240. self.max_symbol_exports = max_symbol_exports
  2241. self.max_repeated_symbol = max_repeated_symbol
  2242. self.sections = []
  2243. self.__warnings = []
  2244. self.PE_TYPE = None
  2245. if name is None and data is None:
  2246. raise ValueError("Must supply either name or data")
  2247. # This list will keep track of all the structures created.
  2248. # That will allow for an easy iteration through the list
  2249. # in order to save the modifications made
  2250. self.__structures__ = []
  2251. self.__from_file = None
  2252. # We only want to print these warnings once
  2253. self.FileAlignment_Warning = False
  2254. self.SectionAlignment_Warning = False
  2255. # Count of total resource entries across nested tables
  2256. self.__total_resource_entries_count = 0
  2257. # Sum of the size of all resource entries parsed, which should not
  2258. # exceed the file size.
  2259. self.__total_resource_bytes = 0
  2260. # The number of imports parsed in this file
  2261. self.__total_import_symbols = 0
  2262. fast_load = fast_load or globals()["fast_load"]
  2263. try:
  2264. self.__parse__(name, data, fast_load)
  2265. except:
  2266. self.close()
  2267. raise
  2268. def close(self):
  2269. if (
  2270. self.__from_file is True
  2271. and hasattr(self, "__data__")
  2272. and (
  2273. (isinstance(mmap.mmap, type) and isinstance(self.__data__, mmap.mmap))
  2274. or "mmap.mmap" in repr(type(self.__data__))
  2275. )
  2276. ):
  2277. self.__data__.close()
  2278. del self.__data__
  2279. def __unpack_data__(self, format, data, file_offset):
  2280. """Apply structure format to raw data.
  2281. Returns an unpacked structure object if successful, None otherwise.
  2282. """
  2283. structure = Structure(format, file_offset=file_offset)
  2284. try:
  2285. structure.__unpack__(data)
  2286. except PEFormatError as err:
  2287. self.__warnings.append(
  2288. 'Corrupt header "{0}" at file offset {1}. Exception: {2}'.format(
  2289. format[0], file_offset, err
  2290. )
  2291. )
  2292. return None
  2293. self.__structures__.append(structure)
  2294. return structure
  2295. def __parse__(self, fname, data, fast_load):
  2296. """Parse a Portable Executable file.
  2297. Loads a PE file, parsing all its structures and making them available
  2298. through the instance's attributes.
  2299. """
  2300. if fname is not None:
  2301. stat = os.stat(fname)
  2302. if stat.st_size == 0:
  2303. raise PEFormatError("The file is empty")
  2304. fd = None
  2305. try:
  2306. fd = open(fname, "rb")
  2307. self.fileno = fd.fileno()
  2308. if hasattr(mmap, "MAP_PRIVATE"):
  2309. # Unix
  2310. self.__data__ = mmap.mmap(self.fileno, 0, mmap.MAP_PRIVATE)
  2311. else:
  2312. # Windows
  2313. self.__data__ = mmap.mmap(self.fileno, 0, access=mmap.ACCESS_READ)
  2314. self.__from_file = True
  2315. except IOError as excp:
  2316. exception_msg = "{0}".format(excp)
  2317. exception_msg = exception_msg and (": %s" % exception_msg)
  2318. raise Exception(
  2319. "Unable to access file '{0}'{1}".format(fname, exception_msg)
  2320. )
  2321. finally:
  2322. if fd is not None:
  2323. fd.close()
  2324. elif data is not None:
  2325. self.__data__ = data
  2326. self.__from_file = False
  2327. # Resources should not overlap each other, so they should not exceed the
  2328. # file size.
  2329. self.__resource_size_limit_upperbounds = len(self.__data__)
  2330. self.__resource_size_limit_reached = False
  2331. if not fast_load:
  2332. for byte, byte_count in Counter(bytearray(self.__data__)).items():
  2333. # Only report the cases where a byte makes up for more than 50% (if
  2334. # zero) or 15% (if non-zero) of the file's contents. There are
  2335. # legitimate PEs where 0x00 bytes are close to 50% of the whole
  2336. # file's contents.
  2337. if (byte == 0 and 1.0 * byte_count / len(self.__data__) > 0.5) or (
  2338. byte != 0 and 1.0 * byte_count / len(self.__data__) > 0.15
  2339. ):
  2340. self.__warnings.append(
  2341. (
  2342. "Byte 0x{0:02x} makes up {1:.4f}% of the file's contents."
  2343. " This may indicate truncation / malformation."
  2344. ).format(byte, 100.0 * byte_count / len(self.__data__))
  2345. )
  2346. dos_header_data = self.__data__[:64]
  2347. if len(dos_header_data) != 64:
  2348. raise PEFormatError(
  2349. "Unable to read the DOS Header, possibly a truncated file."
  2350. )
  2351. self.DOS_HEADER = self.__unpack_data__(
  2352. self.__IMAGE_DOS_HEADER_format__, dos_header_data, file_offset=0
  2353. )
  2354. if self.DOS_HEADER.e_magic == IMAGE_DOSZM_SIGNATURE:
  2355. raise PEFormatError("Probably a ZM Executable (not a PE file).")
  2356. if not self.DOS_HEADER or self.DOS_HEADER.e_magic != IMAGE_DOS_SIGNATURE:
  2357. raise PEFormatError("DOS Header magic not found.")
  2358. # OC Patch:
  2359. # Check for sane value in e_lfanew
  2360. #
  2361. if self.DOS_HEADER.e_lfanew > len(self.__data__):
  2362. raise PEFormatError("Invalid e_lfanew value, probably not a PE file")
  2363. nt_headers_offset = self.DOS_HEADER.e_lfanew
  2364. self.NT_HEADERS = self.__unpack_data__(
  2365. self.__IMAGE_NT_HEADERS_format__,
  2366. self.__data__[nt_headers_offset : nt_headers_offset + 8],
  2367. file_offset=nt_headers_offset,
  2368. )
  2369. # We better check the signature right here, before the file screws
  2370. # around with sections:
  2371. # OC Patch:
  2372. # Some malware will cause the Signature value to not exist at all
  2373. if not self.NT_HEADERS or not self.NT_HEADERS.Signature:
  2374. raise PEFormatError("NT Headers not found.")
  2375. if (0xFFFF & self.NT_HEADERS.Signature) == IMAGE_NE_SIGNATURE:
  2376. raise PEFormatError("Invalid NT Headers signature. Probably a NE file")
  2377. if (0xFFFF & self.NT_HEADERS.Signature) == IMAGE_LE_SIGNATURE:
  2378. raise PEFormatError("Invalid NT Headers signature. Probably a LE file")
  2379. if (0xFFFF & self.NT_HEADERS.Signature) == IMAGE_LX_SIGNATURE:
  2380. raise PEFormatError("Invalid NT Headers signature. Probably a LX file")
  2381. if (0xFFFF & self.NT_HEADERS.Signature) == IMAGE_TE_SIGNATURE:
  2382. raise PEFormatError("Invalid NT Headers signature. Probably a TE file")
  2383. if self.NT_HEADERS.Signature != IMAGE_NT_SIGNATURE:
  2384. raise PEFormatError("Invalid NT Headers signature.")
  2385. self.FILE_HEADER = self.__unpack_data__(
  2386. self.__IMAGE_FILE_HEADER_format__,
  2387. self.__data__[nt_headers_offset + 4 : nt_headers_offset + 4 + 32],
  2388. file_offset=nt_headers_offset + 4,
  2389. )
  2390. image_flags = retrieve_flags(IMAGE_CHARACTERISTICS, "IMAGE_FILE_")
  2391. if not self.FILE_HEADER:
  2392. raise PEFormatError("File Header missing")
  2393. # Set the image's flags according the the Characteristics member
  2394. set_flags(self.FILE_HEADER, self.FILE_HEADER.Characteristics, image_flags)
  2395. optional_header_offset = nt_headers_offset + 4 + self.FILE_HEADER.sizeof()
  2396. # Note: location of sections can be controlled from PE header:
  2397. sections_offset = optional_header_offset + self.FILE_HEADER.SizeOfOptionalHeader
  2398. self.OPTIONAL_HEADER = self.__unpack_data__(
  2399. self.__IMAGE_OPTIONAL_HEADER_format__,
  2400. # Read up to 256 bytes to allow creating a copy of too much data
  2401. self.__data__[optional_header_offset : optional_header_offset + 256],
  2402. file_offset=optional_header_offset,
  2403. )
  2404. # According to solardesigner's findings for his
  2405. # Tiny PE project, the optional header does not
  2406. # need fields beyond "Subsystem" in order to be
  2407. # loadable by the Windows loader (given that zeros
  2408. # are acceptable values and the header is loaded
  2409. # in a zeroed memory page)
  2410. # If trying to parse a full Optional Header fails
  2411. # we try to parse it again with some 0 padding
  2412. #
  2413. MINIMUM_VALID_OPTIONAL_HEADER_RAW_SIZE = 69
  2414. if (
  2415. self.OPTIONAL_HEADER is None
  2416. and len(
  2417. self.__data__[optional_header_offset : optional_header_offset + 0x200]
  2418. )
  2419. >= MINIMUM_VALID_OPTIONAL_HEADER_RAW_SIZE
  2420. ):
  2421. # Add enough zeros to make up for the unused fields
  2422. #
  2423. padding_length = 128
  2424. # Create padding
  2425. #
  2426. padded_data = self.__data__[
  2427. optional_header_offset : optional_header_offset + 0x200
  2428. ] + (b"\0" * padding_length)
  2429. self.OPTIONAL_HEADER = self.__unpack_data__(
  2430. self.__IMAGE_OPTIONAL_HEADER_format__,
  2431. padded_data,
  2432. file_offset=optional_header_offset,
  2433. )
  2434. # Check the Magic in the OPTIONAL_HEADER and set the PE file
  2435. # type accordingly
  2436. #
  2437. if self.OPTIONAL_HEADER is not None:
  2438. if self.OPTIONAL_HEADER.Magic == OPTIONAL_HEADER_MAGIC_PE:
  2439. self.PE_TYPE = OPTIONAL_HEADER_MAGIC_PE
  2440. elif self.OPTIONAL_HEADER.Magic == OPTIONAL_HEADER_MAGIC_PE_PLUS:
  2441. self.PE_TYPE = OPTIONAL_HEADER_MAGIC_PE_PLUS
  2442. self.OPTIONAL_HEADER = self.__unpack_data__(
  2443. self.__IMAGE_OPTIONAL_HEADER64_format__,
  2444. self.__data__[
  2445. optional_header_offset : optional_header_offset + 0x200
  2446. ],
  2447. file_offset=optional_header_offset,
  2448. )
  2449. # Again, as explained above, we try to parse
  2450. # a reduced form of the Optional Header which
  2451. # is still valid despite not including all
  2452. # structure members
  2453. #
  2454. MINIMUM_VALID_OPTIONAL_HEADER_RAW_SIZE = 69 + 4
  2455. if (
  2456. self.OPTIONAL_HEADER is None
  2457. and len(
  2458. self.__data__[
  2459. optional_header_offset : optional_header_offset + 0x200
  2460. ]
  2461. )
  2462. >= MINIMUM_VALID_OPTIONAL_HEADER_RAW_SIZE
  2463. ):
  2464. padding_length = 128
  2465. padded_data = self.__data__[
  2466. optional_header_offset : optional_header_offset + 0x200
  2467. ] + (b"\0" * padding_length)
  2468. self.OPTIONAL_HEADER = self.__unpack_data__(
  2469. self.__IMAGE_OPTIONAL_HEADER64_format__,
  2470. padded_data,
  2471. file_offset=optional_header_offset,
  2472. )
  2473. if not self.FILE_HEADER:
  2474. raise PEFormatError("File Header missing")
  2475. # OC Patch:
  2476. # Die gracefully if there is no OPTIONAL_HEADER field
  2477. # 975440f5ad5e2e4a92c4d9a5f22f75c1
  2478. if self.OPTIONAL_HEADER is None:
  2479. raise PEFormatError("No Optional Header found, invalid PE32 or PE32+ file.")
  2480. if self.PE_TYPE is None:
  2481. self.__warnings.append(
  2482. "Invalid type 0x{0:04x} in Optional Header.".format(
  2483. self.OPTIONAL_HEADER.Magic
  2484. )
  2485. )
  2486. dll_characteristics_flags = retrieve_flags(
  2487. DLL_CHARACTERISTICS, "IMAGE_DLLCHARACTERISTICS_"
  2488. )
  2489. # Set the Dll Characteristics flags according the the DllCharacteristics member
  2490. set_flags(
  2491. self.OPTIONAL_HEADER,
  2492. self.OPTIONAL_HEADER.DllCharacteristics,
  2493. dll_characteristics_flags,
  2494. )
  2495. self.OPTIONAL_HEADER.DATA_DIRECTORY = []
  2496. # offset = (optional_header_offset + self.FILE_HEADER.SizeOfOptionalHeader)
  2497. offset = optional_header_offset + self.OPTIONAL_HEADER.sizeof()
  2498. self.NT_HEADERS.FILE_HEADER = self.FILE_HEADER
  2499. self.NT_HEADERS.OPTIONAL_HEADER = self.OPTIONAL_HEADER
  2500. # Windows 8 specific check
  2501. #
  2502. if (
  2503. self.OPTIONAL_HEADER.AddressOfEntryPoint
  2504. < self.OPTIONAL_HEADER.SizeOfHeaders
  2505. ):
  2506. self.__warnings.append(
  2507. "SizeOfHeaders is smaller than AddressOfEntryPoint: this file "
  2508. "cannot run under Windows 8."
  2509. )
  2510. # The NumberOfRvaAndSizes is sanitized to stay within
  2511. # reasonable limits so can be casted to an int
  2512. #
  2513. if self.OPTIONAL_HEADER.NumberOfRvaAndSizes > 0x10:
  2514. self.__warnings.append(
  2515. "Suspicious NumberOfRvaAndSizes in the Optional Header. "
  2516. "Normal values are never larger than 0x10, the value is: 0x%x"
  2517. % self.OPTIONAL_HEADER.NumberOfRvaAndSizes
  2518. )
  2519. MAX_ASSUMED_VALID_NUMBER_OF_RVA_AND_SIZES = 0x100
  2520. for i in range(int(0x7FFFFFFF & self.OPTIONAL_HEADER.NumberOfRvaAndSizes)):
  2521. if len(self.__data__) - offset == 0:
  2522. break
  2523. if len(self.__data__) - offset < 8:
  2524. data = self.__data__[offset:] + b"\0" * 8
  2525. else:
  2526. data = self.__data__[
  2527. offset : offset + MAX_ASSUMED_VALID_NUMBER_OF_RVA_AND_SIZES
  2528. ]
  2529. dir_entry = self.__unpack_data__(
  2530. self.__IMAGE_DATA_DIRECTORY_format__, data, file_offset=offset
  2531. )
  2532. if dir_entry is None:
  2533. break
  2534. # Would fail if missing an entry
  2535. # 1d4937b2fa4d84ad1bce0309857e70ca offending sample
  2536. try:
  2537. dir_entry.name = DIRECTORY_ENTRY[i]
  2538. except (KeyError, AttributeError):
  2539. break
  2540. offset += dir_entry.sizeof()
  2541. self.OPTIONAL_HEADER.DATA_DIRECTORY.append(dir_entry)
  2542. # If the offset goes outside the optional header,
  2543. # the loop is broken, regardless of how many directories
  2544. # NumberOfRvaAndSizes says there are
  2545. #
  2546. # We assume a normally sized optional header, hence that we do
  2547. # a sizeof() instead of reading SizeOfOptionalHeader.
  2548. # Then we add a default number of directories times their size,
  2549. # if we go beyond that, we assume the number of directories
  2550. # is wrong and stop processing
  2551. if offset >= (
  2552. optional_header_offset + self.OPTIONAL_HEADER.sizeof() + 8 * 16
  2553. ):
  2554. break
  2555. offset = self.parse_sections(sections_offset)
  2556. # OC Patch:
  2557. # There could be a problem if there are no raw data sections
  2558. # greater than 0
  2559. # fc91013eb72529da005110a3403541b6 example
  2560. # Should this throw an exception in the minimum header offset
  2561. # can't be found?
  2562. #
  2563. rawDataPointers = [
  2564. self.adjust_FileAlignment(
  2565. s.PointerToRawData, self.OPTIONAL_HEADER.FileAlignment
  2566. )
  2567. for s in self.sections
  2568. if s.PointerToRawData > 0
  2569. ]
  2570. if len(rawDataPointers) > 0:
  2571. lowest_section_offset = min(rawDataPointers)
  2572. else:
  2573. lowest_section_offset = None
  2574. if not lowest_section_offset or lowest_section_offset < offset:
  2575. self.header = self.__data__[:offset]
  2576. else:
  2577. self.header = self.__data__[:lowest_section_offset]
  2578. # Check whether the entry point lies within a section
  2579. #
  2580. if (
  2581. self.get_section_by_rva(self.OPTIONAL_HEADER.AddressOfEntryPoint)
  2582. is not None
  2583. ):
  2584. # Check whether the entry point lies within the file
  2585. #
  2586. ep_offset = self.get_offset_from_rva(
  2587. self.OPTIONAL_HEADER.AddressOfEntryPoint
  2588. )
  2589. if ep_offset > len(self.__data__):
  2590. self.__warnings.append(
  2591. "Possibly corrupt file. AddressOfEntryPoint lies outside the"
  2592. " file. AddressOfEntryPoint: 0x%x"
  2593. % self.OPTIONAL_HEADER.AddressOfEntryPoint
  2594. )
  2595. else:
  2596. self.__warnings.append(
  2597. "AddressOfEntryPoint lies outside the sections' boundaries. "
  2598. "AddressOfEntryPoint: 0x%x" % self.OPTIONAL_HEADER.AddressOfEntryPoint
  2599. )
  2600. if not fast_load:
  2601. self.full_load()
  2602. def parse_rich_header(self):
  2603. """Parses the rich header
  2604. see http://www.ntcore.com/files/richsign.htm for more information
  2605. Structure:
  2606. 00 DanS ^ checksum, checksum, checksum, checksum
  2607. 10 Symbol RVA ^ checksum, Symbol size ^ checksum...
  2608. ...
  2609. XX Rich, checksum, 0, 0,...
  2610. """
  2611. # Rich Header constants
  2612. #
  2613. DANS = 0x536E6144 # 'DanS' as dword
  2614. RICH = 0x68636952 # 'Rich' as dword
  2615. rich_index = self.__data__.find(
  2616. b"Rich", 0x80, self.OPTIONAL_HEADER.get_file_offset()
  2617. )
  2618. if rich_index == -1:
  2619. return None
  2620. # Read a block of data
  2621. try:
  2622. # The end of the structure is 8 bytes after the start of the Rich
  2623. # string.
  2624. rich_data = self.__data__[0x80 : rich_index + 8]
  2625. # Make the data have length a multiple of 4, otherwise the
  2626. # subsequent parsing will fail. It's not impossible that we retrieve
  2627. # truncated data that it's not a multiple.
  2628. rich_data = rich_data[: 4 * int(len(rich_data) / 4)]
  2629. data = list(
  2630. struct.unpack("<{0}I".format(int(len(rich_data) / 4)), rich_data)
  2631. )
  2632. if RICH not in data:
  2633. return None
  2634. except PEFormatError:
  2635. return None
  2636. # get key, raw_data and clear_data
  2637. key = struct.pack("<L", data[data.index(RICH) + 1])
  2638. result = {"key": key}
  2639. raw_data = rich_data[: rich_data.find(b"Rich")]
  2640. result["raw_data"] = raw_data
  2641. ord_ = lambda c: ord(c) if not isinstance(c, int) else c
  2642. clear_data = bytearray()
  2643. for idx, val in enumerate(raw_data):
  2644. clear_data.append((ord_(val) ^ ord_(key[idx % len(key)])))
  2645. result["clear_data"] = bytes(clear_data)
  2646. # the checksum should be present 3 times after the DanS signature
  2647. #
  2648. checksum = data[1]
  2649. if data[0] ^ checksum != DANS or data[2] != checksum or data[3] != checksum:
  2650. return None
  2651. result["checksum"] = checksum
  2652. headervalues = []
  2653. result["values"] = headervalues
  2654. data = data[4:]
  2655. for i in range(int(len(data) / 2)):
  2656. # Stop until the Rich footer signature is found
  2657. #
  2658. if data[2 * i] == RICH:
  2659. # it should be followed by the checksum
  2660. #
  2661. if data[2 * i + 1] != checksum:
  2662. self.__warnings.append("Rich Header is malformed")
  2663. break
  2664. # header values come by pairs
  2665. #
  2666. headervalues += [data[2 * i] ^ checksum, data[2 * i + 1] ^ checksum]
  2667. return result
  2668. def get_warnings(self):
  2669. """Return the list of warnings.
  2670. Non-critical problems found when parsing the PE file are
  2671. appended to a list of warnings. This method returns the
  2672. full list.
  2673. """
  2674. return self.__warnings
  2675. def show_warnings(self):
  2676. """Print the list of warnings.
  2677. Non-critical problems found when parsing the PE file are
  2678. appended to a list of warnings. This method prints the
  2679. full list to standard output.
  2680. """
  2681. for warning in self.__warnings:
  2682. print(">", warning)
  2683. def full_load(self):
  2684. """Process the data directories.
  2685. This method will load the data directories which might not have
  2686. been loaded if the "fast_load" option was used.
  2687. """
  2688. self.parse_data_directories()
  2689. class RichHeader:
  2690. pass
  2691. rich_header = self.parse_rich_header()
  2692. if rich_header:
  2693. self.RICH_HEADER = RichHeader()
  2694. self.RICH_HEADER.checksum = rich_header.get("checksum", None)
  2695. self.RICH_HEADER.values = rich_header.get("values", None)
  2696. self.RICH_HEADER.key = rich_header.get("key", None)
  2697. self.RICH_HEADER.raw_data = rich_header.get("raw_data", None)
  2698. self.RICH_HEADER.clear_data = rich_header.get("clear_data", None)
  2699. else:
  2700. self.RICH_HEADER = None
  2701. def write(self, filename=None):
  2702. """Write the PE file.
  2703. This function will process all headers and components
  2704. of the PE file and include all changes made (by just
  2705. assigning to attributes in the PE objects) and write
  2706. the changes back to a file whose name is provided as
  2707. an argument. The filename is optional, if not
  2708. provided the data will be returned as a 'str' object.
  2709. """
  2710. file_data = bytearray(self.__data__)
  2711. for structure in self.__structures__:
  2712. struct_data = bytearray(structure.__pack__())
  2713. offset = structure.get_file_offset()
  2714. file_data[offset : offset + len(struct_data)] = struct_data
  2715. if hasattr(self, "VS_VERSIONINFO"):
  2716. if hasattr(self, "FileInfo"):
  2717. for finfo in self.FileInfo:
  2718. for entry in finfo:
  2719. if hasattr(entry, "StringTable"):
  2720. for st_entry in entry.StringTable:
  2721. for key, entry in list(st_entry.entries.items()):
  2722. # Offsets and lengths of the keys and values.
  2723. # Each value in the dictionary is a tuple:
  2724. # (key length, value length)
  2725. # The lengths are in characters, not in bytes.
  2726. offsets = st_entry.entries_offsets[key]
  2727. lengths = st_entry.entries_lengths[key]
  2728. if len(entry) > lengths[1]:
  2729. l = entry.decode("utf-8").encode("utf-16le")
  2730. file_data[
  2731. offsets[1] : offsets[1] + lengths[1] * 2
  2732. ] = l[: lengths[1] * 2]
  2733. else:
  2734. encoded_data = entry.decode("utf-8").encode(
  2735. "utf-16le"
  2736. )
  2737. file_data[
  2738. offsets[1] : offsets[1] + len(encoded_data)
  2739. ] = encoded_data
  2740. new_file_data = file_data
  2741. if not filename:
  2742. return new_file_data
  2743. f = open(filename, "wb+")
  2744. f.write(new_file_data)
  2745. f.close()
  2746. return
  2747. def parse_sections(self, offset):
  2748. """Fetch the PE file sections.
  2749. The sections will be readily available in the "sections" attribute.
  2750. Its attributes will contain all the section information plus "data"
  2751. a buffer containing the section's data.
  2752. The "Characteristics" member will be processed and attributes
  2753. representing the section characteristics (with the 'IMAGE_SCN_'
  2754. string trimmed from the constant's names) will be added to the
  2755. section instance.
  2756. Refer to the SectionStructure class for additional info.
  2757. """
  2758. self.sections = []
  2759. MAX_SIMULTANEOUS_ERRORS = 3
  2760. for i in range(self.FILE_HEADER.NumberOfSections):
  2761. if i >= MAX_SECTIONS:
  2762. self.__warnings.append(
  2763. "Too many sections {0} (>={1})".format(
  2764. self.FILE_HEADER.NumberOfSections, MAX_SECTIONS
  2765. )
  2766. )
  2767. break
  2768. simultaneous_errors = 0
  2769. section = SectionStructure(self.__IMAGE_SECTION_HEADER_format__, pe=self)
  2770. if not section:
  2771. break
  2772. section_offset = offset + section.sizeof() * i
  2773. section.set_file_offset(section_offset)
  2774. section_data = self.__data__[
  2775. section_offset : section_offset + section.sizeof()
  2776. ]
  2777. # Check if the section is all nulls and stop if so.
  2778. if count_zeroes(section_data) == section.sizeof():
  2779. self.__warnings.append(f"Invalid section {i}. Contents are null-bytes.")
  2780. break
  2781. if not section_data:
  2782. self.__warnings.append(
  2783. f"Invalid section {i}. No data in the file (is this corkami's "
  2784. "virtsectblXP?)."
  2785. )
  2786. break
  2787. section.__unpack__(section_data)
  2788. self.__structures__.append(section)
  2789. if section.SizeOfRawData + section.PointerToRawData > len(self.__data__):
  2790. simultaneous_errors += 1
  2791. self.__warnings.append(
  2792. f"Error parsing section {i}. SizeOfRawData is larger than file."
  2793. )
  2794. if self.adjust_FileAlignment(
  2795. section.PointerToRawData, self.OPTIONAL_HEADER.FileAlignment
  2796. ) > len(self.__data__):
  2797. simultaneous_errors += 1
  2798. self.__warnings.append(
  2799. f"Error parsing section {i}. PointerToRawData points beyond "
  2800. "the end of the file."
  2801. )
  2802. if section.Misc_VirtualSize > 0x10000000:
  2803. simultaneous_errors += 1
  2804. self.__warnings.append(
  2805. f"Suspicious value found parsing section {i}. VirtualSize is "
  2806. "extremely large > 256MiB."
  2807. )
  2808. if (
  2809. self.adjust_SectionAlignment(
  2810. section.VirtualAddress,
  2811. self.OPTIONAL_HEADER.SectionAlignment,
  2812. self.OPTIONAL_HEADER.FileAlignment,
  2813. )
  2814. > 0x10000000
  2815. ):
  2816. simultaneous_errors += 1
  2817. self.__warnings.append(
  2818. f"Suspicious value found parsing section {i}. VirtualAddress is "
  2819. "beyond 0x10000000."
  2820. )
  2821. if (
  2822. self.OPTIONAL_HEADER.FileAlignment != 0
  2823. and (section.PointerToRawData % self.OPTIONAL_HEADER.FileAlignment) != 0
  2824. ):
  2825. simultaneous_errors += 1
  2826. self.__warnings.append(
  2827. (
  2828. f"Error parsing section {i}. "
  2829. "PointerToRawData should normally be "
  2830. "a multiple of FileAlignment, this might imply the file "
  2831. "is trying to confuse tools which parse this incorrectly."
  2832. )
  2833. )
  2834. if simultaneous_errors >= MAX_SIMULTANEOUS_ERRORS:
  2835. self.__warnings.append("Too many warnings parsing section. Aborting.")
  2836. break
  2837. section_flags = retrieve_flags(SECTION_CHARACTERISTICS, "IMAGE_SCN_")
  2838. # Set the section's flags according the the Characteristics member
  2839. set_flags(section, section.Characteristics, section_flags)
  2840. if section.__dict__.get(
  2841. "IMAGE_SCN_MEM_WRITE", False
  2842. ) and section.__dict__.get("IMAGE_SCN_MEM_EXECUTE", False):
  2843. if section.Name.rstrip(b"\x00") == b"PAGE" and self.is_driver():
  2844. # Drivers can have a PAGE section with those flags set without
  2845. # implying that it is malicious
  2846. pass
  2847. else:
  2848. self.__warnings.append(
  2849. f"Suspicious flags set for section {i}. "
  2850. "Both IMAGE_SCN_MEM_WRITE and IMAGE_SCN_MEM_EXECUTE are set. "
  2851. "This might indicate a packed executable."
  2852. )
  2853. self.sections.append(section)
  2854. # Sort the sections by their VirtualAddress and add a field to each of them
  2855. # with the VirtualAddress of the next section. This will allow to check
  2856. # for potentially overlapping sections in badly constructed PEs.
  2857. self.sections.sort(key=lambda a: a.VirtualAddress)
  2858. for idx, section in enumerate(self.sections):
  2859. if idx == len(self.sections) - 1:
  2860. section.next_section_virtual_address = None
  2861. else:
  2862. section.next_section_virtual_address = self.sections[
  2863. idx + 1
  2864. ].VirtualAddress
  2865. if self.FILE_HEADER.NumberOfSections > 0 and self.sections:
  2866. return (
  2867. offset + self.sections[0].sizeof() * self.FILE_HEADER.NumberOfSections
  2868. )
  2869. else:
  2870. return offset
  2871. def parse_data_directories(
  2872. self, directories=None, forwarded_exports_only=False, import_dllnames_only=False
  2873. ):
  2874. """Parse and process the PE file's data directories.
  2875. If the optional argument 'directories' is given, only
  2876. the directories at the specified indexes will be parsed.
  2877. Such functionality allows parsing of areas of interest
  2878. without the burden of having to parse all others.
  2879. The directories can then be specified as:
  2880. For export / import only:
  2881. directories = [ 0, 1 ]
  2882. or (more verbosely):
  2883. directories = [ DIRECTORY_ENTRY['IMAGE_DIRECTORY_ENTRY_IMPORT'],
  2884. DIRECTORY_ENTRY['IMAGE_DIRECTORY_ENTRY_EXPORT'] ]
  2885. If 'directories' is a list, the ones that are processed will be removed,
  2886. leaving only the ones that are not present in the image.
  2887. If `forwarded_exports_only` is True, the IMAGE_DIRECTORY_ENTRY_EXPORT
  2888. attribute will only contain exports that are forwarded to another DLL.
  2889. If `import_dllnames_only` is True, symbols will not be parsed from
  2890. the import table and the entries in the IMAGE_DIRECTORY_ENTRY_IMPORT
  2891. attribute will not have a `symbols` attribute.
  2892. """
  2893. directory_parsing = (
  2894. ("IMAGE_DIRECTORY_ENTRY_IMPORT", self.parse_import_directory),
  2895. ("IMAGE_DIRECTORY_ENTRY_EXPORT", self.parse_export_directory),
  2896. ("IMAGE_DIRECTORY_ENTRY_RESOURCE", self.parse_resources_directory),
  2897. ("IMAGE_DIRECTORY_ENTRY_DEBUG", self.parse_debug_directory),
  2898. ("IMAGE_DIRECTORY_ENTRY_BASERELOC", self.parse_relocations_directory),
  2899. ("IMAGE_DIRECTORY_ENTRY_TLS", self.parse_directory_tls),
  2900. ("IMAGE_DIRECTORY_ENTRY_LOAD_CONFIG", self.parse_directory_load_config),
  2901. ("IMAGE_DIRECTORY_ENTRY_DELAY_IMPORT", self.parse_delay_import_directory),
  2902. ("IMAGE_DIRECTORY_ENTRY_BOUND_IMPORT", self.parse_directory_bound_imports),
  2903. ("IMAGE_DIRECTORY_ENTRY_EXCEPTION", self.parse_exceptions_directory),
  2904. )
  2905. if directories is not None:
  2906. if not isinstance(directories, (tuple, list)):
  2907. directories = [directories]
  2908. for entry in directory_parsing:
  2909. # OC Patch:
  2910. #
  2911. try:
  2912. directory_index = DIRECTORY_ENTRY[entry[0]]
  2913. dir_entry = self.OPTIONAL_HEADER.DATA_DIRECTORY[directory_index]
  2914. except IndexError:
  2915. break
  2916. # Only process all the directories if no individual ones have
  2917. # been chosen
  2918. #
  2919. if directories is None or directory_index in directories:
  2920. if dir_entry.VirtualAddress:
  2921. if (
  2922. forwarded_exports_only
  2923. and entry[0] == "IMAGE_DIRECTORY_ENTRY_EXPORT"
  2924. ):
  2925. value = entry[1](
  2926. dir_entry.VirtualAddress,
  2927. dir_entry.Size,
  2928. forwarded_only=True,
  2929. )
  2930. elif (
  2931. import_dllnames_only
  2932. and entry[0] == "IMAGE_DIRECTORY_ENTRY_IMPORT"
  2933. ):
  2934. value = entry[1](
  2935. dir_entry.VirtualAddress, dir_entry.Size, dllnames_only=True
  2936. )
  2937. else:
  2938. try:
  2939. value = entry[1](dir_entry.VirtualAddress, dir_entry.Size)
  2940. except PEFormatError as excp:
  2941. self.__warnings.append(
  2942. f'Failed to process directoty "{entry[0]}": {excp}'
  2943. )
  2944. if value:
  2945. setattr(self, entry[0][6:], value)
  2946. if (
  2947. (directories is not None)
  2948. and isinstance(directories, list)
  2949. and (entry[0] in directories)
  2950. ):
  2951. directories.remove(directory_index)
  2952. def parse_exceptions_directory(self, rva, size):
  2953. """Parses exception directory
  2954. All the code related to handling exception directories is documented in
  2955. https://auscitte.github.io/systems%20blog/Exception-Directory-pefile#implementation-details
  2956. """
  2957. # "For x64 and Itanium platforms; the format is different for other platforms"
  2958. if (
  2959. self.FILE_HEADER.Machine != MACHINE_TYPE["IMAGE_FILE_MACHINE_AMD64"]
  2960. and self.FILE_HEADER.Machine != MACHINE_TYPE["IMAGE_FILE_MACHINE_IA64"]
  2961. ):
  2962. return None
  2963. rf = Structure(self.__RUNTIME_FUNCTION_format__)
  2964. rf_size = rf.sizeof()
  2965. rva2rt = {}
  2966. rt_funcs = []
  2967. rva2infos = {}
  2968. for _ in range(size // rf_size):
  2969. rf = self.__unpack_data__(
  2970. self.__RUNTIME_FUNCTION_format__,
  2971. self.get_data(rva, rf_size),
  2972. file_offset=self.get_offset_from_rva(rva),
  2973. )
  2974. if rf is None:
  2975. break
  2976. ui = None
  2977. if (rf.UnwindData & 0x1) == 0:
  2978. # according to "Improving Automated Analysis of Windows x64 Binaries",
  2979. # if the lowest bit is set, (UnwindData & ~0x1) should point to the
  2980. # chained RUNTIME_FUNCTION instead of UNWIND_INFO
  2981. if (
  2982. rf.UnwindData in rva2infos
  2983. ): # unwind info data structures can be shared among functions
  2984. ui = rva2infos[rf.UnwindData]
  2985. else:
  2986. ui = UnwindInfo(file_offset=self.get_offset_from_rva(rf.UnwindData))
  2987. rva2infos[rf.UnwindData] = ui
  2988. ws = ui.unpack_in_stages(self.get_data(rf.UnwindData, ui.sizeof()))
  2989. if ws != None:
  2990. self.__warnings.append(ws)
  2991. break
  2992. ws = ui.unpack_in_stages(self.get_data(rf.UnwindData, ui.sizeof()))
  2993. if ws != None:
  2994. self.__warnings.append(ws)
  2995. break
  2996. self.__structures__.append(ui)
  2997. entry = ExceptionsDirEntryData(struct=rf, unwindinfo=ui)
  2998. rt_funcs.append(entry)
  2999. rva2rt[rf.BeginAddress] = entry
  3000. rva += rf_size
  3001. # each chained function entry holds a reference to the function first in chain
  3002. for rf in rt_funcs:
  3003. if rf.unwindinfo == None:
  3004. # TODO: have not encountered such a binary yet;
  3005. # in theory, (UnwindData & ~0x1) should point to the chained
  3006. # RUNTIME_FUNCTION which could be used to locate the corresponding
  3007. # ExceptionsDirEntryData and set_chained_function_entry()
  3008. continue
  3009. if not hasattr(rf.unwindinfo, "FunctionEntry"):
  3010. continue
  3011. if not rf.unwindinfo.FunctionEntry in rva2rt:
  3012. self.__warnings.append(
  3013. f"FunctionEntry of UNWIND_INFO at {rf.struct.get_file_offset():x}"
  3014. " points to an entry that does not exist"
  3015. )
  3016. continue
  3017. try:
  3018. rf.unwindinfo.set_chained_function_entry(
  3019. rva2rt[rf.unwindinfo.FunctionEntry]
  3020. )
  3021. except PEFormatError as excp:
  3022. self.__warnings.append(
  3023. "Failed parsing FunctionEntry of UNWIND_INFO at "
  3024. f"{rf.struct.get_file_offset():x}: {excp}"
  3025. )
  3026. continue
  3027. return rt_funcs
  3028. def parse_directory_bound_imports(self, rva, size):
  3029. """"""
  3030. bnd_descr = Structure(self.__IMAGE_BOUND_IMPORT_DESCRIPTOR_format__)
  3031. bnd_descr_size = bnd_descr.sizeof()
  3032. start = rva
  3033. bound_imports = []
  3034. while True:
  3035. bnd_descr = self.__unpack_data__(
  3036. self.__IMAGE_BOUND_IMPORT_DESCRIPTOR_format__,
  3037. self.__data__[rva : rva + bnd_descr_size],
  3038. file_offset=rva,
  3039. )
  3040. if bnd_descr is None:
  3041. # If can't parse directory then silently return.
  3042. # This directory does not necessarily have to be valid to
  3043. # still have a valid PE file
  3044. self.__warnings.append(
  3045. "The Bound Imports directory exists but can't be parsed."
  3046. )
  3047. return
  3048. if bnd_descr.all_zeroes():
  3049. break
  3050. rva += bnd_descr.sizeof()
  3051. section = self.get_section_by_offset(rva)
  3052. file_offset = self.get_offset_from_rva(rva)
  3053. if section is None:
  3054. safety_boundary = len(self.__data__) - file_offset
  3055. sections_after_offset = [
  3056. s.PointerToRawData
  3057. for s in self.sections
  3058. if s.PointerToRawData > file_offset
  3059. ]
  3060. if sections_after_offset:
  3061. # Find the first section starting at a later offset than that
  3062. # specified by 'rva'
  3063. first_section_after_offset = min(sections_after_offset)
  3064. section = self.get_section_by_offset(first_section_after_offset)
  3065. if section is not None:
  3066. safety_boundary = section.PointerToRawData - file_offset
  3067. else:
  3068. safety_boundary = (
  3069. section.PointerToRawData + len(section.get_data()) - file_offset
  3070. )
  3071. if not section:
  3072. self.__warnings.append(
  3073. (
  3074. "RVA of IMAGE_BOUND_IMPORT_DESCRIPTOR points "
  3075. "to an invalid address: {0:x}"
  3076. ).format(rva)
  3077. )
  3078. return
  3079. forwarder_refs = []
  3080. # 8 is the size of __IMAGE_BOUND_IMPORT_DESCRIPTOR_format__
  3081. for _ in range(
  3082. min(bnd_descr.NumberOfModuleForwarderRefs, int(safety_boundary / 8))
  3083. ):
  3084. # Both structures IMAGE_BOUND_IMPORT_DESCRIPTOR and
  3085. # IMAGE_BOUND_FORWARDER_REF have the same size.
  3086. bnd_frwd_ref = self.__unpack_data__(
  3087. self.__IMAGE_BOUND_FORWARDER_REF_format__,
  3088. self.__data__[rva : rva + bnd_descr_size],
  3089. file_offset=rva,
  3090. )
  3091. # OC Patch:
  3092. if not bnd_frwd_ref:
  3093. raise PEFormatError("IMAGE_BOUND_FORWARDER_REF cannot be read")
  3094. rva += bnd_frwd_ref.sizeof()
  3095. offset = start + bnd_frwd_ref.OffsetModuleName
  3096. name_str = self.get_string_from_data(
  3097. 0, self.__data__[offset : offset + MAX_STRING_LENGTH]
  3098. )
  3099. # OffsetModuleName points to a DLL name. These shouldn't be too long.
  3100. # Anything longer than a safety length of 128 will be taken to indicate
  3101. # a corrupt entry and abort the processing of these entries.
  3102. # Names shorter than 4 characters will be taken as invalid as well.
  3103. if name_str:
  3104. invalid_chars = [
  3105. c for c in bytearray(name_str) if chr(c) not in string.printable
  3106. ]
  3107. if len(name_str) > 256 or invalid_chars:
  3108. break
  3109. forwarder_refs.append(
  3110. BoundImportRefData(struct=bnd_frwd_ref, name=name_str)
  3111. )
  3112. offset = start + bnd_descr.OffsetModuleName
  3113. name_str = self.get_string_from_data(
  3114. 0, self.__data__[offset : offset + MAX_STRING_LENGTH]
  3115. )
  3116. if name_str:
  3117. invalid_chars = [
  3118. c for c in bytearray(name_str) if chr(c) not in string.printable
  3119. ]
  3120. if len(name_str) > 256 or invalid_chars:
  3121. break
  3122. if not name_str:
  3123. break
  3124. bound_imports.append(
  3125. BoundImportDescData(
  3126. struct=bnd_descr, name=name_str, entries=forwarder_refs
  3127. )
  3128. )
  3129. return bound_imports
  3130. def parse_directory_tls(self, rva, size):
  3131. """"""
  3132. # By default let's pretend the format is a 32-bit PE. It may help
  3133. # produce some output for files where the Magic in the Optional Header
  3134. # is incorrect.
  3135. format = self.__IMAGE_TLS_DIRECTORY_format__
  3136. if self.PE_TYPE == OPTIONAL_HEADER_MAGIC_PE_PLUS:
  3137. format = self.__IMAGE_TLS_DIRECTORY64_format__
  3138. try:
  3139. tls_struct = self.__unpack_data__(
  3140. format,
  3141. self.get_data(rva, Structure(format).sizeof()),
  3142. file_offset=self.get_offset_from_rva(rva),
  3143. )
  3144. except PEFormatError:
  3145. self.__warnings.append(
  3146. "Invalid TLS information. Can't read " "data at RVA: 0x%x" % rva
  3147. )
  3148. tls_struct = None
  3149. if not tls_struct:
  3150. return None
  3151. return TlsData(struct=tls_struct)
  3152. def parse_directory_load_config(self, rva, size):
  3153. """"""
  3154. if self.PE_TYPE == OPTIONAL_HEADER_MAGIC_PE:
  3155. format = self.__IMAGE_LOAD_CONFIG_DIRECTORY_format__
  3156. elif self.PE_TYPE == OPTIONAL_HEADER_MAGIC_PE_PLUS:
  3157. format = self.__IMAGE_LOAD_CONFIG_DIRECTORY64_format__
  3158. else:
  3159. self.__warnings.append(
  3160. "Don't know how to parse LOAD_CONFIG information for non-PE32/"
  3161. "PE32+ file"
  3162. )
  3163. return None
  3164. load_config_struct = None
  3165. try:
  3166. load_config_struct = self.__unpack_data__(
  3167. format,
  3168. self.get_data(rva, Structure(format).sizeof()),
  3169. file_offset=self.get_offset_from_rva(rva),
  3170. )
  3171. except PEFormatError:
  3172. self.__warnings.append(
  3173. "Invalid LOAD_CONFIG information. Can't read " "data at RVA: 0x%x" % rva
  3174. )
  3175. if not load_config_struct:
  3176. return None
  3177. return LoadConfigData(struct=load_config_struct)
  3178. def parse_relocations_directory(self, rva, size):
  3179. """"""
  3180. rlc_size = Structure(self.__IMAGE_BASE_RELOCATION_format__).sizeof()
  3181. end = rva + size
  3182. relocations = []
  3183. while rva < end:
  3184. # OC Patch:
  3185. # Malware that has bad RVA entries will cause an error.
  3186. # Just continue on after an exception
  3187. #
  3188. try:
  3189. rlc = self.__unpack_data__(
  3190. self.__IMAGE_BASE_RELOCATION_format__,
  3191. self.get_data(rva, rlc_size),
  3192. file_offset=self.get_offset_from_rva(rva),
  3193. )
  3194. except PEFormatError:
  3195. self.__warnings.append(
  3196. "Invalid relocation information. Can't read "
  3197. "data at RVA: 0x%x" % rva
  3198. )
  3199. rlc = None
  3200. if not rlc:
  3201. break
  3202. # rlc.VirtualAddress must lie within the Image
  3203. if rlc.VirtualAddress > self.OPTIONAL_HEADER.SizeOfImage:
  3204. self.__warnings.append(
  3205. "Invalid relocation information. VirtualAddress outside"
  3206. " of Image: 0x%x" % rlc.VirtualAddress
  3207. )
  3208. break
  3209. # rlc.SizeOfBlock must be less or equal than the size of the image
  3210. # (It's a rather loose sanity test)
  3211. if rlc.SizeOfBlock > self.OPTIONAL_HEADER.SizeOfImage:
  3212. self.__warnings.append(
  3213. "Invalid relocation information. SizeOfBlock too large"
  3214. ": %d" % rlc.SizeOfBlock
  3215. )
  3216. break
  3217. reloc_entries = self.parse_relocations(
  3218. rva + rlc_size, rlc.VirtualAddress, rlc.SizeOfBlock - rlc_size
  3219. )
  3220. relocations.append(BaseRelocationData(struct=rlc, entries=reloc_entries))
  3221. if not rlc.SizeOfBlock:
  3222. break
  3223. rva += rlc.SizeOfBlock
  3224. return relocations
  3225. def parse_relocations(self, data_rva, rva, size):
  3226. """"""
  3227. try:
  3228. data = self.get_data(data_rva, size)
  3229. file_offset = self.get_offset_from_rva(data_rva)
  3230. except PEFormatError:
  3231. self.__warnings.append(f"Bad RVA in relocation data: 0x{data_rva:x}")
  3232. return []
  3233. entries = []
  3234. offsets_and_type = []
  3235. for idx in range(int(len(data) / 2)):
  3236. entry = self.__unpack_data__(
  3237. self.__IMAGE_BASE_RELOCATION_ENTRY_format__,
  3238. data[idx * 2 : (idx + 1) * 2],
  3239. file_offset=file_offset,
  3240. )
  3241. if not entry:
  3242. break
  3243. word = entry.Data
  3244. reloc_type = word >> 12
  3245. reloc_offset = word & 0x0FFF
  3246. if (reloc_offset, reloc_type) in offsets_and_type:
  3247. self.__warnings.append(
  3248. "Overlapping offsets in relocation data "
  3249. "at RVA: 0x%x" % (reloc_offset + rva)
  3250. )
  3251. break
  3252. if len(offsets_and_type) >= 1000:
  3253. offsets_and_type.pop()
  3254. offsets_and_type.insert(0, (reloc_offset, reloc_type))
  3255. entries.append(
  3256. RelocationData(
  3257. struct=entry, type=reloc_type, base_rva=rva, rva=reloc_offset + rva
  3258. )
  3259. )
  3260. file_offset += entry.sizeof()
  3261. return entries
  3262. def parse_debug_directory(self, rva, size):
  3263. """"""
  3264. dbg_size = Structure(self.__IMAGE_DEBUG_DIRECTORY_format__).sizeof()
  3265. debug = []
  3266. for idx in range(int(size / dbg_size)):
  3267. try:
  3268. data = self.get_data(rva + dbg_size * idx, dbg_size)
  3269. except PEFormatError:
  3270. self.__warnings.append(
  3271. "Invalid debug information. Can't read " "data at RVA: 0x%x" % rva
  3272. )
  3273. return None
  3274. dbg = self.__unpack_data__(
  3275. self.__IMAGE_DEBUG_DIRECTORY_format__,
  3276. data,
  3277. file_offset=self.get_offset_from_rva(rva + dbg_size * idx),
  3278. )
  3279. if not dbg:
  3280. return None
  3281. # apply structure according to DEBUG_TYPE
  3282. # http://www.debuginfo.com/articles/debuginfomatch.html
  3283. #
  3284. dbg_type = None
  3285. if dbg.Type == 1:
  3286. # IMAGE_DEBUG_TYPE_COFF
  3287. pass
  3288. elif dbg.Type == 2:
  3289. # if IMAGE_DEBUG_TYPE_CODEVIEW
  3290. dbg_type_offset = dbg.PointerToRawData
  3291. dbg_type_size = dbg.SizeOfData
  3292. dbg_type_data = self.__data__[
  3293. dbg_type_offset : dbg_type_offset + dbg_type_size
  3294. ]
  3295. if dbg_type_data[:4] == b"RSDS":
  3296. # pdb7.0
  3297. __CV_INFO_PDB70_format__ = [
  3298. "CV_INFO_PDB70",
  3299. [
  3300. "I,CvSignature",
  3301. "I,Signature_Data1", # Signature is of GUID type
  3302. "H,Signature_Data2",
  3303. "H,Signature_Data3",
  3304. "8s,Signature_Data4",
  3305. # 'H,Signature_Data5',
  3306. # 'I,Signature_Data6',
  3307. "I,Age",
  3308. ],
  3309. ]
  3310. pdbFileName_size = (
  3311. dbg_type_size - Structure(__CV_INFO_PDB70_format__).sizeof()
  3312. )
  3313. # pdbFileName_size can be negative here, as seen in the malware
  3314. # sample with hash
  3315. # MD5: 7c297600870d026c014d42596bb9b5fd
  3316. # SHA256:
  3317. # 83f4e63681fcba8a9d7bbb1688c71981b1837446514a1773597e0192bba9fac3
  3318. # Checking for positive size here to ensure proper parsing.
  3319. if pdbFileName_size > 0:
  3320. __CV_INFO_PDB70_format__[1].append(
  3321. "{0}s,PdbFileName".format(pdbFileName_size)
  3322. )
  3323. dbg_type = self.__unpack_data__(
  3324. __CV_INFO_PDB70_format__, dbg_type_data, dbg_type_offset
  3325. )
  3326. elif dbg_type_data[:4] == b"NB10":
  3327. # pdb2.0
  3328. __CV_INFO_PDB20_format__ = [
  3329. "CV_INFO_PDB20",
  3330. [
  3331. "I,CvHeaderSignature",
  3332. "I,CvHeaderOffset",
  3333. "I,Signature",
  3334. "I,Age",
  3335. ],
  3336. ]
  3337. pdbFileName_size = (
  3338. dbg_type_size - Structure(__CV_INFO_PDB20_format__).sizeof()
  3339. )
  3340. # As with the PDB 7.0 case, ensuring a positive size for
  3341. # pdbFileName_size to ensure proper parsing.
  3342. if pdbFileName_size > 0:
  3343. # Add the last variable-length string field.
  3344. __CV_INFO_PDB20_format__[1].append(
  3345. "{0}s,PdbFileName".format(pdbFileName_size)
  3346. )
  3347. dbg_type = self.__unpack_data__(
  3348. __CV_INFO_PDB20_format__, dbg_type_data, dbg_type_offset
  3349. )
  3350. elif dbg.Type == 4:
  3351. # IMAGE_DEBUG_TYPE_MISC
  3352. dbg_type_offset = dbg.PointerToRawData
  3353. dbg_type_size = dbg.SizeOfData
  3354. dbg_type_data = self.__data__[
  3355. dbg_type_offset : dbg_type_offset + dbg_type_size
  3356. ]
  3357. ___IMAGE_DEBUG_MISC_format__ = [
  3358. "IMAGE_DEBUG_MISC",
  3359. [
  3360. "I,DataType",
  3361. "I,Length",
  3362. "B,Unicode",
  3363. "B,Reserved1",
  3364. "H,Reserved2",
  3365. ],
  3366. ]
  3367. dbg_type_partial = self.__unpack_data__(
  3368. ___IMAGE_DEBUG_MISC_format__, dbg_type_data, dbg_type_offset
  3369. )
  3370. # Need to check that dbg_type_partial contains a correctly unpacked data
  3371. # structure, as the malware sample with the following hash
  3372. # MD5: 5e7d6707d693108de5a303045c17d95b
  3373. # SHA256:
  3374. # 5dd94a95025f3b6e3dd440d52f7c6d2964fdd1aa119e0ee92e38c7bf83829e5c
  3375. # contains a value of None for dbg_type_partial after unpacking,
  3376. # presumably due to a malformed DEBUG entry.
  3377. if dbg_type_partial:
  3378. # The Unicode bool should be set to 0 or 1.
  3379. if dbg_type_partial.Unicode in (0, 1):
  3380. data_size = (
  3381. dbg_type_size
  3382. - Structure(___IMAGE_DEBUG_MISC_format__).sizeof()
  3383. )
  3384. # As with the PDB case, ensuring a positive size for data_size
  3385. # here to ensure proper parsing.
  3386. if data_size > 0:
  3387. ___IMAGE_DEBUG_MISC_format__[1].append(
  3388. "{0}s,Data".format(data_size)
  3389. )
  3390. dbg_type = self.__unpack_data__(
  3391. ___IMAGE_DEBUG_MISC_format__, dbg_type_data, dbg_type_offset
  3392. )
  3393. debug.append(DebugData(struct=dbg, entry=dbg_type))
  3394. return debug
  3395. def parse_resources_directory(self, rva, size=0, base_rva=None, level=0, dirs=None):
  3396. """Parse the resources directory.
  3397. Given the RVA of the resources directory, it will process all
  3398. its entries.
  3399. The root will have the corresponding member of its structure,
  3400. IMAGE_RESOURCE_DIRECTORY plus 'entries', a list of all the
  3401. entries in the directory.
  3402. Those entries will have, correspondingly, all the structure's
  3403. members (IMAGE_RESOURCE_DIRECTORY_ENTRY) and an additional one,
  3404. "directory", pointing to the IMAGE_RESOURCE_DIRECTORY structure
  3405. representing upper layers of the tree. This one will also have
  3406. an 'entries' attribute, pointing to the 3rd, and last, level.
  3407. Another directory with more entries. Those last entries will
  3408. have a new attribute (both 'leaf' or 'data_entry' can be used to
  3409. access it). This structure finally points to the resource data.
  3410. All the members of this structure, IMAGE_RESOURCE_DATA_ENTRY,
  3411. are available as its attributes.
  3412. """
  3413. # OC Patch:
  3414. if dirs is None:
  3415. dirs = [rva]
  3416. if base_rva is None:
  3417. base_rva = rva
  3418. if level > MAX_RESOURCE_DEPTH:
  3419. self.__warnings.append(
  3420. "Error parsing the resources directory. "
  3421. "Excessively nested table depth %d (>%s)" % (level, MAX_RESOURCE_DEPTH)
  3422. )
  3423. return None
  3424. try:
  3425. # If the RVA is invalid all would blow up. Some EXEs seem to be
  3426. # specially nasty and have an invalid RVA.
  3427. data = self.get_data(
  3428. rva, Structure(self.__IMAGE_RESOURCE_DIRECTORY_format__).sizeof()
  3429. )
  3430. except PEFormatError:
  3431. self.__warnings.append(
  3432. "Invalid resources directory. Can't read "
  3433. "directory data at RVA: 0x%x" % rva
  3434. )
  3435. return None
  3436. # Get the resource directory structure, that is, the header
  3437. # of the table preceding the actual entries
  3438. #
  3439. resource_dir = self.__unpack_data__(
  3440. self.__IMAGE_RESOURCE_DIRECTORY_format__,
  3441. data,
  3442. file_offset=self.get_offset_from_rva(rva),
  3443. )
  3444. if resource_dir is None:
  3445. # If we can't parse resources directory then silently return.
  3446. # This directory does not necessarily have to be valid to
  3447. # still have a valid PE file
  3448. self.__warnings.append(
  3449. "Invalid resources directory. Can't parse "
  3450. "directory data at RVA: 0x%x" % rva
  3451. )
  3452. return None
  3453. dir_entries = []
  3454. # Advance the RVA to the position immediately following the directory
  3455. # table header and pointing to the first entry in the table
  3456. #
  3457. rva += resource_dir.sizeof()
  3458. number_of_entries = (
  3459. resource_dir.NumberOfNamedEntries + resource_dir.NumberOfIdEntries
  3460. )
  3461. # Set a hard limit on the maximum reasonable number of entries
  3462. MAX_ALLOWED_ENTRIES = 4096
  3463. if number_of_entries > MAX_ALLOWED_ENTRIES:
  3464. self.__warnings.append(
  3465. "Error parsing the resources directory. "
  3466. "The directory contains %d entries (>%s)"
  3467. % (number_of_entries, MAX_ALLOWED_ENTRIES)
  3468. )
  3469. return None
  3470. self.__total_resource_entries_count += number_of_entries
  3471. if self.__total_resource_entries_count > MAX_RESOURCE_ENTRIES:
  3472. self.__warnings.append(
  3473. "Error parsing the resources directory. "
  3474. "The file contains at least %d entries (>%d)"
  3475. % (self.__total_resource_entries_count, MAX_RESOURCE_ENTRIES)
  3476. )
  3477. return None
  3478. strings_to_postprocess = []
  3479. # Keep track of the last name's start and end offsets in order
  3480. # to be able to detect overlapping entries that might suggest
  3481. # and invalid or corrupt directory.
  3482. last_name_begin_end = None
  3483. for idx in range(number_of_entries):
  3484. if (
  3485. not self.__resource_size_limit_reached
  3486. and self.__total_resource_bytes > self.__resource_size_limit_upperbounds
  3487. ):
  3488. self.__resource_size_limit_reached = True
  3489. self.__warnings.append(
  3490. "Resource size 0x%x exceeds file size 0x%x, overlapping "
  3491. "resources found."
  3492. % (
  3493. self.__total_resource_bytes,
  3494. self.__resource_size_limit_upperbounds,
  3495. )
  3496. )
  3497. res = self.parse_resource_entry(rva)
  3498. if res is None:
  3499. self.__warnings.append(
  3500. "Error parsing the resources directory, "
  3501. "Entry %d is invalid, RVA = 0x%x. " % (idx, rva)
  3502. )
  3503. break
  3504. entry_name = None
  3505. entry_id = None
  3506. name_is_string = (res.Name & 0x80000000) >> 31
  3507. if not name_is_string:
  3508. entry_id = res.Name
  3509. else:
  3510. ustr_offset = base_rva + res.NameOffset
  3511. try:
  3512. entry_name = UnicodeStringWrapperPostProcessor(self, ustr_offset)
  3513. self.__total_resource_bytes += entry_name.get_pascal_16_length()
  3514. # If the last entry's offset points before the current's but its end
  3515. # is past the current's beginning, assume the overlap indicates a
  3516. # corrupt name.
  3517. if last_name_begin_end and (
  3518. last_name_begin_end[0] < ustr_offset
  3519. and last_name_begin_end[1] >= ustr_offset
  3520. ):
  3521. # Remove the previous overlapping entry as it's likely to be
  3522. # already corrupt data.
  3523. strings_to_postprocess.pop()
  3524. self.__warnings.append(
  3525. "Error parsing the resources directory, "
  3526. "attempting to read entry name. "
  3527. "Entry names overlap 0x%x" % (ustr_offset)
  3528. )
  3529. break
  3530. last_name_begin_end = (
  3531. ustr_offset,
  3532. ustr_offset + entry_name.get_pascal_16_length(),
  3533. )
  3534. strings_to_postprocess.append(entry_name)
  3535. except PEFormatError:
  3536. self.__warnings.append(
  3537. "Error parsing the resources directory, "
  3538. "attempting to read entry name. "
  3539. "Can't read unicode string at offset 0x%x" % (ustr_offset)
  3540. )
  3541. if res.DataIsDirectory:
  3542. # OC Patch:
  3543. #
  3544. # One trick malware can do is to recursively reference
  3545. # the next directory. This causes hilarity to ensue when
  3546. # trying to parse everything correctly.
  3547. # If the original RVA given to this function is equal to
  3548. # the next one to parse, we assume that it's a trick.
  3549. # Instead of raising a PEFormatError this would skip some
  3550. # reasonable data so we just break.
  3551. #
  3552. # 9ee4d0a0caf095314fd7041a3e4404dc is the offending sample
  3553. if base_rva + res.OffsetToDirectory in dirs:
  3554. break
  3555. entry_directory = self.parse_resources_directory(
  3556. base_rva + res.OffsetToDirectory,
  3557. size - (rva - base_rva), # size
  3558. base_rva=base_rva,
  3559. level=level + 1,
  3560. dirs=dirs + [base_rva + res.OffsetToDirectory],
  3561. )
  3562. if not entry_directory:
  3563. break
  3564. # Ange Albertini's code to process resources' strings
  3565. #
  3566. strings = None
  3567. if entry_id == RESOURCE_TYPE["RT_STRING"]:
  3568. strings = {}
  3569. for resource_id in entry_directory.entries:
  3570. if hasattr(resource_id, "directory"):
  3571. resource_strings = {}
  3572. for resource_lang in resource_id.directory.entries:
  3573. if (
  3574. resource_lang is None
  3575. or not hasattr(resource_lang, "data")
  3576. or resource_lang.data.struct.Size is None
  3577. or resource_id.id is None
  3578. ):
  3579. continue
  3580. string_entry_rva = (
  3581. resource_lang.data.struct.OffsetToData
  3582. )
  3583. string_entry_size = resource_lang.data.struct.Size
  3584. string_entry_id = resource_id.id
  3585. # XXX: has been raising exceptions preventing parsing
  3586. try:
  3587. string_entry_data = self.get_data(
  3588. string_entry_rva, string_entry_size
  3589. )
  3590. except:
  3591. self.__warnings.append(
  3592. f"Error parsing resource of type RT_STRING at "
  3593. f"RVA 0x{string_entry_rva:x} with "
  3594. f"size {string_entry_size}"
  3595. )
  3596. continue
  3597. parse_strings(
  3598. string_entry_data,
  3599. (int(string_entry_id) - 1) * 16,
  3600. resource_strings,
  3601. )
  3602. strings.update(resource_strings)
  3603. resource_id.directory.strings = resource_strings
  3604. dir_entries.append(
  3605. ResourceDirEntryData(
  3606. struct=res,
  3607. name=entry_name,
  3608. id=entry_id,
  3609. directory=entry_directory,
  3610. )
  3611. )
  3612. else:
  3613. struct = self.parse_resource_data_entry(
  3614. base_rva + res.OffsetToDirectory
  3615. )
  3616. if struct:
  3617. self.__total_resource_bytes += struct.Size
  3618. entry_data = ResourceDataEntryData(
  3619. struct=struct, lang=res.Name & 0x3FF, sublang=res.Name >> 10
  3620. )
  3621. dir_entries.append(
  3622. ResourceDirEntryData(
  3623. struct=res, name=entry_name, id=entry_id, data=entry_data
  3624. )
  3625. )
  3626. else:
  3627. break
  3628. # Check if this entry contains version information
  3629. #
  3630. if level == 0 and res.Id == RESOURCE_TYPE["RT_VERSION"]:
  3631. if dir_entries:
  3632. last_entry = dir_entries[-1]
  3633. try:
  3634. version_entries = last_entry.directory.entries[0].directory.entries
  3635. except:
  3636. # Maybe a malformed directory structure...?
  3637. # Let's ignore it
  3638. pass
  3639. else:
  3640. for version_entry in version_entries:
  3641. rt_version_struct = None
  3642. try:
  3643. rt_version_struct = version_entry.data.struct
  3644. except:
  3645. # Maybe a malformed directory structure...?
  3646. # Let's ignore it
  3647. pass
  3648. if rt_version_struct is not None:
  3649. self.parse_version_information(rt_version_struct)
  3650. rva += res.sizeof()
  3651. string_rvas = [s.get_rva() for s in strings_to_postprocess]
  3652. string_rvas.sort()
  3653. for idx, s in enumerate(strings_to_postprocess):
  3654. s.render_pascal_16()
  3655. resource_directory_data = ResourceDirData(
  3656. struct=resource_dir, entries=dir_entries
  3657. )
  3658. return resource_directory_data
  3659. def parse_resource_data_entry(self, rva):
  3660. """Parse a data entry from the resources directory."""
  3661. try:
  3662. # If the RVA is invalid all would blow up. Some EXEs seem to be
  3663. # specially nasty and have an invalid RVA.
  3664. data = self.get_data(
  3665. rva, Structure(self.__IMAGE_RESOURCE_DATA_ENTRY_format__).sizeof()
  3666. )
  3667. except PEFormatError:
  3668. self.__warnings.append(
  3669. "Error parsing a resource directory data entry, "
  3670. "the RVA is invalid: 0x%x" % (rva)
  3671. )
  3672. return None
  3673. data_entry = self.__unpack_data__(
  3674. self.__IMAGE_RESOURCE_DATA_ENTRY_format__,
  3675. data,
  3676. file_offset=self.get_offset_from_rva(rva),
  3677. )
  3678. return data_entry
  3679. def parse_resource_entry(self, rva):
  3680. """Parse a directory entry from the resources directory."""
  3681. try:
  3682. data = self.get_data(
  3683. rva, Structure(self.__IMAGE_RESOURCE_DIRECTORY_ENTRY_format__).sizeof()
  3684. )
  3685. except PEFormatError:
  3686. # A warning will be added by the caller if this method returns None
  3687. return None
  3688. resource = self.__unpack_data__(
  3689. self.__IMAGE_RESOURCE_DIRECTORY_ENTRY_format__,
  3690. data,
  3691. file_offset=self.get_offset_from_rva(rva),
  3692. )
  3693. if resource is None:
  3694. return None
  3695. # resource.NameIsString = (resource.Name & 0x80000000L) >> 31
  3696. resource.NameOffset = resource.Name & 0x7FFFFFFF
  3697. resource.__pad = resource.Name & 0xFFFF0000
  3698. resource.Id = resource.Name & 0x0000FFFF
  3699. resource.DataIsDirectory = (resource.OffsetToData & 0x80000000) >> 31
  3700. resource.OffsetToDirectory = resource.OffsetToData & 0x7FFFFFFF
  3701. return resource
  3702. def parse_version_information(self, version_struct):
  3703. """Parse version information structure.
  3704. The date will be made available in three attributes of the PE object.
  3705. VS_VERSIONINFO will contain the first three fields of the main structure:
  3706. 'Length', 'ValueLength', and 'Type'
  3707. VS_FIXEDFILEINFO will hold the rest of the fields, accessible as sub-attributes:
  3708. 'Signature', 'StrucVersion', 'FileVersionMS', 'FileVersionLS',
  3709. 'ProductVersionMS', 'ProductVersionLS', 'FileFlagsMask', 'FileFlags',
  3710. 'FileOS', 'FileType', 'FileSubtype', 'FileDateMS', 'FileDateLS'
  3711. FileInfo is a list of all StringFileInfo and VarFileInfo structures.
  3712. StringFileInfo structures will have a list as an attribute named 'StringTable'
  3713. containing all the StringTable structures. Each of those structures contains a
  3714. dictionary 'entries' with all the key / value version information string pairs.
  3715. VarFileInfo structures will have a list as an attribute named 'Var' containing
  3716. all Var structures. Each Var structure will have a dictionary as an attribute
  3717. named 'entry' which will contain the name and value of the Var.
  3718. """
  3719. # Retrieve the data for the version info resource
  3720. #
  3721. try:
  3722. start_offset = self.get_offset_from_rva(version_struct.OffsetToData)
  3723. except PEFormatError:
  3724. self.__warnings.append(
  3725. "Error parsing the version information, "
  3726. "attempting to read OffsetToData with RVA: 0x{:x}".format(
  3727. version_struct.OffsetToData
  3728. )
  3729. )
  3730. return
  3731. raw_data = self.__data__[start_offset : start_offset + version_struct.Size]
  3732. # Map the main structure and the subsequent string
  3733. #
  3734. versioninfo_struct = self.__unpack_data__(
  3735. self.__VS_VERSIONINFO_format__, raw_data, file_offset=start_offset
  3736. )
  3737. if versioninfo_struct is None:
  3738. return
  3739. ustr_offset = version_struct.OffsetToData + versioninfo_struct.sizeof()
  3740. section = self.get_section_by_rva(ustr_offset)
  3741. section_end = None
  3742. if section:
  3743. section_end = section.VirtualAddress + max(
  3744. section.SizeOfRawData, section.Misc_VirtualSize
  3745. )
  3746. versioninfo_string = None
  3747. # These should return 'ascii' decoded data. For the case when it's
  3748. # garbled data the ascii string will retain the byte values while
  3749. # encoding it to something else may yield values that don't match the
  3750. # file's contents.
  3751. try:
  3752. if section_end is None:
  3753. versioninfo_string = self.get_string_u_at_rva(
  3754. ustr_offset, encoding="ascii"
  3755. )
  3756. else:
  3757. versioninfo_string = self.get_string_u_at_rva(
  3758. ustr_offset, (section_end - ustr_offset) >> 1, encoding="ascii"
  3759. )
  3760. except PEFormatError:
  3761. self.__warnings.append(
  3762. "Error parsing the version information, "
  3763. "attempting to read VS_VERSION_INFO string. Can't "
  3764. "read unicode string at offset 0x%x" % (ustr_offset)
  3765. )
  3766. if versioninfo_string == None:
  3767. self.__warnings.append(
  3768. "Invalid VS_VERSION_INFO block: {0}".format(versioninfo_string)
  3769. )
  3770. return
  3771. # If the structure does not contain the expected name, it's assumed to
  3772. # be invalid
  3773. if versioninfo_string is not None and versioninfo_string != b"VS_VERSION_INFO":
  3774. if len(versioninfo_string) > 128:
  3775. excerpt = versioninfo_string[:128].decode("ascii")
  3776. # Don't leave any half-escaped characters
  3777. excerpt = excerpt[: excerpt.rfind("\\u")]
  3778. versioninfo_string = b(
  3779. "{0} ... ({1} bytes, too long to display)".format(
  3780. excerpt, len(versioninfo_string)
  3781. )
  3782. )
  3783. self.__warnings.append(
  3784. "Invalid VS_VERSION_INFO block: {0}".format(
  3785. versioninfo_string.decode("ascii").replace("\00", "\\00")
  3786. )
  3787. )
  3788. return
  3789. if not hasattr(self, "VS_VERSIONINFO"):
  3790. self.VS_VERSIONINFO = []
  3791. # Set the PE object's VS_VERSIONINFO to this one
  3792. vinfo = versioninfo_struct
  3793. # Set the Key attribute to point to the unicode string identifying the structure
  3794. vinfo.Key = versioninfo_string
  3795. self.VS_VERSIONINFO.append(vinfo)
  3796. if versioninfo_string is None:
  3797. versioninfo_string = ""
  3798. # Process the fixed version information, get the offset and structure
  3799. fixedfileinfo_offset = self.dword_align(
  3800. versioninfo_struct.sizeof() + 2 * (len(versioninfo_string) + 1),
  3801. version_struct.OffsetToData,
  3802. )
  3803. fixedfileinfo_struct = self.__unpack_data__(
  3804. self.__VS_FIXEDFILEINFO_format__,
  3805. raw_data[fixedfileinfo_offset:],
  3806. file_offset=start_offset + fixedfileinfo_offset,
  3807. )
  3808. if not fixedfileinfo_struct:
  3809. return
  3810. if not hasattr(self, "VS_FIXEDFILEINFO"):
  3811. self.VS_FIXEDFILEINFO = []
  3812. # Set the PE object's VS_FIXEDFILEINFO to this one
  3813. self.VS_FIXEDFILEINFO.append(fixedfileinfo_struct)
  3814. # Start parsing all the StringFileInfo and VarFileInfo structures
  3815. # Get the first one
  3816. stringfileinfo_offset = self.dword_align(
  3817. fixedfileinfo_offset + fixedfileinfo_struct.sizeof(),
  3818. version_struct.OffsetToData,
  3819. )
  3820. # Set the PE object's attribute that will contain them all.
  3821. if not hasattr(self, "FileInfo"):
  3822. self.FileInfo = []
  3823. finfo = []
  3824. while True:
  3825. # Process the StringFileInfo/VarFileInfo structure
  3826. stringfileinfo_struct = self.__unpack_data__(
  3827. self.__StringFileInfo_format__,
  3828. raw_data[stringfileinfo_offset:],
  3829. file_offset=start_offset + stringfileinfo_offset,
  3830. )
  3831. if stringfileinfo_struct is None:
  3832. self.__warnings.append(
  3833. "Error parsing StringFileInfo/VarFileInfo struct"
  3834. )
  3835. return None
  3836. # Get the subsequent string defining the structure.
  3837. ustr_offset = (
  3838. version_struct.OffsetToData
  3839. + stringfileinfo_offset
  3840. + versioninfo_struct.sizeof()
  3841. )
  3842. try:
  3843. stringfileinfo_string = self.get_string_u_at_rva(ustr_offset)
  3844. except PEFormatError:
  3845. self.__warnings.append(
  3846. "Error parsing the version information, "
  3847. "attempting to read StringFileInfo string. Can't "
  3848. "read unicode string at offset 0x{0:x}".format(ustr_offset)
  3849. )
  3850. break
  3851. # Set such string as the Key attribute
  3852. stringfileinfo_struct.Key = stringfileinfo_string
  3853. # Append the structure to the PE object's list
  3854. finfo.append(stringfileinfo_struct)
  3855. # Parse a StringFileInfo entry
  3856. if stringfileinfo_string and stringfileinfo_string.startswith(
  3857. b"StringFileInfo"
  3858. ):
  3859. if (
  3860. stringfileinfo_struct.Type in (0, 1)
  3861. and stringfileinfo_struct.ValueLength == 0
  3862. ):
  3863. stringtable_offset = self.dword_align(
  3864. stringfileinfo_offset
  3865. + stringfileinfo_struct.sizeof()
  3866. + 2 * (len(stringfileinfo_string) + 1),
  3867. version_struct.OffsetToData,
  3868. )
  3869. stringfileinfo_struct.StringTable = []
  3870. # Process the String Table entries
  3871. while True:
  3872. stringtable_struct = self.__unpack_data__(
  3873. self.__StringTable_format__,
  3874. raw_data[stringtable_offset:],
  3875. file_offset=start_offset + stringtable_offset,
  3876. )
  3877. if not stringtable_struct:
  3878. break
  3879. ustr_offset = (
  3880. version_struct.OffsetToData
  3881. + stringtable_offset
  3882. + stringtable_struct.sizeof()
  3883. )
  3884. try:
  3885. stringtable_string = self.get_string_u_at_rva(ustr_offset)
  3886. except PEFormatError:
  3887. self.__warnings.append(
  3888. "Error parsing the version information, "
  3889. "attempting to read StringTable string. Can't "
  3890. "read unicode string at offset 0x{0:x}".format(
  3891. ustr_offset
  3892. )
  3893. )
  3894. break
  3895. stringtable_struct.LangID = stringtable_string
  3896. stringtable_struct.entries = {}
  3897. stringtable_struct.entries_offsets = {}
  3898. stringtable_struct.entries_lengths = {}
  3899. stringfileinfo_struct.StringTable.append(stringtable_struct)
  3900. entry_offset = self.dword_align(
  3901. stringtable_offset
  3902. + stringtable_struct.sizeof()
  3903. + 2 * (len(stringtable_string) + 1),
  3904. version_struct.OffsetToData,
  3905. )
  3906. # Process all entries in the string table
  3907. while (
  3908. entry_offset
  3909. < stringtable_offset + stringtable_struct.Length
  3910. ):
  3911. string_struct = self.__unpack_data__(
  3912. self.__String_format__,
  3913. raw_data[entry_offset:],
  3914. file_offset=start_offset + entry_offset,
  3915. )
  3916. if not string_struct:
  3917. break
  3918. ustr_offset = (
  3919. version_struct.OffsetToData
  3920. + entry_offset
  3921. + string_struct.sizeof()
  3922. )
  3923. try:
  3924. key = self.get_string_u_at_rva(ustr_offset)
  3925. key_offset = self.get_offset_from_rva(ustr_offset)
  3926. except PEFormatError:
  3927. self.__warnings.append(
  3928. "Error parsing the version information, "
  3929. "attempting to read StringTable Key string. Can't "
  3930. "read unicode string at offset 0x{0:x}".format(
  3931. ustr_offset
  3932. )
  3933. )
  3934. break
  3935. value_offset = self.dword_align(
  3936. 2 * (len(key) + 1)
  3937. + entry_offset
  3938. + string_struct.sizeof(),
  3939. version_struct.OffsetToData,
  3940. )
  3941. ustr_offset = version_struct.OffsetToData + value_offset
  3942. try:
  3943. value = self.get_string_u_at_rva(
  3944. ustr_offset, max_length=string_struct.ValueLength
  3945. )
  3946. value_offset = self.get_offset_from_rva(ustr_offset)
  3947. except PEFormatError:
  3948. self.__warnings.append(
  3949. "Error parsing the version information, attempting "
  3950. "to read StringTable Value string. Can't read "
  3951. f"unicode string at offset 0x{ustr_offset:x}"
  3952. )
  3953. break
  3954. if string_struct.Length == 0:
  3955. entry_offset = (
  3956. stringtable_offset + stringtable_struct.Length
  3957. )
  3958. else:
  3959. entry_offset = self.dword_align(
  3960. string_struct.Length + entry_offset,
  3961. version_struct.OffsetToData,
  3962. )
  3963. stringtable_struct.entries[key] = value
  3964. stringtable_struct.entries_offsets[key] = (
  3965. key_offset,
  3966. value_offset,
  3967. )
  3968. stringtable_struct.entries_lengths[key] = (
  3969. len(key),
  3970. len(value),
  3971. )
  3972. new_stringtable_offset = self.dword_align(
  3973. stringtable_struct.Length + stringtable_offset,
  3974. version_struct.OffsetToData,
  3975. )
  3976. # Check if the entry is crafted in a way that would lead
  3977. # to an infinite loop and break if so.
  3978. if new_stringtable_offset == stringtable_offset:
  3979. break
  3980. stringtable_offset = new_stringtable_offset
  3981. if stringtable_offset >= stringfileinfo_struct.Length:
  3982. break
  3983. # Parse a VarFileInfo entry
  3984. elif stringfileinfo_string and stringfileinfo_string.startswith(
  3985. b"VarFileInfo"
  3986. ):
  3987. varfileinfo_struct = stringfileinfo_struct
  3988. varfileinfo_struct.name = "VarFileInfo"
  3989. if (
  3990. varfileinfo_struct.Type in (0, 1)
  3991. and varfileinfo_struct.ValueLength == 0
  3992. ):
  3993. var_offset = self.dword_align(
  3994. stringfileinfo_offset
  3995. + varfileinfo_struct.sizeof()
  3996. + 2 * (len(stringfileinfo_string) + 1),
  3997. version_struct.OffsetToData,
  3998. )
  3999. varfileinfo_struct.Var = []
  4000. # Process all entries
  4001. while True:
  4002. var_struct = self.__unpack_data__(
  4003. self.__Var_format__,
  4004. raw_data[var_offset:],
  4005. file_offset=start_offset + var_offset,
  4006. )
  4007. if not var_struct:
  4008. break
  4009. ustr_offset = (
  4010. version_struct.OffsetToData
  4011. + var_offset
  4012. + var_struct.sizeof()
  4013. )
  4014. try:
  4015. var_string = self.get_string_u_at_rva(ustr_offset)
  4016. except PEFormatError:
  4017. self.__warnings.append(
  4018. "Error parsing the version information, "
  4019. "attempting to read VarFileInfo Var string. "
  4020. "Can't read unicode string at offset 0x{0:x}".format(
  4021. ustr_offset
  4022. )
  4023. )
  4024. break
  4025. if var_string is None:
  4026. break
  4027. varfileinfo_struct.Var.append(var_struct)
  4028. varword_offset = self.dword_align(
  4029. 2 * (len(var_string) + 1)
  4030. + var_offset
  4031. + var_struct.sizeof(),
  4032. version_struct.OffsetToData,
  4033. )
  4034. orig_varword_offset = varword_offset
  4035. while (
  4036. varword_offset
  4037. < orig_varword_offset + var_struct.ValueLength
  4038. ):
  4039. word1 = self.get_word_from_data(
  4040. raw_data[varword_offset : varword_offset + 2], 0
  4041. )
  4042. word2 = self.get_word_from_data(
  4043. raw_data[varword_offset + 2 : varword_offset + 4], 0
  4044. )
  4045. varword_offset += 4
  4046. if isinstance(word1, int) and isinstance(word2, int):
  4047. var_struct.entry = {
  4048. var_string: "0x%04x 0x%04x" % (word1, word2)
  4049. }
  4050. var_offset = self.dword_align(
  4051. var_offset + var_struct.Length, version_struct.OffsetToData
  4052. )
  4053. if var_offset <= var_offset + var_struct.Length:
  4054. break
  4055. # Increment and align the offset
  4056. stringfileinfo_offset = self.dword_align(
  4057. stringfileinfo_struct.Length + stringfileinfo_offset,
  4058. version_struct.OffsetToData,
  4059. )
  4060. # Check if all the StringFileInfo and VarFileInfo items have been processed
  4061. if (
  4062. stringfileinfo_struct.Length == 0
  4063. or stringfileinfo_offset >= versioninfo_struct.Length
  4064. ):
  4065. break
  4066. self.FileInfo.append(finfo)
  4067. def parse_export_directory(self, rva, size, forwarded_only=False):
  4068. """Parse the export directory.
  4069. Given the RVA of the export directory, it will process all
  4070. its entries.
  4071. The exports will be made available as a list of ExportData
  4072. instances in the 'IMAGE_DIRECTORY_ENTRY_EXPORT' PE attribute.
  4073. """
  4074. try:
  4075. export_dir = self.__unpack_data__(
  4076. self.__IMAGE_EXPORT_DIRECTORY_format__,
  4077. self.get_data(
  4078. rva, Structure(self.__IMAGE_EXPORT_DIRECTORY_format__).sizeof()
  4079. ),
  4080. file_offset=self.get_offset_from_rva(rva),
  4081. )
  4082. except PEFormatError:
  4083. self.__warnings.append(
  4084. "Error parsing export directory at RVA: 0x%x" % (rva)
  4085. )
  4086. return
  4087. if not export_dir:
  4088. return
  4089. # We keep track of the bytes left in the file and use it to set a upper
  4090. # bound in the number of items that can be read from the different
  4091. # arrays.
  4092. def length_until_eof(rva):
  4093. return len(self.__data__) - self.get_offset_from_rva(rva)
  4094. try:
  4095. address_of_names = self.get_data(
  4096. export_dir.AddressOfNames,
  4097. min(
  4098. length_until_eof(export_dir.AddressOfNames),
  4099. export_dir.NumberOfNames * 4,
  4100. ),
  4101. )
  4102. address_of_name_ordinals = self.get_data(
  4103. export_dir.AddressOfNameOrdinals,
  4104. min(
  4105. length_until_eof(export_dir.AddressOfNameOrdinals),
  4106. export_dir.NumberOfNames * 4,
  4107. ),
  4108. )
  4109. address_of_functions = self.get_data(
  4110. export_dir.AddressOfFunctions,
  4111. min(
  4112. length_until_eof(export_dir.AddressOfFunctions),
  4113. export_dir.NumberOfFunctions * 4,
  4114. ),
  4115. )
  4116. except PEFormatError:
  4117. self.__warnings.append(
  4118. "Error parsing export directory at RVA: 0x%x" % (rva)
  4119. )
  4120. return
  4121. exports = []
  4122. max_failed_entries_before_giving_up = 10
  4123. section = self.get_section_by_rva(export_dir.AddressOfNames)
  4124. # Overly generous upper bound
  4125. safety_boundary = len(self.__data__)
  4126. if section:
  4127. safety_boundary = (
  4128. section.VirtualAddress
  4129. + len(section.get_data())
  4130. - export_dir.AddressOfNames
  4131. )
  4132. symbol_counts = collections.defaultdict(int)
  4133. export_parsing_loop_completed_normally = True
  4134. for i in range(min(export_dir.NumberOfNames, int(safety_boundary / 4))):
  4135. symbol_ordinal = self.get_word_from_data(address_of_name_ordinals, i)
  4136. if symbol_ordinal is not None and symbol_ordinal * 4 < len(
  4137. address_of_functions
  4138. ):
  4139. symbol_address = self.get_dword_from_data(
  4140. address_of_functions, symbol_ordinal
  4141. )
  4142. else:
  4143. # Corrupt? a bad pointer... we assume it's all
  4144. # useless, no exports
  4145. return None
  4146. if symbol_address is None or symbol_address == 0:
  4147. continue
  4148. # If the function's RVA points within the export directory
  4149. # it will point to a string with the forwarded symbol's string
  4150. # instead of pointing the the function start address.
  4151. if symbol_address >= rva and symbol_address < rva + size:
  4152. forwarder_str = self.get_string_at_rva(symbol_address)
  4153. try:
  4154. forwarder_offset = self.get_offset_from_rva(symbol_address)
  4155. except PEFormatError:
  4156. continue
  4157. else:
  4158. if forwarded_only:
  4159. continue
  4160. forwarder_str = None
  4161. forwarder_offset = None
  4162. symbol_name_address = self.get_dword_from_data(address_of_names, i)
  4163. if symbol_name_address is None:
  4164. max_failed_entries_before_giving_up -= 1
  4165. if max_failed_entries_before_giving_up <= 0:
  4166. export_parsing_loop_completed_normally = False
  4167. break
  4168. symbol_name = self.get_string_at_rva(
  4169. symbol_name_address, MAX_SYMBOL_NAME_LENGTH
  4170. )
  4171. if not is_valid_function_name(symbol_name):
  4172. export_parsing_loop_completed_normally = False
  4173. break
  4174. try:
  4175. symbol_name_offset = self.get_offset_from_rva(symbol_name_address)
  4176. except PEFormatError:
  4177. max_failed_entries_before_giving_up -= 1
  4178. if max_failed_entries_before_giving_up <= 0:
  4179. export_parsing_loop_completed_normally = False
  4180. break
  4181. try:
  4182. symbol_name_offset = self.get_offset_from_rva(symbol_name_address)
  4183. except PEFormatError:
  4184. max_failed_entries_before_giving_up -= 1
  4185. if max_failed_entries_before_giving_up <= 0:
  4186. export_parsing_loop_completed_normally = False
  4187. break
  4188. continue
  4189. # File 0b1d3d3664915577ab9a32188d29bbf3542b86c7b9ce333e245496c3018819f1
  4190. # was being parsed as potentially containing millions of exports.
  4191. # Checking for duplicates addresses the issue.
  4192. symbol_counts[(symbol_name, symbol_address)] += 1
  4193. if symbol_counts[(symbol_name, symbol_address)] > 10:
  4194. self.__warnings.append(
  4195. f"Export directory contains more than 10 repeated entries "
  4196. f"({symbol_name}, {symbol_address:#02x}). Assuming corrupt."
  4197. )
  4198. break
  4199. elif len(symbol_counts) > self.max_symbol_exports:
  4200. self.__warnings.append(
  4201. "Export directory contains more than {} symbol entries. "
  4202. "Assuming corrupt.".format(self.max_symbol_exports)
  4203. )
  4204. break
  4205. exports.append(
  4206. ExportData(
  4207. pe=self,
  4208. ordinal=export_dir.Base + symbol_ordinal,
  4209. ordinal_offset=self.get_offset_from_rva(
  4210. export_dir.AddressOfNameOrdinals + 2 * i
  4211. ),
  4212. address=symbol_address,
  4213. address_offset=self.get_offset_from_rva(
  4214. export_dir.AddressOfFunctions + 4 * symbol_ordinal
  4215. ),
  4216. name=symbol_name,
  4217. name_offset=symbol_name_offset,
  4218. forwarder=forwarder_str,
  4219. forwarder_offset=forwarder_offset,
  4220. )
  4221. )
  4222. if not export_parsing_loop_completed_normally:
  4223. self.__warnings.append(
  4224. f"RVA AddressOfNames in the export directory points to an invalid "
  4225. f"address: {export_dir.AddressOfNames:x}"
  4226. )
  4227. ordinals = {exp.ordinal for exp in exports}
  4228. max_failed_entries_before_giving_up = 10
  4229. section = self.get_section_by_rva(export_dir.AddressOfFunctions)
  4230. # Overly generous upper bound
  4231. safety_boundary = len(self.__data__)
  4232. if section:
  4233. safety_boundary = (
  4234. section.VirtualAddress
  4235. + len(section.get_data())
  4236. - export_dir.AddressOfFunctions
  4237. )
  4238. symbol_counts = collections.defaultdict(int)
  4239. export_parsing_loop_completed_normally = True
  4240. for idx in range(min(export_dir.NumberOfFunctions, int(safety_boundary / 4))):
  4241. if not idx + export_dir.Base in ordinals:
  4242. try:
  4243. symbol_address = self.get_dword_from_data(address_of_functions, idx)
  4244. except PEFormatError:
  4245. symbol_address = None
  4246. if symbol_address is None:
  4247. max_failed_entries_before_giving_up -= 1
  4248. if max_failed_entries_before_giving_up <= 0:
  4249. export_parsing_loop_completed_normally = False
  4250. break
  4251. if symbol_address == 0:
  4252. continue
  4253. # Checking for forwarder again.
  4254. if (
  4255. symbol_address is not None
  4256. and symbol_address >= rva
  4257. and symbol_address < rva + size
  4258. ):
  4259. forwarder_str = self.get_string_at_rva(symbol_address)
  4260. else:
  4261. forwarder_str = None
  4262. # File 0b1d3d3664915577ab9a32188d29bbf3542b86c7b9ce333e245496c3018819f1
  4263. # was being parsed as potentially containing millions of exports.
  4264. # Checking for duplicates addresses the issue.
  4265. symbol_counts[symbol_address] += 1
  4266. if symbol_counts[symbol_address] > self.max_repeated_symbol:
  4267. # if most_common and most_common[0][1] > 10:
  4268. self.__warnings.append(
  4269. "Export directory contains more than {} repeated "
  4270. "ordinal entries (0x{:x}). Assuming corrupt.".format(
  4271. self.max_repeated_symbol, symbol_address
  4272. )
  4273. )
  4274. break
  4275. elif len(symbol_counts) > self.max_symbol_exports:
  4276. self.__warnings.append(
  4277. "Export directory contains more than "
  4278. f"{self.max_symbol_exports} ordinal entries. Assuming corrupt."
  4279. )
  4280. break
  4281. exports.append(
  4282. ExportData(
  4283. ordinal=export_dir.Base + idx,
  4284. address=symbol_address,
  4285. name=None,
  4286. forwarder=forwarder_str,
  4287. )
  4288. )
  4289. if not export_parsing_loop_completed_normally:
  4290. self.__warnings.append(
  4291. "RVA AddressOfFunctions in the export directory points to an invalid "
  4292. f"address: {export_dir.AddressOfFunctions:x}"
  4293. )
  4294. return
  4295. if not exports and export_dir.all_zeroes():
  4296. return None
  4297. return ExportDirData(
  4298. struct=export_dir,
  4299. symbols=exports,
  4300. name=self.get_string_at_rva(export_dir.Name),
  4301. )
  4302. def dword_align(self, offset, base):
  4303. return ((offset + base + 3) & 0xFFFFFFFC) - (base & 0xFFFFFFFC)
  4304. def normalize_import_va(self, va):
  4305. # Setup image range
  4306. begin_of_image = self.OPTIONAL_HEADER.ImageBase
  4307. end_of_image = self.OPTIONAL_HEADER.ImageBase + self.OPTIONAL_HEADER.SizeOfImage
  4308. # Try to avoid bogus VAs, which are out of the image.
  4309. # This also filters out entries that are zero
  4310. if begin_of_image <= va and va < end_of_image:
  4311. va -= begin_of_image
  4312. return va
  4313. def parse_delay_import_directory(self, rva, size):
  4314. """Walk and parse the delay import directory."""
  4315. import_descs = []
  4316. error_count = 0
  4317. while True:
  4318. try:
  4319. # If the RVA is invalid all would blow up. Some PEs seem to be
  4320. # specially nasty and have an invalid RVA.
  4321. data = self.get_data(
  4322. rva,
  4323. Structure(self.__IMAGE_DELAY_IMPORT_DESCRIPTOR_format__).sizeof(),
  4324. )
  4325. except PEFormatError:
  4326. self.__warnings.append(
  4327. "Error parsing the Delay import directory at RVA: 0x%x" % (rva)
  4328. )
  4329. break
  4330. file_offset = self.get_offset_from_rva(rva)
  4331. import_desc = self.__unpack_data__(
  4332. self.__IMAGE_DELAY_IMPORT_DESCRIPTOR_format__,
  4333. data,
  4334. file_offset=file_offset,
  4335. )
  4336. # If the structure is all zeros, we reached the end of the list
  4337. if not import_desc or import_desc.all_zeroes():
  4338. break
  4339. contains_addresses = False
  4340. # Handle old import descriptor that has Virtual Addresses instead of RVAs
  4341. # This version of import descriptor is created by old Visual Studio versions
  4342. # (pre 6.0)
  4343. # Can only be present in 32-bit binaries (no 64-bit compiler existed at the
  4344. # time)
  4345. # Sample: e8d3bff0c1a9a6955993f7a441121a2692261421e82fdfadaaded45d3bea9980
  4346. if (
  4347. import_desc.grAttrs == 0
  4348. and self.FILE_HEADER.Machine == MACHINE_TYPE["IMAGE_FILE_MACHINE_I386"]
  4349. ):
  4350. import_desc.pBoundIAT = self.normalize_import_va(import_desc.pBoundIAT)
  4351. import_desc.pIAT = self.normalize_import_va(import_desc.pIAT)
  4352. import_desc.pINT = self.normalize_import_va(import_desc.pINT)
  4353. import_desc.pUnloadIAT = self.normalize_import_va(
  4354. import_desc.pUnloadIAT
  4355. )
  4356. import_desc.phmod = self.normalize_import_va(import_desc.pUnloadIAT)
  4357. import_desc.szName = self.normalize_import_va(import_desc.szName)
  4358. contains_addresses = True
  4359. rva += import_desc.sizeof()
  4360. # If the array of thunks is somewhere earlier than the import
  4361. # descriptor we can set a maximum length for the array. Otherwise
  4362. # just set a maximum length of the size of the file
  4363. max_len = len(self.__data__) - file_offset
  4364. if rva > import_desc.pINT or rva > import_desc.pIAT:
  4365. max_len = max(rva - import_desc.pINT, rva - import_desc.pIAT)
  4366. import_data = []
  4367. try:
  4368. import_data = self.parse_imports(
  4369. import_desc.pINT,
  4370. import_desc.pIAT,
  4371. None,
  4372. max_len,
  4373. contains_addresses,
  4374. )
  4375. except PEFormatError as excp:
  4376. self.__warnings.append(
  4377. "Error parsing the Delay import directory. "
  4378. "Invalid import data at RVA: 0x{0:x} ({1})".format(rva, excp.value)
  4379. )
  4380. if error_count > 5:
  4381. self.__warnings.append(
  4382. "Too many errors parsing the Delay import directory. "
  4383. "Invalid import data at RVA: 0x{0:x}".format(rva)
  4384. )
  4385. break
  4386. if not import_data:
  4387. error_count += 1
  4388. continue
  4389. if self.__total_import_symbols > MAX_IMPORT_SYMBOLS:
  4390. self.__warnings.append(
  4391. "Error, too many imported symbols %d (>%s)"
  4392. % (self.__total_import_symbols, MAX_IMPORT_SYMBOLS)
  4393. )
  4394. break
  4395. dll = self.get_string_at_rva(import_desc.szName, MAX_DLL_LENGTH)
  4396. if not is_valid_dos_filename(dll):
  4397. dll = b("*invalid*")
  4398. if dll:
  4399. for symbol in import_data:
  4400. if symbol.name is None:
  4401. funcname = ordlookup.ordLookup(dll.lower(), symbol.ordinal)
  4402. if funcname:
  4403. symbol.name = funcname
  4404. import_descs.append(
  4405. ImportDescData(struct=import_desc, imports=import_data, dll=dll)
  4406. )
  4407. return import_descs
  4408. def get_rich_header_hash(self, algorithm="md5"):
  4409. if not hasattr(self, "RICH_HEADER") or self.RICH_HEADER is None:
  4410. return ""
  4411. if algorithm == "md5":
  4412. return md5(self.RICH_HEADER.clear_data).hexdigest()
  4413. elif algorithm == "sha1":
  4414. return sha1(self.RICH_HEADER.clear_data).hexdigest()
  4415. elif algorithm == "sha256":
  4416. return sha256(self.RICH_HEADER.clear_data).hexdigest()
  4417. elif algorithm == "sha512":
  4418. return sha512(self.RICH_HEADER.clear_data).hexdigest()
  4419. raise Exception("Invalid hashing algorithm specified")
  4420. def get_imphash(self):
  4421. impstrs = []
  4422. exts = ["ocx", "sys", "dll"]
  4423. if not hasattr(self, "DIRECTORY_ENTRY_IMPORT"):
  4424. return ""
  4425. for entry in self.DIRECTORY_ENTRY_IMPORT:
  4426. if isinstance(entry.dll, bytes):
  4427. libname = entry.dll.decode().lower()
  4428. else:
  4429. libname = entry.dll.lower()
  4430. parts = libname.rsplit(".", 1)
  4431. if len(parts) > 1 and parts[1] in exts:
  4432. libname = parts[0]
  4433. for imp in entry.imports:
  4434. funcname = None
  4435. if not imp.name:
  4436. funcname = ordlookup.ordLookup(
  4437. entry.dll.lower(), imp.ordinal, make_name=True
  4438. )
  4439. if not funcname:
  4440. raise PEFormatError(
  4441. f"Unable to look up ordinal {entry.dll}:{imp.ordinal:04x}"
  4442. )
  4443. else:
  4444. funcname = imp.name
  4445. if not funcname:
  4446. continue
  4447. if isinstance(funcname, bytes):
  4448. funcname = funcname.decode()
  4449. impstrs.append("%s.%s" % (libname.lower(), funcname.lower()))
  4450. return md5(",".join(impstrs).encode()).hexdigest()
  4451. def parse_import_directory(self, rva, size, dllnames_only=False):
  4452. """Walk and parse the import directory."""
  4453. import_descs = []
  4454. error_count = 0
  4455. while True:
  4456. try:
  4457. # If the RVA is invalid all would blow up. Some EXEs seem to be
  4458. # specially nasty and have an invalid RVA.
  4459. data = self.get_data(
  4460. rva, Structure(self.__IMAGE_IMPORT_DESCRIPTOR_format__).sizeof()
  4461. )
  4462. except PEFormatError:
  4463. self.__warnings.append(
  4464. f"Error parsing the import directory at RVA: 0x{rva:x}"
  4465. )
  4466. break
  4467. file_offset = self.get_offset_from_rva(rva)
  4468. import_desc = self.__unpack_data__(
  4469. self.__IMAGE_IMPORT_DESCRIPTOR_format__, data, file_offset=file_offset
  4470. )
  4471. # If the structure is all zeros, we reached the end of the list
  4472. if not import_desc or import_desc.all_zeroes():
  4473. break
  4474. rva += import_desc.sizeof()
  4475. # If the array of thunks is somewhere earlier than the import
  4476. # descriptor we can set a maximum length for the array. Otherwise
  4477. # just set a maximum length of the size of the file
  4478. max_len = len(self.__data__) - file_offset
  4479. if rva > import_desc.OriginalFirstThunk or rva > import_desc.FirstThunk:
  4480. max_len = max(
  4481. rva - import_desc.OriginalFirstThunk, rva - import_desc.FirstThunk
  4482. )
  4483. import_data = []
  4484. if not dllnames_only:
  4485. try:
  4486. import_data = self.parse_imports(
  4487. import_desc.OriginalFirstThunk,
  4488. import_desc.FirstThunk,
  4489. import_desc.ForwarderChain,
  4490. max_length=max_len,
  4491. )
  4492. except PEFormatError as e:
  4493. self.__warnings.append(
  4494. "Error parsing the import directory. "
  4495. f"Invalid Import data at RVA: 0x{rva:x} ({e.value})"
  4496. )
  4497. if error_count > 5:
  4498. self.__warnings.append(
  4499. "Too many errors parsing the import directory. "
  4500. f"Invalid import data at RVA: 0x{rva:x}"
  4501. )
  4502. break
  4503. if not import_data:
  4504. error_count += 1
  4505. # TODO: do not continue here
  4506. continue
  4507. dll = self.get_string_at_rva(import_desc.Name, MAX_DLL_LENGTH)
  4508. if not is_valid_dos_filename(dll):
  4509. dll = b("*invalid*")
  4510. if dll:
  4511. for symbol in import_data:
  4512. if symbol.name is None:
  4513. funcname = ordlookup.ordLookup(dll.lower(), symbol.ordinal)
  4514. if funcname:
  4515. symbol.name = funcname
  4516. import_descs.append(
  4517. ImportDescData(struct=import_desc, imports=import_data, dll=dll)
  4518. )
  4519. if not dllnames_only:
  4520. suspicious_imports = set(["LoadLibrary", "GetProcAddress"])
  4521. suspicious_imports_count = 0
  4522. total_symbols = 0
  4523. for imp_dll in import_descs:
  4524. for symbol in imp_dll.imports:
  4525. for suspicious_symbol in suspicious_imports:
  4526. if not symbol or not symbol.name:
  4527. continue
  4528. name = symbol.name
  4529. if type(symbol.name) == bytes:
  4530. name = symbol.name.decode("utf-8")
  4531. if name.startswith(suspicious_symbol):
  4532. suspicious_imports_count += 1
  4533. break
  4534. total_symbols += 1
  4535. if (
  4536. suspicious_imports_count == len(suspicious_imports)
  4537. and total_symbols < 20
  4538. ):
  4539. self.__warnings.append(
  4540. "Imported symbols contain entries typical of packed executables."
  4541. )
  4542. return import_descs
  4543. def parse_imports(
  4544. self,
  4545. original_first_thunk,
  4546. first_thunk,
  4547. forwarder_chain,
  4548. max_length=None,
  4549. contains_addresses=False,
  4550. ):
  4551. """Parse the imported symbols.
  4552. It will fill a list, which will be available as the dictionary
  4553. attribute "imports". Its keys will be the DLL names and the values
  4554. of all the symbols imported from that object.
  4555. """
  4556. imported_symbols = []
  4557. # Import Lookup Table. Contains ordinals or pointers to strings.
  4558. ilt = self.get_import_table(
  4559. original_first_thunk, max_length, contains_addresses
  4560. )
  4561. # Import Address Table. May have identical content to ILT if
  4562. # PE file is not bound. It will contain the address of the
  4563. # imported symbols once the binary is loaded or if it is already
  4564. # bound.
  4565. iat = self.get_import_table(first_thunk, max_length, contains_addresses)
  4566. # OC Patch:
  4567. # Would crash if IAT or ILT had None type
  4568. if (not iat or len(iat) == 0) and (not ilt or len(ilt) == 0):
  4569. self.__warnings.append(
  4570. "Damaged Import Table information. "
  4571. "ILT and/or IAT appear to be broken. "
  4572. f"OriginalFirstThunk: 0x{original_first_thunk:x} "
  4573. f"FirstThunk: 0x{first_thunk:x}"
  4574. )
  4575. return []
  4576. table = None
  4577. if ilt:
  4578. table = ilt
  4579. elif iat:
  4580. table = iat
  4581. else:
  4582. return None
  4583. imp_offset = 4
  4584. address_mask = 0x7FFFFFFF
  4585. if self.PE_TYPE == OPTIONAL_HEADER_MAGIC_PE:
  4586. ordinal_flag = IMAGE_ORDINAL_FLAG
  4587. elif self.PE_TYPE == OPTIONAL_HEADER_MAGIC_PE_PLUS:
  4588. ordinal_flag = IMAGE_ORDINAL_FLAG64
  4589. imp_offset = 8
  4590. address_mask = 0x7FFFFFFFFFFFFFFF
  4591. else:
  4592. # Some PEs may have an invalid value in the Magic field of the
  4593. # Optional Header. Just in case the remaining file is parseable
  4594. # let's pretend it's a 32bit PE32 by default.
  4595. ordinal_flag = IMAGE_ORDINAL_FLAG
  4596. num_invalid = 0
  4597. for idx, tbl_entry in enumerate(table):
  4598. imp_ord = None
  4599. imp_hint = None
  4600. imp_name = None
  4601. name_offset = None
  4602. hint_name_table_rva = None
  4603. if tbl_entry.AddressOfData:
  4604. # If imported by ordinal, we will append the ordinal number
  4605. #
  4606. if tbl_entry.AddressOfData & ordinal_flag:
  4607. import_by_ordinal = True
  4608. imp_ord = tbl_entry.AddressOfData & 0xFFFF
  4609. imp_name = None
  4610. name_offset = None
  4611. else:
  4612. import_by_ordinal = False
  4613. try:
  4614. hint_name_table_rva = tbl_entry.AddressOfData & address_mask
  4615. data = self.get_data(hint_name_table_rva, 2)
  4616. # Get the Hint
  4617. imp_hint = self.get_word_from_data(data, 0)
  4618. imp_name = self.get_string_at_rva(
  4619. tbl_entry.AddressOfData + 2, MAX_IMPORT_NAME_LENGTH
  4620. )
  4621. if not is_valid_function_name(imp_name):
  4622. imp_name = b("*invalid*")
  4623. name_offset = self.get_offset_from_rva(
  4624. tbl_entry.AddressOfData + 2
  4625. )
  4626. except PEFormatError:
  4627. pass
  4628. # by nriva: we want the ThunkRVA and ThunkOffset
  4629. thunk_offset = tbl_entry.get_file_offset()
  4630. thunk_rva = self.get_rva_from_offset(thunk_offset)
  4631. imp_address = (
  4632. first_thunk + self.OPTIONAL_HEADER.ImageBase + idx * imp_offset
  4633. )
  4634. struct_iat = None
  4635. try:
  4636. if iat and ilt and ilt[idx].AddressOfData != iat[idx].AddressOfData:
  4637. imp_bound = iat[idx].AddressOfData
  4638. struct_iat = iat[idx]
  4639. else:
  4640. imp_bound = None
  4641. except IndexError:
  4642. imp_bound = None
  4643. # The file with hashes:
  4644. #
  4645. # MD5: bfe97192e8107d52dd7b4010d12b2924
  4646. # SHA256: 3d22f8b001423cb460811ab4f4789f277b35838d45c62ec0454c877e7c82c7f5
  4647. #
  4648. # has an invalid table built in a way that it's parseable but contains
  4649. # invalid entries that lead pefile to take extremely long amounts of time to
  4650. # parse. It also leads to extreme memory consumption.
  4651. # To prevent similar cases, if invalid entries are found in the middle of a
  4652. # table the parsing will be aborted
  4653. #
  4654. if imp_ord == None and imp_name == None:
  4655. raise PEFormatError("Invalid entries, aborting parsing.")
  4656. # Some PEs appear to interleave valid and invalid imports. Instead of
  4657. # aborting the parsing altogether we will simply skip the invalid entries.
  4658. # Although if we see 1000 invalid entries and no legit ones, we abort.
  4659. if imp_name == b("*invalid*"):
  4660. if num_invalid > 1000 and num_invalid == idx:
  4661. raise PEFormatError("Too many invalid names, aborting parsing.")
  4662. num_invalid += 1
  4663. continue
  4664. if imp_ord or imp_name:
  4665. imported_symbols.append(
  4666. ImportData(
  4667. pe=self,
  4668. struct_table=tbl_entry,
  4669. struct_iat=struct_iat, # for bound imports if any
  4670. import_by_ordinal=import_by_ordinal,
  4671. ordinal=imp_ord,
  4672. ordinal_offset=tbl_entry.get_file_offset(),
  4673. hint=imp_hint,
  4674. name=imp_name,
  4675. name_offset=name_offset,
  4676. bound=imp_bound,
  4677. address=imp_address,
  4678. hint_name_table_rva=hint_name_table_rva,
  4679. thunk_offset=thunk_offset,
  4680. thunk_rva=thunk_rva,
  4681. )
  4682. )
  4683. return imported_symbols
  4684. def get_import_table(self, rva, max_length=None, contains_addresses=False):
  4685. table = []
  4686. # We need the ordinal flag for a simple heuristic
  4687. # we're implementing within the loop
  4688. #
  4689. if self.PE_TYPE == OPTIONAL_HEADER_MAGIC_PE:
  4690. ordinal_flag = IMAGE_ORDINAL_FLAG
  4691. format = self.__IMAGE_THUNK_DATA_format__
  4692. elif self.PE_TYPE == OPTIONAL_HEADER_MAGIC_PE_PLUS:
  4693. ordinal_flag = IMAGE_ORDINAL_FLAG64
  4694. format = self.__IMAGE_THUNK_DATA64_format__
  4695. else:
  4696. # Some PEs may have an invalid value in the Magic field of the
  4697. # Optional Header. Just in case the remaining file is parseable
  4698. # let's pretend it's a 32bit PE32 by default.
  4699. ordinal_flag = IMAGE_ORDINAL_FLAG
  4700. format = self.__IMAGE_THUNK_DATA_format__
  4701. MAX_ADDRESS_SPREAD = 128 * 2 ** 20 # 64 MB
  4702. MAX_REPEATED_ADDRESSES = 15
  4703. repeated_address = 0
  4704. addresses_of_data_set_64 = set()
  4705. addresses_of_data_set_32 = set()
  4706. start_rva = rva
  4707. while rva:
  4708. if max_length is not None and rva >= start_rva + max_length:
  4709. self.__warnings.append(
  4710. "Error parsing the import table. Entries go beyond bounds."
  4711. )
  4712. break
  4713. # Enforce an upper bounds on import symbols.
  4714. if self.__total_import_symbols > MAX_IMPORT_SYMBOLS:
  4715. self.__warnings.append(
  4716. "Excessive number of imports %d (>%s)"
  4717. % (self.__total_import_symbols, MAX_IMPORT_SYMBOLS)
  4718. )
  4719. break
  4720. self.__total_import_symbols += 1
  4721. # if we see too many times the same entry we assume it could be
  4722. # a table containing bogus data (with malicious intent or otherwise)
  4723. if repeated_address >= MAX_REPEATED_ADDRESSES:
  4724. return []
  4725. # if the addresses point somewhere but the difference between the highest
  4726. # and lowest address is larger than MAX_ADDRESS_SPREAD we assume a bogus
  4727. # table as the addresses should be contained within a module
  4728. if (
  4729. addresses_of_data_set_32
  4730. and max(addresses_of_data_set_32) - min(addresses_of_data_set_32)
  4731. > MAX_ADDRESS_SPREAD
  4732. ):
  4733. return []
  4734. if (
  4735. addresses_of_data_set_64
  4736. and max(addresses_of_data_set_64) - min(addresses_of_data_set_64)
  4737. > MAX_ADDRESS_SPREAD
  4738. ):
  4739. return []
  4740. failed = False
  4741. try:
  4742. data = self.get_data(rva, Structure(format).sizeof())
  4743. except PEFormatError:
  4744. failed = True
  4745. if failed or len(data) != Structure(format).sizeof():
  4746. self.__warnings.append(
  4747. "Error parsing the import table. " "Invalid data at RVA: 0x%x" % rva
  4748. )
  4749. return None
  4750. thunk_data = self.__unpack_data__(
  4751. format, data, file_offset=self.get_offset_from_rva(rva)
  4752. )
  4753. # If the thunk data contains VAs instead of RVAs, we need to normalize them
  4754. if contains_addresses:
  4755. thunk_data.AddressOfData = self.normalize_import_va(
  4756. thunk_data.AddressOfData
  4757. )
  4758. thunk_data.ForwarderString = self.normalize_import_va(
  4759. thunk_data.ForwarderString
  4760. )
  4761. thunk_data.Function = self.normalize_import_va(thunk_data.Function)
  4762. thunk_data.Ordinal = self.normalize_import_va(thunk_data.Ordinal)
  4763. # Check if the AddressOfData lies within the range of RVAs that it's
  4764. # being scanned, abort if that is the case, as it is very unlikely
  4765. # to be legitimate data.
  4766. # Seen in PE with SHA256:
  4767. # 5945bb6f0ac879ddf61b1c284f3b8d20c06b228e75ae4f571fa87f5b9512902c
  4768. if (
  4769. thunk_data
  4770. and thunk_data.AddressOfData >= start_rva
  4771. and thunk_data.AddressOfData <= rva
  4772. ):
  4773. self.__warnings.append(
  4774. "Error parsing the import table. "
  4775. "AddressOfData overlaps with THUNK_DATA for "
  4776. "THUNK at RVA 0x%x" % (rva)
  4777. )
  4778. break
  4779. if thunk_data and thunk_data.AddressOfData:
  4780. # If the entry looks like could be an ordinal...
  4781. if thunk_data.AddressOfData & ordinal_flag:
  4782. # but its value is beyond 2^16, we will assume it's a
  4783. # corrupted and ignore it altogether
  4784. if thunk_data.AddressOfData & 0x7FFFFFFF > 0xFFFF:
  4785. return []
  4786. # and if it looks like it should be an RVA
  4787. else:
  4788. # keep track of the RVAs seen and store them to study their
  4789. # properties. When certain non-standard features are detected
  4790. # the parsing will be aborted
  4791. if (
  4792. thunk_data.AddressOfData in addresses_of_data_set_32
  4793. or thunk_data.AddressOfData in addresses_of_data_set_64
  4794. ):
  4795. repeated_address += 1
  4796. if thunk_data.AddressOfData >= 2 ** 32:
  4797. addresses_of_data_set_64.add(thunk_data.AddressOfData)
  4798. else:
  4799. addresses_of_data_set_32.add(thunk_data.AddressOfData)
  4800. if not thunk_data or thunk_data.all_zeroes():
  4801. break
  4802. rva += thunk_data.sizeof()
  4803. table.append(thunk_data)
  4804. return table
  4805. def get_memory_mapped_image(self, max_virtual_address=0x10000000, ImageBase=None):
  4806. """Returns the data corresponding to the memory layout of the PE file.
  4807. The data includes the PE header and the sections loaded at offsets
  4808. corresponding to their relative virtual addresses. (the VirtualAddress
  4809. section header member).
  4810. Any offset in this data corresponds to the absolute memory address
  4811. ImageBase+offset.
  4812. The optional argument 'max_virtual_address' provides with means of limiting
  4813. which sections are processed.
  4814. Any section with their VirtualAddress beyond this value will be skipped.
  4815. Normally, sections with values beyond this range are just there to confuse
  4816. tools. It's a common trick to see in packed executables.
  4817. If the 'ImageBase' optional argument is supplied, the file's relocations
  4818. will be applied to the image by calling the 'relocate_image()' method. Beware
  4819. that the relocation information is applied permanently.
  4820. """
  4821. # Rebase if requested
  4822. #
  4823. if ImageBase is not None:
  4824. # Keep a copy of the image's data before modifying it by rebasing it
  4825. #
  4826. original_data = self.__data__
  4827. self.relocate_image(ImageBase)
  4828. # Collect all sections in one code block
  4829. mapped_data = self.__data__[:]
  4830. for section in self.sections:
  4831. # Miscellaneous integrity tests.
  4832. # Some packer will set these to bogus values to make tools go nuts.
  4833. if section.Misc_VirtualSize == 0 and section.SizeOfRawData == 0:
  4834. continue
  4835. srd = section.SizeOfRawData
  4836. prd = self.adjust_FileAlignment(
  4837. section.PointerToRawData, self.OPTIONAL_HEADER.FileAlignment
  4838. )
  4839. VirtualAddress_adj = self.adjust_SectionAlignment(
  4840. section.VirtualAddress,
  4841. self.OPTIONAL_HEADER.SectionAlignment,
  4842. self.OPTIONAL_HEADER.FileAlignment,
  4843. )
  4844. if (
  4845. srd > len(self.__data__)
  4846. or prd > len(self.__data__)
  4847. or srd + prd > len(self.__data__)
  4848. or VirtualAddress_adj >= max_virtual_address
  4849. ):
  4850. continue
  4851. padding_length = VirtualAddress_adj - len(mapped_data)
  4852. if padding_length > 0:
  4853. mapped_data += b"\0" * padding_length
  4854. elif padding_length < 0:
  4855. mapped_data = mapped_data[:padding_length]
  4856. mapped_data += section.get_data()
  4857. # If the image was rebased, restore it to its original form
  4858. #
  4859. if ImageBase is not None:
  4860. self.__data__ = original_data
  4861. return mapped_data
  4862. def get_resources_strings(self):
  4863. """Returns a list of all the strings found withing the resources (if any).
  4864. This method will scan all entries in the resources directory of the PE, if
  4865. there is one, and will return a [] with the strings.
  4866. An empty list will be returned otherwise.
  4867. """
  4868. resources_strings = []
  4869. if hasattr(self, "DIRECTORY_ENTRY_RESOURCE"):
  4870. for res_type in self.DIRECTORY_ENTRY_RESOURCE.entries:
  4871. if hasattr(res_type, "directory"):
  4872. for resource_id in res_type.directory.entries:
  4873. if hasattr(resource_id, "directory"):
  4874. if (
  4875. hasattr(resource_id.directory, "strings")
  4876. and resource_id.directory.strings
  4877. ):
  4878. for res_string in list(
  4879. resource_id.directory.strings.values()
  4880. ):
  4881. resources_strings.append(res_string)
  4882. return resources_strings
  4883. def get_data(self, rva=0, length=None):
  4884. """Get data regardless of the section where it lies on.
  4885. Given a RVA and the size of the chunk to retrieve, this method
  4886. will find the section where the data lies and return the data.
  4887. """
  4888. s = self.get_section_by_rva(rva)
  4889. if length:
  4890. end = rva + length
  4891. else:
  4892. end = None
  4893. if not s:
  4894. if rva < len(self.header):
  4895. return self.header[rva:end]
  4896. # Before we give up we check whether the file might
  4897. # contain the data anyway. There are cases of PE files
  4898. # without sections that rely on windows loading the first
  4899. # 8291 bytes into memory and assume the data will be
  4900. # there
  4901. # A functional file with these characteristics is:
  4902. # MD5: 0008892cdfbc3bda5ce047c565e52295
  4903. # SHA-1: c7116b9ff950f86af256defb95b5d4859d4752a9
  4904. #
  4905. if rva < len(self.__data__):
  4906. return self.__data__[rva:end]
  4907. raise PEFormatError("data at RVA can't be fetched. Corrupt header?")
  4908. return s.get_data(rva, length)
  4909. def get_rva_from_offset(self, offset):
  4910. """Get the RVA corresponding to this file offset."""
  4911. s = self.get_section_by_offset(offset)
  4912. if not s:
  4913. if self.sections:
  4914. lowest_rva = min(
  4915. [
  4916. self.adjust_SectionAlignment(
  4917. s.VirtualAddress,
  4918. self.OPTIONAL_HEADER.SectionAlignment,
  4919. self.OPTIONAL_HEADER.FileAlignment,
  4920. )
  4921. for s in self.sections
  4922. ]
  4923. )
  4924. if offset < lowest_rva:
  4925. # We will assume that the offset lies within the headers, or
  4926. # at least points before where the earliest section starts
  4927. # and we will simply return the offset as the RVA
  4928. #
  4929. # The case illustrating this behavior can be found at:
  4930. # http://corkami.blogspot.com/2010/01/hey-hey-hey-whats-in-your-head.html
  4931. # where the import table is not contained by any section
  4932. # hence the RVA needs to be resolved to a raw offset
  4933. return offset
  4934. return None
  4935. else:
  4936. return offset
  4937. return s.get_rva_from_offset(offset)
  4938. def get_offset_from_rva(self, rva):
  4939. """Get the file offset corresponding to this RVA.
  4940. Given a RVA , this method will find the section where the
  4941. data lies and return the offset within the file.
  4942. """
  4943. s = self.get_section_by_rva(rva)
  4944. if not s:
  4945. # If not found within a section assume it might
  4946. # point to overlay data or otherwise data present
  4947. # but not contained in any section. In those
  4948. # cases the RVA should equal the offset
  4949. if rva < len(self.__data__):
  4950. return rva
  4951. raise PEFormatError(f"data at RVA 0x{rva:x} can't be fetched")
  4952. return s.get_offset_from_rva(rva)
  4953. def get_string_at_rva(self, rva, max_length=MAX_STRING_LENGTH):
  4954. """Get an ASCII string located at the given address."""
  4955. if rva is None:
  4956. return None
  4957. s = self.get_section_by_rva(rva)
  4958. if not s:
  4959. return self.get_string_from_data(0, self.__data__[rva : rva + max_length])
  4960. return self.get_string_from_data(0, s.get_data(rva, length=max_length))
  4961. def get_bytes_from_data(self, offset, data):
  4962. """."""
  4963. if offset > len(data):
  4964. return b""
  4965. d = data[offset:]
  4966. if isinstance(d, bytearray):
  4967. return bytes(d)
  4968. return d
  4969. def get_string_from_data(self, offset, data):
  4970. """Get an ASCII string from data."""
  4971. s = self.get_bytes_from_data(offset, data)
  4972. end = s.find(b"\0")
  4973. if end >= 0:
  4974. s = s[:end]
  4975. return s
  4976. def get_string_u_at_rva(self, rva, max_length=2 ** 16, encoding=None):
  4977. """Get an Unicode string located at the given address."""
  4978. if max_length == 0:
  4979. return b""
  4980. # If the RVA is invalid let the exception reach the callers. All
  4981. # call-sites of get_string_u_at_rva() will handle it.
  4982. data = self.get_data(rva, 2)
  4983. # max_length is the maximum count of 16bit characters needs to be
  4984. # doubled to get size in bytes
  4985. max_length <<= 1
  4986. requested = min(max_length, 256)
  4987. data = self.get_data(rva, requested)
  4988. # try to find null-termination
  4989. null_index = -1
  4990. while True:
  4991. null_index = data.find(b"\x00\x00", null_index + 1)
  4992. if null_index == -1:
  4993. data_length = len(data)
  4994. if data_length < requested or data_length == max_length:
  4995. null_index = len(data) >> 1
  4996. break
  4997. # Request remaining part of data limited by max_length
  4998. data += self.get_data(rva + data_length, max_length - data_length)
  4999. null_index = requested - 1
  5000. requested = max_length
  5001. elif null_index % 2 == 0:
  5002. null_index >>= 1
  5003. break
  5004. # convert selected part of the string to unicode
  5005. uchrs = struct.unpack("<{:d}H".format(null_index), data[: null_index * 2])
  5006. s = "".join(map(chr, uchrs))
  5007. if encoding:
  5008. return b(s.encode(encoding, "backslashreplace_"))
  5009. return b(s.encode("utf-8", "backslashreplace_"))
  5010. def get_section_by_offset(self, offset):
  5011. """Get the section containing the given file offset."""
  5012. for section in self.sections:
  5013. if section.contains_offset(offset):
  5014. return section
  5015. return None
  5016. def get_section_by_rva(self, rva):
  5017. """Get the section containing the given address."""
  5018. for section in self.sections:
  5019. if section.contains_rva(rva):
  5020. return section
  5021. return None
  5022. def __str__(self):
  5023. return self.dump_info()
  5024. def has_relocs(self):
  5025. """Checks if the PE file has relocation directory"""
  5026. return hasattr(self, "DIRECTORY_ENTRY_BASERELOC")
  5027. def print_info(self, encoding="utf-8"):
  5028. """Print all the PE header information in a human readable from."""
  5029. print(self.dump_info(encoding=encoding))
  5030. def dump_info(self, dump=None, encoding="ascii"):
  5031. """Dump all the PE header information into human readable string."""
  5032. if dump is None:
  5033. dump = Dump()
  5034. warnings = self.get_warnings()
  5035. if warnings:
  5036. dump.add_header("Parsing Warnings")
  5037. for warning in warnings:
  5038. dump.add_line(warning)
  5039. dump.add_newline()
  5040. dump.add_header("DOS_HEADER")
  5041. dump.add_lines(self.DOS_HEADER.dump())
  5042. dump.add_newline()
  5043. dump.add_header("NT_HEADERS")
  5044. dump.add_lines(self.NT_HEADERS.dump())
  5045. dump.add_newline()
  5046. dump.add_header("FILE_HEADER")
  5047. dump.add_lines(self.FILE_HEADER.dump())
  5048. image_flags = retrieve_flags(IMAGE_CHARACTERISTICS, "IMAGE_FILE_")
  5049. dump.add("Flags: ")
  5050. flags = []
  5051. for flag in sorted(image_flags):
  5052. if getattr(self.FILE_HEADER, flag[0]):
  5053. flags.append(flag[0])
  5054. dump.add_line(", ".join(flags))
  5055. dump.add_newline()
  5056. if hasattr(self, "OPTIONAL_HEADER") and self.OPTIONAL_HEADER is not None:
  5057. dump.add_header("OPTIONAL_HEADER")
  5058. dump.add_lines(self.OPTIONAL_HEADER.dump())
  5059. dll_characteristics_flags = retrieve_flags(
  5060. DLL_CHARACTERISTICS, "IMAGE_DLLCHARACTERISTICS_"
  5061. )
  5062. dump.add("DllCharacteristics: ")
  5063. flags = []
  5064. for flag in sorted(dll_characteristics_flags):
  5065. if getattr(self.OPTIONAL_HEADER, flag[0]):
  5066. flags.append(flag[0])
  5067. dump.add_line(", ".join(flags))
  5068. dump.add_newline()
  5069. dump.add_header("PE Sections")
  5070. section_flags = retrieve_flags(SECTION_CHARACTERISTICS, "IMAGE_SCN_")
  5071. for section in self.sections:
  5072. dump.add_lines(section.dump())
  5073. dump.add("Flags: ")
  5074. flags = []
  5075. for flag in sorted(section_flags):
  5076. if getattr(section, flag[0]):
  5077. flags.append(flag[0])
  5078. dump.add_line(", ".join(flags))
  5079. dump.add_line(
  5080. "Entropy: {0:f} (Min=0.0, Max=8.0)".format(section.get_entropy())
  5081. )
  5082. if md5 is not None:
  5083. dump.add_line("MD5 hash: {0}".format(section.get_hash_md5()))
  5084. if sha1 is not None:
  5085. dump.add_line("SHA-1 hash: %s" % section.get_hash_sha1())
  5086. if sha256 is not None:
  5087. dump.add_line("SHA-256 hash: %s" % section.get_hash_sha256())
  5088. if sha512 is not None:
  5089. dump.add_line("SHA-512 hash: %s" % section.get_hash_sha512())
  5090. dump.add_newline()
  5091. if hasattr(self, "OPTIONAL_HEADER") and hasattr(
  5092. self.OPTIONAL_HEADER, "DATA_DIRECTORY"
  5093. ):
  5094. dump.add_header("Directories")
  5095. for directory in self.OPTIONAL_HEADER.DATA_DIRECTORY:
  5096. if directory is not None:
  5097. dump.add_lines(directory.dump())
  5098. dump.add_newline()
  5099. if hasattr(self, "VS_VERSIONINFO"):
  5100. for idx, vinfo_entry in enumerate(self.VS_VERSIONINFO):
  5101. if len(self.VS_VERSIONINFO) > 1:
  5102. dump.add_header(f"Version Information {idx + 1}")
  5103. else:
  5104. dump.add_header("Version Information")
  5105. if vinfo_entry is not None:
  5106. dump.add_lines(vinfo_entry.dump())
  5107. dump.add_newline()
  5108. if hasattr(self, "VS_FIXEDFILEINFO"):
  5109. dump.add_lines(self.VS_FIXEDFILEINFO[idx].dump())
  5110. dump.add_newline()
  5111. if hasattr(self, "FileInfo") and len(self.FileInfo) > idx:
  5112. for entry in self.FileInfo[idx]:
  5113. dump.add_lines(entry.dump())
  5114. dump.add_newline()
  5115. if hasattr(entry, "StringTable"):
  5116. for st_entry in entry.StringTable:
  5117. [dump.add_line(" " + line) for line in st_entry.dump()]
  5118. dump.add_line(
  5119. " LangID: {0}".format(
  5120. st_entry.LangID.decode(
  5121. encoding, "backslashreplace_"
  5122. )
  5123. )
  5124. )
  5125. dump.add_newline()
  5126. for str_entry in sorted(list(st_entry.entries.items())):
  5127. # try:
  5128. dump.add_line(
  5129. " {0}: {1}".format(
  5130. str_entry[0].decode(
  5131. encoding, "backslashreplace_"
  5132. ),
  5133. str_entry[1].decode(
  5134. encoding, "backslashreplace_"
  5135. ),
  5136. )
  5137. )
  5138. dump.add_newline()
  5139. elif hasattr(entry, "Var"):
  5140. for var_entry in entry.Var:
  5141. if hasattr(var_entry, "entry"):
  5142. [
  5143. dump.add_line(" " + line)
  5144. for line in var_entry.dump()
  5145. ]
  5146. dump.add_line(
  5147. " {0}: {1}".format(
  5148. list(var_entry.entry.keys())[0].decode(
  5149. "utf-8", "backslashreplace_"
  5150. ),
  5151. list(var_entry.entry.values())[0],
  5152. )
  5153. )
  5154. dump.add_newline()
  5155. if hasattr(self, "DIRECTORY_ENTRY_EXPORT"):
  5156. dump.add_header("Exported symbols")
  5157. dump.add_lines(self.DIRECTORY_ENTRY_EXPORT.struct.dump())
  5158. dump.add_newline()
  5159. dump.add_line("%-10s %-10s %s" % ("Ordinal", "RVA", "Name"))
  5160. for export in self.DIRECTORY_ENTRY_EXPORT.symbols:
  5161. if export.address is not None:
  5162. name = b("None")
  5163. if export.name:
  5164. name = export.name
  5165. dump.add(
  5166. "%-10d 0x%08X %s"
  5167. % (export.ordinal, export.address, name.decode(encoding))
  5168. )
  5169. if export.forwarder:
  5170. dump.add_line(
  5171. " forwarder: {0}".format(
  5172. export.forwarder.decode(encoding, "backslashreplace_")
  5173. )
  5174. )
  5175. else:
  5176. dump.add_newline()
  5177. dump.add_newline()
  5178. if hasattr(self, "DIRECTORY_ENTRY_IMPORT"):
  5179. dump.add_header("Imported symbols")
  5180. for module in self.DIRECTORY_ENTRY_IMPORT:
  5181. dump.add_lines(module.struct.dump())
  5182. # Print the name of the DLL if there are no imports.
  5183. if not module.imports:
  5184. dump.add(
  5185. " Name -> {0}".format(
  5186. self.get_string_at_rva(module.struct.Name).decode(
  5187. encoding, "backslashreplace_"
  5188. )
  5189. )
  5190. )
  5191. dump.add_newline()
  5192. dump.add_newline()
  5193. for symbol in module.imports:
  5194. if symbol.import_by_ordinal is True:
  5195. if symbol.name is not None:
  5196. dump.add(
  5197. "{0}.{1} Ordinal[{2}] (Imported by Ordinal)".format(
  5198. module.dll.decode("utf-8"),
  5199. symbol.name.decode("utf-8"),
  5200. symbol.ordinal,
  5201. )
  5202. )
  5203. else:
  5204. dump.add(
  5205. "{0} Ordinal[{1}] (Imported by Ordinal)".format(
  5206. module.dll.decode("utf-8"), symbol.ordinal
  5207. )
  5208. )
  5209. else:
  5210. dump.add(
  5211. "{0}.{1} Hint[{2:d}]".format(
  5212. module.dll.decode(encoding, "backslashreplace_"),
  5213. symbol.name.decode(encoding, "backslashreplace_"),
  5214. symbol.hint,
  5215. )
  5216. )
  5217. if symbol.bound:
  5218. dump.add_line(" Bound: 0x{0:08X}".format(symbol.bound))
  5219. else:
  5220. dump.add_newline()
  5221. dump.add_newline()
  5222. if hasattr(self, "DIRECTORY_ENTRY_BOUND_IMPORT"):
  5223. dump.add_header("Bound imports")
  5224. for bound_imp_desc in self.DIRECTORY_ENTRY_BOUND_IMPORT:
  5225. dump.add_lines(bound_imp_desc.struct.dump())
  5226. dump.add_line(
  5227. "DLL: {0}".format(
  5228. bound_imp_desc.name.decode(encoding, "backslashreplace_")
  5229. )
  5230. )
  5231. dump.add_newline()
  5232. for bound_imp_ref in bound_imp_desc.entries:
  5233. dump.add_lines(bound_imp_ref.struct.dump(), 4)
  5234. dump.add_line(
  5235. "DLL: {0}".format(
  5236. bound_imp_ref.name.decode(encoding, "backslashreplace_")
  5237. ),
  5238. 4,
  5239. )
  5240. dump.add_newline()
  5241. if hasattr(self, "DIRECTORY_ENTRY_DELAY_IMPORT"):
  5242. dump.add_header("Delay Imported symbols")
  5243. for module in self.DIRECTORY_ENTRY_DELAY_IMPORT:
  5244. dump.add_lines(module.struct.dump())
  5245. dump.add_newline()
  5246. for symbol in module.imports:
  5247. if symbol.import_by_ordinal is True:
  5248. dump.add(
  5249. "{0} Ordinal[{1:d}] (Imported by Ordinal)".format(
  5250. module.dll.decode(encoding, "backslashreplace_"),
  5251. symbol.ordinal,
  5252. )
  5253. )
  5254. else:
  5255. dump.add(
  5256. "{0}.{1} Hint[{2}]".format(
  5257. module.dll.decode(encoding, "backslashreplace_"),
  5258. symbol.name.decode(encoding, "backslashreplace_"),
  5259. symbol.hint,
  5260. )
  5261. )
  5262. if symbol.bound:
  5263. dump.add_line(" Bound: 0x{0:08X}".format(symbol.bound))
  5264. else:
  5265. dump.add_newline()
  5266. dump.add_newline()
  5267. if hasattr(self, "DIRECTORY_ENTRY_RESOURCE"):
  5268. dump.add_header("Resource directory")
  5269. dump.add_lines(self.DIRECTORY_ENTRY_RESOURCE.struct.dump())
  5270. for res_type in self.DIRECTORY_ENTRY_RESOURCE.entries:
  5271. if res_type.name is not None:
  5272. name = res_type.name.decode(encoding, "backslashreplace_")
  5273. dump.add_line(
  5274. f"Name: [{name}]",
  5275. 2,
  5276. )
  5277. else:
  5278. res_type_id = RESOURCE_TYPE.get(res_type.struct.Id, "-")
  5279. dump.add_line(
  5280. f"Id: [0x{res_type.struct.Id:X}] ({res_type_id})",
  5281. 2,
  5282. )
  5283. dump.add_lines(res_type.struct.dump(), 2)
  5284. if hasattr(res_type, "directory"):
  5285. dump.add_lines(res_type.directory.struct.dump(), 4)
  5286. for resource_id in res_type.directory.entries:
  5287. if resource_id.name is not None:
  5288. name = resource_id.name.decode("utf-8", "backslashreplace_")
  5289. dump.add_line(
  5290. f"Name: [{name}]",
  5291. 6,
  5292. )
  5293. else:
  5294. dump.add_line(f"Id: [0x{resource_id.struct.Id:X}]", 6)
  5295. dump.add_lines(resource_id.struct.dump(), 6)
  5296. if hasattr(resource_id, "directory"):
  5297. dump.add_lines(resource_id.directory.struct.dump(), 8)
  5298. for resource_lang in resource_id.directory.entries:
  5299. if hasattr(resource_lang, "data"):
  5300. dump.add_line(
  5301. "\\--- LANG [%d,%d][%s,%s]"
  5302. % (
  5303. resource_lang.data.lang,
  5304. resource_lang.data.sublang,
  5305. LANG.get(
  5306. resource_lang.data.lang, "*unknown*"
  5307. ),
  5308. get_sublang_name_for_lang(
  5309. resource_lang.data.lang,
  5310. resource_lang.data.sublang,
  5311. ),
  5312. ),
  5313. 8,
  5314. )
  5315. dump.add_lines(resource_lang.struct.dump(), 10)
  5316. dump.add_lines(resource_lang.data.struct.dump(), 12)
  5317. if (
  5318. hasattr(resource_id.directory, "strings")
  5319. and resource_id.directory.strings
  5320. ):
  5321. dump.add_line("[STRINGS]", 10)
  5322. for idx, res_string in list(
  5323. sorted(resource_id.directory.strings.items())
  5324. ):
  5325. dump.add_line(
  5326. "{0:6d}: {1}".format(
  5327. idx,
  5328. res_string.encode(
  5329. "unicode-escape", "backslashreplace"
  5330. ).decode("ascii"),
  5331. ),
  5332. 12,
  5333. )
  5334. dump.add_newline()
  5335. dump.add_newline()
  5336. if (
  5337. hasattr(self, "DIRECTORY_ENTRY_TLS")
  5338. and self.DIRECTORY_ENTRY_TLS
  5339. and self.DIRECTORY_ENTRY_TLS.struct
  5340. ):
  5341. dump.add_header("TLS")
  5342. dump.add_lines(self.DIRECTORY_ENTRY_TLS.struct.dump())
  5343. dump.add_newline()
  5344. if (
  5345. hasattr(self, "DIRECTORY_ENTRY_LOAD_CONFIG")
  5346. and self.DIRECTORY_ENTRY_LOAD_CONFIG
  5347. and self.DIRECTORY_ENTRY_LOAD_CONFIG.struct
  5348. ):
  5349. dump.add_header("LOAD_CONFIG")
  5350. dump.add_lines(self.DIRECTORY_ENTRY_LOAD_CONFIG.struct.dump())
  5351. dump.add_newline()
  5352. if hasattr(self, "DIRECTORY_ENTRY_DEBUG"):
  5353. dump.add_header("Debug information")
  5354. for dbg in self.DIRECTORY_ENTRY_DEBUG:
  5355. dump.add_lines(dbg.struct.dump())
  5356. try:
  5357. dump.add_line("Type: " + DEBUG_TYPE[dbg.struct.Type])
  5358. except KeyError:
  5359. dump.add_line("Type: 0x{0:x}(Unknown)".format(dbg.struct.Type))
  5360. dump.add_newline()
  5361. if dbg.entry:
  5362. dump.add_lines(dbg.entry.dump(), 4)
  5363. dump.add_newline()
  5364. if self.has_relocs():
  5365. dump.add_header("Base relocations")
  5366. for base_reloc in self.DIRECTORY_ENTRY_BASERELOC:
  5367. dump.add_lines(base_reloc.struct.dump())
  5368. for reloc in base_reloc.entries:
  5369. try:
  5370. dump.add_line(
  5371. "%08Xh %s" % (reloc.rva, RELOCATION_TYPE[reloc.type][16:]),
  5372. 4,
  5373. )
  5374. except KeyError:
  5375. dump.add_line(
  5376. "0x%08X 0x%x(Unknown)" % (reloc.rva, reloc.type), 4
  5377. )
  5378. dump.add_newline()
  5379. if (
  5380. hasattr(self, "DIRECTORY_ENTRY_EXCEPTION")
  5381. and len(self.DIRECTORY_ENTRY_EXCEPTION) > 0
  5382. ):
  5383. dump.add_header("Unwind data for exception handling")
  5384. for rf in self.DIRECTORY_ENTRY_EXCEPTION:
  5385. dump.add_lines(rf.struct.dump())
  5386. if hasattr(rf, "unwindinfo") and rf.unwindinfo is not None:
  5387. dump.add_lines(rf.unwindinfo.dump(), 4)
  5388. return dump.get_text()
  5389. def dump_dict(self):
  5390. """Dump all the PE header information into a dictionary."""
  5391. dump_dict = {}
  5392. warnings = self.get_warnings()
  5393. if warnings:
  5394. dump_dict["Parsing Warnings"] = warnings
  5395. dump_dict["DOS_HEADER"] = self.DOS_HEADER.dump_dict()
  5396. dump_dict["NT_HEADERS"] = self.NT_HEADERS.dump_dict()
  5397. dump_dict["FILE_HEADER"] = self.FILE_HEADER.dump_dict()
  5398. image_flags = retrieve_flags(IMAGE_CHARACTERISTICS, "IMAGE_FILE_")
  5399. dump_dict["Flags"] = []
  5400. for flag in image_flags:
  5401. if getattr(self.FILE_HEADER, flag[0]):
  5402. dump_dict["Flags"].append(flag[0])
  5403. if hasattr(self, "OPTIONAL_HEADER") and self.OPTIONAL_HEADER is not None:
  5404. dump_dict["OPTIONAL_HEADER"] = self.OPTIONAL_HEADER.dump_dict()
  5405. dll_characteristics_flags = retrieve_flags(
  5406. DLL_CHARACTERISTICS, "IMAGE_DLLCHARACTERISTICS_"
  5407. )
  5408. dump_dict["DllCharacteristics"] = []
  5409. for flag in dll_characteristics_flags:
  5410. if getattr(self.OPTIONAL_HEADER, flag[0]):
  5411. dump_dict["DllCharacteristics"].append(flag[0])
  5412. dump_dict["PE Sections"] = []
  5413. section_flags = retrieve_flags(SECTION_CHARACTERISTICS, "IMAGE_SCN_")
  5414. for section in self.sections:
  5415. section_dict = section.dump_dict()
  5416. dump_dict["PE Sections"].append(section_dict)
  5417. section_dict["Flags"] = []
  5418. for flag in section_flags:
  5419. if getattr(section, flag[0]):
  5420. section_dict["Flags"].append(flag[0])
  5421. section_dict["Entropy"] = section.get_entropy()
  5422. if md5 is not None:
  5423. section_dict["MD5"] = section.get_hash_md5()
  5424. if sha1 is not None:
  5425. section_dict["SHA1"] = section.get_hash_sha1()
  5426. if sha256 is not None:
  5427. section_dict["SHA256"] = section.get_hash_sha256()
  5428. if sha512 is not None:
  5429. section_dict["SHA512"] = section.get_hash_sha512()
  5430. if hasattr(self, "OPTIONAL_HEADER") and hasattr(
  5431. self.OPTIONAL_HEADER, "DATA_DIRECTORY"
  5432. ):
  5433. dump_dict["Directories"] = []
  5434. for idx, directory in enumerate(self.OPTIONAL_HEADER.DATA_DIRECTORY):
  5435. if directory is not None:
  5436. dump_dict["Directories"].append(directory.dump_dict())
  5437. if hasattr(self, "VS_VERSIONINFO"):
  5438. dump_dict["Version Information"] = []
  5439. for idx, vs_vinfo in enumerate(self.VS_VERSIONINFO):
  5440. version_info_list = []
  5441. version_info_list.append(vs_vinfo.dump_dict())
  5442. if hasattr(self, "VS_FIXEDFILEINFO"):
  5443. version_info_list.append(self.VS_FIXEDFILEINFO[idx].dump_dict())
  5444. if hasattr(self, "FileInfo") and len(self.FileInfo) > idx:
  5445. fileinfo_list = []
  5446. version_info_list.append(fileinfo_list)
  5447. for entry in self.FileInfo[idx]:
  5448. fileinfo_list.append(entry.dump_dict())
  5449. if hasattr(entry, "StringTable"):
  5450. stringtable_dict = {}
  5451. for st_entry in entry.StringTable:
  5452. fileinfo_list.extend(st_entry.dump_dict())
  5453. stringtable_dict["LangID"] = st_entry.LangID
  5454. for str_entry in list(st_entry.entries.items()):
  5455. stringtable_dict[str_entry[0]] = str_entry[1]
  5456. fileinfo_list.append(stringtable_dict)
  5457. elif hasattr(entry, "Var"):
  5458. for var_entry in entry.Var:
  5459. var_dict = {}
  5460. if hasattr(var_entry, "entry"):
  5461. fileinfo_list.extend(var_entry.dump_dict())
  5462. var_dict[list(var_entry.entry.keys())[0]] = list(
  5463. var_entry.entry.values()
  5464. )[0]
  5465. fileinfo_list.append(var_dict)
  5466. dump_dict["Version Information"].append(version_info_list)
  5467. if hasattr(self, "DIRECTORY_ENTRY_EXPORT"):
  5468. dump_dict["Exported symbols"] = []
  5469. dump_dict["Exported symbols"].append(
  5470. self.DIRECTORY_ENTRY_EXPORT.struct.dump_dict()
  5471. )
  5472. for export in self.DIRECTORY_ENTRY_EXPORT.symbols:
  5473. export_dict = {}
  5474. if export.address is not None:
  5475. export_dict.update(
  5476. {
  5477. "Ordinal": export.ordinal,
  5478. "RVA": export.address,
  5479. "Name": export.name,
  5480. }
  5481. )
  5482. if export.forwarder:
  5483. export_dict["forwarder"] = export.forwarder
  5484. dump_dict["Exported symbols"].append(export_dict)
  5485. if hasattr(self, "DIRECTORY_ENTRY_IMPORT"):
  5486. dump_dict["Imported symbols"] = []
  5487. for module in self.DIRECTORY_ENTRY_IMPORT:
  5488. import_list = []
  5489. dump_dict["Imported symbols"].append(import_list)
  5490. import_list.append(module.struct.dump_dict())
  5491. for symbol in module.imports:
  5492. symbol_dict = {}
  5493. if symbol.import_by_ordinal is True:
  5494. symbol_dict["DLL"] = module.dll
  5495. symbol_dict["Ordinal"] = symbol.ordinal
  5496. else:
  5497. symbol_dict["DLL"] = module.dll
  5498. symbol_dict["Name"] = symbol.name
  5499. symbol_dict["Hint"] = symbol.hint
  5500. if symbol.bound:
  5501. symbol_dict["Bound"] = symbol.bound
  5502. import_list.append(symbol_dict)
  5503. if hasattr(self, "DIRECTORY_ENTRY_BOUND_IMPORT"):
  5504. dump_dict["Bound imports"] = []
  5505. for bound_imp_desc in self.DIRECTORY_ENTRY_BOUND_IMPORT:
  5506. bound_imp_desc_dict = {}
  5507. dump_dict["Bound imports"].append(bound_imp_desc_dict)
  5508. bound_imp_desc_dict.update(bound_imp_desc.struct.dump_dict())
  5509. bound_imp_desc_dict["DLL"] = bound_imp_desc.name
  5510. for bound_imp_ref in bound_imp_desc.entries:
  5511. bound_imp_ref_dict = {}
  5512. bound_imp_ref_dict.update(bound_imp_ref.struct.dump_dict())
  5513. bound_imp_ref_dict["DLL"] = bound_imp_ref.name
  5514. if hasattr(self, "DIRECTORY_ENTRY_DELAY_IMPORT"):
  5515. dump_dict["Delay Imported symbols"] = []
  5516. for module in self.DIRECTORY_ENTRY_DELAY_IMPORT:
  5517. module_list = []
  5518. dump_dict["Delay Imported symbols"].append(module_list)
  5519. module_list.append(module.struct.dump_dict())
  5520. for symbol in module.imports:
  5521. symbol_dict = {}
  5522. if symbol.import_by_ordinal is True:
  5523. symbol_dict["DLL"] = module.dll
  5524. symbol_dict["Ordinal"] = symbol.ordinal
  5525. else:
  5526. symbol_dict["DLL"] = module.dll
  5527. symbol_dict["Name"] = symbol.name
  5528. symbol_dict["Hint"] = symbol.hint
  5529. if symbol.bound:
  5530. symbol_dict["Bound"] = symbol.bound
  5531. module_list.append(symbol_dict)
  5532. if hasattr(self, "DIRECTORY_ENTRY_RESOURCE"):
  5533. dump_dict["Resource directory"] = []
  5534. dump_dict["Resource directory"].append(
  5535. self.DIRECTORY_ENTRY_RESOURCE.struct.dump_dict()
  5536. )
  5537. for res_type in self.DIRECTORY_ENTRY_RESOURCE.entries:
  5538. resource_type_dict = {}
  5539. if res_type.name is not None:
  5540. resource_type_dict["Name"] = res_type.name
  5541. else:
  5542. resource_type_dict["Id"] = (
  5543. res_type.struct.Id,
  5544. RESOURCE_TYPE.get(res_type.struct.Id, "-"),
  5545. )
  5546. resource_type_dict.update(res_type.struct.dump_dict())
  5547. dump_dict["Resource directory"].append(resource_type_dict)
  5548. if hasattr(res_type, "directory"):
  5549. directory_list = []
  5550. directory_list.append(res_type.directory.struct.dump_dict())
  5551. dump_dict["Resource directory"].append(directory_list)
  5552. for resource_id in res_type.directory.entries:
  5553. resource_id_dict = {}
  5554. if resource_id.name is not None:
  5555. resource_id_dict["Name"] = resource_id.name
  5556. else:
  5557. resource_id_dict["Id"] = resource_id.struct.Id
  5558. resource_id_dict.update(resource_id.struct.dump_dict())
  5559. directory_list.append(resource_id_dict)
  5560. if hasattr(resource_id, "directory"):
  5561. resource_id_list = []
  5562. resource_id_list.append(
  5563. resource_id.directory.struct.dump_dict()
  5564. )
  5565. directory_list.append(resource_id_list)
  5566. for resource_lang in resource_id.directory.entries:
  5567. if hasattr(resource_lang, "data"):
  5568. resource_lang_dict = {}
  5569. resource_lang_dict["LANG"] = resource_lang.data.lang
  5570. resource_lang_dict[
  5571. "SUBLANG"
  5572. ] = resource_lang.data.sublang
  5573. resource_lang_dict["LANG_NAME"] = LANG.get(
  5574. resource_lang.data.lang, "*unknown*"
  5575. )
  5576. resource_lang_dict[
  5577. "SUBLANG_NAME"
  5578. ] = get_sublang_name_for_lang(
  5579. resource_lang.data.lang,
  5580. resource_lang.data.sublang,
  5581. )
  5582. resource_lang_dict.update(
  5583. resource_lang.struct.dump_dict()
  5584. )
  5585. resource_lang_dict.update(
  5586. resource_lang.data.struct.dump_dict()
  5587. )
  5588. resource_id_list.append(resource_lang_dict)
  5589. if (
  5590. hasattr(resource_id.directory, "strings")
  5591. and resource_id.directory.strings
  5592. ):
  5593. for idx, res_string in list(
  5594. resource_id.directory.strings.items()
  5595. ):
  5596. resource_id_list.append(
  5597. res_string.encode(
  5598. "unicode-escape", "backslashreplace"
  5599. ).decode("ascii")
  5600. )
  5601. if (
  5602. hasattr(self, "DIRECTORY_ENTRY_TLS")
  5603. and self.DIRECTORY_ENTRY_TLS
  5604. and self.DIRECTORY_ENTRY_TLS.struct
  5605. ):
  5606. dump_dict["TLS"] = self.DIRECTORY_ENTRY_TLS.struct.dump_dict()
  5607. if (
  5608. hasattr(self, "DIRECTORY_ENTRY_LOAD_CONFIG")
  5609. and self.DIRECTORY_ENTRY_LOAD_CONFIG
  5610. and self.DIRECTORY_ENTRY_LOAD_CONFIG.struct
  5611. ):
  5612. dump_dict[
  5613. "LOAD_CONFIG"
  5614. ] = self.DIRECTORY_ENTRY_LOAD_CONFIG.struct.dump_dict()
  5615. if hasattr(self, "DIRECTORY_ENTRY_DEBUG"):
  5616. dump_dict["Debug information"] = []
  5617. for dbg in self.DIRECTORY_ENTRY_DEBUG:
  5618. dbg_dict = {}
  5619. dump_dict["Debug information"].append(dbg_dict)
  5620. dbg_dict.update(dbg.struct.dump_dict())
  5621. dbg_dict["Type"] = DEBUG_TYPE.get(dbg.struct.Type, dbg.struct.Type)
  5622. if self.has_relocs():
  5623. dump_dict["Base relocations"] = []
  5624. for base_reloc in self.DIRECTORY_ENTRY_BASERELOC:
  5625. base_reloc_list = []
  5626. dump_dict["Base relocations"].append(base_reloc_list)
  5627. base_reloc_list.append(base_reloc.struct.dump_dict())
  5628. for reloc in base_reloc.entries:
  5629. reloc_dict = {}
  5630. base_reloc_list.append(reloc_dict)
  5631. reloc_dict["RVA"] = reloc.rva
  5632. try:
  5633. reloc_dict["Type"] = RELOCATION_TYPE[reloc.type][16:]
  5634. except KeyError:
  5635. reloc_dict["Type"] = reloc.type
  5636. return dump_dict
  5637. # OC Patch
  5638. def get_physical_by_rva(self, rva):
  5639. """Gets the physical address in the PE file from an RVA value."""
  5640. try:
  5641. return self.get_offset_from_rva(rva)
  5642. except Exception:
  5643. return None
  5644. ##
  5645. # Double-Word get / set
  5646. ##
  5647. def get_data_from_dword(self, dword):
  5648. """Return a four byte string representing the double word value (little endian)."""
  5649. return struct.pack("<L", dword & 0xFFFFFFFF)
  5650. def get_dword_from_data(self, data, offset):
  5651. """Convert four bytes of data to a double word (little endian)
  5652. 'offset' is assumed to index into a dword array. So setting it to
  5653. N will return a dword out of the data starting at offset N*4.
  5654. Returns None if the data can't be turned into a double word.
  5655. """
  5656. if (offset + 1) * 4 > len(data):
  5657. return None
  5658. return struct.unpack("<I", data[offset * 4 : (offset + 1) * 4])[0]
  5659. def get_dword_at_rva(self, rva):
  5660. """Return the double word value at the given RVA.
  5661. Returns None if the value can't be read, i.e. the RVA can't be mapped
  5662. to a file offset.
  5663. """
  5664. try:
  5665. return self.get_dword_from_data(self.get_data(rva, 4), 0)
  5666. except PEFormatError:
  5667. return None
  5668. def get_dword_from_offset(self, offset):
  5669. """Return the double word value at the given file offset. (little endian)"""
  5670. if offset + 4 > len(self.__data__):
  5671. return None
  5672. return self.get_dword_from_data(self.__data__[offset : offset + 4], 0)
  5673. def set_dword_at_rva(self, rva, dword):
  5674. """Set the double word value at the file offset corresponding to the given RVA."""
  5675. return self.set_bytes_at_rva(rva, self.get_data_from_dword(dword))
  5676. def set_dword_at_offset(self, offset, dword):
  5677. """Set the double word value at the given file offset."""
  5678. return self.set_bytes_at_offset(offset, self.get_data_from_dword(dword))
  5679. ##
  5680. # Word get / set
  5681. ##
  5682. def get_data_from_word(self, word):
  5683. """Return a two byte string representing the word value. (little endian)."""
  5684. return struct.pack("<H", word)
  5685. def get_word_from_data(self, data, offset):
  5686. """Convert two bytes of data to a word (little endian)
  5687. 'offset' is assumed to index into a word array. So setting it to
  5688. N will return a dword out of the data starting at offset N*2.
  5689. Returns None if the data can't be turned into a word.
  5690. """
  5691. if (offset + 1) * 2 > len(data):
  5692. return None
  5693. return struct.unpack("<H", data[offset * 2 : (offset + 1) * 2])[0]
  5694. def get_word_at_rva(self, rva):
  5695. """Return the word value at the given RVA.
  5696. Returns None if the value can't be read, i.e. the RVA can't be mapped
  5697. to a file offset.
  5698. """
  5699. try:
  5700. return self.get_word_from_data(self.get_data(rva)[:2], 0)
  5701. except PEFormatError:
  5702. return None
  5703. def get_word_from_offset(self, offset):
  5704. """Return the word value at the given file offset. (little endian)"""
  5705. if offset + 2 > len(self.__data__):
  5706. return None
  5707. return self.get_word_from_data(self.__data__[offset : offset + 2], 0)
  5708. def set_word_at_rva(self, rva, word):
  5709. """Set the word value at the file offset corresponding to the given RVA."""
  5710. return self.set_bytes_at_rva(rva, self.get_data_from_word(word))
  5711. def set_word_at_offset(self, offset, word):
  5712. """Set the word value at the given file offset."""
  5713. return self.set_bytes_at_offset(offset, self.get_data_from_word(word))
  5714. ##
  5715. # Quad-Word get / set
  5716. ##
  5717. def get_data_from_qword(self, word):
  5718. """Return an eight byte string representing the quad-word value (little endian)."""
  5719. return struct.pack("<Q", word)
  5720. def get_qword_from_data(self, data, offset):
  5721. """Convert eight bytes of data to a word (little endian)
  5722. 'offset' is assumed to index into a word array. So setting it to
  5723. N will return a dword out of the data starting at offset N*8.
  5724. Returns None if the data can't be turned into a quad word.
  5725. """
  5726. if (offset + 1) * 8 > len(data):
  5727. return None
  5728. return struct.unpack("<Q", data[offset * 8 : (offset + 1) * 8])[0]
  5729. def get_qword_at_rva(self, rva):
  5730. """Return the quad-word value at the given RVA.
  5731. Returns None if the value can't be read, i.e. the RVA can't be mapped
  5732. to a file offset.
  5733. """
  5734. try:
  5735. return self.get_qword_from_data(self.get_data(rva)[:8], 0)
  5736. except PEFormatError:
  5737. return None
  5738. def get_qword_from_offset(self, offset):
  5739. """Return the quad-word value at the given file offset. (little endian)"""
  5740. if offset + 8 > len(self.__data__):
  5741. return None
  5742. return self.get_qword_from_data(self.__data__[offset : offset + 8], 0)
  5743. def set_qword_at_rva(self, rva, qword):
  5744. """Set the quad-word value at the file offset corresponding to the given RVA."""
  5745. return self.set_bytes_at_rva(rva, self.get_data_from_qword(qword))
  5746. def set_qword_at_offset(self, offset, qword):
  5747. """Set the quad-word value at the given file offset."""
  5748. return self.set_bytes_at_offset(offset, self.get_data_from_qword(qword))
  5749. ##
  5750. # Set bytes
  5751. ##
  5752. def set_bytes_at_rva(self, rva, data):
  5753. """Overwrite, with the given string, the bytes at the file offset corresponding
  5754. to the given RVA.
  5755. Return True if successful, False otherwise. It can fail if the
  5756. offset is outside the file's boundaries.
  5757. """
  5758. if not isinstance(data, bytes):
  5759. raise TypeError("data should be of type: bytes")
  5760. offset = self.get_physical_by_rva(rva)
  5761. if not offset:
  5762. return False
  5763. return self.set_bytes_at_offset(offset, data)
  5764. def set_bytes_at_offset(self, offset, data):
  5765. """Overwrite the bytes at the given file offset with the given string.
  5766. Return True if successful, False otherwise. It can fail if the
  5767. offset is outside the file's boundaries.
  5768. """
  5769. if not isinstance(data, bytes):
  5770. raise TypeError("data should be of type: bytes")
  5771. if 0 <= offset < len(self.__data__):
  5772. self.__data__ = (
  5773. self.__data__[:offset] + data + self.__data__[offset + len(data) :]
  5774. )
  5775. else:
  5776. return False
  5777. return True
  5778. def merge_modified_section_data(self):
  5779. """Update the PE image content with any individual section data that has been
  5780. modified.
  5781. """
  5782. for section in self.sections:
  5783. section_data_start = self.adjust_FileAlignment(
  5784. section.PointerToRawData, self.OPTIONAL_HEADER.FileAlignment
  5785. )
  5786. section_data_end = section_data_start + section.SizeOfRawData
  5787. if section_data_start < len(self.__data__) and section_data_end < len(
  5788. self.__data__
  5789. ):
  5790. self.__data__ = (
  5791. self.__data__[:section_data_start]
  5792. + section.get_data()
  5793. + self.__data__[section_data_end:]
  5794. )
  5795. def relocate_image(self, new_ImageBase):
  5796. """Apply the relocation information to the image using the provided image base.
  5797. This method will apply the relocation information to the image. Given the new
  5798. base, all the relocations will be processed and both the raw data and the
  5799. section's data will be fixed accordingly.
  5800. The resulting image can be retrieved as well through the method:
  5801. get_memory_mapped_image()
  5802. In order to get something that would more closely match what could be found in
  5803. memory once the Windows loader finished its work.
  5804. """
  5805. relocation_difference = new_ImageBase - self.OPTIONAL_HEADER.ImageBase
  5806. if (
  5807. len(self.OPTIONAL_HEADER.DATA_DIRECTORY) >= 6
  5808. and self.OPTIONAL_HEADER.DATA_DIRECTORY[5].Size
  5809. ):
  5810. if not hasattr(self, "DIRECTORY_ENTRY_BASERELOC"):
  5811. self.parse_data_directories(
  5812. directories=[DIRECTORY_ENTRY["IMAGE_DIRECTORY_ENTRY_BASERELOC"]]
  5813. )
  5814. if not hasattr(self, "DIRECTORY_ENTRY_BASERELOC"):
  5815. self.__warnings.append(
  5816. "Relocating image but PE does not have (or pefile cannot "
  5817. "parse) a DIRECTORY_ENTRY_BASERELOC"
  5818. )
  5819. else:
  5820. for reloc in self.DIRECTORY_ENTRY_BASERELOC:
  5821. # We iterate with an index because if the relocation is of type
  5822. # IMAGE_REL_BASED_HIGHADJ we need to also process the next entry
  5823. # at once and skip it for the next iteration
  5824. #
  5825. entry_idx = 0
  5826. while entry_idx < len(reloc.entries):
  5827. entry = reloc.entries[entry_idx]
  5828. entry_idx += 1
  5829. if entry.type == RELOCATION_TYPE["IMAGE_REL_BASED_ABSOLUTE"]:
  5830. # Nothing to do for this type of relocation
  5831. pass
  5832. elif entry.type == RELOCATION_TYPE["IMAGE_REL_BASED_HIGH"]:
  5833. # Fix the high 16-bits of a relocation
  5834. #
  5835. # Add high 16-bits of relocation_difference to the
  5836. # 16-bit value at RVA=entry.rva
  5837. self.set_word_at_rva(
  5838. entry.rva,
  5839. (
  5840. self.get_word_at_rva(entry.rva)
  5841. + relocation_difference
  5842. >> 16
  5843. )
  5844. & 0xFFFF,
  5845. )
  5846. elif entry.type == RELOCATION_TYPE["IMAGE_REL_BASED_LOW"]:
  5847. # Fix the low 16-bits of a relocation
  5848. #
  5849. # Add low 16 bits of relocation_difference to the 16-bit
  5850. # value at RVA=entry.rva
  5851. self.set_word_at_rva(
  5852. entry.rva,
  5853. (
  5854. self.get_word_at_rva(entry.rva)
  5855. + relocation_difference
  5856. )
  5857. & 0xFFFF,
  5858. )
  5859. elif entry.type == RELOCATION_TYPE["IMAGE_REL_BASED_HIGHLOW"]:
  5860. # Handle all high and low parts of a 32-bit relocation
  5861. #
  5862. # Add relocation_difference to the value at RVA=entry.rva
  5863. self.set_dword_at_rva(
  5864. entry.rva,
  5865. self.get_dword_at_rva(entry.rva)
  5866. + relocation_difference,
  5867. )
  5868. elif entry.type == RELOCATION_TYPE["IMAGE_REL_BASED_HIGHADJ"]:
  5869. # Fix the high 16-bits of a relocation and adjust
  5870. #
  5871. # Add high 16-bits of relocation_difference to the 32-bit
  5872. # value composed from the (16-bit value at
  5873. # RVA=entry.rva)<<16 plus the 16-bit value at the next
  5874. # relocation entry.
  5875. # If the next entry is beyond the array's limits,
  5876. # abort... the table is corrupt
  5877. if entry_idx == len(reloc.entries):
  5878. break
  5879. next_entry = reloc.entries[entry_idx]
  5880. entry_idx += 1
  5881. self.set_word_at_rva(
  5882. entry.rva,
  5883. (
  5884. (self.get_word_at_rva(entry.rva) << 16)
  5885. + next_entry.rva
  5886. + relocation_difference
  5887. & 0xFFFF0000
  5888. )
  5889. >> 16,
  5890. )
  5891. elif entry.type == RELOCATION_TYPE["IMAGE_REL_BASED_DIR64"]:
  5892. # Apply the difference to the 64-bit value at the offset
  5893. # RVA=entry.rva
  5894. self.set_qword_at_rva(
  5895. entry.rva,
  5896. self.get_qword_at_rva(entry.rva)
  5897. + relocation_difference,
  5898. )
  5899. self.OPTIONAL_HEADER.ImageBase = new_ImageBase
  5900. # correct VAs(virtual addresses) occurrences in directory information
  5901. if hasattr(self, "DIRECTORY_ENTRY_IMPORT"):
  5902. for dll in self.DIRECTORY_ENTRY_IMPORT:
  5903. for func in dll.imports:
  5904. func.address += relocation_difference
  5905. if hasattr(self, "DIRECTORY_ENTRY_TLS"):
  5906. self.DIRECTORY_ENTRY_TLS.struct.StartAddressOfRawData += (
  5907. relocation_difference
  5908. )
  5909. self.DIRECTORY_ENTRY_TLS.struct.EndAddressOfRawData += (
  5910. relocation_difference
  5911. )
  5912. self.DIRECTORY_ENTRY_TLS.struct.AddressOfIndex += relocation_difference
  5913. self.DIRECTORY_ENTRY_TLS.struct.AddressOfCallBacks += (
  5914. relocation_difference
  5915. )
  5916. if hasattr(self, "DIRECTORY_ENTRY_LOAD_CONFIG"):
  5917. if self.DIRECTORY_ENTRY_LOAD_CONFIG.struct.LockPrefixTable:
  5918. self.DIRECTORY_ENTRY_LOAD_CONFIG.struct.LockPrefixTable += (
  5919. relocation_difference
  5920. )
  5921. if self.DIRECTORY_ENTRY_LOAD_CONFIG.struct.EditList:
  5922. self.DIRECTORY_ENTRY_LOAD_CONFIG.struct.EditList += (
  5923. relocation_difference
  5924. )
  5925. if self.DIRECTORY_ENTRY_LOAD_CONFIG.struct.SecurityCookie:
  5926. self.DIRECTORY_ENTRY_LOAD_CONFIG.struct.SecurityCookie += (
  5927. relocation_difference
  5928. )
  5929. if self.DIRECTORY_ENTRY_LOAD_CONFIG.struct.SEHandlerTable:
  5930. self.DIRECTORY_ENTRY_LOAD_CONFIG.struct.SEHandlerTable += (
  5931. relocation_difference
  5932. )
  5933. if self.DIRECTORY_ENTRY_LOAD_CONFIG.struct.GuardCFCheckFunctionPointer:
  5934. self.DIRECTORY_ENTRY_LOAD_CONFIG.struct.GuardCFCheckFunctionPointer += (
  5935. relocation_difference
  5936. )
  5937. if self.DIRECTORY_ENTRY_LOAD_CONFIG.struct.GuardCFFunctionTable:
  5938. self.DIRECTORY_ENTRY_LOAD_CONFIG.struct.GuardCFFunctionTable += (
  5939. relocation_difference
  5940. )
  5941. def verify_checksum(self):
  5942. return self.OPTIONAL_HEADER.CheckSum == self.generate_checksum()
  5943. def generate_checksum(self):
  5944. # This will make sure that the data representing the PE image
  5945. # is updated with any changes that might have been made by
  5946. # assigning values to header fields as those are not automatically
  5947. # updated upon assignment.
  5948. #
  5949. # data = self.write()
  5950. # print('{0}'.format(len(data)))
  5951. # for idx, b in enumerate(data):
  5952. # if b != ord(self.__data__[idx]) or (idx > 1244440 and idx < 1244460):
  5953. # print('Idx: {0} G {1:02x} {3} B {2:02x}'.format(
  5954. # idx, ord(self.__data__[idx]), b,
  5955. # self.__data__[idx], chr(b)))
  5956. self.__data__ = self.write()
  5957. # Get the offset to the CheckSum field in the OptionalHeader
  5958. # (The offset is the same in PE32 and PE32+)
  5959. checksum_offset = self.OPTIONAL_HEADER.get_file_offset() + 0x40 # 64
  5960. checksum = 0
  5961. # Verify the data is dword-aligned. Add padding if needed
  5962. #
  5963. remainder = len(self.__data__) % 4
  5964. data_len = len(self.__data__) + ((4 - remainder) * (remainder != 0))
  5965. for i in range(int(data_len / 4)):
  5966. # Skip the checksum field
  5967. if i == int(checksum_offset / 4):
  5968. continue
  5969. if i + 1 == (int(data_len / 4)) and remainder:
  5970. dword = struct.unpack(
  5971. "I", self.__data__[i * 4 :] + (b"\0" * (4 - remainder))
  5972. )[0]
  5973. else:
  5974. dword = struct.unpack("I", self.__data__[i * 4 : i * 4 + 4])[0]
  5975. # Optimized the calculation (thanks to Emmanuel Bourg for pointing it out!)
  5976. checksum += dword
  5977. if checksum >= 2 ** 32:
  5978. checksum = (checksum & 0xFFFFFFFF) + (checksum >> 32)
  5979. checksum = (checksum & 0xFFFF) + (checksum >> 16)
  5980. checksum = (checksum) + (checksum >> 16)
  5981. checksum = checksum & 0xFFFF
  5982. # The length is the one of the original data, not the padded one
  5983. #
  5984. return checksum + len(self.__data__)
  5985. def is_exe(self):
  5986. """Check whether the file is a standard executable.
  5987. This will return true only if the file has the IMAGE_FILE_EXECUTABLE_IMAGE flag
  5988. set and the IMAGE_FILE_DLL not set and the file does not appear to be a driver
  5989. either.
  5990. """
  5991. EXE_flag = IMAGE_CHARACTERISTICS["IMAGE_FILE_EXECUTABLE_IMAGE"]
  5992. if (
  5993. (not self.is_dll())
  5994. and (not self.is_driver())
  5995. and (EXE_flag & self.FILE_HEADER.Characteristics) == EXE_flag
  5996. ):
  5997. return True
  5998. return False
  5999. def is_dll(self):
  6000. """Check whether the file is a standard DLL.
  6001. This will return true only if the image has the IMAGE_FILE_DLL flag set.
  6002. """
  6003. DLL_flag = IMAGE_CHARACTERISTICS["IMAGE_FILE_DLL"]
  6004. if (DLL_flag & self.FILE_HEADER.Characteristics) == DLL_flag:
  6005. return True
  6006. return False
  6007. def is_driver(self):
  6008. """Check whether the file is a Windows driver.
  6009. This will return true only if there are reliable indicators of the image
  6010. being a driver.
  6011. """
  6012. # Checking that the ImageBase field of the OptionalHeader is above or
  6013. # equal to 0x80000000 (that is, whether it lies in the upper 2GB of
  6014. # the address space, normally belonging to the kernel) is not a
  6015. # reliable enough indicator. For instance, PEs that play the invalid
  6016. # ImageBase trick to get relocated could be incorrectly assumed to be
  6017. # drivers.
  6018. # This is not reliable either...
  6019. #
  6020. # if any((section.Characteristics &
  6021. # SECTION_CHARACTERISTICS['IMAGE_SCN_MEM_NOT_PAGED']) for
  6022. # section in self.sections ):
  6023. # return True
  6024. # If the import directory was not parsed (fast_load = True); do it now.
  6025. if not hasattr(self, "DIRECTORY_ENTRY_IMPORT"):
  6026. self.parse_data_directories(
  6027. directories=[DIRECTORY_ENTRY["IMAGE_DIRECTORY_ENTRY_IMPORT"]]
  6028. )
  6029. # If there's still no import directory (the PE doesn't have one or it's
  6030. # malformed), give up.
  6031. if not hasattr(self, "DIRECTORY_ENTRY_IMPORT"):
  6032. return False
  6033. # self.DIRECTORY_ENTRY_IMPORT will now exist, although it may be empty.
  6034. # If it imports from "ntoskrnl.exe" or other kernel components it should
  6035. # be a driver
  6036. #
  6037. system_DLLs = set(
  6038. (b"ntoskrnl.exe", b"hal.dll", b"ndis.sys", b"bootvid.dll", b"kdcom.dll")
  6039. )
  6040. if system_DLLs.intersection(
  6041. [imp.dll.lower() for imp in self.DIRECTORY_ENTRY_IMPORT]
  6042. ):
  6043. return True
  6044. driver_like_section_names = set((b"page", b"paged"))
  6045. if driver_like_section_names.intersection(
  6046. [section.Name.lower().rstrip(b"\x00") for section in self.sections]
  6047. ) and (
  6048. self.OPTIONAL_HEADER.Subsystem
  6049. in (
  6050. SUBSYSTEM_TYPE["IMAGE_SUBSYSTEM_NATIVE"],
  6051. SUBSYSTEM_TYPE["IMAGE_SUBSYSTEM_NATIVE_WINDOWS"],
  6052. )
  6053. ):
  6054. return True
  6055. return False
  6056. def get_overlay_data_start_offset(self):
  6057. """Get the offset of data appended to the file and not contained within
  6058. the area described in the headers."""
  6059. largest_offset_and_size = (0, 0)
  6060. def update_if_sum_is_larger_and_within_file(
  6061. offset_and_size, file_size=len(self.__data__)
  6062. ):
  6063. if sum(offset_and_size) <= file_size and sum(offset_and_size) > sum(
  6064. largest_offset_and_size
  6065. ):
  6066. return offset_and_size
  6067. return largest_offset_and_size
  6068. if hasattr(self, "OPTIONAL_HEADER"):
  6069. largest_offset_and_size = update_if_sum_is_larger_and_within_file(
  6070. (
  6071. self.OPTIONAL_HEADER.get_file_offset(),
  6072. self.FILE_HEADER.SizeOfOptionalHeader,
  6073. )
  6074. )
  6075. for section in self.sections:
  6076. largest_offset_and_size = update_if_sum_is_larger_and_within_file(
  6077. (section.PointerToRawData, section.SizeOfRawData)
  6078. )
  6079. skip_directories = [DIRECTORY_ENTRY["IMAGE_DIRECTORY_ENTRY_SECURITY"]]
  6080. for idx, directory in enumerate(self.OPTIONAL_HEADER.DATA_DIRECTORY):
  6081. if idx in skip_directories:
  6082. continue
  6083. try:
  6084. largest_offset_and_size = update_if_sum_is_larger_and_within_file(
  6085. (self.get_offset_from_rva(directory.VirtualAddress), directory.Size)
  6086. )
  6087. # Ignore directories with RVA out of file
  6088. except PEFormatError:
  6089. continue
  6090. if len(self.__data__) > sum(largest_offset_and_size):
  6091. return sum(largest_offset_and_size)
  6092. return None
  6093. def get_overlay(self):
  6094. """Get the data appended to the file and not contained within the area described
  6095. in the headers."""
  6096. overlay_data_offset = self.get_overlay_data_start_offset()
  6097. if overlay_data_offset is not None:
  6098. return self.__data__[overlay_data_offset:]
  6099. return None
  6100. def trim(self):
  6101. """Return the just data defined by the PE headers, removing any overlaid data."""
  6102. overlay_data_offset = self.get_overlay_data_start_offset()
  6103. if overlay_data_offset is not None:
  6104. return self.__data__[:overlay_data_offset]
  6105. return self.__data__[:]
  6106. # According to http://corkami.blogspot.com/2010/01/parce-que-la-planche-aura-brule.html
  6107. # if PointerToRawData is less that 0x200 it's rounded to zero. Loading the test file
  6108. # in a debugger it's easy to verify that the PointerToRawData value of 1 is rounded
  6109. # to zero. Hence we reproduce the behavior
  6110. #
  6111. # According to the document:
  6112. # [ Microsoft Portable Executable and Common Object File Format Specification ]
  6113. # "The alignment factor (in bytes) that is used to align the raw data of sections in
  6114. # the image file. The value should be a power of 2 between 512 and 64 K, inclusive.
  6115. # The default is 512. If the SectionAlignment is less than the architecture's page
  6116. # size, then FileAlignment must match SectionAlignment."
  6117. #
  6118. # The following is a hard-coded constant if the Windows loader
  6119. def adjust_FileAlignment(self, val, file_alignment):
  6120. if file_alignment > FILE_ALIGNMENT_HARDCODED_VALUE:
  6121. # If it's not a power of two, report it:
  6122. if self.FileAlignment_Warning is False and not power_of_two(file_alignment):
  6123. self.__warnings.append(
  6124. "If FileAlignment > 0x200 it should be a power of 2. Value: %x"
  6125. % (file_alignment)
  6126. )
  6127. self.FileAlignment_Warning = True
  6128. return cache_adjust_FileAlignment(val, file_alignment)
  6129. # According to the document:
  6130. # [ Microsoft Portable Executable and Common Object File Format Specification ]
  6131. # "The alignment (in bytes) of sections when they are loaded into memory. It must be
  6132. # greater than or equal to FileAlignment. The default is the page size for the
  6133. # architecture."
  6134. #
  6135. def adjust_SectionAlignment(self, val, section_alignment, file_alignment):
  6136. if file_alignment < FILE_ALIGNMENT_HARDCODED_VALUE:
  6137. if (
  6138. file_alignment != section_alignment
  6139. and self.SectionAlignment_Warning is False
  6140. ):
  6141. self.__warnings.append(
  6142. "If FileAlignment(%x) < 0x200 it should equal SectionAlignment(%x)"
  6143. % (file_alignment, section_alignment)
  6144. )
  6145. self.SectionAlignment_Warning = True
  6146. return cache_adjust_SectionAlignment(val, section_alignment, file_alignment)
  6147. def main():
  6148. import sys
  6149. usage = """\
  6150. pefile.py <filename>
  6151. pefile.py exports <filename>"""
  6152. if not sys.argv[1:]:
  6153. print(usage)
  6154. elif sys.argv[1] == "exports":
  6155. if not sys.argv[2:]:
  6156. sys.exit("error: <filename> required")
  6157. pe = PE(sys.argv[2])
  6158. for exp in pe.DIRECTORY_ENTRY_EXPORT.symbols:
  6159. print(
  6160. hex(pe.OPTIONAL_HEADER.ImageBase + exp.address), exp.name, exp.ordinal
  6161. )
  6162. else:
  6163. print(PE(sys.argv[1]).dump_info())
  6164. if __name__ == "__main__":
  6165. main()