ucfunc.cc

Go to the documentation of this file.
00001 /*
00002  *  Copyright (C) 2001-2002  The Exult Team
00003  *
00004  *  This program is free software; you can redistribute it and/or modify
00005  *  it under the terms of the GNU General Public License as published by
00006  *  the Free Software Foundation; either version 2 of the License, or
00007  *  (at your option) any later version.
00008  *
00009  *  This program is distributed in the hope that it will be useful,
00010  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
00011  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00012  *  GNU General Public License for more details.
00013  *
00014  *  You should have received a copy of the GNU General Public License
00015  *  along with this program; if not, write to the Free Software
00016  *  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
00017  */
00018 
00019 #ifdef HAVE_CONFIG_H
00020 #  include <config.h>
00021 #endif
00022 
00023 #include "ucdata.h"
00024 #include "ucfunc.h"
00025 #include <set>
00026 #ifdef HAVE_SSTREAM
00027 #include <sstream>
00028 #else
00029 #include <strstream>
00030 #endif
00031 #include <algorithm>
00032 #include <iomanip>
00033 #include "files/utils.h"
00034 
00035 #include "ops.h"
00036 
00037 #if 0
00038   #define DEBUG_INDENT
00039   #define DEBUG_PARSE
00040   #define DEBUG_PARSE2
00041   #define DEBUG_PARSE2a
00042   #define DEBUG_READ
00043   #define DEBUG_PRINT
00044   #define DEBUG_READ_PAIR(X, Y) cout << '\t' << X << '\t' << Y << endl;
00045 #else
00046   #undef DEBUG_INDENT
00047   #undef DEBUG_PARSE
00048   #undef DEBUG_PARSE2
00049   #undef DEBUG_READ
00050   #undef DEBUG_PRINT
00051   #define DEBUG_READ_PAIR(X, Y)
00052 #endif
00053 
00054 //#define DEBUG_PARSE2
00055 //#define DEBUG_PARSE2a
00056 //#define DEBUG_PRINT
00057 
00058 using std::ostream;
00059 using std::ifstream;
00060 using std::string;
00061 using std::vector;
00062 using std::map;
00063 using std::endl;
00064 using std::pair;
00065 using std::ios;
00066 using std::streampos;
00067 using std::cout;
00068 using std::setw;
00069 using std::less;
00070 
00071 const string VARNAME = "var";
00072 const string VARPREFIX = "var";
00073 
00074 string demunge_ocstring(UCFunc &ucf, const FuncMap &funcmap, const string &asmstr, const vector<unsigned int> &params, const map<unsigned int, string> &intrinsics, const UCc &op, bool ucs_output);
00075 
00076 /* Assumption the 'var's are in their 'zeroed' state on initialization,
00077    unless something else is assigned to them. */
00078 
00079 inline ostream &tab_indent(const unsigned int indent, ostream &o)
00080 {
00081   #ifdef DEBUG_INDENT
00082   o << indent;
00083   #endif
00084   
00085   switch(indent)
00086   {
00087     case 0:                    break;
00088     case 1: o << '\t';         break;
00089     case 2: o << "\t\t";       break;
00090     case 3: o << "\t\t\t";     break;
00091     case 4: o << "\t\t\t\t";   break;
00092     case 5: o << "\t\t\t\t\t"; break;
00093     default:
00094       for(unsigned int i=0; i<indent; ++i) o << '\t';
00095       break;
00096   }
00097   return o;
00098 }
00099 
00100 /* Outputs the short function data 'list' format, returns true upon success */
00101 bool UCFunc::output_list(ostream &o, unsigned int funcno, const UCOptions &options)
00102 {
00103   o << "#" << std::setbase(10) << std::setw(4) << funcno << std::setbase(16) << ": "
00104     << (return_var ? '&' : ' ')
00105     << std::setw(4) << _funcid    << "H  "
00106     << std::setw(8) << _offset    << "  "
00107     << std::setw(4) << _funcsize  << "  "
00108     << std::setw(4) << _datasize  << "  "
00109     << std::setw(4) << codesize() << "  ";
00110   
00111   if(options.ucdebug)
00112     o << _data.find(0)->second;
00113   
00114   o << endl;
00115   
00116   return true;
00117 }
00118 
00119 /* Outputs the usecode-script formatted usecode, returns true upon success */
00120 bool UCFunc::output_ucs(ostream &o, const FuncMap &funcmap, const map<unsigned int, string> &intrinsics, const UCOptions &options)
00121 {
00122   unsigned int indent=0;
00123   
00124   if(_externs.size()) tab_indent(indent, o) << "// externs" << endl;
00125   // output the 'externs'
00126   for(vector<unsigned int>::iterator e=_externs.begin(); e!=_externs.end(); e++)
00127   {
00128     FuncMap::const_iterator fmp = funcmap.find(*e);
00129     output_ucs_funcname(tab_indent(indent, o) << "extern ", funcmap, *e, fmp->second.num_args, fmp->second.return_var) << ';' << endl;
00130   }
00131   
00132   if(_externs.size()) o << endl;
00133   
00134   // output the function name
00135   output_ucs_funcname(tab_indent(indent, o), funcmap, _funcid, _num_args, return_var) << endl;
00136   // start of func
00137   tab_indent(indent++, o) << '{' << endl;
00138   
00139   for(unsigned int i=_num_args; i<_num_args+_num_locals; i++)
00140     tab_indent(indent, o) << VARNAME << ' ' << VARPREFIX << std::setw(4) << i << ';' << endl;
00141     
00142   if(return_var) tab_indent(indent, o) << VARNAME << ' ' << "rr" << ';' << endl;
00143   
00144   if(_num_locals>0) o << endl;
00145   
00146   output_ucs_data(o, funcmap, intrinsics, options, indent);
00147   
00148   tab_indent(--indent, o) << '}' << endl;
00149   
00150   return true;
00151 }
00152 
00153 /* outputs the general 'function name' in long format. For function
00154   declarations and externs */
00155 ostream &UCFunc::output_ucs_funcname(ostream &o, const FuncMap &funcmap,
00156                                      unsigned int funcid,
00157                                      unsigned int numargs, bool return_var)
00158 {
00159   // do we return a variable
00160   if(return_var) o << VARNAME << ' ';
00161   
00162   // output the "function name"
00163   // TODO: Probably want to grab this from a file in the future...
00164   //o << demunge_ocstring(*this, funcmap, "%f1", ucc._params_parsed, intrinsics, ucc, true)
00165   
00166   FuncMap::const_iterator fmp = funcmap.find(funcid);
00167   if(fmp->second.funcname.size())
00168   {
00169     if(fmp->second.funcname[0]=='&')
00170       o << fmp->second.funcname.substr(1, fmp->second.funcname.size()-1);
00171     else
00172       o << fmp->second.funcname;
00173   }
00174   else
00175     o << "Func" << std::setw(4) << funcid;
00176   
00177   // output the "function number"
00178   o << " 0x" << funcid
00179   // output ObCurly braces
00180     << " (";
00181   
00182   for(unsigned int i=0; i<numargs; i++)
00183     o << VARNAME << ' ' << VARPREFIX << std::setw(4) << i << ((i==numargs-1) ? "" : ", ");
00184   
00185   o << ")";
00186   
00187   return o;
00188 }
00189 
00190 ostream &UCFunc::output_ucs_funcname(ostream &o, const FuncMap &funcmap)
00191 {
00192     return output_ucs_funcname(o, funcmap, _funcid, _num_args, return_var);
00193 }
00194 
00195 void UCFunc::output_ucs_data(ostream &o, const FuncMap &funcmap, const map<unsigned int, string> &intrinsics, const UCOptions &options, unsigned int indent)
00196 {
00197   vector<unsigned int> labeltmp(1);
00198   for(vector<GotoSet>::iterator i=gotoset.begin(); i!=gotoset.end(); ++i)
00199   {
00200     // we don't want to output the first "jump" (the start of the function)
00201     if(i!=gotoset.begin())
00202     {
00203       labeltmp[0]=i->offset();
00204       tab_indent(indent++, o) << demunge_ocstring(*this, funcmap, "label%f*_%1:", labeltmp, intrinsics, UCc(), true) << endl;
00205     }
00206     
00207     for(GotoSet::iterator j=(*i)().begin(); j!=(*i)().end(); j++)
00208     {
00209       const UCc &ucc = *(j->first);
00210       
00211       if(options.uselesscomment)
00212         tab_indent(indent, o) << "// Offset: " << std::setw(4) << ucc._offset << endl;
00213 
00214       output_ucs_opcode(o, funcmap, opcode_table_data, ucc, intrinsics, indent);
00215     }
00216     if(i!=gotoset.begin()) --indent; //decrement it again to skip the label statement.
00217     
00218   }
00219 }
00220 
00221 void UCFunc::output_ucs_opcode(ostream &o, const FuncMap &funcmap, const vector<UCOpcodeData> &optab, const UCc &op, const map<unsigned int, string> &intrinsics, unsigned int indent)
00222 {
00223   tab_indent(indent, o) << demunge_ocstring(*this, funcmap, optab[op._id].ucs_nmo, op._params_parsed, intrinsics, op, true) << ';' << endl;
00224   
00225   #ifdef DEBUG_PRINT
00226   for(vector<UCc *>::const_iterator i=op._popped.begin(); i!=op._popped.end(); i++)
00227   {
00228     if((*i)->_popped.size())
00229       output_ucs_opcode(o, funcmap, opcode_table_data, **i, intrinsics, indent+1);
00230     else
00231 //      tab_indent(indent+1, o) << demunge_ocstring(*this, funcmap, optab[(*i)->_id].ucs_nmo, op._params_parsed, **i) << endl;
00232       tab_indent(indent+1, o) << optab[(*i)->_id].ucs_nmo << endl;
00233   }
00234   #endif
00235 }
00236 
00237 void UCFunc::output_ucs_node(ostream &o, const FuncMap &funcmap, UCNode* ucn, const map<unsigned int, string> &intrinsics, unsigned int indent, const UCOptions &options)
00238 {
00239   if(!ucn->nodelist.empty()) tab_indent(indent, o) << '{' << endl;
00240   
00241   if(ucn->ucc!=0)
00242     output_asm_opcode(tab_indent(indent, o), funcmap, opcode_table_data, intrinsics, *(ucn->ucc), options);
00243   
00244   if(ucn->nodelist.size())
00245     for(vector<UCNode *>::iterator i=ucn->nodelist.begin(); i!=ucn->nodelist.end(); i++)
00246     {
00247       //tab_indent(indent, o);
00248       output_ucs_node(o, funcmap, *i, intrinsics, indent+1, options);
00249     }
00250       
00251   // end of func
00252   if(!ucn->nodelist.empty()) tab_indent(indent, o) << '}' << endl;
00253 }
00254 
00255 /* Just a quick function to remove all the ucc structured flagged as removable */
00256 inline void gc_gotoset(vector<GotoSet> &gotoset)
00257 {
00258   for(vector<GotoSet>::iterator i=gotoset.begin(); i!=gotoset.end(); i++)
00259   {
00260     i->gc();
00261     #ifdef DEBUG_GOTOSET
00262     cout << "----" << endl;
00263     #endif
00264   }
00265 }
00266 
00267 void UCFunc::parse_ucs(const FuncMap &funcmap, const map<unsigned int, string> &intrinsics, const UCOptions &options)
00268 {
00269   for(vector<UCc>::iterator i=_opcodes.begin(); i!=_opcodes.end(); i++)
00270     node.nodelist.push_back(new UCNode(&(*i)));
00271   
00272   parse_ucs_pass1(node.nodelist);
00273   parse_ucs_pass2(gotoset, funcmap, intrinsics);
00274   gc_gotoset(gotoset);
00275   
00276   if(!options.basic)
00277   {
00278     parse_ucs_pass3(gotoset, intrinsics);
00279   }
00280   
00281   #ifdef DEBUG_PARSE2
00282   for(vector<GotoSet>::iterator i=gotoset.begin(); i!=gotoset.end(); i++)
00283   {
00284     cout << std::setw(4) << i->offset() << endl;
00285     
00286     for(GotoSet::iterator j=(*i)().begin(); j!=(*i)().end(); j++)
00287     {
00288       cout << '\t' << std::setw(4) << j->first->_offset << '\t' << j->first->_id << endl;
00289     }
00290   }
00291   #endif
00292 }
00293 
00294 /* Pass 1 turns the 1-dimentional vector of opcodes, into a 2-dimentional array
00295    consisting of all the opcodes within two 'goto target offsets'. */
00296 void UCFunc::parse_ucs_pass1(vector<UCNode *> &nodes)
00297 {
00298   vector<unsigned int> jumps;
00299 
00300   // collect jump references
00301   for(unsigned int i=0; i<nodes.size(); i++)
00302   {
00303     if(nodes[i]->ucc!=0)
00304     {
00305       unsigned int isjump=0;
00306       for(vector<pair<unsigned int, unsigned int> >::iterator op=opcode_jumps.begin(); op!=opcode_jumps.end(); op++)
00307         if(op->first==nodes[i]->ucc->_id)
00308         {
00309           isjump=op->second;
00310           break;
00311         }
00312       
00313       if(isjump!=0)
00314       {
00315         assert(nodes[i]->ucc->_params_parsed.size()>=isjump);
00316         jumps.push_back(nodes[i]->ucc->_params_parsed[isjump-1]);
00317       }
00318     }
00319   }
00320 
00321   gotoset.push_back(GotoSet());
00322 
00323   for(unsigned int i=0; i<nodes.size(); i++)
00324   {
00325     if(nodes[i]->ucc!=0)
00326     {
00327       if(count(jumps.begin(), jumps.end(), nodes[i]->ucc->_offset))
00328       {
00329         gotoset.push_back(nodes[i]->ucc);
00330       }
00331       else
00332         gotoset.back().add(nodes[i]->ucc);
00333     }
00334   }
00335 }
00336 
00337 /* In Pass 2 we convert our 2-dimensional 'GotoSet' array into an array with
00338    each UCc, having it's parameters sitting in it's UCc::_popped vector. Elements
00339    that are parameters are flagged for removal (Gotoset::()[i]->second=true) from
00340    the original GotoSet. */
00341 void UCFunc::parse_ucs_pass2(vector<GotoSet> &gotoset, const FuncMap &funcmap, const map<unsigned int, string> &intrinsics)
00342 {
00343   for(vector<GotoSet>::iterator i=gotoset.begin(); i!=gotoset.end(); ++i)
00344   {
00345     parse_ucs_pass2a((*i)().rbegin(), (*i)(), 0, funcmap, intrinsics);
00346   }
00347 }
00348 
00349 vector<UCc *> UCFunc::parse_ucs_pass2a(vector<pair<UCc *, bool> >::reverse_iterator current, vector<pair<UCc *, bool> > &vec, unsigned int opsneeded, const FuncMap &funcmap, const map<unsigned int, string> &intrinsics)
00350 {
00351   vector<UCc *> vucc;
00352   unsigned int opsfound=0;
00353   
00354   #ifdef DEBUG_PARSE2
00355   output_asm_opcode(tab_indent(4, cout), *this, funcmap, opcode_table_data, intrinsics, *(current->first));
00356   #endif
00357   
00358   for(;vec.rend()!=current; current++)
00359   {
00360     #ifdef DEBUG_PARSE2
00361     output_asm_opcode(tab_indent(3, cout), *this, funcmap, opcode_table_data, intrinsics, *(current->first));
00362     #endif
00363     
00364     if(current->second==false)
00365     {
00366       if((opcode_table_data[current->first->_id].num_pop!=0) || (opcode_table_data[current->first->_id].call_effect!=0))
00367       {
00368         //if(opcode_table_data[current->first->_id].num_pop<0x7F)
00369         {
00370           #ifdef DEBUG_PARSE2
00371           output_asm_opcode(tab_indent(3, cout << "0x" << std::setw(2) << current->first->_id << "-"), *this, funcmap, opcode_table_data, intrinsics, *(current->first));
00372           tab_indent(3, cout << "0x" << std::setw(2) << current->first->_id << "-") << opcode_table_data[current->first->_id].num_pop << endl;
00373           #endif
00374           
00375           unsigned int num_args=0;
00376           
00377           if(opcode_table_data[current->first->_id].num_pop>0x7F)
00378           {
00379             #ifdef DEBUG_PARSE2a
00380             cout << "CALL EFFECT: " << opcode_table_data[current->first->_id].num_pop << '\t';
00381             #endif
00382             
00383             unsigned int offset = 0x100 - opcode_table_data[current->first->_id].num_pop - 1;
00384             num_args = current->first->_params_parsed[offset];
00385             
00386             #ifdef DEBUG_PARSE2a
00387             cout << num_args << endl;
00388             #endif
00389           }
00390           else if(opcode_table_data[current->first->_id].call_effect!=0)
00391           {
00392             assert(current->first->_params_parsed.size()>=1);
00393             assert(_externs.size()>=current->first->_params_parsed[0]);
00394             FuncMap::const_iterator fmp = funcmap.find(_externs[current->first->_params_parsed[0]]);
00395             assert(fmp!=funcmap.end());
00396             #ifdef DEBUG_PARSE2
00397             cout << "CALL:     " << fmp->second.funcid << '\t' << fmp->second.num_args << endl;
00398             #endif
00399             
00400             num_args = fmp->second.num_args;
00401           }
00402           else
00403           {
00404             #ifdef DEBUG_PARSE2
00405             cout << "Non-CALL: \t" << opcode_table_data[current->first->_id].num_pop << endl;
00406             #endif
00407             num_args = opcode_table_data[current->first->_id].num_pop;
00408           }
00409           
00410           if(num_args>0)
00411           {
00412             /* save the 'current' value as the return value and increment it so it's
00413                pointing at the 'next' current value */
00414             vector<pair<UCc *, bool> >::reverse_iterator ret(current);
00415             
00416             ret->first->_popped = parse_ucs_pass2a(++current, vec, num_args, funcmap, intrinsics);
00417             
00418             assert(current!=ret);
00419             
00420             --current;
00421             
00422             assert(current==ret);
00423             #ifdef DEBUG_PARSE2a
00424             output_asm_opcode(tab_indent(1, cout), *this, funcmap, opcode_table_data, intrinsics, *(ret->first));
00425             
00426             for(vector<UCc *>::iterator i=ret->first->_popped.begin(); i!=ret->first->_popped.end(); i++)
00427               output_asm_opcode(tab_indent(2, cout), *this, funcmap, opcode_table_data, intrinsics, **i);
00428             #endif
00429           }
00430         }
00431       }
00432       if((opsneeded!=0) && (current->second==false))
00433       {
00434         // if it's a 'push' opcode and we need items to return that we've popped off the stack...
00435         if(opcode_table_data[current->first->_id].num_push!=0)
00436         {
00437           #ifdef DEBUG_PARSE2
00438           output_asm_opcode(tab_indent(4, cout << "P-"), *this, funcmap, opcode_table_data, intrinsics, *(current->first));
00439           #endif
00440           
00441           opsfound+=opcode_table_data[current->first->_id].num_push;
00442           vucc.push_back(current->first);
00443           current->second=true;
00444         }
00445         // if it's a call to a function that returns a variable...
00446         else if(opcode_table_data[current->first->_id].call_effect!=0)
00447         {
00448           FuncMap::const_iterator fmp = funcmap.find(_externs[current->first->_params_parsed[0]]);
00449           assert(fmp!=funcmap.end());
00450           
00451           if(fmp->second.return_var)
00452           {
00453             #ifdef DEBUG_PARSE2
00454             output_asm_opcode(tab_indent(4, cout << "C-"), *this, funcmap, opcode_table_data, intrinsics, *(current->first));
00455             #endif
00456           
00457             opsfound+=1;
00458             vucc.push_back(current->first);
00459             current->second=true;
00460           }
00461         }
00462         else
00463           current->second=true;
00464           
00465         // if we've found all the ops we were searching for, return them
00466         if(opsfound>=opsneeded)
00467         {
00468           return vucc;
00469         }
00470       }
00471     }
00472   }
00473   
00474   if(vucc.size()>0) cout << "DID NOT FIND ALL OPCODE PARAMETERS." << endl;
00475   return vucc;
00476 }
00477 
00478 /* The 'optimisation' phase. Attempting to remove as many goto...labels as possible. */
00479 void UCFunc::parse_ucs_pass3(vector<GotoSet> &gotoset, const map<unsigned int, string> &intrinsics)
00480 {
00481 
00482 }
00483 
00484 bool UCFunc::output_tt(std::ostream &o)
00485 {
00486   o << "\t<0x" << setw(4) << _funcid << ">" << endl;
00487   
00488   for(map<unsigned int, string, less<unsigned int> >::iterator i=_data.begin(); i!=_data.end(); i++)
00489   {
00490     o << "\t\t<0x" << setw(4) << i->first << ">" << endl
00491       << "\t\t`" << i->second << "`" << endl
00492       << "\t\t</>" << endl;
00493   }
00494   o << "\t</>" << endl;
00495   
00496   return true;
00497 }
00498 
00499 /* calculates the relative offset jump location, used in opcodes jmp && jne */
00500 inline int calc16reloffset(const UCc &op, unsigned int param)
00501 {
00502   /* forumla:
00503      real offset = offset of start of current opcode
00504                  + int of parameter (since you can jump backwards)
00505                  + 1 (size of "opcode")
00506                  + size of "opcode" parameter data
00507      NOTE: since param is unsigned, a twos-complimant is required:
00508            formula: 0xFFFF - (unsigned short)param + 1
00509                     ^^^^^^ max of unsighed short
00510   */
00511   return op._offset + ((param>>15) ? (-1 * (0xFFFF - static_cast<unsigned short>(param) + 1)) : static_cast<int>(param)) + 1 + op._params.size();
00512 }
00513 
00514 /* calculates the relative offset jump location, used in opcodes jmp && jne */
00515 inline int calc32reloffset(const UCc &op, unsigned int param) //FIXME: Test this!
00516 {
00517   /* forumla:
00518      real offset = offset of start of current opcode
00519                  + int of parameter (since you can jump backwards)
00520                  + 1 (size of "opcode")
00521                  + size of "opcode" parameter data
00522      NOTE: since param is unsigned, a twos-complimant is required:
00523            formula: 0xFFFFFFFF - (unsigned int)param + 1
00524                     ^^^^^^ max of unsighed int
00525   */
00526   return op._offset + ((param>>31) ? (-1 * (0xFFFFFFFF - static_cast<unsigned int>(param) + 1)) : static_cast<int>(param)) + 1 + op._params.size();
00527 }
00528 
00529 void ucc_parse_parambytes(UCc &ucop, const UCOpcodeData &otd)
00530 {
00531   unsigned int first=0;
00532   
00533   for(vector<pair<unsigned int, bool> >::const_iterator s=otd.param_sizes.begin(); s!=otd.param_sizes.end(); ++s)
00534   {
00535     //cout << ucop._id << '\t' << ucop._params.size() << endl;
00536     
00537     assert(first<ucop._params.size());
00538     
00539     unsigned int ssize=s->first;
00540     bool offset_munge=s->second;
00541     
00542     assert(ssize!=0);
00543 
00544     if(ssize==1)
00545       ucop._params_parsed.push_back(static_cast<unsigned int>(ucop._params[first++]));
00546     else if(ssize==2)
00547       if(offset_munge)
00548       {
00549         unsigned int calcvar = static_cast<unsigned int>(ucop._params[first++]);
00550         calcvar += ((static_cast<unsigned int>(ucop._params[first++])) << 8);
00551         unsigned int reloffset = calc16reloffset(ucop, calcvar);
00552         ucop._params_parsed.push_back(reloffset);
00553         ucop._jump_offsets.push_back(reloffset);
00554       }
00555       else
00556       {
00557         unsigned int calcvar = static_cast<unsigned int>(ucop._params[first++]);
00558         calcvar += ((static_cast<unsigned int>(ucop._params[first++])) << 8);
00559         ucop._params_parsed.push_back(calcvar);
00560       }
00561     else if(ssize==4)
00562       if(offset_munge)
00563       {
00564         unsigned int calcvar = static_cast<unsigned int>(ucop._params[first++]);
00565         calcvar += ((static_cast<unsigned int>(ucop._params[first++])) << 8);
00566         calcvar += ((static_cast<unsigned int>(ucop._params[first++])) << 16);
00567         calcvar += ((static_cast<unsigned int>(ucop._params[first++])) << 24);
00568         unsigned int reloffset = calc32reloffset(ucop, calcvar);
00569         ucop._params_parsed.push_back(reloffset);
00570         ucop._jump_offsets.push_back(reloffset);
00571       }
00572       else
00573       {
00574         unsigned int calcvar = static_cast<unsigned int>(ucop._params[first++]);
00575         calcvar += ((static_cast<unsigned int>(ucop._params[first++])) << 8);
00576         calcvar += ((static_cast<unsigned int>(ucop._params[first++])) << 16);
00577         calcvar += ((static_cast<unsigned int>(ucop._params[first++])) << 24);
00578         ucop._params_parsed.push_back(calcvar);
00579       }
00580     else
00581       assert(false); // just paranoia.
00582   }
00583 }
00584 
00585 /* prints the "assembler" output of the usecode, currently trying to duplicate
00586    the output of the original ucdump... returns true if successful*/
00587 bool UCFunc::output_asm(ostream &o, const FuncMap &funcmap, const map<unsigned int, string> &intrinsics, const UCOptions &options)
00588 {
00589   if(options.verbose) cout << "Printing function..." << endl;
00590   
00591   o << "Function at file offset " << std::setw(8) << _offset << "H" << endl;
00592   o << "\t.funcnumber  " << std::setw(4) << _funcid << "H" << endl;
00593   if(ext32) o << "\t.ext32" << endl;
00594   o << "\t.msize       " << ((ext32) ? setw(8) : setw(4)) << _funcsize << "H" << endl;
00595   o << "\t.dsize       " << ((ext32) ? setw(8) : setw(4)) << _datasize << "H" << endl;
00596   
00597   if(debugging_info)
00598     o << "\t  .dbgoffset " << std::setw(4) << debugging_offset << "H" << endl;
00599   
00600   if(_data.size())
00601     output_asm_data(o);
00602   
00603   o << "Code segment at file offset " << std::setw(8) << _codeoffset << "H" << endl;
00604   o << "\t.argc        " << std::setw(4) << _num_args << "H" << endl;
00605   o << "\t.localc      " << std::setw(4) << _num_locals << "H" << endl;
00606   o << "\t.externsize  " << std::setw(4) << _externs.size() << "H" << endl;
00607   
00608   for(typeof(_externs.begin()) i=_externs.begin(); i!=_externs.end(); i++)
00609     o << '\t' << "  .extern    " << std::setw(4) << *i << "H" << endl;
00610 /*  for(unsigned int i=0; i<_externs.size(); i++) //FIXME: ::iterators
00611     o << '\t' << "  .extern    " << std::setw(4) << _externs[i] << "H" << endl;*/
00612   
00613   for(vector<UCc>::iterator op=_opcodes.begin(); op!=_opcodes.end(); op++)
00614     output_asm_opcode(o, funcmap, opcode_table_data, intrinsics, *op, options);
00615   
00616   return true;
00617 }
00618 
00619 void UCFunc::output_asm_data(ostream &o)
00620 {
00621   static const unsigned int nochars=60;
00622   // limit of about 60 chars to a line, wrap to the next line if longer then this...
00623   for(map<unsigned int, string, less<unsigned int> >::iterator i=_data.begin(); i!=_data.end(); i++)
00624   {
00625     for(unsigned int j=0; j<i->second.size(); j++)
00626     {
00627       if(j==0)
00628         o << setw(4) << i->first;
00629       if((j!=0) && !(j%nochars))
00630         o << "'" << endl;
00631       if(!(j%nochars))
00632         o << "\tdb\t'";
00633 
00634       o << i->second[j];
00635     }
00636     o << "'" << endl;
00637     o << "\tdb\t00" << endl;
00638   }
00639 }
00640 
00641 void UCFunc::output_raw_opcodes(ostream &o, const UCc &op)
00642 {
00643   // chars in opcode
00644   o << ' ' << std::setw(2) << static_cast<unsigned int>(op._id);
00645   if(op._params.size()) cout << ' ';
00646 
00647   for(unsigned int i=0; i<op._params.size(); i++)
00648   {
00649     o << std::setw(2) << static_cast<unsigned int>(op._params[i]);
00650     if(i!=op._params.size())
00651       o << ' ';
00652   }
00653 
00654   // seperator
00655   unsigned int numsep = op._params.size();
00656   //cout << endl << numsep << endl;
00657   if(numsep>6)
00658     o << endl << "\t\t\t";
00659   else if (numsep>5)
00660     o << " ";
00661   else if (numsep>2)
00662     o << "\t";
00663   else
00664     o << "\t\t";
00665 }
00666 
00667 void UCFunc::output_asm_opcode(ostream &o, const FuncMap &funcmap, const vector<UCOpcodeData> &optab, const map<unsigned int, string> &intrinsics, const UCc &op, const UCOptions &options)
00668 {
00669   // offset
00670   o << std::setw(4) << op._offset << ':';
00671 
00672   if(options.rawops) output_raw_opcodes(o, op);
00673   else            o << '\t';
00674 
00675   o << demunge_ocstring(*this, funcmap, optab[op._id].asm_nmo, op._params_parsed, intrinsics, op, false);
00676 
00677   if(options.autocomment)
00678     o << demunge_ocstring(*this, funcmap, optab[op._id].asm_comment, op._params_parsed, intrinsics, op, false);
00679 
00680   o << endl;
00681 }
00682 
00683 inline unsigned int charnum2uint(const char c)
00684 {
00685   switch(c)
00686   {
00687     case '1': return 1;
00688     case '2': return 2;
00689     case '3': return 3;
00690     case '4': return 4;
00691     case '5': return 5;
00692     case '6': return 6;
00693     case '7': return 7;
00694     case '8': return 8;
00695     case '9': return 9;
00696     default:  return 0;
00697   }
00698   return 0; // can't happen
00699 }
00700 
00701 // FIXME: Remove the passed &params value. Get it from op._params_parsed
00702 string demunge_ocstring(UCFunc &ucf, const FuncMap &funcmap, const string &asmstr, const vector<unsigned int> &params, const map<unsigned int, string> &intrinsics, const UCc &op, bool ucs_output)
00703 {
00704 #ifdef HAVE_SSTREAM
00705   std::stringstream str;
00706 #else
00707   std::strstream str;
00708 #endif
00709   str << std::setfill('0') << std::setbase(16);
00710   str.setf(ios::uppercase);
00711   size_t  len=asmstr.length();
00712 
00713   if(len==0) return string(); // for the degenerate case
00714 
00715   bool finished=false; // terminating details are at end-of-while
00716   unsigned int i=0; // istr index
00717   unsigned int width=0; // width value for setw()
00718 
00719   if(ucs_output && opcode_table_data[op._id].flag_paren) str << '(';
00720   
00721   while(!finished&&i<len)
00722   {
00723     bool special_call(false); // FIXME: <sigh> temporary exception handling for call (0x24)
00724     
00725     char c = asmstr[i];
00726     
00727     width = 4; // with defaults to 4
00728     
00729     switch(c)
00730     {
00731       case '\\':
00732         i++;
00733         c = asmstr[i];
00734         switch(c)
00735         {
00736           case '\\': str << '\\'; break;
00737           case 'n':  str << '\n'; break;
00738           case 't':  str << '\t'; break;
00739           case '\'': str << '\''; break;
00740           case '"':  str << '\"'; break;
00741           case 'b':  // bell is invalid
00742           default:   // we'll silently drop errors... it's the only "clean" way
00743             str << '\\' << c;
00744         }
00745         break;
00746       case '%':
00747         {
00748           i++;
00749           c = asmstr[i];
00750 
00751           // if it's a "byte" set width to 2, and get the next char
00752           if(c=='b')      { i++; c = asmstr[i]; width=2; }
00753           // if it's a "short" set width to 4, and get the next char
00754           else if(c=='s') { i++; c = asmstr[i]; width=4; }
00755           // if it's a "int" set width to 8, and get the next char
00756           else if(c=='n') { i++; c = asmstr[i]; width=8; }
00757           // if it's a "long" set width to 16, and get the next char
00758           else if(c=='l') { i++; c = asmstr[i]; width=16; }
00759           // if we want to output the 'decimal' value rather then the default hex
00760           else if(c=='d')
00761           {
00762             i++; c = asmstr[i];
00763             unsigned int t = charnum2uint(c);
00764             
00765             if(t!=0)
00766             {
00767               assert(params.size()>=t);
00768               str << std::setbase(10) << params[t-1] << std::setbase(16);
00769             }
00770             else if(c=='%')
00771               str << '%';
00772             break;
00773           }
00774           // if it's the character representation of a text data string we want
00775           else if(c=='t')
00776           {
00777             bool commentformat=false;
00778             i++; c = asmstr[i];
00779             
00780             // if we only want to output the 'short' format of the text (comment format)
00781             if(c=='c')
00782             {
00783               commentformat=true;
00784               i++; c = asmstr[i];
00785             }
00786             
00787             unsigned int t = charnum2uint(c);
00788             
00789             assert(params.size()>=t);
00790             assert(t!=0);
00791             string s = ucf._data.find(params[t-1])->second;
00792             
00793             if(commentformat)
00794               if(s.size()>17) s = s.substr(0, 17) + string("...");
00795             
00796             // escape the appropriate characters...
00797             // we'll only do it in the 'full' text output for the moment.
00798             if(!commentformat)
00799               for(string::size_type i=0; i<s.size(); i++)
00800                 if((s[i]=='\"') || (s[i]=='\\'))
00801                 {
00802                   s.insert(i, "\\");
00803                   ++i;
00804                 }
00805             
00806             str << s;
00807             break;
00808           }
00809           // if it's the intrinsic name we want
00810           else if(c=='i')
00811           {
00812             i++; c = asmstr[i];
00813             unsigned int t = charnum2uint(c);
00814             
00815             assert(params.size()>=t);
00816             assert(t!=0);
00817             string s = intrinsics.find(params[t-1])->second;
00818             str << s;
00819             break;
00820           }
00821           // if it's external function name we want
00822           else if(c=='f')
00823           {
00824             i++; c = asmstr[i];
00825             
00826             if(c=='*')
00827             {
00828               if(ucf.funcname.size())
00829               {
00830                 if(ucf.funcname[0]=='&')
00831                   str << ucf.funcname.substr(1, ucf.funcname.size()-1);
00832                 else
00833                   str << ucf.funcname;
00834               }
00835               else
00836                 str << "Func" << std::setw(4) << ucf._funcid;
00837             }
00838             else
00839             {
00840               unsigned int t = charnum2uint(c);
00841               
00842               assert(ucf._externs.size()>=t);
00843               assert(t!=0);
00844               assert(op._params_parsed.size()>=1);
00845               
00846               FuncMap::const_iterator fmp = funcmap.find(ucf._externs[op._params_parsed[t-1]]);
00847               if(fmp->second.funcname.size())
00848               {
00849                 if(fmp->second.funcname[0]=='&')
00850                   str << fmp->second.funcname.substr(1, fmp->second.funcname.size()-1);
00851                 else
00852                   str << fmp->second.funcname;
00853               }
00854               else
00855                 str << "Func" << std::setw(4) << ucf._externs[op._params_parsed[t-1]];
00856             }
00857             break;
00858           }
00859           // if it's the character representation of a text data string we want
00860           else if(c=='p')
00861           {
00862             i++; c = asmstr[i];
00863             unsigned int t = charnum2uint(c);
00864             
00865             // FIXME: this is the special 'call' case, it may be a good idea to make more general
00866             if((t==0) && (c==','))
00867             {
00868               special_call=true;
00869             
00870               for(vector<UCc *>::const_iterator i=op._popped.begin(); i!=op._popped.end();)
00871               {
00872                 str << demunge_ocstring(ucf, funcmap, opcode_table_data[(*i)->_id].ucs_nmo, (*i)->_params_parsed, intrinsics, **i, ucs_output);
00873                 if(++i!=op._popped.end())
00874                   str << ", ";
00875               }
00876             }
00877             
00878             if(t!=0)
00879             {
00880               if(t>op._popped.size())
00881                 str << "SOMETHING_GOES_HERE()";
00882               else
00883               {
00884                 UCc &ucc(*op._popped[t-1]);
00885                 str << demunge_ocstring(ucf, funcmap, opcode_table_data[ucc._id].ucs_nmo, ucc._params_parsed, intrinsics, ucc, ucs_output);
00886               }
00887             }
00888             break;
00889           }
00890           
00891           if(special_call!=true)
00892           {
00893             unsigned int t = charnum2uint(c);
00894             if(t!=0)
00895             {
00896               assert(params.size()>=t);
00897               str << std::setw(width) << params[t-1];
00898             }
00899             else if(c=='%')
00900               str << '%';
00901           }
00902         }
00903         break;
00904       default: // it's just a character, leave it be
00905         str << c;
00906     }
00907 
00908     i++;
00909     if(i==asmstr.size()) finished=true;
00910   }
00911   
00912   if(ucs_output && opcode_table_data[op._id].flag_paren) str << ')';
00913   
00914   return str.str();
00915 }
00916 
00917 void readbin_U7UCFunc(ifstream &f, UCFunc &ucf, const UCOptions &options)
00918 {
00919 //  #define DEBUG_READ_PAIR(X, Y) cout << '\t' << X << '\t' << Y << endl;
00920   // offset to start of function
00921   ucf._offset = f.tellg();
00922   DEBUG_READ_PAIR("  Offset: ", ucf._offset);
00923 
00924   // Read Function Header
00925   ucf._funcid = Read2(f);
00926   
00927   if(options.very_verbose)
00928     cout << "\tReading Function: " << setw(4) << ucf._funcid << endl;
00929   
00930   DEBUG_READ_PAIR("  FuncID: ", ucf._funcid);
00931   
00932   if(ucf._funcid!=0xFFFF)
00933   {
00934     
00935     // This is the original usecode function header
00936     ucf._funcsize = Read2(f);
00937     DEBUG_READ_PAIR("  FuncSize: ", ucf._funcsize);
00938     
00939     // save body offset in case we need it
00940     ucf._bodyoffset = f.tellg();
00941     
00942     ucf._datasize = Read2(f);
00943     DEBUG_READ_PAIR("  DataSize: ", ucf._datasize);
00944   }
00945   else
00946   {
00947     // This is the ext32 extended usecode function header
00948     ucf.ext32=true;
00949     ucf._funcid = Read2(f);
00950     
00951     if(options.very_verbose)
00952       cout << "\tReading Function: " << setw(4) << ucf._funcid << endl;
00953     
00954     DEBUG_READ_PAIR("  extFuncID: ", ucf._funcid);
00955     ucf._funcsize = Read4(f);
00956     DEBUG_READ_PAIR("  extFuncSize: ", ucf._funcsize);
00957     
00958     // save body offset in case we need it
00959     ucf._bodyoffset = f.tellg();
00960     
00961     ucf._datasize = Read4(f);
00962     DEBUG_READ_PAIR("  extDataSize: ", ucf._datasize);
00963   }
00964   
00965   // process ze data segment!
00966   {
00967     streampos pos = f.tellg(); // paranoia
00968   
00969     unsigned int off = 0;
00970     // Load all strings & their offsets
00971     while( off < ucf._datasize )
00972     {
00973       assert(!f.eof());
00974       
00975       string data;
00976       getline(f, data, static_cast<char>(0x00));
00977       ucf._data.insert(pair<unsigned int, string>(off, data));
00978       off+=data.size()+1;
00979     }
00980     f.seekg(pos, ios::beg); // paranoia
00981     f.seekg(ucf._datasize, ios::cur); // paranoia
00982   }
00983 
00984   #if 0 
00985   if(ucf._funcid==_search_func)
00986     for(map<unsigned int, string>::iterator i=ucf._data.begin(); i!=ucf._data.end(); i++)
00987       cout << i->first << "\t" << i->second << endl;
00988   #endif
00989   
00990   // process code segment
00991   {
00992     //streampos start_of_code_seg = f.tellg(); // what's this used for?
00993     ucf._codeoffset = f.tellg();
00994 
00995     // get the number of arguments to the function
00996     ucf._num_args = Read2(f);
00997 
00998     // get the number of local variables
00999     ucf._num_locals = Read2(f);
01000 
01001     // get the number of external function numbers
01002     ucf._num_externs = Read2(f);
01003     
01004     // load the external function numbers
01005     for(unsigned int i=0; i<ucf._num_externs; i++)
01006       ucf._externs.push_back(Read2(f));
01007     
01008     // ok, now to load the usecode
01009     unsigned int code_offset=0;
01010 
01011     /* Here the '3+' comes from the sizeof(_datasize) + sizeof(_num_args)
01012       + sizeof(_num_locals) + sizeof(_num_externs)
01013       which are stored in the file as 16bit shorts, with the exception of
01014       an ext32 function header, where the _datasize is a 32bit structure */
01015     unsigned int code_size = ucf._funcsize - ucf._datasize - ((4+ucf._num_externs) * SIZEOF_USHORT);
01016     if(ucf.ext32==true) code_size-=2;
01017     
01018     DEBUG_READ_PAIR("Code Size: ", code_size);
01019 
01020     while(code_offset<code_size)
01021     {
01022       assert(!f.eof());
01023       UCc ucop;
01024       
01025       ucop._offset = code_offset;
01026 
01027       ucop._id = Read1(f);
01028       code_offset++;
01029 
01030       const UCOpcodeData &otd = opcode_table_data[ucop._id];
01031 
01032       if(otd.opcode==0x00)
01033       {
01034         cout << ucop._id << ' ' << code_offset << endl;
01035         assert(otd.opcode!=0x00);
01036       }
01037       
01038       //assert(((otd.asm_nmo.size()!=0) && (otd.ucs_nmo.size()!=0)));
01039       for(unsigned int i=0; i<otd.num_bytes; i++)
01040         ucop._params.push_back(Read1(f));
01041 
01042       // parse the parameters
01043       ucc_parse_parambytes(ucop, otd);
01044 
01045       code_offset+=otd.num_bytes;
01046 
01047       /* if we're an opcode that sets a return value, we need to mark the
01048         function as one that returns a value */
01049       if(otd.flag_return==true)
01050         ucf.return_var=true;
01051       
01052       /* if we're a function debugging opcode, set the debuging flag, and
01053         assign the variable name string offset
01054         TODO: Add this to opcodes.txt */
01055       if((ucop._id==0x4D) && (options.game_bg() || options.game_si()))
01056       {
01057         ucf.debugging_info=true;
01058         assert(ucop._params_parsed.size()>=2);
01059         ucf.debugging_offset = ucop._params_parsed[1];
01060         ucf.funcname = ucf._data.find(0x0000)->second;
01061       }
01062       
01063       ucf._opcodes.push_back(ucop);
01064 
01065       #ifdef DEBUG_READ
01066       cout << std::setw(4) << code_size << "\t" << std::setw(4) << code_offset << "\t" << std::setw(4) << (unsigned int)ucop._offset << "\t" << std::setw(2) << (unsigned int)ucop._id << "\t";
01067       for(unsigned int i=0; i<ucop._params.size(); i++)
01068         cout << std::setw(2) << (unsigned int)ucop._params[i] << ',';
01069       cout << endl;
01070       #endif
01071     }
01072   }
01073 }
01074 
01075 void readbin_U8UCFunc(ifstream &f, UCFunc &ucf)
01076 {
01077 
01078 }
01079 

Generated on Mon Jul 9 14:42:52 2007 for ExultEngine by  doxygen 1.5.1