doc update
[www.jankratochvil.net.git] / project / captive / doc / Index.html.pl
1 #! /usr/bin/perl
2
3 # $Id$
4 # Captive project doc Index page Perl template.
5 # Copyright (C) 2003 Jan Kratochvil <project-www.jankratochvil.net@jankratochvil.net>
6
7 # This program is free software; you can redistribute it and/or modify
8 # it under the terms of the GNU General Public License as published by
9 # the Free Software Foundation; exactly version 2 of June 1991 is required
10
11 # This program is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14 # GNU General Public License for more details.
15
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
19
20
21 package project::captive::doc::Index;
22 require 5.6.0;  # at least 'use warnings;' but we need some 5.6.0+ modules anyway
23 our $VERSION=do { my @r=(q$Revision$=~/\d+/g); sprintf "%d.".("%03d"x$#r),@r; };
24 our $CVS_ID=q$Id$;
25 use strict;
26 use warnings;
27
28 BEGIN{ open F,"Makefile"; our $top_dir=pop @{[split /\s/,(grep /^top_srcdir/,<F>)[0]]}; eval "use lib '$top_dir'"; close F; }
29 use My::Web;
30 require "CGI";
31
32
33 My::Web->init(
34                 "__PACKAGE__"=>__PACKAGE__,
35                 "title"=>'Captive NTFS doc',
36                 "head_css"=>"
37 .productname { font-family: cursive; }
38 .fname       { font-family: monospace; }
39 .constant    { font-family: monospace; }
40 .author      { font-family: cursive; }
41 .stuff       { font-style: italic; font-size: larger; margin-left: 20%; margin-right: 10%; }
42 .function    { font-family: monospace; }
43 .type        { font-family: monospace; }
44 .command     { font-family: monospace; }
45 .instruction { font-style: italic; }
46 ",
47                 );
48 My::Web->heading();
49
50
51 sub doc_img ($$)
52 {
53 my($img_base,$caption)=@_;
54
55         my $r="";
56         $r.='<table border="0" align="center">'."\n";
57                 $r.="\t<tr><td>".img($img_base,$caption)."</td></tr>\n";
58                 $r.="\t<caption>$caption</caption>\n";
59         $r.='</table>'."\n";
60         $r.=vskip "2ex";
61         return $r;
62 }
63
64 sub captive_srcfile ($)
65 {
66 my($filename)=@_;
67
68         a_href 'http://cvs.jankratochvil.net/viewcvs/*checkout*/priv/captive/'.$filename.'?rev=HEAD',$filename;
69 }
70
71 my $freespeech=a_href 'http://www.gnu.org/philosophy/free-sw.html','Free';
72 my $freebeer=a_href 'http://www.gnu.org/philosophy/free-sw.html','free (as in beer)';
73
74 sub productname
75 {
76 my($url,$name)=@_;
77
78         return '<span class="productname">'.a_href($url,CGI::escapeHTML($name)).'</span>';
79 }
80 my $Wine=productname 'http://www.winehq.com/','Wine';
81 my $ReactOS=productname 'http://www.reactos.com/','ReactOS';
82 my $LinuxNTFS=productname 'http://linux-ntfs.sourceforge.net/','Linux NTFS';
83 my $GnomeVFS=productname 'http://developer.gnome.org/doc/API/gnome-vfs/','Gnome-VFS';
84 my $GnomeVFSmodule=productname 'http://developer.gnome.org/doc/API/gnome-vfs/modules.html','Gnome-VFS-module';
85 my $gnulinux='GNU/Linux';
86
87
88 print vskip("10ex")."<h1 align=\"center\">!!! PRELIMINARY - TO BE UPDATED !!!</h1>\n".vskip("10ex");
89
90
91 # FIXME:
92 # Compatibility with NT4 etc. - just legal reasons.
93
94
95 print <<"HERE";
96
97
98 <h1>Reasons for the Implementation</h1>
99
100         <p>Currently there is no possibility to any of the available $freespeech
101                 ($freespeech used in the following text in the meaning of
102                 &quot;<a href="http://www.gnu.org/philosophy/free-sw.html">free as in speech</a>&quot;)
103         operating systems to reliably write to the most common disk partition
104         filesystem type - <span class="productname">Microsoft NTFS</span>. It would
105         be already supported a long time ago but there is no proper documentation of
106         <span class="productname">NTFS</span> filesystem data structures available.
107         Since <span class="productname">Microsoft</span> corporation continues in its
108         propagation of <span class="productname">Microsoft Windows NT</span>
109                 (<span class="productname">NT</span> identifier used in the following text
110                 applies to all the products of <span class="productname">Microsoft</span>
111                 <span class="productname">NT</span> series such as
112                 <span class="productname">NT&nbsp;4.0</span>,
113                 <span class="productname">2000</span> as NT-5.0
114                 and
115                 <span class="productname">XP</span> as NT-5.1.)
116         based operating systems <span class="productname">NTFS</span> is the default
117         disk file system type for vendor preinstalled <span class="productname">Microsoft Windows</span>.
118
119         <p>Unfortunately the <span class="productname">NTFS</span> filesystem has too
120         complex data structure to allow a complete reverse enginnering process in
121         reasonable time. Currently available $freespeech solutions such as $LinuxNTFS
122         filesystem have already implemented reliable reverse
123         engineered read-only access. However <a name="reliability">reliabile</a>
124         read-write part of the access would require much better
125         knowledge of the <span class="productname">NTFS</span> data structures.
126         Currently only rewriting of already existing file data blocks is supported
127         by $LinuxNTFS - no file creation, no file deletion, no directory operations etc.
128         Also any future versions of <span class="productname">NTFS</span> filesystem
129         would require another major reverse engineering effort.</p>
130
131
132 <h1>Challenges of the Project</h1>
133
134         <p>The <a name="NTFSgoal">ultimate goal</a> of this project is definitely the
135         free implementation of <a href="#reliability">reliable</a> read-write <span
136         class="productname">NTFS</span> filesystem driver. This project chose to
137         solve this problem in the style of $Wine project by using the original binary
138         <span class="fname">ntfs.sys</span> and emulating all the required layers of
139         <span class="productname">Microsoft Windows NT</span> for it.</p>
140
141         <p>Unfortunately this effort is tainted by only partial and generally
142         insufficient documentation of API between filesystem driver
143         (<span class="fname">ntfs.sys</span>) and the
144         <span class="productname">Microsoft Windows NT</span>
145         (&quot;<a href="http://mail.gnu.org/archive/html/libtool/2000-09/msg00000.html">W32</a>&quot;
146         in the following text) kernel <span class="fname">ntoskrnl.exe</span>. Note
147         that this API is a different than the one being used in the $Wine project
148         since <span class="productname">Wine</span> implements only the user space
149         part of W32.</p>
150
151
152 <h1>Architecture</h1>
153
154         <p>The principle of the
155         project lies in the glue between
156         <span class="productname">Microsoft Windows NT</span> kernel space
157         environment and $gnulinux user space process environment:</p>
158
159         @{[ doc_img 'arch-W32','Microsoft Windows Subsystems Architecture' ]}
160         @{[ doc_img 'arch-captive','Captive Subsystems Architecture' ]}
161         
162         <a name="existing_emulation"><h2>Existing Emulation Projects</h2></a>
163
164                 <p>There were two well-known $freespeech projects emulating W32 subsystems
165                 to reach the compatibility with various W32 components:
166                 $Wine and $ReactOS. Sad moment is that the goals of this project do not fit
167                 very well into any role in those two ones. Therefore this project went
168                 its own way of emulation:</p>
169
170                 <table align="center" border="1">
171                         <tr>
172                                 <th><a href="#guestosnote">Guest-OS</a></th>
173                                 <th><a href="#hostosnote" >Host-OS</a ></th>
174                                 <th>Implements</th>
175                                 <th>W32 kernel library</th>
176                                 </tr>
177                         <tr>
178                                 <td>$Wine</td>
179                                 <td>$gnulinux</td>
180                                 <td>W32 user space</td>
181                                 <td><span class="fname">ntdll.dll</span></td>
182                                 </tr>
183                         <tr>
184                                 <td>$ReactOS</td>
185                                 <td><span class="constant">i386</span> hardware</td>
186                                 <td>W32 kernel and user space</td>
187                                 <td><span class="fname">ntoskrnl.exe</span></td>
188                                 </tr>
189                         <tr style="height: 1ex;"></tr>
190                         <tr>
191                                 <td>this project</td>
192                                 <td>$gnulinux</td>
193                                 <td>W32 kernel</td>
194                                 <td><span class="fname">ntoskrnl.exe</span></td>
195                                 </tr>
196                         <caption>Emulation Projects Characteristics</caption>
197                 </table>
198
199                 <dl>
200                         <a name="guestosnote"><dt>Guest-OS</dt></a>
201                         <dd><a href="http://www.vmware.com/support/reference/common/glossary/#guestos">Guest OS</a>:
202                                 An operating system that runs inside a&nbsp;virtual machine.</dd>
203                         <a name="hostosnote" ><dt>Host  OS</dt></a>
204                         <dd><a href="http://www.vmware.com/support/reference/common/glossary/#hostos" >Host  OS</a>:
205                                 An operating system that runs on the host machine.</dd>
206                 </dl>
207
208                 <p>While $ReactOS provides the necessary W32 kernel subsystem emulation
209                 code we also need to run such <a href="#guestosnote">Guest-OS</a> in the <a
210                 href="#hostosnote">Host-OS</a> $gnulinux. Initially it was planned to
211                 extend $Wine with the W32 kernel space emulation functionality but
212                 fortunately <span class="author">Steven Edwards</span> pointed to the $ReactOS
213                 which better suits the needs of this project by its already implemented W32
214                 kernel space emulation.</p>
215
216                 <p>The <a name="reactos_nocare">original reasons</a> for developing
217                 $ReactOS still make no sense to the author of this project. Free
218                 implementation of W32 platform standalone running on the machine hardware
219                 is no longer free as most od the W32 applications are usually closed source
220                 and the user still looses its freedom on the application level anyway. Even
221                 in the case of available free applications there still remains the
222                 disadvantage of loosing the Host-OS platform availability if implemented in
223                 the $Wine style. For these ideology incompatibilities not much effort was
224                 made for acceptance the fixes and improvements of $ReactOS by this project.
225                 Moreover new functionality is not being implemented to the $ReactOS part
226                 but it is coded in Gnome style in the project specific source files
227                 place.</p>
228
229                 <p>The most serious problem of $ReactOS is its dependence on the direct
230                 <span class="constant">i386</span> hardware instead of some
231                 <a href="#hostosnote">Host-OS</a> as required by the goals of this project.
232                 W32 is designed to be hardware-independent using its
233                 <span class="fname">hal.dll</span>. Unfortunately $ReactOS does not follow
234                 this design and thus there are needed various patches and replaces of its
235                 various parts and its hardware-dependent code. Despite it $ReactOS code
236                 base still made a big asset for this project.</p>
237
238
239
240
241
242                 <p>Some API functions are provided both by
243                 <span class="fname">ntdll.dll</span> and
244                 <span class="fname">ntoskrnl.exe</span> in W32.
245                 <span class="author">Casper Hornstrup</span> enlightened such functions
246                 calling conventions have to be differentiated as
247                 <span class="fname">ntdll.dll</span> lives in the user space (low address
248                 space -- below <span class="constant">0x80000000</span>) and
249                 <span class="fname">ntoskrnl.exe</span> in the kernel space (high address
250                 space -- above <span class="constant">0x80000000</span>). Although they
251                 contain slightly different set of symbols (functions)
252                 <span class="fname">ntdll.dll</span> still can be considered as a&nbsp;user
253                 space interface to the kernel space implementation by
254                 <span class="fname">ntoskrnl.exe</span>.</p>
255
256                 <p>Currently there are
257                 no plans to ever extend the project's crossplatformity beyond the
258                 <span class="constant">i386</span> processor
259                         (<span class="constant">i386</span> used here as
260                         <a href="http://www.intel.com/">Intel</a> architecture covering 32-bit
261                         processors compatible with <span class="constant">i386</span>,
262                         <span class="constant">i486</span>, ...).</p>
263
264         <h2>API Function Implementation Choices</h2>
265
266                 <p>During the initial point of the project development all the API
267                 functions were defined as unimplemented, of course. Any call of such
268                 unimplemented function is fatal and results in program termination. When we
269                 need to implement any required API function we have multiple choices to do
270                 so:
271                 <a href="#functype_pass">Direct pass to original
272                                 <span class="fname">ntoskrnl.exe</span></a>,
273                 <a href="#functype_wrap">Wrap of the original
274                                 <span class="fname">ntoskrnl.exe</span> function</a>,
275                 <a href="#functype_native_reactos">Native implementation -- $ReactOS,
276                 <a href="#functype_native_wine">Native implementation -- $Wine
277                 or
278                 <a href="#functype_native_libcaptive">Native implementation
279                                 -- project specific</a>.
280                 <!-- a href="#functype_undef" Undefined function /a -->
281
282         <h2>&quot;patched&quot; vs. &quot;unpatched&quot; Libraries</h2>
283
284                 <p>Library is called <span class="constant">patched</span> if we require
285                 loading its original binary code file. Project needs to patch it to be able
286                 to trap all the function entry points. The only currently
287                 <span class="constant">patched</span> library of this project is
288                 <span class="fname">ntoskrnl.exe</span>.</p>
289
290                 <p>Library is called <span class="constant">unpatched</span> if no original
291                 binary code is needed since all of its functions are completely emulated by
292                 <a href="#functype_native">the native implementations</a> of this project.
293                 The typical <span class="constant">unpatched</span> representative is
294                 <span class="fname">hal.dll</span> as it specializes on the hardware
295                 dependent code and therefore it must be completely replaced by this project
296                 running in the $gnulinux operating system environment. Early versions of
297                 this project had also full <span class="constant">unpatched</span>
298                 <a href="#native_ntoskrnl">native implementation of
299                 <span class="fname">ntoskrnl.exe</span></a> but it no longer applies.</p>
300
301         <h2>Memory Management</h2>
302
303                 <p>Original <span class="productname">Microsoft Windows NT</span>
304                 architecture uses two address space areas - user space and kernel space.
305                 User space is mapped in the range <span class="constant">0x00000000</span>
306                 to <span class="constant">0x7FFFFFFF</span>, kernel space is mapped in the
307                 range <span class="constant">0x80000000</span>
308                 (<span class="constant">KERNEL_BASE</span> in $ReactOS sources) to
309                 <span class="constant">0xFFFFFFFF</span>. All these virtual memory ranges
310                 represent addresses after their MMU (Memory Management Unit) mapping, of
311                 course. More discussion can be found in the
312                 <a href="http://www.microsoft.com/hwdev/platform/server/PAE/PAEmem.asp">description 
313                 by <span class="productname">Microsoft</span></a>.</p>
314
315                 <p>This project runs in the virtual address space used both for the UNIX
316                 user space process part and for the W32 kernel space. Therefore this
317                 project defines that W32 kernel runs in the whole range
318                 <span class="constant">0x00000000</span> to
319                 <span class="constant">0xFFFFFFFF</span> since there are no special mapping
320                 assumptions about the UNIX user space process mapping. No W32 user space
321                 exists in this project. Such approach also nullifies any special memory
322                 moving operations between W32 kernel space and W32 user space memory areas
323                 (such as <span class="function">MmSafeCopyToUser()</span>).</p>
324
325         <h2>Unicode Strings and Characters</h2>
326
327                 <p>W32 platform uses 16-bit type <span class="type">wchar_t</span> while $gnulinux uses a
328                 32-bit one. This can be problem during GCC (GNU C&nbsp;Compiler)
329                 compilation of combination of native UNIX C&nbsp;sources (assuming 32-bit
330                 GCC with 32-bit <span class="type">wchar_t</span>) and
331                 $ReactOS C sources (assuming W32 compiler with 16-bit
332                 <span class="type">wchar_t</span>) for literal wide strings
333                 (C source file systax: <span class="command">L&quot;wstring&quot;</span>).
334                 Possibilities to solve this issue list:</p>
335
336                 <ul>
337                         <li>
338                                 <p>Using <span class="constant">-fshort-wchar</span> GCC option and
339                                 strictly differentiate between compilation of
340                                 <span class="productname">ReactOS</span> code and UNIX code.</p>
341
342                                 <p>pros: No source modifications needed, no runtime performance hit.</p>
343
344                                 <p>cons: No type checking if some part of code has bad compilation
345                                 flags, complicated way to completely split
346                                 <span class="productname">ReactOS</span> and UNIX code.</p>
347                         </li>
348                         <li>
349                                 <p>Wrap all <span class="productname">ReactOS</span> literal constants
350                                 by some conversions function call (implemented as macro
351                                 <span class="function">REACTOS_UCS2()</span> by this project).</p>
352
353                                 <p>pros: Any forgotten/mistaken conversions are type-checked and warned
354                                 during the compilation by GCC.</p>
355
356                                 <p>cons: All compiled <span class="productname">ReactOS</span> sources
357                                 files containing literal wide strings have to be wrapped/modified,
358                                 performance hit by runtime string conversions.</p>
359
360                                 <p>This solution was chosen to get the internal sanity checking
361                                 benefit.</p>
362                         </li>
363                 </ul>
364
365         <h2>Supported Binary Formats</h2>
366
367                 <p>The native W32 binary format is identified as
368                 <span class="constant">PE-32</span> (Portable Executable 32-bit), such
369                 files have all the usual extensions such as
370                 <span class="fname">.sys</span>, <span class="fname">.exe</span>,
371                 <span class="fname">.dll</span> etc. <span class="constant">PE-32</span>
372                 loading support was already implemented by $ReactOS, its memory mapping
373                 specifics just had to be ported to $gnulinux environment by this project.
374                 This loading support does not (yet) cover importing of debug symbols from
375                 W32 <span class="fname">.PDB</span> (Program DataBase) files in $gnulinux
376                 ABI (Application Binary Interface) compatible way.</p>
377
378                 <p>This project also supports transparent loading of UNIX
379                 <span class="fname">.so</span> (Shared Object file) binary format. If you
380                 have W32 source files for some W32 library you can try to compile it by GCC
381                 to get the shared library with $gnulinux ABI compatible debug information
382                 (GCC option <span class="constant">-ggdb3</span> recommended). Beware of
383                 possible compilation problems as <span class="productname">Microsoft</span>
384                 C&nbsp;code expects <span class="constant">exception</span> handling to be
385                 supported by the compiler (definitely not the case of the plain C compiler
386                 of GCC) --- all the exception catching code should be discarded as any
387                 <a href="#exception_fatal">generated exceptions are always fatal</a> when
388                 such driver is running in the scope of this project. You can use the
389                 following script of this project to compile W32 filesystem source files as
390                 UNIX <span class="fname">.so</span>:
391                 @{[ captive_srcfile 'src/w32-mod/ext2fsd.so-build.sh' ]}</p>
392                 
393                 <p>Be aware of some differences if you use
394                 <span class="constant">PE-32</span> binary format file vs.
395                 <span class="fname">.so</span> format file.
396                 <span class="constant">PE-32</span> use the appropriate W32 specific
397                 <a href="#calltype">cdecl/stdcall/fastcall call types</a>,
398                 <span class="fname">.so</span> must be completely compiled in the standard
399                 UNIX <a href="#calltype_cdecl">cdecl call type semantics</a>.
400                 <a href="#functype_native">Native function implementations</a> do not need
401                 to be explicitely exported by <span class="fname">captivesym</span> as they
402                 are resolved automatically by the UNIX dynamic system linker. It may be
403                 surprising you will have to fix all such missing symbol exports if you
404                 advance during the development from the debugging
405                 <span class="fname">.so</span> file for the production version of the
406                 original <span class="constant">PE-32</span> binary file.</p>
407
408         <h2>Reverse Engineering</h2>
409
410                 <p>This project has no intentions to reverse engineer and document the
411                 filesystem data structures themselves since they are being encapsulated by
412                 the filesystem driver. For these reasons the resources available in
413                 projects such as $LinuxNTFS get out of any possible use. This project goal
414                 is to provide fully compatible API interface to the rest of the W32 system
415                 to persuade the filesystem driver it is running in the native
416                 <span class="productname">Microsoft Windows XP</span> environment.</p>
417
418                 <p>All the W32 filesystem drivers are running in the W32 kernel address
419                 space and this area of W32 API is not much documented by
420                 <span class="productname">Microsoft</span>. Some API functions are not
421                 documented at all and the others are documented insufficiently for a their
422                 possibly needed reimplementation from scratch. Documentation being
423                 consulted primarily consists of
424                 <span class="productname"><a href="http://msdn.microsoft.com/library/default.asp?url=/library/en-us/kmarch/hh/kmarch/kmhdr_6enb.asp">MSDN (Microsoft Developer Network) Kernel-Mode Driver Architecture: Windows DDK</a></span>
425                 documentation and also various other 3rd party documentation resources such as
426                 <span class="productname"><a href="http://www.osr.com/ntinsider/1996/cacheman.htm">The NT Cache Manager Description</a></span>,
427                 <span class="productname"><a href="http://www.winntmag.com/Articles/Print.cfm?ArticleID=3864">Learn About NT's&nbsp;File-system Cache</a></span>,
428                 <span class="productname"><a href="http://www.ntfsd.org/archive/">NT File System Developers mailing list archives</a></span>
429                 including various
430                 <a href="http://www.google.com/search?q=site%3Amicrosoft.com">fulltext searches</a>
431                 through Internet from case to case.</p>
432
433                 <p>Sometimes no sufficient documentation was found and some code behaviour
434                 had to be reverse engineered directly from the binaries of
435                 <span class="fname">ntoskrnl.exe</span>,
436                 <span class="fname">cdfs.sys</span>,
437                 <span class="fname">fastfat.sys</span>
438                 and primarily
439                 <span class="fname">ntfs.sys</span>.
440                 Up to now the code was disassembled by
441                 <span class="productname"><a href="http://www.simtel.net/pub/pd/29498.html">IDA Freeware</a></span>
442                 and by
443                 <span class="productname">dumpbin.exe</span> of
444                 <span class="productname">Microsoft Visual Studio</span>.
445                 <span class="productname">dumpbin.exe</span> is fortunately able to
446                 interpret debug symbols from W32 <span class="fname">.PDB</span>
447                 (Program DataBase) debug information files.</p>
448
449                 <h3><span class="productname">dumpbin.exe</span>:</p></h3>
450
451                         <p>You should use the following options for
452                         <span class="productname">dumpbin.exe</span>:</p>
453
454                         <blockquote class="command">
455                                 <p>dumpbin.exe /all /rawdata:none /disasm /pdbpath:verbose FILENAME.SYS</p>
456                         </blockquote>
457
458                         <p>You should see the following line in the output:</p>
459
460                         <blockquote class="command">
461                                 <p>PDB file found at '.\\FILENAME.pdb'</p>
462                         </blockquote>
463
464                 <h3><span class="productname">WinDbg</span> Windows NT kernel debugging</h3>
465
466                         <p><span class="productname">WinDbg</span> is downloadable from:
467                         @{[ a_href 'http://www.microsoft.com/whdc/ddk/debugging/installx86.mspx' ]}</p>
468
469                         <p>This is (the only?) tool able to debug filesystem drivers incl.
470                         <span class="fname">ntfs.sys</span>. You will need two computers running
471                         <span class="productname">Microsoft Windows</span> - one computer will run
472                         <span class="productname">WinDbg</span> while the other one will be
473                         frozen in remote Windows NT kernel debug mode. It does not matter which
474                         <span class="productname">Microsoft Windows</span> version will be run
475                         on the <span class="productname">WinDbg</span> side.</p>
476
477                         <p>The most easy way to setup two computers is to use commercial
478                         <span class="productname"><a href="http://www.vmware.com/download/workstation.html">VMware Workstation</a></span>
479                         where you can run two virtual machines simultaneously on single PC
480                         hardware and you can connect them by a virtual serial port provided by
481                         <span class="productname">VMware</span>.</p>
482
483                         <h4><span class="productname">WinDbg</span> side setup</h4>
484
485                                 @{[ doc_img 'ntdebug-vmware-windbg',
486                                                 '<span class="productname">VMware</span> virtual serial port'
487                                                                 .' of <span class="productname">WinDbg</span> side' ]}
488
489                                 <p>You should setup <span class="productname">WinDbg</span> according
490                                 to:</p>
491
492                                 @{[ doc_img 'ntdebug-windbg-port','Port settings of <span class="productname">WinDbg</span>' ]}
493                                 @{[ doc_img 'ntdebug-windbg-sym','Symbols files location of <span class="productname">WinDbg</span>' ]}
494
495                                 <span class="constant">Symbols</span> should point to the directory where
496                                 reside files extracted from the symbol archive for your version of
497                                 <span class="productname">Microsoft Windows</span>. In the case of the
498                                 recommended <span class="productname">Microsoft Windows XP Service Pack 1 Checked Build</span>
499                                 you should use:
500                                 @{[ a_href 'http://msdl.microsoft.com/download/symbols/packages/windowsxp/xpsp1sym_x86_chk.exe' ]}</p>
501
502                                 <blockquote class="command">
503                                         <p># Rename xpsp1sym_x86_chk.exe contents .pdb files for WinDbg<br />
504                                         @{[ CGI::escapeHTML(q{for i in *.pdb*;do ext="`echo $i|sed 's/^.*\.pdb\.\(.*\)$/\1/'`";if [ "$i" = "$ext" ];then echo "BAD:$i";break;fi;base="`echo $i|sed 's/\(\.pdb\)\..*$/\1/'`";echo "md $ext";echo "move /-y $i $ext\\$base";done|sort -u|sed 's/$/'`echo -ne '\r'`'/g' >/tmp/rename.bat}) ]}</p>
505                                 </blockquote>
506
507                                 <p>The resulting <span class="command">rename.bat</span> for
508                                 <span class="command">xpsp1sym_x86_chk.exe</span> can be found at:
509                                 @{[ a_href 'xpsp1sym_x86_chk-rename.bat.zip' ]}</p>
510
511                                 <p>The resulting directory should contain at least
512                                 <span class="command">sys\\ntfs.pdb</span>
513                                 and
514                                 <span class="command">exe\\ntoskrnl.pdb</span>.</p>
515
516                                 <p>Your successfuly connected target (after the steps described
517                                 below) should look like:</p>
518
519                                 @{[ doc_img 'ntdebug-windbg-boot','Successfuly connected <span class="productname">WinDbg</span>' ]}
520
521                         <h4>Setup of the side being kernel-debugged</h4>
522
523                                 @{[ doc_img 'ntdebug-vmware-xpdebug',
524                                                 '<span class="productname">VMware</span> virtual serial port'
525                                                                 .' of the side being kernel-debugged' ]}
526
527                                 <p>You must use the following options in your
528                                 <span class="command">c:\\boot.init</span> command-line:</p>
529
530                                 <blockquote class="command">
531                                         <p>/debug /debugport=COM1 /baudrate=115200</p>
532                                 </blockquote>
533
534                                 <p>After booting this <span class="command">boot.ini</span>-entry
535                                 should freeze at this point
536                                 (if no <span class="productname">WinDbg</span> is waiting in the other
537                                 virtual machine):</p>
538
539                                 @{[ doc_img 'ntdebug-wait','Side being kernel-debugged waiting for <span class="productname">WinDbg</span>' ]}
540
541
542         <a name="law"><h2>Laws and Licensing Conditions</h2></a>
543
544                 <p>If you are an <span class="productname">authorized user</span> of
545                 <span class="productname">Microsoft Windows NT</span> the laws in some
546                 countries give you the right to fully handle the product in any way you
547                 want. Therefore you can disassemble the product even in the case you had
548                 to agree with the product license forbidding such disassembly as the
549                 country laws override any such license agreement.</p>
550
551                 <h3>Microsoft Service Pack</h3>
552
553                         <p>Sometimes you may have the legal license for
554                         <span class="productname">Microsoft Windows NT</span>
555                         but for various technical reasons you do not have the media and/or
556                         installation ready at the place of intended use of this project.</p>
557
558                         <p>Fortunately <span class="productname">Microsoft</span> provides
559                         $freebeer update packages for its
560                         <span class="productname">Microsoft Windows</span> products called
561                         <span class="productname">Service Packs</span>; the latest one is
562                         <span class="productname"><a href="http://www.microsoft.com/WindowsXP/pro/downloads/servicepacks/sp1/checkedbuild.asp">Microsoft Windows XP Service Pack 1a</a></span>.</p>
563
564                         <p>This downloadable file contains the full versions of the essential
565                         files needed for the current stage of this product:
566                         <span class="fname">ntfs.sys</span>
567                         and
568                         <span class="fname">ntoskrnl.exe</span>.
569                         It even contains
570                         <span class="fname">cdfs.sys</span> and
571                         <span class="fname">fastfat.sys</span> for testing purposes.</p>
572
573                         <p><span class="productname">Service Pack</span> also contains
574                         EULA (End User License Agreement) paper disallowing any use of
575                         <span class="productname">Service Pack</span> outside its original
576                         intentions. According to the laws of some countries you need to be
577                         <span class="productname">authorized user</span> of the
578                         <span class="productname">Microsoft Windows XP</span> product to be
579                         allowed to use the files contained in such
580                         <span class="productname">Service Pack</span> without the bindings of its
581                         EULA. Even the interpretation of such laws may vary.</p>
582
583                         <p>It would be a&nbsp;breach of the law by the project author to provide
584                         automatic (=hidden) functionality to download and extract the
585                         <span class="productname">Service Pack</span> files. On the other hand it
586                         is perfectly legal to ask user for his/her confirmation whether he/she is
587                         really the <span class="productname">authorized user</span> of
588                         <span class="productname">Microsoft Windows XP</span> product and
589                         download/extract the <span class="productname">Service Pack</span> files
590                         accordingly.</p>
591
592                         @{[ doc_img 'captive-install-acquire-ask','Microsoft Windows Drivers Acquire Affirmation' ]}
593
594         <h2>Project Architecture</h2>
595
596                 @{[ doc_img 'dia/arch-all','Project Components Architecture' ]}
597
598                 <p>Most of the work of this project is located in the single box called
599                 &quot;<span class="constant">libcaptive</span>&quot; located in the center
600                 of the scheme. This component implements the core W32 kernel API by
601                 <a href="#functype">various methods described in this document</a>.
602                 The &quot;<span class="constant">libcaptive</span>&quot; box cannot be
603                 further dissected as it is just an implementation of a&nbsp;set of API
604                 functions. It could be separated to several subsystems such as the Cache
605                 Manager, Memory Manager, Object Manager, Runtime Library, I/O&nbsp;Manager
606                 etc. but they have no interesting referencing structure.</p>
607
608                 <p>As this project is in fact just a&nbsp;filesystem implementation every
609                 story must begin at the device file and end at the filesystem operations
610                 interface. The unified suppported interfaces are
611                 <span class="productname"><a href="http://developer.gnome.org/doc/API/2.0/glib/">GLib</a></span>
612                         (the most low level portability, data-types and utility library for Gnome)
613                 <span class="type">GIOChannel</span> (for the device access) and the custom
614                 <span class="constant">libcaptive</span> filesystme API. Each of these ends
615                 can be connected either to some direct interface (such as the
616                 <span class="constant">captive-cmdline</span> client) or it can connected
617                 as a general $GnomeVFS filter. $GnomeVFS offers nice filter interface on
618                 the UNIX user-privileges level for transparent operation with archives and
619                 network protocols. This filter interface was used by this project to turn
620                 the device reference such as <span class="fname">/dev/hda3</span> or
621                 <span class="fname">/dev/discs/disc0/part3</span> to the fully accessible
622                 filesystem (pretending being an &quot;archive&quot; in the device
623                 reference). This device access can be specified by $GnomeVFS URLs such as:
624                 <span class="fname">file:///dev/hda3#captive-fastfat:/autoexec.bat</span></p>
625                 
626                 <p>If the passed device reference is requested by the user to be accessed
627                 either in <span class="dashdash">--ro</span> (read-only) mode or in the
628                 <span class="dashdash">--rw</span> (full read-write) mode there are no
629                 further device layers needed. Just in the case of
630                 <span class="dashdash">--blind</span> mode another layer is involved to
631                 emulate read-write device on top of the real read-only device by the method
632                 of non-persistent memory buffering of all the possible write requests.</p>
633
634                 <p>Such device is still only a&nbsp;UNIX style GLib <span
635                 class="type">GIOChannel</span> type at this point.  As we need to supply it
636                 to the W32 filesystem driver we must convert it to the W32 I/O&nbsp;Device
637                 with its capability of handling <span class="type">IRP</span>
638                         (<span class="constant">I/O Request Packet</span>; structure holding the
639                         request and result data for any W32 filesystem or W32 block device
640                         operation)
641                 requests from its upper W32 filesystem driver. Such W32 I/O&nbsp;Device can
642                 represent either <span class="type">CD-ROM</span> or
643                 <span class="type">disk</span> device type as different W32 filesystem
644                 drivers require different media types:</p>
645
646                 <h3>cdfs.sys</h3>
647
648                         <p><span class="type">CD-ROM</span> filesystem runs just on the
649                         <span class="constant">FILE_DEVICE_CD_ROM_FILE_SYSTEM</span> device type.
650                         Use <span class="dashdash">--cdrom</span> option of this project for
651                         <span class="fname">cdfs.sys</span>.</p>
652
653                 <h3>fastfat.sys</h3>
654
655                         <p><span class="type">FAT</span> filesystem supports both the (expected)
656                         <span class="constant">FILE_DEVICE_DISK_FILE_SYSTEM</span> device type
657                         but it also supports the reading of
658                         <span class="constant">FILE_DEVICE_CD_ROM_FILE_SYSTEM</span> devices as
659                         you can use <span class="type">FAT</span> filesystem on <span
660                         class="type">CD-ROM</span> media in W32 environment. It is recommended to
661                         use <span class="dashdash">--disk</span> option of this project for
662                         <span class="fname">fastfat.sys</span>.</p>
663
664                 <h3>ext2fsd.sys</h3>
665
666                         <p><span class="type">ext2</span> filesystem supports just the
667                         <span class="constant">FILE_DEVICE_DISK_FILE_SYSTEM</span> device type.
668                         Use <span class="dashdash">--disk</span> option of this project for
669                         <span class="fname">ext2fsd.sys</span>.</p>
670                 
671                 @{[ vskip("3ex") ]}
672
673                 <p>W32 media I/O&nbsp;Device is accessed from the W32 filesystem driver.
674                 The filesystem driver itself always creates volume object by
675                 <span class="function">IoCreateStreamFileObject()</span> representing the
676                 underlying W32 media I/O&nbsp;Device as the object handled by the
677                 filesystem driver itself. All the client application filesystem requests
678                 must be first resolved at the filesystem structures level, passed to the
679                 volume stream object of the same filesystem and then finally passed to the
680                 W32 media I/O&nbsp;Device (already implemented by this project as an
681                 interface to <span class="type">GIOChannel</span> noted above).</p>
682
683                 <p>The filesystem driver is called by the core W32 kernel implementation of
684                 <span class="constant">libcaptive</span> in
685                 <a href="#synchronous">synchronous way</a> in single-shot manner instead of
686                 the several reentrancies while waiting for the disk I/O completions as can
687                 be seen in the original
688                 <span class="productname">Microsoft Windows NT</span>.
689                 This single-shot synchronous behaviour is possible since all the needed
690                 resources (disk blocks etc.) can be always presented as instantly ready as
691                 their acquirement is solved by <a href="hostosnote">Host-OS</a> outside of
692                 the W32 emulated <a href="guestosnote">Guest-OS</a> environment.</p>
693
694                 <p><span class="constant">libcaptive</span> offers the W32 kernel
695                 filesystem API to the upper layers. This is still not the API the common
696                 W32 applications are used to as they use W32 libraries which in turn pass
697                 the call to W32 kernel.  For example
698                 <span class="function">CreateFileA()</span> is being implemented by several
699                 libraries such as <span class="fname">user32.dll</span> as a relay
700                 interface for the kernel function
701                 <span class="function">IoCreateFile()</span> implemented by this
702                 project's&nbsp;<span class="constant">libcaptive</span> W32 kernel
703                 emulation component.</p>
704
705                 <p>As it would be very inconvenient to use the legacy, bloated and UNIX
706                 style unfriendly W32 kernel filesystem API this project offers its own
707                 <a href="#client_interface">custom filesystem API interface</a> inspired by
708                 the $GnomeVFS client interface adapted to the specifics of W32 kernel API.
709                 This interface is supposed to be easily utilized by
710                 <a href="#client_interface_customapp">a&nbsp;custom application accessing
711                 the W32 filesystem driver</a>.</p>
712
713                 <p>The rest of the story is not much special for this project since this is
714                 a common UNIX problem how to offer user space implemented UNIX filesystem
715                 as a generic system filesystem (as those are usually implemented only as
716                 the components od UNIX kernel). The most thin implementation would be to
717                 implement <FIXME:LUFS><a href="#fuse_interface">FUSE \bookcitation{FUSE}
718                         (Filesystem in Userspace project for $gnulinux implemented by its own
719                         filesystem code for Linux kernel)
720                 interface</a> for the purpose but such feature is not yet implemented.
721                 Currently this project implements
722                 <a href="#offered_gnomevfs">Gnome-VFS interface</a> allowing its filesystem
723                 access even without any involvement of UNIX kernel from any
724                 $GnomeVFS aware client application (such as
725                 <span class="fname">gnome-vfs/tests/test-shell</span>).
726                 This <a href="#offered_gnomevfs">Gnome-VFS interface</a> connects the data
727                 flow of this project in two points - both as the lowest layer device image
728                 source and also as the upper layer for the filesystem operation
729                 requests.</p>
730
731                 <p>That's&nbsp;all folks!</p>
732
733         <a name="mounted_one"><h2>At Most One Mounted Filesystem</h2></a>
734
735                 <p>The project technically supports only one (exactly one...) mounted
736                 filesystem device and only one filesystem driver. There is nothing
737                 complicated to support multiple disks and multiple loaded filesystem
738                 modules but as they would share the address space it would only bring
739                 a&nbsp;possible complications during bug reports and the bug solving
740                 itself.  It was considered as a&nbsp;more sane way to support multiple W32
741                 mounted disks by completely separately running project instances in
742                 a&nbsp;different UNIX processes communicating from their sandboxes via
743                 <a href="#todo_sandbox">CORBA sandbox interface</a>. This sandboxing
744                 feature is not yet deployed although its code is already prepared.</p>
745
746                 <p>The project also does not support any state cleanup to be able to load
747                 filesystem&nbsp;<span class="constant">A</span>,
748                 cleanup&nbsp;<span class="constant">A</span> and load a different
749                 filesystem&nbsp;<span class="constant">B</span> in the same process address
750                 space. It complies with the preventions of the possible debugging
751                 complications as noted above. Despite this you still must call the function
752                 <span class="function">captive_shutdown()</span> to flush all the pending
753                 filesystem buffers to the disk. After calling
754                 <span class="function">captive_shutdown()</span> the process address space is
755                 no longer usable for any further project operations and the process is
756                 expected to be terminated in the manner compatible with its driving
757                 <a href="#todo_sandbox">CORBA sandbox interface</a> control master.</p>
758
759                 <p>Each sandbox executing the untrusted W32 binary filesystem driver code
760                 is connected through its
761                 <a href="#todo_sandbox">CORBA sandbox interface</a> at the point of upper
762                 layer <span class="constant">libcaptive</span>-specific filesystem API, at
763                 the point of the bottom layer of <span class="type">GIOChannel</span>
764                 device access and also for transfers of GLib logging
765                 messages/warnings/errors out of the sandbox to the user.</p>
766
767
768 <h1>Choice of the Emulation Methods</h1>
769
770         <p>The intent of the project was to get reliable read-write access to
771         <span class="productname">NTFS</span> partition. There are several possible
772         ways to achieve that:</p>
773
774         <h2>Virtualmachine Running the Original W32 Subsystem</h2>
775
776                 <p>Creating virtual-hardware PC and running the original W32 binaries
777                 including their boot-loader etc. Disk device access would be passed as
778                 virtual IDE disk (=hard disk drive). File access API would be implemented
779                 either by special escaping by some trapped instruction out of the
780                 virtualmachine while using W32 file access API or using the standard W32
781                 SMB (Server Message Block) network access through some virtual network
782                 card. The latter network access solution is almost the currently available
783                 possibility of running full-blown disk-sharing real
784                 <span class="productname">Microsoft Windows NT</span> inside virtual
785                 machine emulator such as <span class="productname">VMware</span>.</p>
786
787                 <p>pros: Full compatibility due to fully native codebase.</p>
788
789                 <p>cons: Hard to debug, missing documentation of NT booting internals,
790                 possible problems by different PC virtual-hardware than expected by NT,
791                 requirement of fully installed
792                 <span class="productname">Microsoft Windows NT</span> product.</p>
793
794         <a name="method_ntoskrnl"><h2>&quot;ntoskrnl.exe&quot; Inside Virtual Address Space</h2></a>
795
796                 <p>This solution was chosen by the project. Binary filesystem driver and
797                 also <span class="fname">ntoskrnl.exe</span> binary file are required.
798                 Unfortunately <span class="fname">ntoskrnl.exe</span> expects a&nbsp;native
799                 PC virtual-hardware missing during regular UNIX user space process
800                 emulation, therefore such instructions must be trapped and emulated/ignored
801                 from case to case.</p>
802
803                 <p>Also the <a name="init_ntoskrnl">initialization code of <span
804                 class="fname">ntoskrnl.exe</span></a> is not executed by this project since
805                 it expects to get full PC hardware access privileges and thus some
806                 datastructures do not get initialized by it (need to be trapped later at
807                 runtime stage). Some of the missing initializations are solved by
808                 <a href="#functype_wrap">API functions wrapping</a>.
809
810                 <p>pros: Lightweight, easier to debug.</p>
811
812                 <p>cons: Possible incompatible emulation of
813                 <span class="fname">ntoskrnl.exe</span> parts, missing documentation needed
814                 for the implementation.</p>
815
816         <h2>Filesystem Driver Inside Virtual Address Space</h2>
817
818                 <p>Unlike <a href="#method_ntoskrnl">previous method</a> here we do not use
819                 even <span class="fname">ntoskrnl.exe</span> as the complete kernel part of
820                 W32 is <a name="native_ntoskrnl">emulated from the project source
821                 files</a>. <span class="fname">cdfs.sys</span> driver was successfuly ran
822                 in this manner in the former versions of this project but the possibility
823                 to run without <span class="fname">ntoskrnl.exe</span> was dropped since it
824                 had no licensing gains (you need the original
825                 <span class="productname">Microsoft Windows NT</span> files at least for
826                 the filesystem driver itself) and the emulation of undocumented parts
827                 reusable from <span class="fname">ntoskrnl.exe</span> binary was
828                 a&nbsp;pain.</p>
829
830                 <p>pros: Lightweight, easier to debug.</p>
831
832                 <p>cons: Possible incompatible emulation of the whole
833                 <span class="fname">ntoskrnl.exe</span>, its missing documentation.</p>
834
835
836 <h1>Implementation Details</h1>
837
838         <a name="functype"><h2>API Function Implementation Choices</h2></a>
839
840                 <p>For each function exported by W32
841                 <span class="fname">ntoskrnl.exe</span> and imported and called by the
842                 filesystem driver a decision needs to be made to properly implement its
843                 functionality. Currently implemented functionality statistics are provided
844                 below:</p>
845
846                 <FIXME:numbers>
847                 <table border="1" align="center">
848                         <tr><th>Function type                                        </th><th>Items</th><th>Portion</th></tr>
849                         <tr><td><a href="#functype_pass">pass</a>                    </td><td>   46</td><td>    21%</td></tr>
850                         <tr><td><a href="#functype_wrap">wrap</a>                    </td><td>    1</td><td>     0%</td></tr>
851                         <tr><td><a href="#functype_native_reactos">native-ReactOS</a></td><td>   94</td><td>    43%</td></tr>
852                         <tr><td><a href="#functype_native_libcaptive">native-own</a> </td><td>   79</td><td>    36%</td></tr>
853                         <caption>Function Implementation Types Statistics</caption>
854                 </table>
855
856                 <p>As there are several choices to implement each function the usual
857                 attempts/investigations ordering is listed in the sections below.</p>
858
859                 <p>Special case must be taken for data-type symbols since they are
860                 referenced without the possibility of catching the code flow by some
861                 breakpoints (it would be possible only in some special access cases). Data
862                 export symbols of <span class="constant">unpatched</span> libraries must
863                 contain already prepared content at the runtime. There is a&nbsp;problem
864                 with <span class="constant">patched</span> libraries where it is necessary
865                 to also fully implement the data symbol as
866                 <a href="#functype_native">native implementation</a> since there is no
867                 possibility to <a href="#functype_pass">pass</a> the data symbol instead of
868                 the original W32 data location and therefore there will be two instances of
869                 such data variable place. As there will be also the uncaught references for
870                 such W32 data location from the <span class="constant">patched</span>
871                 library itself such symbols should be usually only some constants (such as
872                 <span class="constant">KeNumberProcessors</span>).</p>
873
874                 <p>W32 platform symbols export/import can be based either on the symbol
875                 name itself or it can be also exported and imported just by its
876                 identification number called <span class="constant">Ordinal</span>.
877                 Although it saves some jumptables file binary size it is currently no
878                 longer used by W32 binaries and this project also does not support such
879                 <span class="constant">Ordinal</span> symbol reference type at all.</p>
880
881                 <p>All the exporting magic is handled by custom script
882                 <span class="fname">captivesym</span> processing the definition file
883                 <FIXME:span class="fname">src/libcaptive/ke/exports.captivesym</span> to produce
884                 the intermediate relaying code
885                 <FIXME:span class="fname">src/libcaptive/ke/exports.c</span>. For details of the
886                 <span class="fname">captivesym</span>-specific source file syntax please
887                 see its documentation: <FIXME:span class="fname">doc/captivesym-pod.html</span>
888
889                 <a name="functype_pass"><h3>Direct Pass to Original &quot;ntoskrnl.exe&quot;</h3></a>
890
891                         <p>Simple (standalone) functions such as
892                         <span class="function">RtlTimeToSecondsSince1970()</span> can be simply
893                         passed to the original implementation in
894                         <span class="fname">ntoskrnl.exe</span> as they make no hardware access
895                         and they do not expect any special internal data structures to be set up
896                         in advance by an earlier library initialization. A common case are all
897                         the data structures utility functions such as
898                         <span class="constant">GenericTable</span> subsystem or
899                         <span class="constant">LargeMcb</span> handling.</p>
900
901                         <a name="functype_pass_fromunix"><h4>Pass from UNIX Code</h4></a>
902
903                                 <p>Control flow begins in some standard UNIX code. Such code is always
904                                 using <a href="#calltype_cdecl">cdecl call type</a> for all its
905                                 intracalls. <a href="#functype_native_reactos">Native functions
906                                 compiled from <span class="productname">ReactOS</span> sources</a> use
907                                 their own <a href="#calltype">cdecl/stdcall/fastcall</a> declarations
908                                 but these call type modifications are discarded during compilation for
909                                 this project by the <span class="constant">LIBCAPTIVE</span>
910                                 symbol.</p>
911
912                                 <p>UNIX code calls <span class="function">FUNCTIONNAME()</span> relay
913                                 from the generated UNIX jump table. Such relay will debug dump the
914                                 passed arguments and finally pass the control to the original W32
915                                 function code in the proper call type
916                                 <a href="#calltype">cdecl/stdcall/fastcall</a> for a&nbsp;given
917                                 function.</p>
918
919                                 <p>Original W32 code entry point is always trapped by a&nbsp;breakpoint
920                                 although it would not be needed during this specific direct pass from
921                                 UNIX code to the original W32 implementation. Still the breakpoint has
922                                 to be there to catch some other (such as intra-W32) possible calls
923                                 described later. There are several more ways to define breakpoint in
924                                 the code. One way is to use processor hardware breakpoint support but
925                                 the number of breakpoints is limited.  The other way is to patch in the
926                                 <span class="instruction">@{[ 'int $3' ]}</span> instruction but it will invoke
927                                 <span class="constant">SIGTRAP</span> signal handler conflicting with
928                                 the possible debugger (<span class="productname">gdb(1)</span>)
929                                 control. This project uses the <span class="instruction">hlt</span>
930                                 instruction, which also has a&nbsp;single-byte opcode as
931                                 <span class="instruction">@{[ 'int $3' ]}</span> and it is a&nbsp;privileged
932                                 instruction forbidden to be used from the UNIX user space code.
933                                 <span class="instruction">hlt</span> invokes
934                                 <span class="constant">SIGSEGV</span> signal which can be resolved by
935                                 a&nbsp;custom signal handler without any conflict with the possible
936                                 debugger control; <span class="productname">gdb(1)</span> needs the
937                                 following command to pass through such
938                                 <span class="constant">SIGSEGV</span> signal:</p>
939
940                                 <blockquote class="command">
941                                         <p>handle SIGSEGV nostop noprint pass</p>
942                                 </blockquote>
943
944                                 <p>When a breakpoint gets caught, we usually need to return to the
945                                 running code. Unfortunately it is not possible because of the patched
946                                 breakpoint opcode. The breakpoint cannot be simply removed upon return
947                                 as it would permanently loose control over the point of entry. Even if
948                                 the return would include faking of the return address in the bottom
949                                 stack frame to patch the breakpoint back during later function exit it
950                                 still would not solve the caughts of inner calls of recursive
951                                 functions. One of the working possibilities would be to patch the
952                                 original instruction back and perform a&nbsp;singlestep provided by
953                                 <span class="function">ptrace(2)</span> syscall. However such
954                                 singlestep needs another controlling UNIX process and it would again
955                                 conflict with the debuggers such as
956                                 <span class="productname">gdb(1)</span>. This project implements the
957                                 singlestep functionality by two consecutive breakpoints
958                                 (<span class="instruction">hlt</span> instructions to be specific):
959                                 The first two instruction addresses of the W32 functions are called
960                                 <span class="productname">slot #1</span> and
961                                 <span class="productname">slot #2</span>, the length of the first
962                                 function instruction has to be analyzed to get the right address of
963                                 <span class="productname">slot #2</span>. When the first breakpoint is
964                                 caught it is necessary to patch the original instruction back and also
965                                 patch another breakpoint in place of
966                                 <span class="productname">slot #2</span>.
967                                 During the <span class="productname">slot #2</span> breakpoint
968                                 invocation the operation will be reverted - the breakpoint will be put
969                                 to <span class="productname">slot #1</span> again and the instruction
970                                 of <span class="productname">slot #2</span> will be restored to be able
971                                 to continue the execution of the function.</p>
972
973                                 <p>W32 function will finish in its specific
974                                 <a href="#calltype">cdecl/stdcall/fastcall call type</a>, the control
975                                 will return to the UNIX jump table relay which will debug dump the
976                                 return value and it will finally pass the control back to the UNIX
977                                 caller in the standard UNIX
978                                 <a href="#calltype_cdecl">cdecl call type</a>.</p>
979
980                                 @{[ doc_img 'fig/functype_patched_pass_fromunix',
981                                                 'Function Type: <span class="constant">pass</span> from UNIX Code' ]}
982
983                         <a name="functype_pass_fromw32"><h4>Pass from W32 Code</h4></a>
984
985                                 <p>This function type is similiar to the
986                                 <a href="#functype_pass_fromunix">previous one</a> with the exception
987                                 of more complicated entry point. Unfortunately W32 libraries call their
988                                 own functions directly, using the <span class="instruction">call</span>
989                                 instructions without any patchable jump table. Even the
990                                 <span class="instruction">call</span> argument itself cannot be patched
991                                 according to the relocation table record as such library intra-call
992                                 instruction has no relocation due to its relative argument offset on
993                                 <span class="constant">i386</span>. This time the double-breakpoint
994                                 mechanism <a href="#functype_pass_fromunix">described above</a> gets
995                                 handy since it will catch the entry point when the function gets
996                                 called.  <span class="constant">SIGSEGV</span> handler gets invoked by
997                                 the <span class="instruction">hlt</span> instruction and it will
998                                 redirect the control to the jump table relay function to debug dump the
999                                 function entry arguments (it has no other uses in this call type).</p>
1000
1001                                 <p>When the relay needs to call the original function it will reach
1002                                 exactly the same breakpoint instruction as during the recent
1003                                 <span class="constant">SIGSEGV</span> handling redirecting to this
1004                                 calling relay.  But this time the
1005                                 <span class="constant">through_w32_func</span> field of this function
1006                                 record will be set to to prevent repeated redirection and to pass the
1007                                 control through the breakpoint mangle instead this time.</p>
1008
1009                                 <p>Returning is not much interesting as the first
1010                                 <span class="constant">SIGSEGV</span> handler did a&nbsp;straight jump
1011                                 for the redirection purposes without any needed consequent
1012                                 handling.</p>
1013
1014                                 <p>The jump table relay used for the callers from W32 code is
1015                                 a&nbsp;different one than the relay being used for the callers
1016                                 <a href="#functype_pass_fromunix">from UNIX code</a>. UNIX code always
1017                                 uses relay with external <a href="#calltype_cdecl">cdecl call type</a>
1018                                 but in this case a&nbsp;relay with the appropriate
1019                                 <a href="#calltype">cdecl/stdcall/fastcall call type</a> is used.</p>
1020
1021                                 @{[ doc_img 'fig/functype_patched_pass_fromw32',
1022                                                 'Function Type: <span class="constant">pass</span> from W32 Code' ]}
1023
1024                         @{[ vskip() ]}
1025
1026                         <table border="1" align="center">
1027                                 <tr><td><span class="fname">captivesym</span> keyword</td><td>pass</td></tr>
1028                                 <tr><td>Native code function name                    </td><td>(no implementation)</td></tr>
1029                                 <tr><td>W32 traced code from UNIX function name      </td><td>FUNCNAME</td></tr>
1030                                 <tr><td>W32 traced code from W32  function name      </td><td>FUNCNAME_cdecl/_stdcall/_fastcall</td></tr>
1031                                 <tr><td>Entry/exit debug tracing from UNIX code      </td><td>yes</td></tr>
1032                                 <tr><td>Entry/exit debug tracing from W32 code       </td><td>yes</td></tr>
1033                                 <caption>Function Type <span class="constant">pass</span> Characteristics</caption>
1034                         </table>
1035
1036                 <a name="functype_wrap"><h3>Wrap of the Original "ntoskrnl.exe" Function</h3></a>
1037
1038                         <a name="functype_wrap_fromunix"><h4>Wrapping of Call from UNIX Code</h4></a>
1039
1040                                 <p>The code control flow has no special hardcore features since it is
1041                                 very similiar to <a href="#functype_pass_fromunix">the direct pass to
1042                                 W32 function from UNIX code</a>. All the wrapping is done in the
1043                                 standard UNIX <a href="#calltype_cdecl">cdecl call type</a> manner.
1044                                 Jump table debug dumping relays are provided twice - the
1045                                 &quot;outer&quot; one to trace the parameters from the function caller
1046                                 and the &quot;inner&quot; one to trace the call from the wrapper to the
1047                                 original W32 code. The &quot;inner&quot; relay also calls the W32 code
1048                                 with the appropriate <a href="#calltype">cdecl/stdcall/fastcall call
1049                                 type</a>.</p>
1050
1051                                 @{[ doc_img 'fig/functype_patched_wrap_fromunix',
1052                                                 'Function Type: <span class="constant">wrap</span> from UNIX Code' ]}
1053
1054                         <a name="functype_wrap_fromw32"><h4>Wrapping of Call from W32 Code</h4></a>
1055
1056                                 <p>This scheme is a&nbsp;combination of the
1057                                 <a href="#functype_wrap_fromunix">previous wrap of a&nbsp;call from
1058                                 UNIX code</a> and the <a href="#functype_pass_fromw32">direct pass from
1059                                 the W32 code</a>. The control is caught and redirected by
1060                                 <span class="constant">SIGSEGV</span> handler from the breakpoint
1061                                 placed at the entry to the original W32 function code. The second entry
1062                                 to the original W32 function with the
1063                                 <span class="constant">through_w32_func</span> field of this function
1064                                 description already set is done from the &quot;inner&quot; jump table
1065                                 relay with the appropriate
1066                                 <a href="#calltype">cdecl/stdcall/fastcall call type</a>.</p>
1067
1068                                 @{[ doc_img 'fig/functype_patched_wrap_fromw32',
1069                                                 'Function Type: <span class="constant">wrap</span> from W32 Code' ]}
1070
1071                         @{[ vskip() ]}
1072
1073                         <p>Some functions can be <a href="#functype_pass">passed to the original
1074                         code</a> but they need their parameters to be checked/prepared.
1075                         Currently, such wrapping is only needed for the
1076                         <span class="function">ExAllocateFromPagedLookasideList()</span> function
1077                         where it is required due to <a href="#init_ntoskrnl">missing execution of
1078                         <span class="fname">ntoskrnl.exe</span> initialization execution</a>,
1079                         which would otherwise properly initialize some internal data structures.
1080                         In this case the wrapping code detects passing of an uninitialized
1081                         parameter and will search through the whole
1082                         <span class="fname">ntoskrnl.exe</span> code body at runtime to find the
1083                         proper initialization routine containing the correct initialization
1084                         parameters.  Passed addresses of static structures must be differentiated
1085                         as each of them usually has different initialization parameters. It is
1086                         proactive to not to have fixed parameters array as these parameters may
1087                         differ across different <span class="fname">ntoskrnl.exe</span>
1088                         versions.</p>
1089
1090                         <table border="1" align="center">
1091                                 <tr><td><span class="fname">captivesym</span> keyword</td><td>wrap</td></tr>
1092                                 <tr><td>Native UNIX wrapping code function name      </td><td>FUNCNAME_wrap</td></tr>
1093                                 <tr><td>W32 traced wraping code from UNIX func. name </td><td>FUNCNAME</td></tr>
1094                                 <tr><td>W32 traced wrapping code from W32 func. name </td><td>FUNCNAME_cdecl/_stdcall/...</td></tr>
1095                                 <tr><td>W32 traced original code function name       </td><td>FUNCNAME_orig</td></tr>
1096                                 <tr><td>Entry/exit debug tracing from UNIX code      </td><td>yes</td></tr>
1097                                 <tr><td>Entry/exit debug tracing from W32 code       </td><td>yes</td></tr>
1098                                 <caption>Function Type <span class="constant">wrap</span> Characteristics</caption>
1099                         </table>
1100
1101                 <a name="functype_native"><h3>Native Implementation</h3></a>
1102
1103                         <h4>Native Implementation Called from UNIX Code</h4>
1104
1105                                 <p>This is the simplest case of a&nbsp;function call as it is fully
1106                                 handled only by the compiler and/or linker.</p>
1107
1108                                 <p>In this case though, no debug dumping call relay is provided - such
1109                                 relay would need to rename the implementations of native functions to
1110                                 prevent its automatic linking with the caller code. This renaming would
1111                                 not be possible to do by simple <span class="constant">#define</span>
1112                                 since it would also rename any calling statements of such function in
1113                                 the same C&nbsp;sources.  One of the possibilities to solve would be to
1114                                 utilize <span class="dashdash">--redefine-sym</span> feature of the
1115                                 <span class="productname">objcopy(1)</span> utility. On the other hand
1116                                 there is not much need to catch/debug such calls as both the caller and
1117                                 the callee are provided with full source file debug information for the
1118                                 debugger. Also the callee usually debug dumps its entry/exit parameters
1119                                 by custom debug dumps in the
1120                                 <a href="#functype_native_reactos"><span class="productname">ReactOS</span> implementations</a>.
1121
1122                                 @{[ doc_img 'fig/functype_native_fromunix',
1123                                                 'Function Type: <span class="constant">native</span> from UNIX Code' ]}
1124
1125                         <a name="functype_native_fromw32"><h4>Native Implementation of
1126                                         &quot;unpatched&quot; Library Function Called from W32 Code</h4></a>
1127
1128                                 @{[ doc_img 'fig/functype_unpatched_native_fromw32',
1129                                                 'Function Type: <span class="constant">native</span> of <span class="constant">unpatched</span> from W32 Code' ]}
1130
1131                                 <p>Here comes the differentiation if the project deals either with
1132                                 a&nbsp;<span class="constant">patched</span> or an
1133                                 <span class="constant">unpatched</span> version of the library
1134                                 (<span class="constant">patched</span> is a&nbsp;loaded W32 binary
1135                                 library while <span class="constant">unpatched</span> library is
1136                                 completely provided by this project with no use of the library's
1137                                 original W32 binary file). As the project adjusts the exported symbol
1138                                 address during the patching operation, in some cases the
1139                                 <span class="constant">patched</span> library call may be handled
1140                                 simply as <span class="constant">unpatched</span> library call even for
1141                                 the <span class="constant">patched</span> libraries. Fortunately the
1142                                 distinction is not much important as the project is prepared to
1143                                 properly handle both cases.</p>
1144
1145                                 <p>The W32 caller which imported the symbol will be pointed right to
1146                                 the relaying function. The debug dumping relay will be called from W32
1147                                 code with the appropriate
1148                                 <a href="#calltype">cdecl/stdcall/fastcall call type</a> while the
1149                                 relay will call the implementation of the native function in the
1150                                 standard UNIX <a href="#calltype_cdecl">cdecl call type</a> manner.</p>
1151
1152                         <h4>Native Implementation of &quot;patched&quot; Library Function Called from W32 Code</h4>
1153
1154                                 @{[ doc_img 'fig/functype_patched_native_fromw32',
1155                                                 'Function Type: <span class="constant">native</span> of <span class="constant">patched</span> from W32 Code' ]}
1156
1157                                 <p>The calling scheme is similiar to the
1158                                 <a href="#functype_native_fromw32">previous call of
1159                                 <span class="constant">unpatched</span> library function from W32
1160                                 code</a> but the call control is redirected from the entry point of the
1161                                 original W32 binary implementation by the breakpoint and its
1162                                 <span class="constant">SIGSEGV</span> handler as in
1163                                 <a href="#functype_pass_fromw32">the case of passing control from W32
1164                                 call</a>.</p>
1165
1166                                 <p>The original W32 function implementation located in the original
1167                                 loaded binary file is never executed but its entry point needs to be
1168                                 trapped by the breakpoint to be able to catch the function calls within
1169                                 the library.</p>
1170
1171                         @{[ vskip() ]}
1172
1173                         <p>In all cases the final function implementation is a&nbsp;standard UNIX
1174                         code compiled from C&nbsp;sources with full debug information available
1175                         for the debugger. Fortunately all such functions do not need to be coded
1176                         from scratch for this project since there already exist $freespeech
1177                         $ReactOS and $Wine projects and their code can be used instead.</p>
1178
1179                         <p>$Wine project is listed mostly for a&nbsp;completeness as almost no
1180                         code was suitable for reuse as it implements W32 user space while this
1181                         project is running pure W32 kernel space environment (in $gnulinux user
1182                         space!).</p>
1183
1184                         <a name="functype_native_reactos"><h4>Native Implementation
1185                                         - <span class="productname">ReactOS</span></h4></a>
1186
1187                                 <p>Some functions are already implemented in the $ReactOS
1188                                 project and they can be used as they are.  Although it would be
1189                                 possible to <a href="#functype_pass">pass some function calls to the
1190                                 original code</a> it is more handy to provide native implementation as
1191                                 there is better control of the data handling during debugging sessions
1192                                 due to the provided debugging symbols.</p>
1193
1194                                 <p>Such functions can be found in
1195                                 <span class="fname">src/libcaptive/reactos/</span> subdirectory.
1196                                 Some functions had to be adjusted for this project
1197                                 - these modifications are compiled conditionally, depending on the
1198                                 <span class="constant">LIBCAPTIVE</span> symbol existence.</p>
1199
1200                                 <p>Later stages of this project reached the level where
1201                                 $ReactOS is yet too immature and the needed functions are usually
1202                                 written just with the sad body:</p>
1203
1204                                 <blockquote class="command">
1205                                         <p>UNIMPLEMENTED;</p>
1206                                 </blockquote>
1207
1208                                 <p>Functions that were not possible to
1209                                 <a href="#functype_pass">pass</a> were reimplemented by this project
1210                                 and placed in the project's implementation directories
1211                                 <a href="#reactos_nocare">instead of extending</a> $ReactOS code.</p>
1212
1213                         <a name="functype_native_wine"><h4>Native Implementation -- <span class="productname">Wine</span></h4></a>
1214
1215                                 <p>Even though $Wine only implements the
1216                                 <span class="productname">Microsoft Windows NT</span> user space, there
1217                                 still are some common functions which could be copied from the $Wine
1218                                 project.</p>
1219
1220                         <a name="functype_native_libcaptive"><h4>Native Implementation - Project Specific</h4></a>
1221
1222                                 <p>As the last resort it was necessary to provide completely own
1223                                 implementation of some API functions such as PC hardware dependent
1224                                 parts or memory management functions.</p>
1225
1226                         @{[ vskip() ]}
1227
1228                         <table border="1" align="center">
1229                                 <tr><td><span class="fname">captivesym</span> keyword</td><td>(none; just the symbol name)</td></tr>
1230                                 <tr><td>Native code function name                    </td><td>FUNCTIONNAME</td></tr>
1231                                 <tr><td>Native traced code from W32 code func. name  </td><td>FUNCTIONNAME_cdecl/_std...</td></tr>
1232                                 <tr><td>Entry/exit debug tracing from UNIX code      </td><td>no</td></tr>
1233                                 <tr><td>Entry/exit debug tracing from W32 code       </td><td>yes</td></tr>
1234                                 <caption>Function Type <span class="constant">native</span> Characteristics</caption>
1235                         </table>
1236
1237                 <a name="functype_undef"><h3>Undefined Function</h3></a>
1238
1239                         <p>Functions not defined by any of the previous function types cannot be
1240                         called by any W32 code including the code of the library implementing
1241                         such function. All functions of <span class="constant">patch</span>ed
1242                         libraries not listed in the <span class="fname">captivesym</span> exports
1243                         file are automatically set to be trapped as fatal program execution
1244                         errors.</p>
1245
1246                         <p>It is not necessary to list the symbols as
1247                         <span class="constant">undef</span> as long as you are just loading the
1248                         W32 <span class="constant">PE-32</span> code and the symbols belong to
1249                         <span class="constant">patch</span>ed library. On the other hand if you
1250                         are loading W32 <span class="fname">.so</span> code or if such symbol is
1251                         a&nbsp;part of <span class="constant">unpatched</span> library (and thus
1252                         being completely provided by the project) you need to list such symbol as
1253                         <span class="constant">undef</span> type to prevent unresolved symbol
1254                         reference.</p>
1255
1256                         <table border="1" align="center">
1257                                 <tr><td><span class="fname">captivesym</span> keyword</td><td>undef</td></tr>
1258                                 <tr><td>Native code function name                    </td><td>(no implementation)</td></tr>
1259                                 <tr><td>Native traced code function name             </td><td>FUNCTIONNAME_cdecl/_stdcall/_fastcall</td></tr>
1260                                 <tr><td>Debug tracing message from UNIX code         </td><td>yes</td></tr>
1261                                 <tr><td>Debug tracing message from W32 code          </td><td>yes</td></tr>
1262                                 <caption>Function Type <span class="constant">undef</span> Characteristics</caption>
1263                         </table>
1264
1265         
1266         <a name="calltype"><h2>API Function Calling Conventions</h2></a>
1267
1268                 <p>Standard UNIX code compiled by GCC (GNU C&nbsp;Compiler) running on host
1269                 $gnulinux always uses <a href="#calltype_cdecl">cdecl</a> ABI (Application
1270                 Binary Interface) calling convention. This calling convention is also the
1271                 default declaration type of UNIX functions.</p>
1272
1273                 <p>W32 uses three different calling conventions in its ABI. They are all
1274                 described in the
1275                 <a href="http://msdn.microsoft.com/library/default.asp?url=/library/en-us/vclang/html/_core_argument_passing_and_naming_conventions.asp"><span class="productname">Microsoft</span> documentation</a>.
1276                 There is always necessary to have the proper function declaration
1277                 (prototype) in the caller scope to prevent all sorts of unexpected
1278                 crashes.</p>
1279
1280                 <p>Unfortunately some non-matching combinations of calling conventions
1281                 result in hard to debug bugs: the caller gets back an unexpected stack
1282                 pointer from the callee and upon return it will restore registers from the
1283                 wrong stack pointer place. Since the caller will finally reclaim its stack
1284                 frame from its (uncorrupted) <span class="constant">EBP</span> stack frame
1285                 pointer the caller will return to the caller of the caller correctly. Just
1286                 the registers remain corrupted causing crashes of completely unrelated code
1287                 executed far, far away...</p>
1288
1289                 <p><span class="constant">EDI</span>, <span class="constant">ESI</span> and
1290                 <span class="constant">EBX</span> registers are always saved on the stack.
1291                 They are stored on the stack in this particular order from bottom to top
1292                 addresses (using the <span class="instruction">push EBX</span>,
1293                 <span class="instruction">push ESI</span>,
1294                 <span class="instruction">push EDI</span> sequence). Fortunately $gnulinux
1295                 GCC has the same register saving behaviour. If some register corruption
1296                 occurs the calling type presented between the caller and callee should be
1297                 checked.</p>
1298
1299                 <a name="calltype_cdecl"><h3>W32 Calling Convention &quot;cdecl&quot;</h3></a>
1300
1301                         <p>The only calling convention in the UNIX world. The default one for all
1302                         the compilers. All the arguments are passed on the stack, no arguments
1303                         are cleaned by the callee. Possible inconsistencies in the number of
1304                         function arguments with the function prototype used by the caller is
1305                         harmless. Variable arguments lists can be passed by this convention.</p>
1306
1307                         @{[ doc_img 'fig/calltype_cdecl',
1308                                         'W32 Calling Convention <span class="constant">cdecl</span> Scheme' ]}
1309
1310                         <table border="1" align="center">
1311                                 <tr><td>Arguments freed by         </td><td>caller</td></tr>
1312                                 <tr><td>Arguments on the stack     </td><td>#0 ... #(n-1)</td></tr>
1313                                 <tr><td>Arguments in the registers </td><td>none</td></tr>
1314                                 <tr><td>GCC attribute              </td><td><span class="command">__attribute__((__cdecl__))</span> (default)</td></tr>
1315                                 <caption>Calling Convention <span class="constant">cdecl</span> Characteristics</caption>
1316                         </table>
1317
1318                 <h3>W32 Calling Convention &quot;stdcall&quot;</h3>
1319
1320                         @{[ doc_img 'fig/calltype_stdcall',
1321                                         'W32 Calling Convention <span class="constant">stdcall</span> Scheme' ]}
1322
1323                         <p>Convention never used in the UNIX world. It needs to be specified for
1324                         W32 compilers. All the arguments are passed on the stack, all the
1325                         arguments are cleaned by the callee. Possible inconsistencies in the
1326                         number of function arguments with the function prototype used by the
1327                         caller will result in fatal crash. Variable arguments lists cannot be
1328                         passed by this convention - use <a href="#calltype_cdecl">cdecl</a>
1329                         instead.</p>
1330
1331                         <table border="1" align="center">
1332                                 <tr><td>Arguments freed by         </td><td>callee</td></tr>
1333                                 <tr><td>Arguments on the stack     </td><td>#0 ... #(n-1)</td></tr>
1334                                 <tr><td>Arguments in the registers </td><td>none</td></tr>
1335                                 <tr><td>GCC attribute              </td><td><span class="command">__attribute__((__stdcall__))</span></td></tr>
1336                                 <caption>Calling Convention <span class="constant">stdcall</span> Characteristics</caption>
1337                         </table>
1338
1339                 <h3>W32 Calling Convention &quot;fastcall&quot;</h3>
1340
1341                         <p>Convention never used in the UNIX world. It needs to be specified for
1342                         W32 compilers. Convention used in the W32 world for its low calling
1343                         overhead. All but the first two arguments are passed on the stack, such
1344                         arguments are cleaned by the callee. First two arguments are passed in
1345                         the registers <span class="constant">ECX</span> and
1346                         <span class="constant">EDX</span> respectively. Possible inconsistencies
1347                         in the number of function arguments with the function prototype used by
1348                         the caller will result in fatal crash. Variable arguments lists cannot be
1349                         passed by this convention - use <a href="#calltype_cdecl">cdecl</a>
1350                         instead.</p>
1351
1352                         <p>GCC (GNU C&nbsp;Compiler) native support for this calling convention
1353                         is pretty fresh and it is currently present only in the recent CVS
1354                         versions since 21st December of 2002 which should get released as GCC
1355                         version 3.4. This project solved the unsupported calling convention by
1356                         declaration of arguments passed in registers by
1357                         <span class="command">__attribute__((__regparm__(3)))</span>.
1358                         W32 passes the arguments in registers in the order
1359                         <span class="constant">ECX</span>, <span class="constant">EDX</span> but
1360                         GCC passes them in registers <span class="constant">EAX</span>,
1361                         <span class="constant">EDX</span>, <span class="constant">ECX</span>.
1362                         This incompatibility is compensated at C&nbsp;source level in the
1363                         <a href="#functype">relaying code</a> generated by
1364                         <span class="fname">captivesym</span> relay generator.</p>
1365
1366                         @{[ doc_img 'fig/calltype_fastcall',
1367                                         'W32 Calling Convention <span class="constant">fastcall</span> Scheme' ]}
1368
1369                         <table border="1" align="center">
1370                                 <tr><td>Arguments freed by         </td><td>callee</td></tr>
1371                                 <tr><td>Arguments on the stack     </td><td>#2 ... #(n-1)</td></tr>
1372                                 <tr><td>Arguments in the registers </td><td><span class="constant">ECX</span>=#0,
1373                                                                             <span class="constant">EDX</span>=#1</td></tr>
1374                                 <tr><td>GCC &ge;3.4 attribute      </td><td><span class="command">__attribute__((__fastcall__))</span></td></tr>
1375                                 <tr><td>GCC &lt;3.4 attr. emulation</td><td><span class="command">__attribute__((__stdcall__))</span></td></tr>
1376                                 <tr><td>                           </td><td><span class="command">__attribute__((__regparm__(3) /* EAX,EDX,ECX */))</span></td></tr>
1377                                 <caption>Calling Convention <span class="constant">fastcall</span> Characteristics</caption>
1378                         </table>
1379
1380         <a name="synchronous"><h2>Multithreading and Multiple Processors</h2></a>
1381
1382                 <p>W32 platform stands on its&nbsp;thorough architecture parallelism. It
1383                 must lock all its objects to maintain coherence in presence of
1384                 multithreading and multiple processors. Since the author of this project
1385                 considers any parallel execution a serious obstacle for debugging the whole
1386                 project architecture was designed to prevent any undeterministic behaviour.
1387                 Therefore this projects always emulates uniprocessor
1388                 <span class="productname">Microsoft Windows NT</span> kernel
1389                 (<span class="constant">KeNumberProcessors</span> symbol is always 1),
1390                 everything runs in the single initial thread/process and all the filesystem
1391                 operations are performed as synchronous
1392                         (&quot;synchronous&quot; by flags
1393                         <span class="constant">FILE_SYNCHRONOUS_IO_ALERT</span>,
1394                         <span class="constant">FO_SYNCHRONOUS_IO</span>,
1395                         <span class="constant">IRP_SYNCHRONOUS_API</span>,
1396                         <span class="constant">IRP_SYNCHRONOUS_PAGING_IO</span>,
1397                         forced <span class="constant">TRUE</span> result of
1398                         <span class="function">IoIsOperationSynchronous()</span>
1399                         etc.).
1400                 <span class="constant">STATUS_PENDING</span> result code indicating that
1401                 request should be completed in the next callback of the driver is
1402                 considered <a href="#paranoia">fatal</a> as it should not happen for the
1403                 requested synchronous <span class="constant">IRP</span>s (I/O Request
1404                 Packets). Since there is a&nbsp;possibility some filesystem would require
1405                 a&nbsp;real W32 parallel thread all the code that would be hit by W32
1406                 multithreading capability is marked by
1407                 <span class="constant">TODO:thread</span> comment for a&nbsp;possible
1408                 future extension.</p>
1409
1410                 <p>Multiple processors (SMP) support will never need to be implemented
1411                 since uniprocessor W32 kernels apparently run the filesystem driver modules
1412                 fine. As this project implements only the uniprocessor W32 kernel all the
1413                 processor locking functions and structures such as
1414                 <span class="constant">KSPIN_LOCK</span> etc. can be safely implemented as
1415                 no-operations.</p>
1416
1417                 <p>Asynchronous callbacks registered for
1418                 <span class="constant">IO_WORKITEM</span>s are passed as GLib idle
1419                 functions by <span class="function">g_idle_add_full()</span>. Although they
1420                 will probably never be executed during non-interactive project's batch
1421                 executions it is the&nbsp;responsibility of W32 driver implementation to
1422                 complete all the pending tasks before its W32 shutdown. Such W32 shutdown
1423                 is done during cleanup of the project's&nbsp;execution by
1424                 <span class="function">captive_shutdown()</span>.</p>
1425
1426         <a name="paranoia"><h2>Paranoia Checks</h2></a>
1427
1428                 <p>A&nbsp;general approach of software projects development is to implement
1429                 many internal sanity checks during the development stage but to produce the
1430                 most optimized final release product without those debugging checks.</p>
1431
1432                 <p>Facilities for these practices can be seen in the standard
1433                 C&nbsp;include files for example as function
1434                 <span class="function">assert()</span> which gets disabled by the
1435                 <span class="constant">NDEBUG</span> symbol used during the final optimized
1436                 executable compilation. This project uses Gnome GLib messaging subsystem
1437                 offering sanity checks discarded by symbols
1438                 <span class="constant">G_DISABLE_ASSERT</span> and
1439                 <span class="constant">G_DISABLE_CHECKS</span>.
1440                 <span class="productname">Microsoft</span> also produces two versions of
1441                 its products - regular customers use the &quot;free build&quot; (also
1442                 called &quot;retail&quot;) while the programmers should develop their code
1443                 on the &quot;checked build&quot; product releases.</p>
1444
1445                 <p>As this project will always run unknown binary code of proprietary W32
1446                 filesystem drivers, the code can never be trusted. Such code even runs in
1447                 the same unprotected address space as its controlling UNIX code. Since
1448                 there is not enough documentation for the W32 components of the system and
1449                 also such documentation is usually misleading it can never be considered as
1450                 100% emulation. Even in the final releases all the sanity checks
1451                 implemented in this project should remain active as all the project's code
1452                 always interacts with unknown and untrusted W32 binaries.</p>
1453
1454                 <p><span class="productname">Microsoft Windows NT</span> code is written in
1455                 a&nbsp;foolproof style as it accepts even invalid input values, and which
1456                 it usually corrects. This makes long-term debugging a&nbsp;pain as it hides
1457                 sources of problems. &quot;Checked build&quot; releases were probably
1458                 designed to fix this flaw by strict consistency checks but it did not reach
1459                 its goals as such checks are usually missing in the code.</p>
1460
1461                 <p>This project has strict consistency checks across all the code to make
1462                 the debugging phase easy enough. Failed sanity check is not always
1463                 a&nbsp;bug - sometimes it just means the real W32 binary code is more
1464                 benevolent than it could be expected according to the documentation and
1465                 such sanity check gets removed for the next version build. In other cases
1466                 the failed sanity checks mean the execution path for some unexpected
1467                 arguments combination was not yet implemented by this project. I may also
1468                 mean a bug, of course...</p>
1469
1470                 <p>Last but not least - never miss a&nbsp;possible sanity check as its
1471                 later removal is in an order of magnitude cheaper than an&nbsp;uncaught
1472                 invalid assumption. Failed assertion is not always a&nbsp;bug although it
1473                 has to be fixed, of course.</p>
1474
1475         <a name="client_interface"><h2>Client Filesystem Interface</h2></a>
1476
1477                 <p>While this project successfuly communicates with the W32 filesystem
1478                 driver (considered as the lower layer) it must also somehow offer its open
1479                 filesystem interface service to some real client software (upper layer).
1480                 This project offers its own custom filesystem operations interface of <span
1481                 class="constant">libcaptive</span> library based on GLib
1482                 <span class="constant">GObject</span> OO system. Interface prototypes are
1483                 specified in the project's&nbsp;<span class="fname">client-*.h</span>
1484                 include files.</p>
1485
1486                 <p>The filesystem service can be offered in several ways:</p>
1487
1488                 <ul>
1489                         <li>
1490                                 <p>One possibility would be to write
1491                                 <a name="client_interface_customapp">a custom client application</a>
1492                                 for this project such as file manager or a&nbsp;shell. Although it
1493                                 would implement the most appropriate user interface to the set of
1494                                 functions offered by this project (and W32 filesystem API) it has the
1495                                 disadvantage of special client software. Appropriate client is provided
1496                                 by this project as:
1497                                 <span class="fname">src/client/cmdline/cmdline-captive</span></p>
1498                         </li>
1499                         <li>
1500                                 <p>The real UNIX OS filesystem implementation must be completely
1501                                 implemented inside the hosting OS kernel. This requires special coding
1502                                 methods with limited availability of coding features and libraries.
1503                                 Also it would give the full system control to the untrusted W32
1504                                 filesystem driver code with possibly fatal consequences of yet
1505                                 unhandled W32 emulation code paths. It would benefit from the best
1506                                 execution performance but this solution was never considered a real
1507                                 possibility.</p>
1508                         </li>
1509                         <li>
1510                                 <p>The common approach
1511                                 <a name="offered_NFS">of filesystem implementations</a>
1512                                 outside UNIX OS kernel were custom NFS servers usually running on the
1513                                 same machine as the NFS-connected client as such NFS server is usually
1514                                 an ordinary UNIX user space process. It would be possible to implement
1515                                 this project as a&nbsp;custom NFS server but the NFS protocol itself
1516                                 has a&nbsp;lot of fundamental flaws and complicated code for backward
1517                                 compatibility.</p>
1518                         </li>
1519                         <li>
1520                                 <p>Currently there is already implemented
1521                                 <a name="offered_gnomevfs"><a href="#offered_gnomevfs_todo">Gnome-VFS interface</a></a>
1522                                 to the custom filesystem interface of this project's&nbsp;library <span
1523                                 class="constant">libcaptive</span>.
1524                                 The $GnomeVFSmodule can be used by a&nbsp;Gnome-VFS aware client (such
1525                                 as <span class="fname">gnome-vfs/tests/test-shell</span>).</p>
1526
1527                                 <FIXME:lufs-gvfs>
1528                                 <p>The <span class="productname">Gnome-VFS-module</span> can be further
1529                                 utilized by the <span class="productname">UserVFS</span>
1530                                 \bookcitation{UserVFS-2.0} software ported to provide local <span
1531                                 class="productname">Coda</span> \bookcitation{Coda} network filesystem
1532                                 server implementation similar to the <a href="#offered_NFS">NFS
1533                                 server</a> solution but with much more acceptable network protocol ---
1534                                 more about this actual scheme can be found in \link{architecture}{the
1535                                 project architecture description}.</p>
1536                         </li>
1537                         <li>
1538                                 <FIXME:LUFS>
1539                                 <p>Direct interface for the Host-OS kernel would be provided
1540                                 by the
1541                                 \label{fuse_interface}
1542                                 <span class="productname">FUSE</span> \bookcitation{FUSE} project \link{offered_FUSE}{described
1543                                 later in this document}. This interface is currently not yet implemented.
1544                                 Although it would be much more straightforward than
1545                                 <a href="#offered_gnomevfs">Gnome-VFS interface</a> described above,
1546                                 its biggest disadvantage would be the requirement to replace/update
1547                                 the stock distributions kernel package as it usually does not
1548                                 have the <span class="productname">FUSE</span> \bookcitation{FUSE} filesystem support while it already supports
1549                                 the <span class="productname">Coda</span> \bookcitation{Coda} interface, which is sufficient for the
1550                                 ported <span class="productname">UserVFS</span> \bookcitation{UserVFS-2.0} interface.</p>
1551                         </li>
1552                 </ul>
1553
1554         <h2>3rd Party Projects Bugfixes</h2>
1555
1556                 <p>Implementation of this project required certain bugfixes to 3rd party
1557                 software packages:</p>
1558
1559                 <h3>GNU Libtool, A&nbsp;Generic Library Support Script</h3>
1560                 
1561                         <p><span class="productname"><a href="http://www.gnu.org/software/libtool/">libtool</a></span>:
1562                         Handle duplicate object file names when performing piecewise archive
1563                         linking by renaming object files when needed.</p>
1564
1565                 <h3>dosfstools, MS-DOS FAT Filesystems Support on Linux</h3>
1566
1567                         <p><span class="productname"><a href="ftp://ftp.uni-erlangen.de/pub/Linux/LOCAL/dosfstools/">dosfstools</a></span>:
1568                         Prevent generation of <span class="constant">FAT-32</span> filesystems
1569                         not supported by the (buggy?) W32 platform
1570                         <span class="fname">fastfat.sys</span> implementation.</p>
1571
1572                 <h3>ext2fsd, Ext2 File System Driver</h3>
1573
1574                         <p><span class="productname"><a href="http://sys.xiloo.com/projects/projects.htm#ext2fsd">Ext2fsd</a></span>:
1575                         Many filesystem corruption fixes, missing filesystem unregistration
1576                         etc.</p>
1577
1578
1579 <h1>Futher Development</h1>
1580
1581         <p>All the W32 filesystem operations of <span class="fname">cdfs.sys</span>,
1582         <span class="fname">fastfat.sys</span>
1583         and
1584         <span class="fname">ext2fsd.sys</span> can be successfuly executed.
1585         The further development tasks include:</p>
1586
1587         <ul>
1588                 <li>
1589                         <p>The primary goal is to reach <span class="productname">NTFS</span>
1590                         filesystem (<span class="fname">ntfs.sys</span>) compatibility.
1591                         A&nbsp;lot of imported symbols is missing although it is expected most of
1592                         them can be just safely passed for execution in the original
1593                         <span class="fname">ntoskrnl.exe</span>.</p>
1594                 </li>
1595                 <li>
1596                         <p>There may still be valid code paths where some emulated W32 kernel
1597                         functionality and symbols remain unimplemented as these code paths were
1598                         just not hit during testing. The proper way would be to check all the
1599                         possibilities of such code paths execution from the filesystem driver
1600                         code disassembly.</p>
1601                 </li>
1602                 <li>
1603                         <p>No unusual error codes are expected from the filesystem drivers and
1604                         any such return codes will abort the project's execution. For example
1605                         code <span class="constant">STATUS_NO_SUCH_FILE</span> is expected and
1606                         correctly recognized but
1607                         <span class="constant">STATUS_FILE_CORRUPT_ERROR</span> will stop driver
1608                         execution.</p>
1609
1610                         <p><a name="exception_fatal">No exceptions in W32 code are allowed</a>
1611                         - any thrown exception will result in driver execution abortion (instead
1612                         of just returning some error code as in the original W32 environment).</p>
1613
1614                         <p>These issues should cease to be a&nbsp;problem after deployment of
1615                         sandbox wrapper which will restart the filesystem driver after any
1616                         unexpected error.</p>
1617                 </li>
1618                 <li>
1619                         <p><a name="todo_sandbox">Completion and activation of the sandbox
1620                         wrapper.</a> <span class="fname">src/libcaptive/sandbox/</span> sources
1621                         currently implement the base of both the client and the server sides of
1622                         CORBA interface to separate the client calling filesystem operations from
1623                         the W32 filesystem driver itself. Although CORBA usually makes sense for
1624                         crossmachine network interconnections here it gets a&nbsp;role of
1625                         inter-process interface between the regular client process and the
1626                         <span class="constant">chroot</span>ed/unprivileged/<span class="constant">ulimit</span>ed
1627                         environment of the W32 emulation address space.</p>
1628
1629                         <p>Any W32 binary file must be always considered untrusted and therefore
1630                         it is needed to be sandboxed and accessible only via the CORBA interface.
1631                         Furthermore it is needed for clean implementation of $GnomeVFSmodule as
1632                         this project always handles <a href="#mounted_one">exactly one mounted
1633                         filesystem</a> but $GnomeVFSmodule interface expects unlimited number of
1634                         mounts in the scope of one process.</p>
1635                 </li>
1636                 <li>
1637                         <p>Project offers
1638                         <a name="offered_gnomevfs_todo">the filesystem access as its custom UNIX API</a>
1639                         (<span class="fname">captive/client-*.h</span>). This API is currently
1640                         offered in the scope of $GnomeVFSmodule interface as a filter applied to
1641                         the filesystem device (or filesystem image file).
1642                         As $GnomeVFS has no officially supported method of generic $gnulinux
1643                         kernel filesystem access it may be better to provide
1644                         <FIXME:LUFS><a name="offered_FUSE">an interface</a> for <span
1645                         class="productname">FUSE</span> \bookcitation{FUSE} instead.</p>
1646
1647                         <p>To get transparent access to W32 filesystems from legacy
1648                         (=non <span class="productname">Gnome-VFS-2.0</span> aware) applications it is possible to use a draft
1649                         port \bookcitation{UserVFS-2.0} of the original <span class="productname">UserVFS</span>
1650                         \bookcitation{UserVFS} to <span class="productname">Gnome-VFS-2.0</span> interface.
1651                         It is also possible to use the test utilities of <span class="productname">Gnome-VFS-2.0</span> \bookcitation{GnomeVFS} package.</p>
1652                 </li>
1653                 <li>
1654                         <p>Implementation of interface to this project by
1655                         <span class="productname"><a href="http://surprise.sourceforge.net/">Partition Surprise</a></span>
1656                         partition manager. Although there currently exists
1657                         <span class="productname"><a href="http://mlf.linux.rulez.org/mlf/ezaz/ntfsresize.html">ntfsresize</a></span>
1658                         it is a data structures reverse engineered solution which may have
1659                         problems on various hard drives. <span class="productname">Partition
1660                         Surprise</span> project would be able to resize the disk safely by using
1661                         just the original W32 filesystem driver file although with some
1662                         performance hit.</p>
1663                 </li>
1664         </ul>
1665
1666
1667 <h1>Related Projects</h1>
1668
1669         <p>The usual solution for file exchange between $freespeech operating systems
1670         and <span class="productname">Microsoft Windows NT</span> is to use
1671         <span class="productname">FAT32</span> (<span class="productname">vfat</span>
1672         called in $gnulinux) partition and swap the files over it. This method is not
1673         very comfortable as you never have access to all the files of the other
1674         operating system.</p>
1675
1676         <a name="LinuxNTFScompet"><h2>$LinuxNTFS</h2></a>
1677
1678                 <p>Although this project takes a&nbsp;completely different approach and has
1679                 a&nbsp;different architecture, the final goal is the same as for this
1680                 project - reliable read-write <span class="productname">NTFS</span>
1681                 filesystem support. $LinuxNTFS goes the way of reverse engineering
1682                 filesystem data structures (and possibly
1683                 <span class="fname">ntfs.sys</span> itself). Unfortunately after many years
1684                 of its development it did not yet reach the state of reliable read-write
1685                 access although its read-only part is considered trustworthy.</p>
1686
1687                 <p>Using $LinuxNTFS for read-only access to existing partition with
1688                 <span class="productname">Microsoft Windows NT</span> installation is
1689                 planned to be able to acquire existing <span class="fname">ntfs.sys</span>,
1690                 <span class="fname">ntoskrnl.exe</span> and possibly
1691                 <span class="fname">ksecdd.sys</span> (imported by
1692                 <span class="fname">ntfs.sys</span>) files from the user's
1693                 <span class="productname">NTFS</span> partition.</p>
1694
1695         <h2><span class="productname"><a href="http://www.cgsecurity.org/ntfs.html">NTPwd NTFS Driver</a></span></h2>
1696
1697                 <p>DOS based <a href="http://www.gnu.org/licenses/gpl.html">GPL-2.0</a>
1698                 read-write NTFS driver. Filesystem structures are reverse engineered in the
1699                 way of <a href="#LinuxNTFScompet">Linux-NTFS Project</a>. As it is not very
1700                 actively maintained it reaches a&nbsp;lower level of
1701                 <span class="productname">NTFS</span> compatibility.</p>
1702
1703         <h2>Virtual Machine with <span class="productname">Microsoft Windows NT</span></h2>
1704
1705                 <p>Original <span class="productname">Microsoft Windows NT</span>
1706                 operating system can be run inside a virtual machine running under
1707                 $gnulinux (or vice versa) and share the read-write disk partitions by using
1708                 a network file sharing through a&nbsp;virtual network card.</p>
1709
1710                 <p>Although there will be full filesystem structures compatibility the
1711                 <span class="productname">NTFS</span> partition cannot be accessed with no
1712                 system installed (or with non-bootable crashed system to repair it this
1713                 way)
1714                         (Although this project requires the original
1715                         <span class="fname">ntfs.sys</span> it can obtained from the legal
1716                         <span class="productname">Microsoft Windows NT</span> CD.),
1717                 it will have substantial system resources requirement and you also need
1718                 a virtual machine software product such as commercial
1719                 <span class="productname"><a href="http://www.vmware.com/download/workstation.html">VMware Workstation</a></span>.</p>
1720
1721
1722 <h1>Conclusion</h1>
1723
1724         <p>The project established <a href="#existing_emulation">a&nbsp;new form</a>
1725         of W32 emulation model suitable for existing proprietary binary W32 kernel
1726         code (drivers) while being hosted in an open source operating system
1727         (currently $gnulinux). Currently, only the subsystems required by W32
1728         filesystem drivers are implemented but the project can be further extended
1729         for compatibility with various hardware-related drivers such as W32 video
1730         drivers, W32 disk interface drivers etc.</p>
1731
1732         <p>Some W32 kernel space subsystems were implemented for the first time as
1733         $freespeech code as they are still missing in the only currently available
1734         $freespeech W32 kernel implementation, $ReactOS. Some W32 kernel function
1735         behaviour expected by the drivers had to be reverse engineered and documented
1736         in this project's&nbsp;API documentation (not listed in this book) and/or in
1737         its source files, because its description in the
1738         <span class="productname">Microsoft</span> documentation is missing.</p>
1739
1740         <p>Author had to get familiar both with the W32 kernel API and also with the
1741         W32 kernel code by the reverse engineering. This experience also covers the
1742         first <span class="productname">Microsoft Windows</span> compatible code ever
1743         written by the author - <span class="fname">hal.dll</span> (Hardware
1744         Abstraction Layer) part of W32 kernel.</p>
1745
1746         <p>Certain UNIX implementation interfaces allow a regular, non-privileged
1747         user of UNIX system to mount image files with any W32 filesystem supported by
1748         this project. Such mount operation usually requires UNIX
1749         <span class="constant">root</span> privileges to do so. On the other hand the
1750         choice of supported filesystem types is very limited as only a&nbsp;few
1751         filesystem types are supported for the W32 platform.</p>
1752 HERE
1753
1754
1755 My::Web->footer();