CVS_ID
[www.jankratochvil.net.git] / project / captive / doc / Index.html.pl
1 #! /usr/bin/perl
2
3 # $Id$
4 # Captive project doc Index page Perl template.
5 # Copyright (C) 2003 Jan Kratochvil <project-www.jankratochvil.net@jankratochvil.net>
6
7 # This program is free software; you can redistribute it and/or modify
8 # it under the terms of the GNU General Public License as published by
9 # the Free Software Foundation; exactly version 2 of June 1991 is required
10
11 # This program is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14 # GNU General Public License for more details.
15
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
19
20
21 package project::captive::doc::Index;
22 require 5.6.0;  # at least 'use warnings;' but we need some 5.6.0+ modules anyway
23 our $VERSION=do { my @r=(q$Revision$=~/\d+/g); sprintf "%d.".("%03d"x$#r),@r; };
24 our $CVS_ID=q$Id$;
25 use strict;
26 use warnings;
27
28 BEGIN{ open F,"Makefile"; our $top_dir=pop @{[split /\s/,(grep /^top_srcdir/,<F>)[0]]}; eval "use lib '$top_dir'"; close F; }
29 use My::Web;
30
31
32 My::Web->init(
33                 "__PACKAGE__"=>__PACKAGE__,
34                 "title"=>'Captive NTFS doc',
35                 "head_css"=>"
36 .productname { font-family: cursive; }
37 .fname       { font-family: monospace; }
38 .constant    { font-family: monospace; }
39 .author      { font-family: cursive; }
40 .stuff       { font-style: italic; font-size: larger; margin-left: 20%; margin-right: 10%; }
41 .function    { font-family: monospace; }
42 .type        { font-family: monospace; }
43 .command     { font-family: monospace; }
44 .instruction { font-style: italic; }
45 ",
46                 );
47 My::Web->heading();
48
49
50 sub doc_img
51 {
52 my($img_base,$caption)=@_;
53
54         my $r="";
55         $r.='<table border="0" align="center">'."\n";
56                 $r.="\t<tr><td>".img($img_base,$caption)."</td></tr>\n";
57                 $r.="\t<caption>".CGI::escapeHTML($caption)."</caption>\n";
58         $r.='</table>'."\n";
59         return $r;
60 }
61
62 my $freespeech=a_href 'http://www.gnu.org/philosophy/free-sw.html','Free';
63 my $freebeer=a_href 'http://www.gnu.org/philosophy/free-sw.html','free (as in beer)';
64
65 sub productname
66 {
67 my($url,$name)=@_;
68
69         return '<span class="productname">'.a_href($url,CGI::escapeHTML($name)).'</span>';
70 }
71 my $Wine=productname 'http://www.winehq.com/','Wine';
72 my $ReactOS=productname 'http://www.reactos.com/','ReactOS';
73 my $LinuxNTFS=productname 'http://linux-ntfs.sourceforge.net/','Linux NTFS';
74 my $GnomeVFS=productname 'http://developer.gnome.org/doc/API/gnome-vfs/','Gnome-VFS';
75 my $GnomeVFSmodule=productname 'http://developer.gnome.org/doc/API/gnome-vfs/modules.html','Gnome-VFS-module';
76 my $gnulinux='GNU/Linux';
77
78
79 print vskip("10ex")."<h1 align=\"center\">!!! PRELIMINARY - TO BE UPDATED !!!</h1>\n".vskip("10ex");
80
81 print <<"HERE";
82 <h1>Abstract</h1>
83
84 <p>Existing binary Microsoft Windows file system drivers were exploited
85 for accessing drives with possibly proprietary file system data structures.
86 Open file system API is provided to access these file system drivers.
87 Microsoft Windows system components required by these drivers
88 were analyzed and successfuly emulated in the GNU/Linux operating system.
89 Currently the implementation allows applications running under the GNU/Linux
90 operating system to access VFAT, ISO9660 and EXT2 drives. NTFS file system
91 capability target is the final goal being currently developed on the base
92 of this project assets.</p>
93
94
95 <h1>Reasons for the Implementation</h1>
96
97         <p>Currently there is no possibility to any of the available $freespeech
98                 ($freespeech used in the following text in the meaning of
99                 &quot;<a href="http://www.gnu.org/philosophy/free-sw.html">free as in speech</a>&quot;)
100         operating systems to reliably write to the most common disk partition
101         filesystem type - <span class="productname">Microsoft NTFS</span>. It would
102         be already supported a long time ago but there is no proper documentation of
103         <span class="productname">NTFS</span> filesystem data structures available.
104         Since <span class="productname">Microsoft</span> corporation continues in its
105         propagation of <span class="productname">Microsoft Windows NT</span>
106                 (<span class="productname">NT</span> identifier used in the following text
107                 applies to all the products of <span class="productname">Microsoft</span>
108                 <span class="productname">NT</span> series such as
109                 <span class="productname">NT&nbsp;4.0</span>,
110                 <span class="productname">2000</span> as NT-5.0
111                 and
112                 <span class="productname">XP</span> as NT-5.1.)
113         based operating systems <span class="productname">NTFS</span> is the default
114         disk file system type for new installations as described in the
115         <a href="http://www.microsoft.com/hwdev/tech/storage/ntfs-preinstallP.asp">recommendations
116         report</a> by <span class="productname">Microsoft</span>.</p>
117
118         <p>Unfortunately the <span class="productname">NTFS</span> filesystem has too
119         complex data structure to allow a complete reverse enginnering process in
120         reasonable time. Currently available $freespeech solutions such as $LinuxNTFS
121         filesystem have already implemented (more or less) reliable reverse
122         engineered read-only access. However <a name="reliability">the
123         reliability</a> of the read-write part of the access requires much better
124         knowledge of the <span class="productname">NTFS</span> data structures. Also
125         any future versions of <span class="productname">NTFS</span> filesystem would
126         require another major reverse engineering effort.</p>
127
128
129 <h1>Goals of This Stage of the Project</h1>
130
131         <p>The <a name="NTFSgoal">ultimate goal</a> of this project is definitely the
132         free implementation of <a href="#reliability">reliable</a> read-write <span
133         class="productname">NTFS</span> filesystem driver. This project chose to
134         solve this problem in the style of $Wine project by using the original binary
135         <span class="fname">ntfs.sys</span> and emulating all the required layers of
136         <span class="productname">Microsoft Windows NT</span> for it.</p>
137
138         <p>Unfortunately this effort is tainted by only partial and generally
139         insufficient documentation of API between filesystem driver
140         (<span class="fname">ntfs.sys</span>) and the
141         <span class="productname">Microsoft Windows NT</span>
142         (&quot;<a href="http://mail.gnu.org/archive/html/libtool/2000-09/msg00000.html">W32</a>&quot;
143         in the following text) kernel <span class="fname">ntoskrnl.exe</span>. Note
144         that this API is a different than the one being used in the $Wine project
145         since <span class="productname">Wine</span> implements only the user space
146         part of W32.</p>
147
148         <p>There also exists a $freespeech
149         <span class="fname"><a href="http://sys.xiloo.com/projects/projects.htm#ext2fsd">ext2fsd.sys</a></span>
150         W32 filesystem driver for <span class="constant">ext2</span> filesystems with
151         source files freely available for it. Moreover original
152         <span class="productname">Microsoft Windows NT</span> filesystems
153         <span class="fname">cdfs.sys</span> and
154         <span class="fname">fastfat.sys</span> (which correspond to Linux
155         <span class="productname">iso9660</span> and
156         <span class="productname">vfat</span> filesystems, resp.) are easy enough to
157         get working in reasonable time. All these filesystem drivers also use only
158         the documented filesystem data structures which makes their behaviour better
159         controllable when debugging the project.</p>
160
161         <p>Therefore this stage of the project is intended to get only the original
162         W32 binary form of <span class="fname">cdfs.sys</span> and
163         <span class="fname">fastfat.sys</span> drivers working. This goal was
164         achieved and the compatibility with <span class="fname">ext2fsd.sys</span>
165         can be considered as an additional benefit.</p>
166
167
168 <h1>Architecture</h1>
169
170         <p>Although this project attempts to be as general and crossplatform as
171         possible to avoid being needlessly bound by any resources the current
172         implementation is being developed/tested on $gnulinux. The principle of the
173         project lies in the glue between
174         <span class="productname">Microsoft Windows NT</span> kernel space
175         environment and $gnulinux user space process environment. Currently there are
176         no plans to ever extend the project's crossplatformity beyond the
177         <span class="constant">i386</span> processor
178                 (<span class="constant">i386</span> used here as
179                 <a href="http://www.intel.com/">Intel</a> architecture covering 32-bit
180                 processors compatible with <span class="constant">i386</span>,
181                 <span class="constant">i486</span>, ...).
182
183         <a name="existing_emulation"><h2>Existing Emulation Projects</h2></a>
184
185                 <p>There are two well-known $freespeech projects emulating W32 subsystems
186                 to reach the compatibility with various W32 components:
187                 $Wine and $ReactOS. Sad moment is that the goals of this project do not fit
188                 very well into any role in those two projects:
189
190                 <table align="center" border="1">
191                         <tr>
192                                 <th><a href="#guestosnote">Guest-OS</a></th>
193                                 <th><a href="#hostosnote" >Host-OS</a ></th>
194                                 <th>Implements</th>
195                                 <th>W32 kernel library</th>
196                                 </tr>
197                         <tr>
198                                 <td>$Wine</td>
199                                 <td>$gnulinux</td>
200                                 <td>W32 user space</td>
201                                 <td><span class="fname">ntdll.dll</span></td>
202                                 </tr>
203                         <tr>
204                                 <td>$ReactOS</td>
205                                 <td><span class="constant">i386</span> hardware</td>
206                                 <td>W32 kernel and user space</td>
207                                 <td><span class="fname">ntoskrnl.exe</span></td>
208                                 </tr>
209                         <caption>Existing Emulation Projects Characteristics</caption>
210                 </table>
211
212                 <dl>
213                         <a name="guestosnote"><dt>Guest-OS</dt></a>
214                         <dd><a href="http://www.vmware.com/support/reference/common/glossary/#guestos">Guest OS</a>:
215                                 An operating system that runs inside a&nbsp;virtual machine.</dd>
216                         <a name="hostosnote" ><dt>Host  OS</dt></a>
217                         <dd><a href="http://www.vmware.com/support/reference/common/glossary/#hostos" >Host  OS</a>:
218                                 An operating system that runs on the host machine.</dd>
219                 </dl>
220
221                 <p>While $ReactOS provides the necessary W32 kernel subsystem emulation
222                 code we also need to run such <a href="#guestosnote">Guest-OS</a> in the <a
223                 href="#hostosnote">Host-OS</a> $gnulinux. Initially it was planned to
224                 extend $Wine with the W32 kernel space emulation functionality but
225                 fortunately <span class="author">Steven Edwards</span> pointed to the $ReactOS
226                 which better suits the needs of this project by its already implemented W32
227                 kernel space emulation.</p>
228
229                 <p>The <a name="reactos_nocare">original reasons</a> for developing
230                 $ReactOS still make no sense to the author of this project. Free
231                 implementation of W32 platform standalone running on the machine hardware
232                 is no longer free as most od the W32 applications are usually closed source
233                 and the user still looses its freedom on the application level anyway. Even
234                 in the case of available free applications there still remains the
235                 disadvantage of loosing the Host-OS platform availability if implemented in
236                 the $Wine style. For these ideology incompatibilities not much effort was
237                 made for acceptance the fixes and improvements of $ReactOS by this project.
238                 Moreover new functionality is not being implemented to the $ReactOS part
239                 but it is coded in Gnome style in the project specific source files
240                 place.</p>
241
242                 <p>The most serious problem of $ReactOS is its dependence on the direct
243                 <span class="constant">i386</span> hardware instead of some
244                 <a href="#hostosnote">Host-OS</a> as required by the goals of this project.
245                 W32 is designed to be hardware-independent using its
246                 <span class="fname">hal.dll</span>. Unfortunately $ReactOS does not follow
247                 this design and thus there are needed various patches and replaces of its
248                 various parts and its hardware-dependent code. Despite it $ReactOS code
249                 base still made a big asset for this project.</p>
250
251
252
253
254
255                 <p>Some API functions are provided both by
256                 <span class="fname">ntdll.dll</span> and
257                 <span class="fname">ntoskrnl.exe</span> in W32.
258                 <span class="author">Casper Hornstrup</span> enlightened such functions
259                 calling conventions have to be differentiated as
260                 <span class="fname">ntdll.dll</span> lives in the user space (low address
261                 space -- below <span class="constant">0x80000000</span>) and
262                 <span class="fname">ntoskrnl.exe</span> in the kernel space (high address
263                 space -- above <span class="constant">0x80000000</span>). Although they
264                 contain slightly different set of symbols (functions)
265                 <span class="fname">ntdll.dll</span> still can be considered as a&nbsp;user
266                 space interface to the kernel space implementation by
267                 <span class="fname">ntoskrnl.exe</span>.</p>
268
269         <h2>API Function Implementation Choices</h2>
270
271                 <p>During the initial point of the project development all the API
272                 functions were defined as unimplemented, of course. Any call of such
273                 unimplemented function is fatal and results in program termination. When we
274                 need to implement any required API function we have multiple choices to do
275                 so:
276                 <a href="#functype_pass">Direct pass to original
277                                 <span class="fname">ntoskrnl.exe</span></a>,
278                 <a href="#functype_wrap">Wrap of the original
279                                 <span class="fname">ntoskrnl.exe</span> function</a>,
280                 <a href="#functype_native_reactos">Native implementation -- $ReactOS,
281                 <a href="#functype_native_wine">Native implementation -- $Wine
282                 or
283                 <a href="#functype_native_libcaptive">Native implementation
284                                 -- project specific</a>.
285                 <!-- a href="#functype_undef" Undefined function /a -->
286
287         <h2>&quot;patched&quot; vs. &quot;unpatched&quot; Libraries</h2>
288
289                 <p>Library is called <span class="constant">patched</span> if we require
290                 loading its original binary code file. Project needs to patch it to be able
291                 to trap all the function entry points. The typical current
292                 <span class="constant">patched</span> library of this project is
293                 <span class="fname">ntoskrnl.exe</span>.</p>
294
295                 <p>Library is called <span class="constant">unpatched</span> if no original
296                 binary code is needed since all of its functions are completely emulated by
297                 <a href="#functype_native">the native implementations</a> of this project.
298                 The typical <span class="constant">unpatched</span> representative is
299                 <span class="fname">hal.dll</span> as it specializes on the hardware
300                 dependent code and therefore it must be completely replaced by this project
301                 running in the $gnulinux operating system environment. Early versions of
302                 this project had also full <span class="constant">unpatched</span>
303                 <a href="#native_ntoskrnl">native implementation of
304                 <span class="fname">ntoskrnl.exe</span></a> but it no longer applies.</p>
305
306         <h2>Memory Management</h2>
307
308                 <p>Original <span class="productname">Microsoft Windows NT</span>
309                 architecture uses two address space areas - user space and kernel space.
310                 User space is mapped in the range <span class="constant">0x00000000</span>
311                 to <span class="constant">0x7FFFFFFF</span>, kernel space is mapped in the
312                 range <span class="constant">0x80000000</span>
313                 (<span class="constant">KERNEL_BASE</span> in $ReactOS sources) to
314                 <span class="constant">0xFFFFFFFF</span>. All these virtual memory ranges
315                 represent addresses after their MMU (Memory Management Unit) mapping, of
316                 course. More discussion can be found in the
317                 <a href="http://www.microsoft.com/hwdev/platform/server/PAE/PAEmem.asp">description 
318                 by <span class="productname">Microsoft</span></a>.</p>
319
320                 <p>This project runs in the virtual address space used both for the UNIX
321                 user space process part and for the W32 kernel space. Therefore this
322                 project defines that W32 kernel runs in the whole range
323                 <span class="constant">0x00000000</span> to
324                 <span class="constant">0xFFFFFFFF</span> since there are no special mapping
325                 assumptions about the UNIX user space process mapping. No W32 user space
326                 exists in this project. Such approach also nullifies any special memory
327                 moving operations between W32 kernel space and W32 user space memory areas
328                 (such as <span class="function">MmSafeCopyToUser()</span>).</p>
329
330         <h2>Unicode Strings and Characters</h2>
331
332                 <p>W32 platform uses 16-bit type <span class="type">wchar_t</span> while $gnulinux uses a
333                 32-bit one. This can be problem during GCC (GNU C&nbsp;Compiler)
334                 compilation of combination of native UNIX C&nbsp;sources (assuming 32-bit
335                 GCC with 32-bit <span class="type">wchar_t</span>) and
336                 $ReactOS C sources (assuming W32 compiler with 16-bit
337                 <span class="type">wchar_t</span>) for literal wide strings
338                 (C source file systax: <span class="command">L&quot;wstring&quot;</span>).
339                 Possibilities to solve this issue list:</p>
340
341                 <ul>
342                         <li>
343                                 <p>Using <span class="constant">-fshort-wchar</span> GCC option and
344                                 strictly differentiate between compilation of
345                                 <span class="productname">ReactOS</span> code and UNIX code.</p>
346
347                                 <p>pros: No source modifications needed, no runtime performance hit.</p>
348
349                                 <p>cons: No type checking if some part of code has bad compilation
350                                 flags, complicated way to completely split
351                                 <span class="productname">ReactOS</span> and UNIX code.</p>
352                         </li>
353                         <li>
354                                 <p>Wrap all <span class="productname">ReactOS</span> literal constants
355                                 by some conversions function call (implemented as macro
356                                 <span class="function">REACTOS_UCS2()</span> by this project).</p>
357
358                                 <p>pros: Any forgotten/mistaken conversions are type-checked and warned
359                                 during the compilation by GCC.</p>
360
361                                 <p>cons: All compiled <span class="productname">ReactOS</span> sources
362                                 files containing literal wide strings have to be wrapped/modified,
363                                 performance hit by runtime string conversions.</p>
364
365                                 <p>This solution was chosen to get the internal sanity checking
366                                 benefit.</p>
367                         </li>
368                 </ul>
369
370         <h2>Supported Binary Formats</h2>
371
372                 <p>The native W32 binary format is identified as
373                 <span class="constant">PE-32</span> (Portable Executable 32-bit), such
374                 files have all the usual extensions such as
375                 <span class="fname">.sys</span>, <span class="fname">.exe</span>,
376                 <span class="fname">.dll</span> etc. <span class="constant">PE-32</span>
377                 loading support was already implemented by $ReactOS, its memory mapping
378                 specifics just had to be ported to $gnulinux environment by this project.
379                 This loading support does not (yet) cover importing of debug symbols from
380                 W32 <span class="fname">.PDB</span> (Program DataBase) files in $gnulinux
381                 ABI (Application Binary Interface) compatible way.</p>
382
383                 <p>This project also supports transparent loading of UNIX
384                 <span class="fname">.so</span> (Shared Object file) binary format. If you
385                 have W32 source files for some W32 library you can try to compile it by GCC
386                 to get the shared library with $gnulinux ABI compatible debug information
387                 (GCC option <span class="constant">-ggdb3</span> recommended). Beware of
388                 possible compilation problems as <span class="productname">Microsoft</span>
389                 C&nbsp;code expects <span class="constant">exception</span> handling to be
390                 supported by the compiler (definitely not the case of the plain C compiler
391                 of GCC) --- all the exception catching code should be discarded as any
392                 <a href="#exception_fatal">generated exceptions are always fatal</a> when
393                 such driver is running in the scope of this project.</p>
394
395                 <p>Be aware of some differences if you use
396                 <span class="constant">PE-32</span> binary format file vs.
397                 <span class="fname">.so</span> format file.
398                 <span class="constant">PE-32</span> use the appropriate W32 specific
399                 <a href="#calltype">cdecl/stdcall/fastcall call types</a>,
400                 <span class="fname">.so</span> must be completely compiled in the standard
401                 UNIX <a href="#calltype_cdecl">cdecl call type semantics</a>.
402                 <a href="#functype_native">Native function implementations</a> do not need
403                 to be explicitely exported by <span class="fname">captivesym</span> as they
404                 are resolved automatically by the UNIX dynamic system linker. It may be
405                 surprising you will have to fix all such missing symbol exports if you
406                 advance during the development from the debugging
407                 <span class="fname">.so</span> file for the production version of the
408                 original <span class="constant">PE-32</span> binary file.</p>
409
410         <h2>Reverse Engineering</h2>
411
412                 <p>This project has no intentions to reverse engineer and document the
413                 filesystem data structures themselves since they are being encapsulated by
414                 the filesystem driver. For these reasons the resources available in
415                 projects such as $LinuxNTFS get out of any possible use. This project goal
416                 is to provide fully compatible API interface to the rest of the W32 system
417                 to persuade the filesystem driver it is running in the native
418                 <span class="productname">Microsoft Windows XP</span> environment.</p>
419
420                 <p>All the W32 filesystem drivers are running in the W32 kernel address
421                 space and this area of W32 API is not much documented by
422                 <span class="productname">Microsoft</span>. Some API functions are not
423                 documented at all and the others are documented insufficiently for a their
424                 possibly needed reimplementation from scratch. Documentation being
425                 consulted primarily consists of
426                 <span class="productname"><a href="http://msdn.microsoft.com/library/default.asp?url=/library/en-us/kmarch/hh/kmarch/kmhdr_6enb.asp">MSDN (Microsoft Developer Network) Kernel-Mode Driver Architecture: Windows DDK</a></span>
427                 documentation and also various other 3rd party documentation resources such as
428                 <span class="productname"><a href="http://www.osr.com/ntinsider/1996/cacheman.htm">The NT Cache Manager Description</a></span>,
429                 <span class="productname"><a href="http://www.winntmag.com/Articles/Print.cfm?ArticleID=3864">Learn About NT's&nbsp;File-system Cache</a></span>,
430                 <span class="productname"><a href="http://www.ntfsd.org/archive/">NT File System Developers mailing list archives</a></span>
431                 including various
432                 <a href="http://www.google.com/search?q=site%3Amicrosoft.com">fulltext searches</a>
433                 through Internet from case to case.</p>
434
435                 <p>Sometimes no sufficient documentation was found and some code behaviour
436                 had to be reverse engineered directly from the binaries of
437                 <span class="fname">ntoskrnl.exe</span>,
438                 <span class="fname">cdfs.sys</span>
439                 and/or
440                 <span class="fname">fastfat.sys</span>.
441                 Up to now the code was disassembled by
442                 <span class="productname"><a href="http://www.simtel.net/pub/pd/29498.html">IDA Freeware</a></span>
443                 and by
444                 <span class="productname">dumpbin.exe</span> of
445                 <span class="productname">Microsoft Visual Studio</span>.
446                 <span class="productname">dumpbin.exe</span> is fortunately able to
447                 interpret debug symbols from W32 <span class="fname">.PDB</span>
448                 (Program DataBase) debug information files.</p>
449
450         <a name="law"><h2>Laws and Licensing Conditions</h2></a>
451
452                 <p>If you are an <span class="productname">authorized user</span> of
453                 <span class="productname">Microsoft Windows NT</span> the laws in some
454                 countries give you the right to fully handle the product in any way you
455                 want. Therefore you can disassemble the product even in the case you had
456                 to agree with the product license forbidding such disassembly as the
457                 country laws override any such license agreement.</p>
458
459                 <h3>Microsoft Service Pack</h3>
460
461                         <p>Sometimes you may have the legal license for
462                         <span class="productname">Microsoft Windows NT</span>
463                         but for various technical reasons you do not have the media and/or
464                         installation ready at the place of intended use of this project.</p>
465
466                         <p>Fortunately <span class="productname">Microsoft</span> provides
467                         $freebeer update packages for its
468                         <span class="productname">Microsoft Windows</span> products called
469                         <span class="productname">Service Packs</span>; the latest one is
470                         <span class="productname"><a href="http://www.microsoft.com/WindowsXP/pro/downloads/servicepacks/sp1/checkedbuild.asp">Microsoft Windows XP Service Pack 1a</a></span>.</p>
471
472                         <p>This downloadable file contains the full versions of the essential
473                         files needed for the current stage of this product:
474                         <span class="fname">cdfs.sys</span>,
475                         <span class="fname">fastfat.sys</span>
476                         and
477                         <span class="fname">ntoskrnl.exe</span>.
478                         It even contains <span class="fname">ntfs.sys</span> for the planned
479                         <a href="#NTFSgoal"><span class="productname">NTFS</span>
480                         functionality</a>.</p>
481
482                         <p><span class="productname">Service Pack</span> also contains
483                         EULA (End User License Agreement) paper disallowing any use of
484                         <span class="productname">Service Pack</span> outside its original
485                         intentions. According to the laws of some countries you need to be
486                         <span class="productname">authorized user</span> of the
487                         <span class="productname">Microsoft Windows XP</span> product to be
488                         allowed to use the files contained in such
489                         <span class="productname">Service Pack</span> without the bindings of its
490                         EULA. Even the interpretation of such laws may vary.</p>
491
492                         <p>It would be a&nbsp;breach of the law by the project author to provide
493                         automatic (=hidden) functionality to download and extract the
494                         <span class="productname">Service Pack</span> files. On the other hand it
495                         is perfectly legal to ask user for his/her confirmation whether he/she is
496                         really the <span class="productname">authorized user</span> of
497                         <span class="productname">Microsoft Windows XP</span> product and
498                         download/extract the <span class="productname">Service Pack</span> files
499                         accordingly.</p>
500
501         <h2>Project Architecture</h2>
502
503                 @{[ doc_img 'fig/architecture','Project Architecture' ]}
504
505                 <p>Most of the work of this project is located in the single box called
506                 &quot;<span class="constant">libcaptive</span>&quot; located in the center
507                 of the scheme. This component implements the core W32 kernel API by
508                 <a href="#functype">various methods described in this document</a>.
509                 The &quot;<span class="constant">libcaptive</span>&quot; box cannot be
510                 further dissected as it is just an implementation of a&nbsp;set of API
511                 functions. It could be separated to several subsystems such as the Cache
512                 Manager, Memory Manager, Object Manager, Runtime Library, I/O&nbsp;Manager
513                 etc. but they have no interesting referencing structure.</p>
514
515                 <p>As this project is in fact just a&nbsp;filesystem implementation every
516                 story must begin at the device file and end at the filesystem operations
517                 interface. The unified suppported interfaces are
518                 <span class="productname"><a href="http://developer.gnome.org/doc/API/2.0/glib/">GLib</a></span>
519                         (the most low level portability, data-types and utility library for Gnome)
520                 <span class="type">GIOChannel</span> (for the device access) and the custom
521                 <span class="constant">libcaptive</span> filesystme API. Each of these ends
522                 can be connected either to some direct interface (such as the
523                 <span class="constant">captive-cmdline</span> client) or it can connected
524                 as a general $GnomeVFS filter. $GnomeVFS offers nice filter interface on
525                 the UNIX user-privileges level for transparent operation with archives and
526                 network protocols. This filter interface was used by this project to turn
527                 the device reference such as <span class="fname">/dev/hda3</span> or
528                 <span class="fname">/dev/discs/disc0/part3</span> to the fully accessible
529                 filesystem (pretending being an &quot;archive&quot; in the device
530                 reference). This device access can be specified by $GnomeVFS URLs such as:
531                 <span class="fname">file:///dev/hda3#captive-fastfat:/autoexec.bat</span></p>
532                 
533                 <p>If the passed device reference is requested by the user to be accessed
534                 either in <span class="dashdash">--ro</span> (read-only) mode or in the
535                 <span class="dashdash">--rw</span> (full read-write) mode there are no
536                 further device layers needed. Just in the case of
537                 <span class="dashdash">--blind</span> mode another layer is involved to
538                 emulate read-write device on top of the real read-only device by the method
539                 of non-persistent memory buffering of all the possible write requests.</p>
540
541                 <p>Such device is still only a&nbsp;UNIX style GLib <span
542                 class="type">GIOChannel</span> type at this point.  As we need to supply it
543                 to the W32 filesystem driver we must convert it to the W32 I/O&nbsp;Device
544                 with its capability of handling <span class="type">IRP</span>
545                         (<span class="constant">I/O Request Packet</span>; structure holding the
546                         request and result data for any W32 filesystem or W32 block device
547                         operation)
548                 requests from its upper W32 filesystem driver. Such W32 I/O&nbsp;Device can
549                 represent either <span class="type">CD-ROM</span> or
550                 <span class="type">disk</span> device type as different W32 filesystem
551                 drivers require different media types:</p>
552
553                 <h3>cdfs.sys</h3>
554
555                         <p><span class="type">CD-ROM</span> filesystem runs just on the
556                         <span class="constant">FILE_DEVICE_CD_ROM_FILE_SYSTEM</span> device type.
557                         Use <span class="dashdash">--cdrom</span> option of this project for
558                         <span class="fname">cdfs.sys</span>.</p>
559
560                 <h3>fastfat.sys</h3>
561
562                         <p><span class="type">FAT</span> filesystem supports both the (expected)
563                         <span class="constant">FILE_DEVICE_DISK_FILE_SYSTEM</span> device type
564                         but it also supports the reading of
565                         <span class="constant">FILE_DEVICE_CD_ROM_FILE_SYSTEM</span> devices as
566                         you can use <span class="type">FAT</span> filesystem on <span
567                         class="type">CD-ROM</span> media in W32 environment. It is recommended to
568                         use <span class="dashdash">--disk</span> option of this project for
569                         <span class="fname">fastfat.sys</span>.</p>
570
571                 <h3>ext2fsd.sys</h3>
572
573                         <p><span class="type">ext2</span> filesystem supports just the
574                         <span class="constant">FILE_DEVICE_DISK_FILE_SYSTEM</span> device type.
575                         Use <span class="dashdash">--disk</span> option of this project for
576                         <span class="fname">ext2fsd.sys</span>.</p>
577                 
578                 @{[ vskip("3ex") ]}
579
580                 <p>W32 media I/O&nbsp;Device is accessed from the W32 filesystem driver.
581                 The filesystem driver itself always creates volume object by
582                 <span class="function">IoCreateStreamFileObject()</span> representing the
583                 underlying W32 media I/O&nbsp;Device as the object handled by the
584                 filesystem driver itself. All the client application filesystem requests
585                 must be first resolved at the filesystem structures level, passed to the
586                 volume stream object of the same filesystem and then finally passed to the
587                 W32 media I/O&nbsp;Device (already implemented by this project as an
588                 interface to <span class="type">GIOChannel</span> noted above).</p>
589
590                 <p>The filesystem driver is called by the core W32 kernel implementation of
591                 <span class="constant">libcaptive</span> in
592                 <a href="#synchronous">synchronous way</a> in single-shot manner instead of
593                 the several reentrancies while waiting for the disk I/O completions as can
594                 be seen in the original
595                 <span class="productname">Microsoft Windows NT</span>.
596                 This single-shot synchronous behaviour is possible since all the needed
597                 resources (disk blocks etc.) can be always presented as instantly ready as
598                 their acquirement is solved by <a href="hostosnote">Host-OS</a> outside of
599                 the W32 emulated <a href="guestosnote">Guest-OS</a> environment.</p>
600
601                 <p><span class="constant">libcaptive</span> offers the W32 kernel
602                 filesystem API to the upper layers. This is still not the API the common
603                 W32 applications are used to as they use W32 libraries which in turn pass
604                 the call to W32 kernel.  For example
605                 <span class="function">CreateFileA()</span> is being implemented by several
606                 libraries such as <span class="fname">user32.dll</span> as a relay
607                 interface for the kernel function
608                 <span class="function">IoCreateFile()</span> implemented by this
609                 project's&nbsp;<span class="constant">libcaptive</span> W32 kernel
610                 emulation component.</p>
611
612                 <p>As it would be very inconvenient to use the legacy, bloated and UNIX
613                 style unfriendly W32 kernel filesystem API this project offers its own
614                 <a href="#client_interface">custom filesystem API interface</a> inspired by
615                 the $GnomeVFS client interface adapted to the specifics of W32 kernel API.
616                 This interface is supposed to be easily utilized by
617                 <a href="#client_interface_customapp">a&nbsp;custom application accessing
618                 the W32 filesystem driver</a>.</p>
619
620                 <p>The rest of the story is not much special for this project since this is
621                 a common UNIX problem how to offer user space implemented UNIX filesystem
622                 as a generic system filesystem (as those are usually implemented only as
623                 the components od UNIX kernel). The most thin implementation would be to
624                 implement <FIXME:LUFS><a href="#fuse_interface">FUSE \bookcitation{FUSE}
625                         (Filesystem in Userspace project for $gnulinux implemented by its own
626                         filesystem code for Linux kernel)
627                 interface</a> for the purpose but such feature is not yet implemented.
628                 Currently this project implements
629                 <a href="#offered_gnomevfs">Gnome-VFS interface</a> allowing its filesystem
630                 access even without any involvement of UNIX kernel from any
631                 $GnomeVFS aware client application (such as
632                 <span class="fname">gnome-vfs/tests/test-shell</span>).
633                 This <a href="#offered_gnomevfs">Gnome-VFS interface</a> connects the data
634                 flow of this project in two points - both as the lowest layer device image
635                 source and also as the upper layer for the filesystem operation
636                 requests.</p>
637
638                 <p>That's&nbsp;all folks!</p>
639
640         <a name="mounted_one"><h2>At Most One Mounted Filesystem</h2></a>
641
642                 @{[ doc_img 'fig/sandbox','Multiple Filesystems by libcaptive Sandboxing' ]}
643
644                 <p>The project technically supports only one (exactly one...) mounted
645                 filesystem device and only one filesystem driver. There is nothing
646                 complicated to support multiple disks and multiple loaded filesystem
647                 modules but as they would share the address space it would only bring
648                 a&nbsp;possible complications during bug reports and the bug solving
649                 itself.  It was considered as a&nbsp;more sane way to support multiple W32
650                 mounted disks by completely separately running project instances in
651                 a&nbsp;different UNIX processes communicating from their sandboxes via
652                 <a href="#todo_sandbox">CORBA sandbox interface</a>. This sandboxing
653                 feature is not yet deployed although its code is already prepared.</p>
654
655                 <p>The project also does not support any state cleanup to be able to load
656                 filesystem&nbsp;<span class="constant">A</span>,
657                 cleanup&nbsp;<span class="constant">A</span> and load a different
658                 filesystem&nbsp;<span class="constant">B</span> in the same process address
659                 space. It complies with the preventions of the possible debugging
660                 complications as noted above. Despite this you still must call the function
661                 <span class="function">captive_shutdown()</span> to flush all the pending
662                 filesystem buffers to the disk. After calling
663                 <span class="function">captive_shutdown()</span> the process address space is
664                 no longer usable for any further project operations and the process is
665                 expected to be terminated in the manner compatible with its driving
666                 <a href="#todo_sandbox">CORBA sandbox interface</a> control master.</p>
667
668                 <p>Each sandbox executing the untrusted W32 binary filesystem driver code
669                 is connected through its
670                 <a href="#todo_sandbox">CORBA sandbox interface</a> at the point of upper
671                 layer <span class="constant">libcaptive</span>-specific filesystem API, at
672                 the point of the bottom layer of <span class="type">GIOChannel</span>
673                 device access and also for transfers of GLib logging
674                 messages/warnings/errors out of the sandbox to the user.</p>
675
676
677 <h1>Choice of the Emulation Methods</h1>
678
679         <p>The intent of the project was to get reliable read-write access to
680         <span class="productname">NTFS</span> partition. There are several possible
681         ways to achieve that:</p>
682
683         <h2>Virtualmachine Running the Original W32 Subsystem</h2>
684
685                 <p>Creating virtual-hardware PC and running the original W32 binaries
686                 including their boot-loader etc. Disk device access would be passed as
687                 virtual IDE disk (=hard disk drive). File access API would be implemented
688                 either by special escaping by some trapped instruction out of the
689                 virtualmachine while using W32 file access API or using the standard W32
690                 SMB (Server Message Block) network access through some virtual network
691                 card. The latter network access solution is almost the currently available
692                 possibility of running full-blown disk-sharing real
693                 <span class="productname">Microsoft Windows NT</span> inside virtual
694                 machine emulator such as <span class="productname">VMware</span>.</p>
695
696                 <p>pros: Full compatibility due to fully native codebase.</p>
697
698                 <p>cons: Hard to debug, missing documentation of NT booting internals,
699                 possible problems by different PC virtual-hardware than expected by NT,
700                 requirement of fully installed
701                 <span class="productname">Microsoft Windows NT</span> product.</p>
702
703         <a name="method_ntoskrnl"><h2>&quot;ntoskrnl.exe&quot; Inside Virtual Address Space</h2></a>
704
705                 <p>This solution was chosen by the project. Binary filesystem driver and
706                 also <span class="fname">ntoskrnl.exe</span> binary file are required.
707                 Unfortunately <span class="fname">ntoskrnl.exe</span> expects a&nbsp;native
708                 PC virtual-hardware missing during regular UNIX user space process
709                 emulation, therefore such instructions must be trapped and emulated/ignored
710                 from case to case.</p>
711
712                 <p>Also the <a name="init_ntoskrnl">initialization code of <span
713                 class="fname">ntoskrnl.exe</span></a> is not executed by this project since
714                 it expects to get full PC hardware access privileges and thus some
715                 datastructures do not get initialized by it (need to be trapped later at
716                 runtime stage). Some of the missing initializations are solved by
717                 <a href="#functype_wrap">API functions wrapping</a>.
718
719                 <p>pros: Lightweight, easier to debug.</p>
720
721                 <p>cons: Possible incompatible emulation of
722                 <span class="fname">ntoskrnl.exe</span> parts, missing documentation needed
723                 for the implementation.</p>
724
725         <h2>Filesystem Driver Inside Virtual Address Space</h2>
726
727                 <p>Unlike <a href="#method_ntoskrnl">previous method</a> here we do not use
728                 even <span class="fname">ntoskrnl.exe</span> as the complete kernel part of
729                 W32 is <a name="native_ntoskrnl">emulated from the project source
730                 files</a>. <span class="fname">cdfs.sys</span> driver was successfuly ran
731                 in this manner in the former versions of this project but the possibility
732                 to run without <span class="fname">ntoskrnl.exe</span> was dropped since it
733                 had no licensing gains (you need the original
734                 <span class="productname">Microsoft Windows NT</span> files at least for
735                 the filesystem driver itself) and the emulation of undocumented parts
736                 reusable from <span class="fname">ntoskrnl.exe</span> binary was
737                 a&nbsp;pain.</p>
738
739                 <p>pros: Lightweight, easier to debug.</p>
740
741                 <p>cons: Possible incompatible emulation of the whole
742                 <span class="fname">ntoskrnl.exe</span>, its missing documentation.</p>
743
744
745 <h1>Implementation Details</h1>
746
747         <a name="functype"><h2>API Function Implementation Choices</h2></a>
748
749                 <p>For each function exported by W32
750                 <span class="fname">ntoskrnl.exe</span> and imported and called by the
751                 filesystem driver a decision needs to be made to properly implement its
752                 functionality. Currently implemented functionality statistics are provided
753                 below:</p>
754
755                 <FIXME:numbers>
756                 <table border="1" align="center">
757                         <tr><th>Function type                                        </th><th>Items</th><th>Portion</th></tr>
758                         <tr><td><a href="#functype_pass">pass</a>                    </td><td>   46</td><td>    21%</td></tr>
759                         <tr><td><a href="#functype_wrap">wrap</a>                    </td><td>    1</td><td>     0%</td></tr>
760                         <tr><td><a href="#functype_native_reactos">native-ReactOS</a></td><td>   94</td><td>    43%</td></tr>
761                         <tr><td><a href="#functype_native_libcaptive">native-own</a> </td><td>   79</td><td>    36%</td></tr>
762                         <caption>Function Implementation Types Statistics</caption>
763                 </table>
764
765                 <p>As there are several choices to implement each function the usual
766                 attempts/investigations ordering is listed in the sections below.</p>
767
768                 <p>Special case must be taken for data-type symbols since they are
769                 referenced without the possibility of catching the code flow by some
770                 breakpoints (it would be possible only in some special access cases). Data
771                 export symbols of <span class="constant">unpatched</span> libraries must
772                 contain already prepared content at the runtime. There is a&nbsp;problem
773                 with <span class="constant">patched</span> libraries where it is necessary
774                 to also fully implement the data symbol as
775                 <a href="#functype_native">native implementation</a> since there is no
776                 possibility to <a href="#functype_pass">pass</a> the data symbol instead of
777                 the original W32 data location and therefore there will be two instances of
778                 such data variable place. As there will be also the uncaught references for
779                 such W32 data location from the <span class="constant">patched</span>
780                 library itself such symbols should be usually only some constants (such as
781                 <span class="constant">KeNumberProcessors</span>).</p>
782
783                 <p>W32 platform symbols export/import can be based either on the symbol
784                 name itself or it can be also exported and imported just by its
785                 identification number called <span class="constant">Ordinal</span>.
786                 Although it saves some jumptables file binary size it is currently no
787                 longer used by W32 binaries and this project also does not support such
788                 <span class="constant">Ordinal</span> symbol reference type at all.</p>
789
790                 <p>All the exporting magic is handled by custom script
791                 <span class="fname">captivesym</span> processing the definition file
792                 <FIXME:span class="fname">src/libcaptive/ke/exports.captivesym</span> to produce
793                 the intermediate relaying code
794                 <FIXME:span class="fname">src/libcaptive/ke/exports.c</span>. For details of the
795                 <span class="fname">captivesym</span>-specific source file syntax please
796                 see its documentation: <FIXME:span class="fname">doc/captivesym-pod.html</span>
797
798                 <a name="functype_pass"><h3>Direct Pass to Original &quot;ntoskrnl.exe&quot;</h3></a>
799
800                         <p>Simple (standalone) functions such as
801                         <span class="function">RtlTimeToSecondsSince1970()</span> can be simply
802                         passed to the original implementation in
803                         <span class="fname">ntoskrnl.exe</span> as they make no hardware access
804                         and they do not expect any special internal data structures to be set up
805                         in advance by an earlier library initialization. A common case are all
806                         the data structures utility functions such as
807                         <span class="constant">GenericTable</span> subsystem or
808                         <span class="constant">LargeMcb</span> handling.</p>
809
810                         <a name="functype_pass_fromunix"><h4>Pass from UNIX Code</h4></a>
811
812                                 <p>Control flow begins in some standard UNIX code. Such code is always
813                                 using <a href="#calltype_cdecl">cdecl call type</a> for all its
814                                 intracalls. <a href="#functype_native_reactos">Native functions
815                                 compiled from <span class="productname">ReactOS</span> sources</a> use
816                                 their own <a href="#calltype">cdecl/stdcall/fastcall</a> declarations
817                                 but these call type modifications are discarded during compilation for
818                                 this project by the <span class="constant">LIBCAPTIVE</span>
819                                 symbol.</p>
820
821                                 <p>UNIX code calls <span class="function">FUNCTIONNAME()</span> relay
822                                 from the generated UNIX jump table. Such relay will debug dump the
823                                 passed arguments and finally pass the control to the original W32
824                                 function code in the proper call type
825                                 <a href="#calltype">cdecl/stdcall/fastcall</a> for a&nbsp;given
826                                 function.</p>
827
828                                 <p>Original W32 code entry point is always trapped by a&nbsp;breakpoint
829                                 although it would not be needed during this specific direct pass from
830                                 UNIX code to the original W32 implementation. Still the breakpoint has
831                                 to be there to catch some other (such as intra-W32) possible calls
832                                 described later. There are several more ways to define breakpoint in
833                                 the code. One way is to use processor hardware breakpoint support but
834                                 the number of breakpoints is limited.  The other way is to patch in the
835                                 <span class="instruction">@{[ 'int $3' ]}</span> instruction but it will invoke
836                                 <span class="constant">SIGTRAP</span> signal handler conflicting with
837                                 the possible debugger (<span class="productname">gdb(1)</span>)
838                                 control. This project uses the <span class="instruction">hlt</span>
839                                 instruction, which also has a&nbsp;single-byte opcode as
840                                 <span class="instruction">@{[ 'int $3' ]}</span> and it is a&nbsp;privileged
841                                 instruction forbidden to be used from the UNIX user space code.
842                                 <span class="instruction">hlt</span> invokes
843                                 <span class="constant">SIGSEGV</span> signal which can be resolved by
844                                 a&nbsp;custom signal handler without any conflict with the possible
845                                 debugger control; <span class="productname">gdb(1)</span> needs the
846                                 following command to pass through such
847                                 <span class="constant">SIGSEGV</span> signal:</p>
848
849                                 <blockquote class="command">
850                                         <p>handle SIGSEGV nostop noprint pass</p>
851                                 </blockquote>
852
853                                 <p>When a breakpoint gets caught, we usually need to return to the
854                                 running code. Unfortunately it is not possible because of the patched
855                                 breakpoint opcode. The breakpoint cannot be simply removed upon return
856                                 as it would permanently loose control over the point of entry. Even if
857                                 the return would include faking of the return address in the bottom
858                                 stack frame to patch the breakpoint back during later function exit it
859                                 still would not solve the caughts of inner calls of recursive
860                                 functions. One of the working possibilities would be to patch the
861                                 original instruction back and perform a&nbsp;singlestep provided by
862                                 <span class="function">ptrace(2)</span> syscall. However such
863                                 singlestep needs another controlling UNIX process and it would again
864                                 conflict with the debuggers such as
865                                 <span class="productname">gdb(1)</span>. This project implements the
866                                 singlestep functionality by two consecutive breakpoints
867                                 (<span class="instruction">hlt</span> instructions to be specific):
868                                 The first two instruction addresses of the W32 functions are called
869                                 <span class="productname">slot #1</span> and
870                                 <span class="productname">slot #2</span>, the length of the first
871                                 function instruction has to be analyzed to get the right address of
872                                 <span class="productname">slot #2</span>. When the first breakpoint is
873                                 caught it is necessary to patch the original instruction back and also
874                                 patch another breakpoint in place of
875                                 <span class="productname">slot #2</span>.
876                                 During the <span class="productname">slot #2</span> breakpoint
877                                 invocation the operation will be reverted - the breakpoint will be put
878                                 to <span class="productname">slot #1</span> again and the instruction
879                                 of <span class="productname">slot #2</span> will be restored to be able
880                                 to continue the execution of the function.</p>
881
882                                 <p>W32 function will finish in its specific
883                                 <a href="#calltype">cdecl/stdcall/fastcall call type</a>, the control
884                                 will return to the UNIX jump table relay which will debug dump the
885                                 return value and it will finally pass the control back to the UNIX
886                                 caller in the standard UNIX
887                                 <a href="#calltype_cdecl">cdecl call type</a>.</p>
888
889                                 @{[ doc_img 'fig/functype_patched_pass_fromunix',
890                                                 'Function Type: <span class="constant">pass</span> from UNIX Code' ]}
891
892                         <a name="functype_pass_fromw32"><h4>Pass from W32 Code</h4></a>
893
894                                 <p>This function type is similiar to the
895                                 <a href="#functype_pass_fromunix">previous one</a> with the exception
896                                 of more complicated entry point. Unfortunately W32 libraries call their
897                                 own functions directly, using the <span class="instruction">call</span>
898                                 instructions without any patchable jump table. Even the
899                                 <span class="instruction">call</span> argument itself cannot be patched
900                                 according to the relocation table record as such library intra-call
901                                 instruction has no relocation due to its relative argument offset on
902                                 <span class="constant">i386</span>. This time the double-breakpoint
903                                 mechanism <a href="#functype_pass_fromunix">described above</a> gets
904                                 handy since it will catch the entry point when the function gets
905                                 called.  <span class="constant">SIGSEGV</span> handler gets invoked by
906                                 the <span class="instruction">hlt</span> instruction and it will
907                                 redirect the control to the jump table relay function to debug dump the
908                                 function entry arguments (it has no other uses in this call type).</p>
909
910                                 <p>When the relay needs to call the original function it will reach
911                                 exactly the same breakpoint instruction as during the recent
912                                 <span class="constant">SIGSEGV</span> handling redirecting to this
913                                 calling relay.  But this time the
914                                 <span class="constant">through_w32_func</span> field of this function
915                                 record will be set to to prevent repeated redirection and to pass the
916                                 control through the breakpoint mangle instead this time.</p>
917
918                                 <p>Returning is not much interesting as the first
919                                 <span class="constant">SIGSEGV</span> handler did a&nbsp;straight jump
920                                 for the redirection purposes without any needed consequent
921                                 handling.</p>
922
923                                 <p>The jump table relay used for the callers from W32 code is
924                                 a&nbsp;different one than the relay being used for the callers
925                                 <a href="#functype_pass_fromunix">from UNIX code</a>. UNIX code always
926                                 uses relay with external <a href="#calltype_cdecl">cdecl call type</a>
927                                 but in this case a&nbsp;relay with the appropriate
928                                 <a href="#calltype">cdecl/stdcall/fastcall call type</a> is used.</p>
929
930                                 @{[ doc_img 'fig/functype_patched_pass_fromw32',
931                                                 'Function Type: <span class="constant">pass</span> from W32 Code' ]}
932
933                         @{[ vskip() ]}
934
935                         <table border="1" align="center">
936                                 <tr><td><span class="fname">captivesym</span> keyword</td><td>pass</td></tr>
937                                 <tr><td>Native code function name                    </td><td>(no implementation)</td></tr>
938                                 <tr><td>W32 traced code from UNIX function name      </td><td>FUNCNAME</td></tr>
939                                 <tr><td>W32 traced code from W32  function name      </td><td>FUNCNAME_cdecl/_stdcall/_fastcall</td></tr>
940                                 <tr><td>Entry/exit debug tracing from UNIX code      </td><td>yes</td></tr>
941                                 <tr><td>Entry/exit debug tracing from W32 code       </td><td>yes</td></tr>
942                                 <caption>Function Type <span class="constant">pass</span> Characteristics</caption>
943                         </table>
944
945                 <a name="functype_wrap"><h3>Wrap of the Original "ntoskrnl.exe" Function</h3></a>
946
947                         <a name="functype_wrap_fromunix"><h4>Wrapping of Call from UNIX Code</h4></a>
948
949                                 <p>The code control flow has no special hardcore features since it is
950                                 very similiar to <a href="#functype_pass_fromunix">the direct pass to
951                                 W32 function from UNIX code</a>. All the wrapping is done in the
952                                 standard UNIX <a href="#calltype_cdecl">cdecl call type</a> manner.
953                                 Jump table debug dumping relays are provided twice - the
954                                 &quot;outer&quot; one to trace the parameters from the function caller
955                                 and the &quot;inner&quot; one to trace the call from the wrapper to the
956                                 original W32 code. The &quot;inner&quot; relay also calls the W32 code
957                                 with the appropriate <a href="#calltype">cdecl/stdcall/fastcall call
958                                 type</a>.</p>
959
960                                 @{[ doc_img 'fig/functype_patched_wrap_fromunix',
961                                                 'Function Type: <span class="constant">wrap</span> from UNIX Code' ]}
962
963                         <a name="functype_wrap_fromw32"><h4>Wrapping of Call from W32 Code</h4></a>
964
965                                 <p>This scheme is a&nbsp;combination of the
966                                 <a href="#functype_wrap_fromunix">previous wrap of a&nbsp;call from
967                                 UNIX code</a> and the <a href="#functype_pass_fromw32">direct pass from
968                                 the W32 code</a>. The control is caught and redirected by
969                                 <span class="constant">SIGSEGV</span> handler from the breakpoint
970                                 placed at the entry to the original W32 function code. The second entry
971                                 to the original W32 function with the
972                                 <span class="constant">through_w32_func</span> field of this function
973                                 description already set is done from the &quot;inner&quot; jump table
974                                 relay with the appropriate
975                                 <a href="#calltype">cdecl/stdcall/fastcall call type</a>.</p>
976
977                                 @{[ doc_img 'fig/functype_patched_wrap_fromw32',
978                                                 'Function Type: <span class="constant">wrap</span> from W32 Code' ]}
979
980                         @{[ vskip() ]}
981
982                         <p>Some functions can be <a href="#functype_pass">passed to the original
983                         code</a> but they need their parameters to be checked/prepared.
984                         Currently, such wrapping is only needed for the
985                         <span class="function">ExAllocateFromPagedLookasideList()</span> function
986                         where it is required due to <a href="#init_ntoskrnl">missing execution of
987                         <span class="fname">ntoskrnl.exe</span> initialization execution</a>,
988                         which would otherwise properly initialize some internal data structures.
989                         In this case the wrapping code detects passing of an uninitialized
990                         parameter and will search through the whole
991                         <span class="fname">ntoskrnl.exe</span> code body at runtime to find the
992                         proper initialization routine containing the correct initialization
993                         parameters.  Passed addresses of static structures must be differentiated
994                         as each of them usually has different initialization parameters. It is
995                         proactive to not to have fixed parameters array as these parameters may
996                         differ across different <span class="fname">ntoskrnl.exe</span>
997                         versions.</p>
998
999                         <table border="1" align="center">
1000                                 <tr><td><span class="fname">captivesym</span> keyword</td><td>wrap</td></tr>
1001                                 <tr><td>Native UNIX wrapping code function name      </td><td>FUNCNAME_wrap</td></tr>
1002                                 <tr><td>W32 traced wraping code from UNIX func. name </td><td>FUNCNAME</td></tr>
1003                                 <tr><td>W32 traced wrapping code from W32 func. name </td><td>FUNCNAME_cdecl/_stdcall/...</td></tr>
1004                                 <tr><td>W32 traced original code function name       </td><td>FUNCNAME_orig</td></tr>
1005                                 <tr><td>Entry/exit debug tracing from UNIX code      </td><td>yes</td></tr>
1006                                 <tr><td>Entry/exit debug tracing from W32 code       </td><td>yes</td></tr>
1007                                 <caption>Function Type <span class="constant">wrap</span> Characteristics</caption>
1008                         </table>
1009
1010                 <a name="functype_native"><h3>Native Implementation</h3></a>
1011
1012                         <h4>Native Implementation Called from UNIX Code</h4>
1013
1014                                 <p>This is the simplest case of a&nbsp;function call as it is fully
1015                                 handled only by the compiler and/or linker.</p>
1016
1017                                 <p>In this case though, no debug dumping call relay is provided - such
1018                                 relay would need to rename the implementations of native functions to
1019                                 prevent its automatic linking with the caller code. This renaming would
1020                                 not be possible to do by simple <span class="constant">#define</span>
1021                                 since it would also rename any calling statements of such function in
1022                                 the same C&nbsp;sources.  One of the possibilities to solve would be to
1023                                 utilize <span class="dashdash">--redefine-sym</span> feature of the
1024                                 <span class="productname">objcopy(1)</span> utility. On the other hand
1025                                 there is not much need to catch/debug such calls as both the caller and
1026                                 the callee are provided with full source file debug information for the
1027                                 debugger. Also the callee usually debug dumps its entry/exit parameters
1028                                 by custom debug dumps in the
1029                                 <a href="#functype_native_reactos"><span class="productname">ReactOS</span> implementations</a>.
1030
1031                                 @{[ doc_img 'fig/functype_native_fromunix',
1032                                                 'Function Type: <span class="constant">native</span> from UNIX Code' ]}
1033
1034                         <a name="functype_native_fromw32"><h4>Native Implementation of
1035                                         &quot;unpatched&quot; Library Function Called from W32 Code</h4></a>
1036
1037                                 @{[ doc_img 'fig/functype_unpatched_native_fromw32',
1038                                                 'Function Type: <span class="constant">native</span> of <span class="constant">unpatched</span> from W32 Code' ]}
1039
1040                                 <p>Here comes the differentiation if the project deals either with
1041                                 a&nbsp;<span class="constant">patched</span> or an
1042                                 <span class="constant">unpatched</span> version of the library
1043                                 (<span class="constant">patched</span> is a&nbsp;loaded W32 binary
1044                                 library while <span class="constant">unpatched</span> library is
1045                                 completely provided by this project with no use of the library's
1046                                 original W32 binary file). As the project adjusts the exported symbol
1047                                 address during the patching operation, in some cases the
1048                                 <span class="constant">patched</span> library call may be handled
1049                                 simply as <span class="constant">unpatched</span> library call even for
1050                                 the <span class="constant">patched</span> libraries. Fortunately the
1051                                 distinction is not much important as the project is prepared to
1052                                 properly handle both cases.</p>
1053
1054                                 <p>The W32 caller which imported the symbol will be pointed right to
1055                                 the relaying function. The debug dumping relay will be called from W32
1056                                 code with the appropriate
1057                                 <a href="#calltype">cdecl/stdcall/fastcall call type</a> while the
1058                                 relay will call the implementation of the native function in the
1059                                 standard UNIX <a href="#calltype_cdecl">cdecl call type</a> manner.</p>
1060
1061                         <h4>Native Implementation of &quot;patched&quot; Library Function Called from W32 Code</h4>
1062
1063                                 @{[ doc_img 'fig/functype_patched_native_fromw32',
1064                                                 'Function Type: <span class="constant">native</span> of <span class="constant">patched</span> from W32 Code' ]}
1065
1066                                 <p>The calling scheme is similiar to the
1067                                 <a href="#functype_native_fromw32">previous call of
1068                                 <span class="constant">unpatched</span> library function from W32
1069                                 code</a> but the call control is redirected from the entry point of the
1070                                 original W32 binary implementation by the breakpoint and its
1071                                 <span class="constant">SIGSEGV</span> handler as in
1072                                 <a href="#functype_pass_fromw32">the case of passing control from W32
1073                                 call</a>.</p>
1074
1075                                 <p>The original W32 function implementation located in the original
1076                                 loaded binary file is never executed but its entry point needs to be
1077                                 trapped by the breakpoint to be able to catch the function calls within
1078                                 the library.</p>
1079
1080                         @{[ vskip() ]}
1081
1082                         <p>In all cases the final function implementation is a&nbsp;standard UNIX
1083                         code compiled from C&nbsp;sources with full debug information available
1084                         for the debugger. Fortunately all such functions do not need to be coded
1085                         from scratch for this project since there already exist $freespeech
1086                         $ReactOS and $Wine projects and their code can be used instead.</p>
1087
1088                         <p>$Wine project is listed mostly for a&nbsp;completeness as almost no
1089                         code was suitable for reuse as it implements W32 user space while this
1090                         project is running pure W32 kernel space environment (in $gnulinux user
1091                         space!).</p>
1092
1093                         <a name="functype_native_reactos"><h4>Native Implementation
1094                                         - <span class="productname">ReactOS</span></h4></a>
1095
1096                                 <p>Some functions are already implemented in the $ReactOS
1097                                 project and they can be used as they are.  Although it would be
1098                                 possible to <a href="#functype_pass">pass some function calls to the
1099                                 original code</a> it is more handy to provide native implementation as
1100                                 there is better control of the data handling during debugging sessions
1101                                 due to the provided debugging symbols.</p>
1102
1103                                 <p>Such functions can be found in
1104                                 <span class="fname">src/libcaptive/reactos/</span> subdirectory.
1105                                 Some functions had to be adjusted for this project
1106                                 - these modifications are compiled conditionally, depending on the
1107                                 <span class="constant">LIBCAPTIVE</span> symbol existence.</p>
1108
1109                                 <p>Later stages of this project reached the level where
1110                                 $ReactOS is yet too immature and the needed functions are usually
1111                                 written just with the sad body:</p>
1112
1113                                 <blockquote class="command">
1114                                         <p>UNIMPLEMENTED;</p>
1115                                 </blockquote>
1116
1117                                 <p>Functions that were not possible to
1118                                 <a href="#functype_pass">pass</a> were reimplemented by this project
1119                                 and placed in the project's implementation directories
1120                                 <a href="#reactos_nocare">instead of extending</a> $ReactOS code.</p>
1121
1122                         <a name="functype_native_wine"><h4>Native Implementation -- <span class="productname">Wine</span></h4></a>
1123
1124                                 <p>Even though $Wine only implements the
1125                                 <span class="productname">Microsoft Windows NT</span> user space, there
1126                                 still are some common functions which could be copied from the $Wine
1127                                 project.</p>
1128
1129                         <a name="functype_native_libcaptive"><h4>Native Implementation - Project Specific</h4></a>
1130
1131                                 <p>As the last resort it was necessary to provide completely own
1132                                 implementation of some API functions such as PC hardware dependent
1133                                 parts or memory management functions.</p>
1134
1135                         @{[ vskip() ]}
1136
1137                         <table border="1" align="center">
1138                                 <tr><td><span class="fname">captivesym</span> keyword</td><td>(none; just the symbol name)</td></tr>
1139                                 <tr><td>Native code function name                    </td><td>FUNCTIONNAME</td></tr>
1140                                 <tr><td>Native traced code from W32 code func. name  </td><td>FUNCTIONNAME_cdecl/_std...</td></tr>
1141                                 <tr><td>Entry/exit debug tracing from UNIX code      </td><td>no</td></tr>
1142                                 <tr><td>Entry/exit debug tracing from W32 code       </td><td>yes</td></tr>
1143                                 <caption>Function Type <span class="constant">native</span> Characteristics</caption>
1144                         </table>
1145
1146                 <a name="functype_undef"><h3>Undefined Function</h3></a>
1147
1148                         <p>Functions not defined by any of the previous function types cannot be
1149                         called by any W32 code including the code of the library implementing
1150                         such function. All functions of <span class="constant">patch</span>ed
1151                         libraries not listed in the <span class="fname">captivesym</span> exports
1152                         file are automatically set to be trapped as fatal program execution
1153                         errors.</p>
1154
1155                         <p>It is not necessary to list the symbols as
1156                         <span class="constant">undef</span> as long as you are just loading the
1157                         W32 <span class="constant">PE-32</span> code and the symbols belong to
1158                         <span class="constant">patch</span>ed library. On the other hand if you
1159                         are loading W32 <span class="fname">.so</span> code or if such symbol is
1160                         a&nbsp;part of <span class="constant">unpatched</span> library (and thus
1161                         being completely provided by the project) you need to list such symbol as
1162                         <span class="constant">undef</span> type to prevent unresolved symbol
1163                         reference.</p>
1164
1165                         <table border="1" align="center">
1166                                 <tr><td><span class="fname">captivesym</span> keyword</td><td>undef</td></tr>
1167                                 <tr><td>Native code function name                    </td><td>(no implementation)</td></tr>
1168                                 <tr><td>Native traced code function name             </td><td>FUNCTIONNAME_cdecl/_stdcall/_fastcall</td></tr>
1169                                 <tr><td>Debug tracing message from UNIX code         </td><td>yes</td></tr>
1170                                 <tr><td>Debug tracing message from W32 code          </td><td>yes</td></tr>
1171                                 <caption>Function Type <span class="constant">undef</span> Characteristics</caption>
1172                         </table>
1173
1174         
1175         <a name="calltype"><h2>API Function Calling Conventions</h2></a>
1176
1177                 <p>Standard UNIX code compiled by GCC (GNU C&nbsp;Compiler) running on host
1178                 $gnulinux always uses <a href="#calltype_cdecl">cdecl</a> ABI (Application
1179                 Binary Interface) calling convention. This calling convention is also the
1180                 default declaration type of UNIX functions.</p>
1181
1182                 <p>W32 uses three different calling conventions in its ABI. They are all
1183                 described in the
1184                 <a href="http://msdn.microsoft.com/library/default.asp?url=/library/en-us/vclang/html/_core_argument_passing_and_naming_conventions.asp"><span class="productname">Microsoft</span> documentation</a>.
1185                 There is always necessary to have the proper function declaration
1186                 (prototype) in the caller scope to prevent all sorts of unexpected
1187                 crashes.</p>
1188
1189                 <p>Unfortunately some non-matching combinations of calling conventions
1190                 result in hard to debug bugs: the caller gets back an unexpected stack
1191                 pointer from the callee and upon return it will restore registers from the
1192                 wrong stack pointer place. Since the caller will finally reclaim its stack
1193                 frame from its (uncorrupted) <span class="constant">EBP</span> stack frame
1194                 pointer the caller will return to the caller of the caller correctly. Just
1195                 the registers remain corrupted causing crashes of completely unrelated code
1196                 executed far, far away...</p>
1197
1198                 <p><span class="constant">EDI</span>, <span class="constant">ESI</span> and
1199                 <span class="constant">EBX</span> registers are always saved on the stack.
1200                 They are stored on the stack in this particular order from bottom to top
1201                 addresses (using the <span class="instruction">push EBX</span>,
1202                 <span class="instruction">push ESI</span>,
1203                 <span class="instruction">push EDI</span> sequence). Fortunately $gnulinux
1204                 GCC has the same register saving behaviour. If some register corruption
1205                 occurs the calling type presented between the caller and callee should be
1206                 checked.</p>
1207
1208                 <a name="calltype_cdecl"><h3>W32 Calling Convention &quot;cdecl&quot;</h3></a>
1209
1210                         <p>The only calling convention in the UNIX world. The default one for all
1211                         the compilers. All the arguments are passed on the stack, no arguments
1212                         are cleaned by the callee. Possible inconsistencies in the number of
1213                         function arguments with the function prototype used by the caller is
1214                         harmless. Variable arguments lists can be passed by this convention.</p>
1215
1216                         @{[ doc_img 'fig/calltype_cdecl',
1217                                         'W32 Calling Convention <span class="constant">cdecl</span> Scheme' ]}
1218
1219                         <table border="1" align="center">
1220                                 <tr><td>Arguments freed by         </td><td>caller</td></tr>
1221                                 <tr><td>Arguments on the stack     </td><td>#0 ... #(n-1)</td></tr>
1222                                 <tr><td>Arguments in the registers </td><td>none</td></tr>
1223                                 <tr><td>GCC attribute              </td><td><span class="command">__attribute__((__cdecl__))</span> (default)</td></tr>
1224                                 <caption>Calling Convention <span class="constant">cdecl</span> Characteristics</caption>
1225                         </table>
1226
1227                 <h3>W32 Calling Convention &quot;stdcall&quot;</h3>
1228
1229                         @{[ doc_img 'fig/calltype_stdcall',
1230                                         'W32 Calling Convention <span class="constant">stdcall</span> Scheme' ]}
1231
1232                         <p>Convention never used in the UNIX world. It needs to be specified for
1233                         W32 compilers. All the arguments are passed on the stack, all the
1234                         arguments are cleaned by the callee. Possible inconsistencies in the
1235                         number of function arguments with the function prototype used by the
1236                         caller will result in fatal crash. Variable arguments lists cannot be
1237                         passed by this convention - use <a href="#calltype_cdecl">cdecl</a>
1238                         instead.</p>
1239
1240                         <table border="1" align="center">
1241                                 <tr><td>Arguments freed by         </td><td>callee</td></tr>
1242                                 <tr><td>Arguments on the stack     </td><td>#0 ... #(n-1)</td></tr>
1243                                 <tr><td>Arguments in the registers </td><td>none</td></tr>
1244                                 <tr><td>GCC attribute              </td><td><span class="command">__attribute__((__stdcall__))</span></td></tr>
1245                                 <caption>Calling Convention <span class="constant">stdcall</span> Characteristics</caption>
1246                         </table>
1247
1248                 <h3>W32 Calling Convention &quot;fastcall&quot;</h3>
1249
1250                         <p>Convention never used in the UNIX world. It needs to be specified for
1251                         W32 compilers. Convention used in the W32 world for its low calling
1252                         overhead. All but the first two arguments are passed on the stack, such
1253                         arguments are cleaned by the callee. First two arguments are passed in
1254                         the registers <span class="constant">ECX</span> and
1255                         <span class="constant">EDX</span> respectively. Possible inconsistencies
1256                         in the number of function arguments with the function prototype used by
1257                         the caller will result in fatal crash. Variable arguments lists cannot be
1258                         passed by this convention - use <a href="#calltype_cdecl">cdecl</a>
1259                         instead.</p>
1260
1261                         <p>GCC (GNU C&nbsp;Compiler) native support for this calling convention
1262                         is pretty fresh and it is currently present only in the recent CVS
1263                         versions since 21st December of 2002 which should get released as GCC
1264                         version 3.4. This project solved the unsupported calling convention by
1265                         declaration of arguments passed in registers by
1266                         <span class="command">__attribute__((__regparm__(3)))</span>.
1267                         W32 passes the arguments in registers in the order
1268                         <span class="constant">ECX</span>, <span class="constant">EDX</span> but
1269                         GCC passes them in registers <span class="constant">EAX</span>,
1270                         <span class="constant">EDX</span>, <span class="constant">ECX</span>.
1271                         This incompatibility is compensated at C&nbsp;source level in the
1272                         <a href="#functype">relaying code</a> generated by
1273                         <span class="fname">captivesym</span> relay generator.</p>
1274
1275                         @{[ doc_img 'fig/calltype_fastcall',
1276                                         'W32 Calling Convention <span class="constant">fastcall</span> Scheme' ]}
1277
1278                         <table border="1" align="center">
1279                                 <tr><td>Arguments freed by         </td><td>callee</td></tr>
1280                                 <tr><td>Arguments on the stack     </td><td>#2 ... #(n-1)</td></tr>
1281                                 <tr><td>Arguments in the registers </td><td><span class="constant">ECX</span>=#0,
1282                                                                             <span class="constant">EDX</span>=#1</td></tr>
1283                                 <tr><td>GCC &ge;3.4 attribute      </td><td><span class="command">__attribute__((__fastcall__))</span></td></tr>
1284                                 <tr><td>GCC &lt;3.4 attr. emulation</td><td><span class="command">__attribute__((__stdcall__))</span></td></tr>
1285                                 <tr><td>                           </td><td><span class="command">__attribute__((__regparm__(3) /* EAX,EDX,ECX */))</span></td></tr>
1286                                 <caption>Calling Convention <span class="constant">fastcall</span> Characteristics</caption>
1287                         </table>
1288
1289         <a name="synchronous"><h2>Multithreading and Multiple Processors</h2></a>
1290
1291                 <p>W32 platform stands on its&nbsp;thorough architecture parallelism. It
1292                 must lock all its objects to maintain coherence in presence of
1293                 multithreading and multiple processors. Since the author of this project
1294                 considers any parallel execution a serious obstacle for debugging the whole
1295                 project architecture was designed to prevent any undeterministic behaviour.
1296                 Therefore this projects always emulates uniprocessor
1297                 <span class="productname">Microsoft Windows NT</span> kernel
1298                 (<span class="constant">KeNumberProcessors</span> symbol is always 1),
1299                 everything runs in the single initial thread/process and all the filesystem
1300                 operations are performed as synchronous
1301                         (&quot;synchronous&quot; by flags
1302                         <span class="constant">FILE_SYNCHRONOUS_IO_ALERT</span>,
1303                         <span class="constant">FO_SYNCHRONOUS_IO</span>,
1304                         <span class="constant">IRP_SYNCHRONOUS_API</span>,
1305                         <span class="constant">IRP_SYNCHRONOUS_PAGING_IO</span>,
1306                         forced <span class="constant">TRUE</span> result of
1307                         <span class="function">IoIsOperationSynchronous()</span>
1308                         etc.).
1309                 <span class="constant">STATUS_PENDING</span> result code indicating that
1310                 request should be completed in the next callback of the driver is
1311                 considered <a href="#paranoia">fatal</a> as it should not happen for the
1312                 requested synchronous <span class="constant">IRP</span>s (I/O Request
1313                 Packets). Since there is a&nbsp;possibility some filesystem would require
1314                 a&nbsp;real W32 parallel thread all the code that would be hit by W32
1315                 multithreading capability is marked by
1316                 <span class="constant">TODO:thread</span> comment for a&nbsp;possible
1317                 future extension.</p>
1318
1319                 <p>Multiple processors (SMP) support will never need to be implemented
1320                 since uniprocessor W32 kernels apparently run the filesystem driver modules
1321                 fine. As this project implements only the uniprocessor W32 kernel all the
1322                 processor locking functions and structures such as
1323                 <span class="constant">KSPIN_LOCK</span> etc. can be safely implemented as
1324                 no-operations.</p>
1325
1326                 <p>Asynchronous callbacks registered for
1327                 <span class="constant">IO_WORKITEM</span>s are passed as GLib idle
1328                 functions by <span class="function">g_idle_add_full()</span>. Although they
1329                 will probably never be executed during non-interactive project's batch
1330                 executions it is the&nbsp;responsibility of W32 driver implementation to
1331                 complete all the pending tasks before its W32 shutdown. Such W32 shutdown
1332                 is done during cleanup of the project's&nbsp;execution by
1333                 <span class="function">captive_shutdown()</span>.</p>
1334
1335         <a name="paranoia"><h2>Paranoia Checks</h2></a>
1336
1337                 <p>A&nbsp;general approach of software projects development is to implement
1338                 many internal sanity checks during the development stage but to produce the
1339                 most optimized final release product without those debugging checks.</p>
1340
1341                 <p>Facilities for these practices can be seen in the standard
1342                 C&nbsp;include files for example as function
1343                 <span class="function">assert()</span> which gets disabled by the
1344                 <span class="constant">NDEBUG</span> symbol used during the final optimized
1345                 executable compilation. This project uses Gnome GLib messaging subsystem
1346                 offering sanity checks discarded by symbols
1347                 <span class="constant">G_DISABLE_ASSERT</span> and
1348                 <span class="constant">G_DISABLE_CHECKS</span>.
1349                 <span class="productname">Microsoft</span> also produces two versions of
1350                 its products - regular customers use the &quot;free build&quot; (also
1351                 called &quot;retail&quot;) while the programmers should develop their code
1352                 on the &quot;checked build&quot; product releases.</p>
1353
1354                 <p>As this project will always run unknown binary code of proprietary W32
1355                 filesystem drivers, the code can never be trusted. Such code even runs in
1356                 the same unprotected address space as its controlling UNIX code. Since
1357                 there is not enough documentation for the W32 components of the system and
1358                 also such documentation is usually misleading it can never be considered as
1359                 100% emulation. Even in the final releases all the sanity checks
1360                 implemented in this project should remain active as all the project's code
1361                 always interacts with unknown and untrusted W32 binaries.</p>
1362
1363                 <p><span class="productname">Microsoft Windows NT</span> code is written in
1364                 a&nbsp;foolproof style as it accepts even invalid input values, and which
1365                 it usually corrects. This makes long-term debugging a&nbsp;pain as it hides
1366                 sources of problems. &quot;Checked build&quot; releases were probably
1367                 designed to fix this flaw by strict consistency checks but it did not reach
1368                 its goals as such checks are usually missing in the code.</p>
1369
1370                 <p>This project has strict consistency checks across all the code to make
1371                 the debugging phase easy enough. Failed sanity check is not always
1372                 a&nbsp;bug - sometimes it just means the real W32 binary code is more
1373                 benevolent than it could be expected according to the documentation and
1374                 such sanity check gets removed for the next version build. In other cases
1375                 the failed sanity checks mean the execution path for some unexpected
1376                 arguments combination was not yet implemented by this project. I may also
1377                 mean a bug, of course...</p>
1378
1379                 <p>Last but not least - never miss a&nbsp;possible sanity check as its
1380                 later removal is in an order of magnitude cheaper than an&nbsp;uncaught
1381                 invalid assumption. Failed assertion is not always a&nbsp;bug although it
1382                 has to be fixed, of course.</p>
1383
1384         <a name="client_interface"><h2>Client Filesystem Interface</h2></a>
1385
1386                 <p>While this project successfuly communicates with the W32 filesystem
1387                 driver (considered as the lower layer) it must also somehow offer its open
1388                 filesystem interface service to some real client software (upper layer).
1389                 This project offers its own custom filesystem operations interface of <span
1390                 class="constant">libcaptive</span> library based on GLib
1391                 <span class="constant">GObject</span> OO system. Interface prototypes are
1392                 specified in the project's&nbsp;<span class="fname">client-*.h</span>
1393                 include files.</p>
1394
1395                 <p>The filesystem service can be offered in several ways:</p>
1396
1397                 <ul>
1398                         <li>
1399                                 <p>One possibility would be to write
1400                                 <a name="client_interface_customapp">a custom client application</a>
1401                                 for this project such as file manager or a&nbsp;shell. Although it
1402                                 would implement the most appropriate user interface to the set of
1403                                 functions offered by this project (and W32 filesystem API) it has the
1404                                 disadvantage of special client software. Appropriate client is provided
1405                                 by this project as:
1406                                 <span class="fname">src/client/cmdline/cmdline-captive</span></p>
1407                         </li>
1408                         <li>
1409                                 <p>The real UNIX OS filesystem implementation must be completely
1410                                 implemented inside the hosting OS kernel. This requires special coding
1411                                 methods with limited availability of coding features and libraries.
1412                                 Also it would give the full system control to the untrusted W32
1413                                 filesystem driver code with possibly fatal consequences of yet
1414                                 unhandled W32 emulation code paths. It would benefit from the best
1415                                 execution performance but this solution was never considered a real
1416                                 possibility.</p>
1417                         </li>
1418                         <li>
1419                                 <p>The common approach
1420                                 <a name="offered_NFS">of filesystem implementations</a>
1421                                 outside UNIX OS kernel were custom NFS servers usually running on the
1422                                 same machine as the NFS-connected client as such NFS server is usually
1423                                 an ordinary UNIX user space process. It would be possible to implement
1424                                 this project as a&nbsp;custom NFS server but the NFS protocol itself
1425                                 has a&nbsp;lot of fundamental flaws and complicated code for backward
1426                                 compatibility.</p>
1427                         </li>
1428                         <li>
1429                                 <p>Currently there is already implemented
1430                                 <a name="offered_gnomevfs"><a href="#offered_gnomevfs_todo">Gnome-VFS interface</a></a>
1431                                 to the custom filesystem interface of this project's&nbsp;library <span
1432                                 class="constant">libcaptive</span>.
1433                                 The $GnomeVFSmodule can be used by a&nbsp;Gnome-VFS aware client (such
1434                                 as <span class="fname">gnome-vfs/tests/test-shell</span>).</p>
1435
1436                                 <FIXME:lufs-gvfs>
1437                                 <p>The <span class="productname">Gnome-VFS-module</span> can be further
1438                                 utilized by the <span class="productname">UserVFS</span>
1439                                 \bookcitation{UserVFS-2.0} software ported to provide local <span
1440                                 class="productname">Coda</span> \bookcitation{Coda} network filesystem
1441                                 server implementation similar to the <a href="#offered_NFS">NFS
1442                                 server</a> solution but with much more acceptable network protocol ---
1443                                 more about this actual scheme can be found in \link{architecture}{the
1444                                 project architecture description}.</p>
1445                         </li>
1446                         <li>
1447                                 <FIXME:LUFS>
1448                                 <p>Direct interface for the Host-OS kernel would be provided
1449                                 by the
1450                                 \label{fuse_interface}
1451                                 <span class="productname">FUSE</span> \bookcitation{FUSE} project \link{offered_FUSE}{described
1452                                 later in this document}. This interface is currently not yet implemented.
1453                                 Although it would be much more straightforward than
1454                                 <a href="#offered_gnomevfs">Gnome-VFS interface</a> described above,
1455                                 its biggest disadvantage would be the requirement to replace/update
1456                                 the stock distributions kernel package as it usually does not
1457                                 have the <span class="productname">FUSE</span> \bookcitation{FUSE} filesystem support while it already supports
1458                                 the <span class="productname">Coda</span> \bookcitation{Coda} interface, which is sufficient for the
1459                                 ported <span class="productname">UserVFS</span> \bookcitation{UserVFS-2.0} interface.</p>
1460                         </li>
1461                 </ul>
1462
1463         <h2>3rd Party Projects Bugfixes</h2>
1464
1465                 <p>Implementation of this project required certain bugfixes to 3rd party
1466                 software packages:</p>
1467
1468                 <h3>GNU Libtool, A&nbsp;Generic Library Support Script</h3>
1469                 
1470                         <p><span class="productname"><a href="http://www.gnu.org/software/libtool/">libtool</a></span>:
1471                         Handle duplicate object file names when performing piecewise archive
1472                         linking by renaming object files when needed.</p>
1473
1474                 <h3>dosfstools, MS-DOS FAT Filesystems Support on Linux</h3>
1475
1476                         <p><span class="productname"><a href="ftp://ftp.uni-erlangen.de/pub/Linux/LOCAL/dosfstools/">dosfstools</a></span>:
1477                         Prevent generation of <span class="constant">FAT-32</span> filesystems
1478                         not supported by the (buggy?) W32 platform
1479                         <span class="fname">fastfat.sys</span> implementation.</p>
1480
1481                 <h3>ext2fsd, Ext2 File System Driver</h3>
1482
1483                         <p><span class="productname"><a href="http://sys.xiloo.com/projects/projects.htm#ext2fsd">Ext2fsd</a></span>:
1484                         Many filesystem corruption fixes, missing filesystem unregistration
1485                         etc.</p>
1486
1487
1488 <h1>Futher Development</h1>
1489
1490         <p>All the W32 filesystem operations of <span class="fname">cdfs.sys</span>,
1491         <span class="fname">fastfat.sys</span>
1492         and
1493         <span class="fname">ext2fsd.sys</span> can be successfuly executed.
1494         The further development tasks include:</p>
1495
1496         <ul>
1497                 <li>
1498                         <p>The primary goal is to reach <span class="productname">NTFS</span>
1499                         filesystem (<span class="fname">ntfs.sys</span>) compatibility.
1500                         A&nbsp;lot of imported symbols is missing although it is expected most of
1501                         them can be just safely passed for execution in the original
1502                         <span class="fname">ntoskrnl.exe</span>.</p>
1503                 </li>
1504                 <li>
1505                         <p>There may still be valid code paths where some emulated W32 kernel
1506                         functionality and symbols remain unimplemented as these code paths were
1507                         just not hit during testing. The proper way would be to check all the
1508                         possibilities of such code paths execution from the filesystem driver
1509                         code disassembly.</p>
1510                 </li>
1511                 <li>
1512                         <p>No unusual error codes are expected from the filesystem drivers and
1513                         any such return codes will abort the project's execution. For example
1514                         code <span class="constant">STATUS_NO_SUCH_FILE</span> is expected and
1515                         correctly recognized but
1516                         <span class="constant">STATUS_FILE_CORRUPT_ERROR</span> will stop driver
1517                         execution.</p>
1518
1519                         <p><a name="exception_fatal">No exceptions in W32 code are allowed</a>
1520                         - any thrown exception will result in driver execution abortion (instead
1521                         of just returning some error code as in the original W32 environment).</p>
1522
1523                         <p>These issues should cease to be a&nbsp;problem after deployment of
1524                         sandbox wrapper which will restart the filesystem driver after any
1525                         unexpected error.</p>
1526                 </li>
1527                 <li>
1528                         <p><a name="todo_sandbox">Completion and activation of the sandbox
1529                         wrapper.</a> <span class="fname">src/libcaptive/sandbox/</span> sources
1530                         currently implement the base of both the client and the server sides of
1531                         CORBA interface to separate the client calling filesystem operations from
1532                         the W32 filesystem driver itself. Although CORBA usually makes sense for
1533                         crossmachine network interconnections here it gets a&nbsp;role of
1534                         inter-process interface between the regular client process and the
1535                         <span class="constant">chroot</span>ed/unprivileged/<span class="constant">ulimit</span>ed
1536                         environment of the W32 emulation address space.</p>
1537
1538                         <p>Any W32 binary file must be always considered untrusted and therefore
1539                         it is needed to be sandboxed and accessible only via the CORBA interface.
1540                         Furthermore it is needed for clean implementation of $GnomeVFSmodule as
1541                         this project always handles <a href="#mounted_one">exactly one mounted
1542                         filesystem</a> but $GnomeVFSmodule interface expects unlimited number of
1543                         mounts in the scope of one process.</p>
1544                 </li>
1545                 <li>
1546                         <p>Project offers
1547                         <a name="offered_gnomevfs_todo">the filesystem access as its custom UNIX API</a>
1548                         (<span class="fname">captive/client-*.h</span>). This API is currently
1549                         offered in the scope of $GnomeVFSmodule interface as a filter applied to
1550                         the filesystem device (or filesystem image file).
1551                         As $GnomeVFS has no officially supported method of generic $gnulinux
1552                         kernel filesystem access it may be better to provide
1553                         <FIXME:LUFS><a name="offered_FUSE">an interface</a> for <span
1554                         class="productname">FUSE</span> \bookcitation{FUSE} instead.</p>
1555
1556                         <p>To get transparent access to W32 filesystems from legacy
1557                         (=non <span class="productname">Gnome-VFS-2.0</span> aware) applications it is possible to use a draft
1558                         port \bookcitation{UserVFS-2.0} of the original <span class="productname">UserVFS</span>
1559                         \bookcitation{UserVFS} to <span class="productname">Gnome-VFS-2.0</span> interface.
1560                         It is also possible to use the test utilities of <span class="productname">Gnome-VFS-2.0</span> \bookcitation{GnomeVFS} package.</p>
1561                 </li>
1562                 <li>
1563                         <p>Implementation of interface to this project by
1564                         <span class="productname"><a href="http://surprise.sourceforge.net/">Partition Surprise</a></span>
1565                         partition manager. Although there currently exists
1566                         <span class="productname"><a href="http://mlf.linux.rulez.org/mlf/ezaz/ntfsresize.html">ntfsresize</a></span>
1567                         it is a data structures reverse engineered solution which may have
1568                         problems on various hard drives. <span class="productname">Partition
1569                         Surprise</span> project would be able to resize the disk safely by using
1570                         just the original W32 filesystem driver file although with some
1571                         performance hit.</p>
1572                 </li>
1573         </ul>
1574
1575
1576 <h1>Related Projects</h1>
1577
1578         <p>The usual solution for file exchange between $freespeech operating systems
1579         and <span class="productname">Microsoft Windows NT</span> is to use
1580         <span class="productname">FAT32</span> (<span class="productname">vfat</span>
1581         called in $gnulinux) partition and swap the files over it. This method is not
1582         very comfortable as you never have access to all the files of the other
1583         operating system.</p>
1584
1585         <a name="LinuxNTFScompet"><h2>$LinuxNTFS</h2></a>
1586
1587                 <p>Although this project takes a&nbsp;completely different approach and has
1588                 a&nbsp;different architecture, the final goal is the same as for this
1589                 project - reliable read-write <span class="productname">NTFS</span>
1590                 filesystem support. $LinuxNTFS goes the way of reverse engineering
1591                 filesystem data structures (and possibly
1592                 <span class="fname">ntfs.sys</span> itself). Unfortunately after many years
1593                 of its development it did not yet reach the state of reliable read-write
1594                 access although its read-only part is considered trustworthy.</p>
1595
1596                 <p>Using $LinuxNTFS for read-only access to existing partition with
1597                 <span class="productname">Microsoft Windows NT</span> installation is
1598                 planned to be able to acquire existing <span class="fname">ntfs.sys</span>,
1599                 <span class="fname">ntoskrnl.exe</span> and possibly
1600                 <span class="fname">ksecdd.sys</span> (imported by
1601                 <span class="fname">ntfs.sys</span>) files from the user's
1602                 <span class="productname">NTFS</span> partition.</p>
1603
1604         <h2><span class="productname"><a href="http://www.cgsecurity.org/ntfs.html">NTPwd NTFS Driver</a></span></h2>
1605
1606                 <p>DOS based <a href="http://www.gnu.org/licenses/gpl.html">GPL-2.0</a>
1607                 read-write NTFS driver. Filesystem structures are reverse engineered in the
1608                 way of <a href="#LinuxNTFScompet">Linux-NTFS Project</a>. As it is not very
1609                 actively maintained it reaches a&nbsp;lower level of
1610                 <span class="productname">NTFS</span> compatibility.</p>
1611
1612         <h2>Virtual Machine with <span class="productname">Microsoft Windows NT</span></h2>
1613
1614                 <p>Original <span class="productname">Microsoft Windows NT</span>
1615                 operating system can be run inside a virtual machine running under
1616                 $gnulinux (or vice versa) and share the read-write disk partitions by using
1617                 a network file sharing through a&nbsp;virtual network card.</p>
1618
1619                 <p>Although there will be full filesystem structures compatibility the
1620                 <span class="productname">NTFS</span> partition cannot be accessed with no
1621                 system installed (or with non-bootable crashed system to repair it this
1622                 way)
1623                         (Although this project requires the original
1624                         <span class="fname">ntfs.sys</span> it can obtained from the legal
1625                         <span class="productname">Microsoft Windows NT</span> CD.),
1626                 it will have substantial system resources requirement and you also need
1627                 a virtual machine software product such as commercial
1628                 <span class="productname"><a href="http://www.vmware.com/download/workstation.html">VMware Workstation</a></span>.</p>
1629
1630
1631 <h1>Conclusion</h1>
1632
1633         <p>The project established <a href="#existing_emulation">a&nbsp;new form</a>
1634         of W32 emulation model suitable for existing proprietary binary W32 kernel
1635         code (drivers) while being hosted in an open source operating system
1636         (currently $gnulinux). Currently, only the subsystems required by W32
1637         filesystem drivers are implemented but the project can be further extended
1638         for compatibility with various hardware-related drivers such as W32 video
1639         drivers, W32 disk interface drivers etc.</p>
1640
1641         <p>Some W32 kernel space subsystems were implemented for the first time as
1642         $freespeech code as they are still missing in the only currently available
1643         $freespeech W32 kernel implementation, $ReactOS. Some W32 kernel function
1644         behaviour expected by the drivers had to be reverse engineered and documented
1645         in this project's&nbsp;API documentation (not listed in this book) and/or in
1646         its source files, because its description in the
1647         <span class="productname">Microsoft</span> documentation is missing.</p>
1648
1649         <p>Author had to get familiar both with the W32 kernel API and also with the
1650         W32 kernel code by the reverse engineering. This experience also covers the
1651         first <span class="productname">Microsoft Windows</span> compatible code ever
1652         written by the author - <span class="fname">hal.dll</span> (Hardware
1653         Abstraction Layer) part of W32 kernel.</p>
1654
1655         <p>Certain UNIX implementation interfaces allow a regular, non-privileged
1656         user of UNIX system to mount image files with any W32 filesystem supported by
1657         this project. Such mount operation usually requires UNIX
1658         <span class="constant">root</span> privileges to do so. On the other hand the
1659         choice of supported filesystem types is very limited as only a&nbsp;few
1660         filesystem types are supported for the W32 platform.</p>
1661 HERE
1662
1663
1664 My::Web->footer();