modperl bootstrap
[www.jankratochvil.net.git] / project / captive / doc / Index.html.pl
1 #! /usr/bin/perl
2
3 # $Id$
4 # Captive project doc Index page Perl template.
5 # Copyright (C) 2003 Jan Kratochvil <project-www.jankratochvil.net@jankratochvil.net>
6
7 # This program is free software; you can redistribute it and/or modify
8 # it under the terms of the GNU General Public License as published by
9 # the Free Software Foundation; exactly version 2 of June 1991 is required
10
11 # This program is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14 # GNU General Public License for more details.
15
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
19
20
21 package project::captive::doc::Index;
22 require 5.6.0;  # at least 'use warnings;' but we need some 5.6.0+ modules anyway
23 use vars qw($VERSION $CVS_ID);
24 $VERSION=do { my @r=(q$Revision$=~/\d+/g); sprintf "%d.".("%03d"x$#r),@r; };
25 $CVS_ID=q$Id$;
26 use strict;
27 use warnings;
28
29 BEGIN{ open F,"Makefile"; our $top_dir=pop @{[split /\s/,(grep /^top_srcdir/,<F>)[0]]}; eval "use lib '$top_dir'"; close F; }
30 use My::Web;
31
32
33 My::Web->init(
34                 "__PACKAGE__"=>__PACKAGE__,
35                 "title"=>'Captive NTFS doc',
36                 "head_css"=>"
37 .productname { font-family: cursive; }
38 .fname       { font-family: monospace; }
39 .constant    { font-family: monospace; }
40 .author      { font-family: cursive; }
41 .stuff       { font-style: italic; font-size: larger; margin-left: 20%; margin-right: 10%; }
42 .function    { font-family: monospace; }
43 .type        { font-family: monospace; }
44 .command     { font-family: monospace; }
45 .instruction { font-style: italic; }
46 ",
47                 );
48 My::Web->heading();
49
50
51 sub doc_img
52 {
53 my($img_base,$caption)=@_;
54
55         my $r="";
56         $r.='<table border="0" align="center">'."\n";
57                 $r.="\t<tr><td>".img($img_base,$caption)."</td></tr>\n";
58                 $r.="\t<caption>".CGI::escapeHTML($caption)."</caption>\n";
59         $r.='</table>'."\n";
60         return $r;
61 }
62
63 my $freespeech=a_href 'http://www.gnu.org/philosophy/free-sw.html','Free';
64 my $freebeer=a_href 'http://www.gnu.org/philosophy/free-sw.html','free (as in beer)';
65
66 sub productname
67 {
68 my($url,$name)=@_;
69
70         return '<span class="productname">'.a_href($url,CGI::escapeHTML($name)).'</span>';
71 }
72 my $Wine=productname 'http://www.winehq.com/','Wine';
73 my $ReactOS=productname 'http://www.reactos.com/','ReactOS';
74 my $LinuxNTFS=productname 'http://linux-ntfs.sourceforge.net/','Linux NTFS';
75 my $GnomeVFS=productname 'http://developer.gnome.org/doc/API/gnome-vfs/','Gnome-VFS';
76 my $GnomeVFSmodule=productname 'http://developer.gnome.org/doc/API/gnome-vfs/modules.html','Gnome-VFS-module';
77 my $gnulinux='GNU/Linux';
78
79
80 print vskip("10ex")."<h1 align=\"center\">!!! PRELIMINARY - TO BE UPDATED !!!</h1>\n".vskip("10ex");
81
82 print <<"HERE";
83 <h1>Abstract</h1>
84
85 <p>Existing binary Microsoft Windows file system drivers were exploited
86 for accessing drives with possibly proprietary file system data structures.
87 Open file system API is provided to access these file system drivers.
88 Microsoft Windows system components required by these drivers
89 were analyzed and successfuly emulated in the GNU/Linux operating system.
90 Currently the implementation allows applications running under the GNU/Linux
91 operating system to access VFAT, ISO9660 and EXT2 drives. NTFS file system
92 capability target is the final goal being currently developed on the base
93 of this project assets.</p>
94
95
96 <h1>Reasons for the Implementation</h1>
97
98         <p>Currently there is no possibility to any of the available $freespeech
99                 ($freespeech used in the following text in the meaning of
100                 &quot;<a href="http://www.gnu.org/philosophy/free-sw.html">free as in speech</a>&quot;)
101         operating systems to reliably write to the most common disk partition
102         filesystem type - <span class="productname">Microsoft NTFS</span>. It would
103         be already supported a long time ago but there is no proper documentation of
104         <span class="productname">NTFS</span> filesystem data structures available.
105         Since <span class="productname">Microsoft</span> corporation continues in its
106         propagation of <span class="productname">Microsoft Windows NT</span>
107                 (<span class="productname">NT</span> identifier used in the following text
108                 applies to all the products of <span class="productname">Microsoft</span>
109                 <span class="productname">NT</span> series such as
110                 <span class="productname">NT&nbsp;4.0</span>,
111                 <span class="productname">2000</span> as NT-5.0
112                 and
113                 <span class="productname">XP</span> as NT-5.1.)
114         based operating systems <span class="productname">NTFS</span> is the default
115         disk file system type for new installations as described in the
116         <a href="http://www.microsoft.com/hwdev/tech/storage/ntfs-preinstallP.asp">recommendations
117         report</a> by <span class="productname">Microsoft</span>.</p>
118
119         <p>Unfortunately the <span class="productname">NTFS</span> filesystem has too
120         complex data structure to allow a complete reverse enginnering process in
121         reasonable time. Currently available $freespeech solutions such as $LinuxNTFS
122         filesystem have already implemented (more or less) reliable reverse
123         engineered read-only access. However <a name="reliability">the
124         reliability</a> of the read-write part of the access requires much better
125         knowledge of the <span class="productname">NTFS</span> data structures. Also
126         any future versions of <span class="productname">NTFS</span> filesystem would
127         require another major reverse engineering effort.</p>
128
129
130 <h1>Goals of This Stage of the Project</h1>
131
132         <p>The <a name="NTFSgoal">ultimate goal</a> of this project is definitely the
133         free implementation of <a href="#reliability">reliable</a> read-write <span
134         class="productname">NTFS</span> filesystem driver. This project chose to
135         solve this problem in the style of $Wine project by using the original binary
136         <span class="fname">ntfs.sys</span> and emulating all the required layers of
137         <span class="productname">Microsoft Windows NT</span> for it.</p>
138
139         <p>Unfortunately this effort is tainted by only partial and generally
140         insufficient documentation of API between filesystem driver
141         (<span class="fname">ntfs.sys</span>) and the
142         <span class="productname">Microsoft Windows NT</span>
143         (&quot;<a href="http://mail.gnu.org/archive/html/libtool/2000-09/msg00000.html">W32</a>&quot;
144         in the following text) kernel <span class="fname">ntoskrnl.exe</span>. Note
145         that this API is a different than the one being used in the $Wine project
146         since <span class="productname">Wine</span> implements only the user space
147         part of W32.</p>
148
149         <p>There also exists a $freespeech
150         <span class="fname"><a href="http://sys.xiloo.com/projects/projects.htm#ext2fsd">ext2fsd.sys</a></span>
151         W32 filesystem driver for <span class="constant">ext2</span> filesystems with
152         source files freely available for it. Moreover original
153         <span class="productname">Microsoft Windows NT</span> filesystems
154         <span class="fname">cdfs.sys</span> and
155         <span class="fname">fastfat.sys</span> (which correspond to Linux
156         <span class="productname">iso9660</span> and
157         <span class="productname">vfat</span> filesystems, resp.) are easy enough to
158         get working in reasonable time. All these filesystem drivers also use only
159         the documented filesystem data structures which makes their behaviour better
160         controllable when debugging the project.</p>
161
162         <p>Therefore this stage of the project is intended to get only the original
163         W32 binary form of <span class="fname">cdfs.sys</span> and
164         <span class="fname">fastfat.sys</span> drivers working. This goal was
165         achieved and the compatibility with <span class="fname">ext2fsd.sys</span>
166         can be considered as an additional benefit.</p>
167
168
169 <h1>Architecture</h1>
170
171         <p>Although this project attempts to be as general and crossplatform as
172         possible to avoid being needlessly bound by any resources the current
173         implementation is being developed/tested on $gnulinux. The principle of the
174         project lies in the glue between
175         <span class="productname">Microsoft Windows NT</span> kernel space
176         environment and $gnulinux user space process environment. Currently there are
177         no plans to ever extend the project's crossplatformity beyond the
178         <span class="constant">i386</span> processor
179                 (<span class="constant">i386</span> used here as
180                 <a href="http://www.intel.com/">Intel</a> architecture covering 32-bit
181                 processors compatible with <span class="constant">i386</span>,
182                 <span class="constant">i486</span>, ...).
183
184         <a name="existing_emulation"><h2>Existing Emulation Projects</h2></a>
185
186                 <p>There are two well-known $freespeech projects emulating W32 subsystems
187                 to reach the compatibility with various W32 components:
188                 $Wine and $ReactOS. Sad moment is that the goals of this project do not fit
189                 very well into any role in those two projects:
190
191                 <table align="center" border="1">
192                         <tr>
193                                 <th><a href="#guestosnote">Guest-OS</a></th>
194                                 <th><a href="#hostosnote" >Host-OS</a ></th>
195                                 <th>Implements</th>
196                                 <th>W32 kernel library</th>
197                                 </tr>
198                         <tr>
199                                 <td>$Wine</td>
200                                 <td>$gnulinux</td>
201                                 <td>W32 user space</td>
202                                 <td><span class="fname">ntdll.dll</span></td>
203                                 </tr>
204                         <tr>
205                                 <td>$ReactOS</td>
206                                 <td><span class="constant">i386</span> hardware</td>
207                                 <td>W32 kernel and user space</td>
208                                 <td><span class="fname">ntoskrnl.exe</span></td>
209                                 </tr>
210                         <caption>Existing Emulation Projects Characteristics</caption>
211                 </table>
212
213                 <dl>
214                         <a name="guestosnote"><dt>Guest-OS</dt></a>
215                         <dd><a href="http://www.vmware.com/support/reference/common/glossary/#guestos">Guest OS</a>:
216                                 An operating system that runs inside a&nbsp;virtual machine.</dd>
217                         <a name="hostosnote" ><dt>Host  OS</dt></a>
218                         <dd><a href="http://www.vmware.com/support/reference/common/glossary/#hostos" >Host  OS</a>:
219                                 An operating system that runs on the host machine.</dd>
220                 </dl>
221
222                 <p>While $ReactOS provides the necessary W32 kernel subsystem emulation
223                 code we also need to run such <a href="#guestosnote">Guest-OS</a> in the <a
224                 href="#hostosnote">Host-OS</a> $gnulinux. Initially it was planned to
225                 extend $Wine with the W32 kernel space emulation functionality but
226                 fortunately <span class="author">Steven Edwards</span> pointed to the $ReactOS
227                 which better suits the needs of this project by its already implemented W32
228                 kernel space emulation.</p>
229
230                 <p>The <a name="reactos_nocare">original reasons</a> for developing
231                 $ReactOS still make no sense to the author of this project. Free
232                 implementation of W32 platform standalone running on the machine hardware
233                 is no longer free as most od the W32 applications are usually closed source
234                 and the user still looses its freedom on the application level anyway. Even
235                 in the case of available free applications there still remains the
236                 disadvantage of loosing the Host-OS platform availability if implemented in
237                 the $Wine style. For these ideology incompatibilities not much effort was
238                 made for acceptance the fixes and improvements of $ReactOS by this project.
239                 Moreover new functionality is not being implemented to the $ReactOS part
240                 but it is coded in Gnome style in the project specific source files
241                 place.</p>
242
243                 <p>The most serious problem of $ReactOS is its dependence on the direct
244                 <span class="constant">i386</span> hardware instead of some
245                 <a href="#hostosnote">Host-OS</a> as required by the goals of this project.
246                 W32 is designed to be hardware-independent using its
247                 <span class="fname">hal.dll</span>. Unfortunately $ReactOS does not follow
248                 this design and thus there are needed various patches and replaces of its
249                 various parts and its hardware-dependent code. Despite it $ReactOS code
250                 base still made a big asset for this project.</p>
251
252
253
254
255
256                 <p>Some API functions are provided both by
257                 <span class="fname">ntdll.dll</span> and
258                 <span class="fname">ntoskrnl.exe</span> in W32.
259                 <span class="author">Casper Hornstrup</span> enlightened such functions
260                 calling conventions have to be differentiated as
261                 <span class="fname">ntdll.dll</span> lives in the user space (low address
262                 space -- below <span class="constant">0x80000000</span>) and
263                 <span class="fname">ntoskrnl.exe</span> in the kernel space (high address
264                 space -- above <span class="constant">0x80000000</span>). Although they
265                 contain slightly different set of symbols (functions)
266                 <span class="fname">ntdll.dll</span> still can be considered as a&nbsp;user
267                 space interface to the kernel space implementation by
268                 <span class="fname">ntoskrnl.exe</span>.</p>
269
270         <h2>API Function Implementation Choices</h2>
271
272                 <p>During the initial point of the project development all the API
273                 functions were defined as unimplemented, of course. Any call of such
274                 unimplemented function is fatal and results in program termination. When we
275                 need to implement any required API function we have multiple choices to do
276                 so:
277                 <a href="#functype_pass">Direct pass to original
278                                 <span class="fname">ntoskrnl.exe</span></a>,
279                 <a href="#functype_wrap">Wrap of the original
280                                 <span class="fname">ntoskrnl.exe</span> function</a>,
281                 <a href="#functype_native_reactos">Native implementation -- $ReactOS,
282                 <a href="#functype_native_wine">Native implementation -- $Wine
283                 or
284                 <a href="#functype_native_libcaptive">Native implementation
285                                 -- project specific</a>.
286                 <!-- a href="#functype_undef" Undefined function /a -->
287
288         <h2>&quot;patched&quot; vs. &quot;unpatched&quot; Libraries</h2>
289
290                 <p>Library is called <span class="constant">patched</span> if we require
291                 loading its original binary code file. Project needs to patch it to be able
292                 to trap all the function entry points. The typical current
293                 <span class="constant">patched</span> library of this project is
294                 <span class="fname">ntoskrnl.exe</span>.</p>
295
296                 <p>Library is called <span class="constant">unpatched</span> if no original
297                 binary code is needed since all of its functions are completely emulated by
298                 <a href="#functype_native">the native implementations</a> of this project.
299                 The typical <span class="constant">unpatched</span> representative is
300                 <span class="fname">hal.dll</span> as it specializes on the hardware
301                 dependent code and therefore it must be completely replaced by this project
302                 running in the $gnulinux operating system environment. Early versions of
303                 this project had also full <span class="constant">unpatched</span>
304                 <a href="#native_ntoskrnl">native implementation of
305                 <span class="fname">ntoskrnl.exe</span></a> but it no longer applies.</p>
306
307         <h2>Memory Management</h2>
308
309                 <p>Original <span class="productname">Microsoft Windows NT</span>
310                 architecture uses two address space areas - user space and kernel space.
311                 User space is mapped in the range <span class="constant">0x00000000</span>
312                 to <span class="constant">0x7FFFFFFF</span>, kernel space is mapped in the
313                 range <span class="constant">0x80000000</span>
314                 (<span class="constant">KERNEL_BASE</span> in $ReactOS sources) to
315                 <span class="constant">0xFFFFFFFF</span>. All these virtual memory ranges
316                 represent addresses after their MMU (Memory Management Unit) mapping, of
317                 course. More discussion can be found in the
318                 <a href="http://www.microsoft.com/hwdev/platform/server/PAE/PAEmem.asp">description 
319                 by <span class="productname">Microsoft</span></a>.</p>
320
321                 <p>This project runs in the virtual address space used both for the UNIX
322                 user space process part and for the W32 kernel space. Therefore this
323                 project defines that W32 kernel runs in the whole range
324                 <span class="constant">0x00000000</span> to
325                 <span class="constant">0xFFFFFFFF</span> since there are no special mapping
326                 assumptions about the UNIX user space process mapping. No W32 user space
327                 exists in this project. Such approach also nullifies any special memory
328                 moving operations between W32 kernel space and W32 user space memory areas
329                 (such as <span class="function">MmSafeCopyToUser()</span>).</p>
330
331         <h2>Unicode Strings and Characters</h2>
332
333                 <p>W32 platform uses 16-bit type <span class="type">wchar_t</span> while $gnulinux uses a
334                 32-bit one. This can be problem during GCC (GNU C&nbsp;Compiler)
335                 compilation of combination of native UNIX C&nbsp;sources (assuming 32-bit
336                 GCC with 32-bit <span class="type">wchar_t</span>) and
337                 $ReactOS C sources (assuming W32 compiler with 16-bit
338                 <span class="type">wchar_t</span>) for literal wide strings
339                 (C source file systax: <span class="command">L&quot;wstring&quot;</span>).
340                 Possibilities to solve this issue list:</p>
341
342                 <ul>
343                         <li>
344                                 <p>Using <span class="constant">-fshort-wchar</span> GCC option and
345                                 strictly differentiate between compilation of
346                                 <span class="productname">ReactOS</span> code and UNIX code.</p>
347
348                                 <p>pros: No source modifications needed, no runtime performance hit.</p>
349
350                                 <p>cons: No type checking if some part of code has bad compilation
351                                 flags, complicated way to completely split
352                                 <span class="productname">ReactOS</span> and UNIX code.</p>
353                         </li>
354                         <li>
355                                 <p>Wrap all <span class="productname">ReactOS</span> literal constants
356                                 by some conversions function call (implemented as macro
357                                 <span class="function">REACTOS_UCS2()</span> by this project).</p>
358
359                                 <p>pros: Any forgotten/mistaken conversions are type-checked and warned
360                                 during the compilation by GCC.</p>
361
362                                 <p>cons: All compiled <span class="productname">ReactOS</span> sources
363                                 files containing literal wide strings have to be wrapped/modified,
364                                 performance hit by runtime string conversions.</p>
365
366                                 <p>This solution was chosen to get the internal sanity checking
367                                 benefit.</p>
368                         </li>
369                 </ul>
370
371         <h2>Supported Binary Formats</h2>
372
373                 <p>The native W32 binary format is identified as
374                 <span class="constant">PE-32</span> (Portable Executable 32-bit), such
375                 files have all the usual extensions such as
376                 <span class="fname">.sys</span>, <span class="fname">.exe</span>,
377                 <span class="fname">.dll</span> etc. <span class="constant">PE-32</span>
378                 loading support was already implemented by $ReactOS, its memory mapping
379                 specifics just had to be ported to $gnulinux environment by this project.
380                 This loading support does not (yet) cover importing of debug symbols from
381                 W32 <span class="fname">.PDB</span> (Program DataBase) files in $gnulinux
382                 ABI (Application Binary Interface) compatible way.</p>
383
384                 <p>This project also supports transparent loading of UNIX
385                 <span class="fname">.so</span> (Shared Object file) binary format. If you
386                 have W32 source files for some W32 library you can try to compile it by GCC
387                 to get the shared library with $gnulinux ABI compatible debug information
388                 (GCC option <span class="constant">-ggdb3</span> recommended). Beware of
389                 possible compilation problems as <span class="productname">Microsoft</span>
390                 C&nbsp;code expects <span class="constant">exception</span> handling to be
391                 supported by the compiler (definitely not the case of the plain C compiler
392                 of GCC) --- all the exception catching code should be discarded as any
393                 <a href="#exception_fatal">generated exceptions are always fatal</a> when
394                 such driver is running in the scope of this project.</p>
395
396                 <p>Be aware of some differences if you use
397                 <span class="constant">PE-32</span> binary format file vs.
398                 <span class="fname">.so</span> format file.
399                 <span class="constant">PE-32</span> use the appropriate W32 specific
400                 <a href="#calltype">cdecl/stdcall/fastcall call types</a>,
401                 <span class="fname">.so</span> must be completely compiled in the standard
402                 UNIX <a href="#calltype_cdecl">cdecl call type semantics</a>.
403                 <a href="#functype_native">Native function implementations</a> do not need
404                 to be explicitely exported by <span class="fname">captivesym</span> as they
405                 are resolved automatically by the UNIX dynamic system linker. It may be
406                 surprising you will have to fix all such missing symbol exports if you
407                 advance during the development from the debugging
408                 <span class="fname">.so</span> file for the production version of the
409                 original <span class="constant">PE-32</span> binary file.</p>
410
411         <h2>Reverse Engineering</h2>
412
413                 <p>This project has no intentions to reverse engineer and document the
414                 filesystem data structures themselves since they are being encapsulated by
415                 the filesystem driver. For these reasons the resources available in
416                 projects such as $LinuxNTFS get out of any possible use. This project goal
417                 is to provide fully compatible API interface to the rest of the W32 system
418                 to persuade the filesystem driver it is running in the native
419                 <span class="productname">Microsoft Windows XP</span> environment.</p>
420
421                 <p>All the W32 filesystem drivers are running in the W32 kernel address
422                 space and this area of W32 API is not much documented by
423                 <span class="productname">Microsoft</span>. Some API functions are not
424                 documented at all and the others are documented insufficiently for a their
425                 possibly needed reimplementation from scratch. Documentation being
426                 consulted primarily consists of
427                 <span class="productname"><a href="http://msdn.microsoft.com/library/default.asp?url=/library/en-us/kmarch/hh/kmarch/kmhdr_6enb.asp">MSDN (Microsoft Developer Network) Kernel-Mode Driver Architecture: Windows DDK</a></span>
428                 documentation and also various other 3rd party documentation resources such as
429                 <span class="productname"><a href="http://www.osr.com/ntinsider/1996/cacheman.htm">The NT Cache Manager Description</a></span>,
430                 <span class="productname"><a href="http://www.winntmag.com/Articles/Print.cfm?ArticleID=3864">Learn About NT's&nbsp;File-system Cache</a></span>,
431                 <span class="productname"><a href="http://www.ntfsd.org/archive/">NT File System Developers mailing list archives</a></span>
432                 including various
433                 <a href="http://www.google.com/search?q=site%3Amicrosoft.com">fulltext searches</a>
434                 through Internet from case to case.</p>
435
436                 <p>Sometimes no sufficient documentation was found and some code behaviour
437                 had to be reverse engineered directly from the binaries of
438                 <span class="fname">ntoskrnl.exe</span>,
439                 <span class="fname">cdfs.sys</span>
440                 and/or
441                 <span class="fname">fastfat.sys</span>.
442                 Up to now the code was disassembled by
443                 <span class="productname"><a href="http://www.simtel.net/pub/pd/29498.html">IDA Freeware</a></span>
444                 and by
445                 <span class="productname">dumpbin.exe</span> of
446                 <span class="productname">Microsoft Visual Studio</span>.
447                 <span class="productname">dumpbin.exe</span> is fortunately able to
448                 interpret debug symbols from W32 <span class="fname">.PDB</span>
449                 (Program DataBase) debug information files.</p>
450
451         <a name="law"><h2>Laws and Licensing Conditions</h2></a>
452
453                 <p>If you are an <span class="productname">authorized user</span> of
454                 <span class="productname">Microsoft Windows NT</span> the laws in some
455                 countries give you the right to fully handle the product in any way you
456                 want. Therefore you can disassemble the product even in the case you had
457                 to agree with the product license forbidding such disassembly as the
458                 country laws override any such license agreement.</p>
459
460                 <h3>Microsoft Service Pack</h3>
461
462                         <p>Sometimes you may have the legal license for
463                         <span class="productname">Microsoft Windows NT</span>
464                         but for various technical reasons you do not have the media and/or
465                         installation ready at the place of intended use of this project.</p>
466
467                         <p>Fortunately <span class="productname">Microsoft</span> provides
468                         $freebeer update packages for its
469                         <span class="productname">Microsoft Windows</span> products called
470                         <span class="productname">Service Packs</span>; the latest one is
471                         <span class="productname"><a href="http://www.microsoft.com/WindowsXP/pro/downloads/servicepacks/sp1/checkedbuild.asp">Microsoft Windows XP Service Pack 1a</a></span>.</p>
472
473                         <p>This downloadable file contains the full versions of the essential
474                         files needed for the current stage of this product:
475                         <span class="fname">cdfs.sys</span>,
476                         <span class="fname">fastfat.sys</span>
477                         and
478                         <span class="fname">ntoskrnl.exe</span>.
479                         It even contains <span class="fname">ntfs.sys</span> for the planned
480                         <a href="#NTFSgoal"><span class="productname">NTFS</span>
481                         functionality</a>.</p>
482
483                         <p><span class="productname">Service Pack</span> also contains
484                         EULA (End User License Agreement) paper disallowing any use of
485                         <span class="productname">Service Pack</span> outside its original
486                         intentions. According to the laws of some countries you need to be
487                         <span class="productname">authorized user</span> of the
488                         <span class="productname">Microsoft Windows XP</span> product to be
489                         allowed to use the files contained in such
490                         <span class="productname">Service Pack</span> without the bindings of its
491                         EULA. Even the interpretation of such laws may vary.</p>
492
493                         <p>It would be a&nbsp;breach of the law by the project author to provide
494                         automatic (=hidden) functionality to download and extract the
495                         <span class="productname">Service Pack</span> files. On the other hand it
496                         is perfectly legal to ask user for his/her confirmation whether he/she is
497                         really the <span class="productname">authorized user</span> of
498                         <span class="productname">Microsoft Windows XP</span> product and
499                         download/extract the <span class="productname">Service Pack</span> files
500                         accordingly.</p>
501
502         <h2>Project Architecture</h2>
503
504                 @{[ doc_img 'fig/architecture','Project Architecture' ]}
505
506                 <p>Most of the work of this project is located in the single box called
507                 &quot;<span class="constant">libcaptive</span>&quot; located in the center
508                 of the scheme. This component implements the core W32 kernel API by
509                 <a href="#functype">various methods described in this document</a>.
510                 The &quot;<span class="constant">libcaptive</span>&quot; box cannot be
511                 further dissected as it is just an implementation of a&nbsp;set of API
512                 functions. It could be separated to several subsystems such as the Cache
513                 Manager, Memory Manager, Object Manager, Runtime Library, I/O&nbsp;Manager
514                 etc. but they have no interesting referencing structure.</p>
515
516                 <p>As this project is in fact just a&nbsp;filesystem implementation every
517                 story must begin at the device file and end at the filesystem operations
518                 interface. The unified suppported interfaces are
519                 <span class="productname"><a href="http://developer.gnome.org/doc/API/2.0/glib/">GLib</a></span>
520                         (the most low level portability, data-types and utility library for Gnome)
521                 <span class="type">GIOChannel</span> (for the device access) and the custom
522                 <span class="constant">libcaptive</span> filesystme API. Each of these ends
523                 can be connected either to some direct interface (such as the
524                 <span class="constant">captive-cmdline</span> client) or it can connected
525                 as a general $GnomeVFS filter. $GnomeVFS offers nice filter interface on
526                 the UNIX user-privileges level for transparent operation with archives and
527                 network protocols. This filter interface was used by this project to turn
528                 the device reference such as <span class="fname">/dev/hda3</span> or
529                 <span class="fname">/dev/discs/disc0/part3</span> to the fully accessible
530                 filesystem (pretending being an &quot;archive&quot; in the device
531                 reference). This device access can be specified by $GnomeVFS URLs such as:
532                 <span class="fname">file:///dev/hda3#captive-fastfat:/autoexec.bat</span></p>
533                 
534                 <p>If the passed device reference is requested by the user to be accessed
535                 either in <span class="dashdash">--ro</span> (read-only) mode or in the
536                 <span class="dashdash">--rw</span> (full read-write) mode there are no
537                 further device layers needed. Just in the case of
538                 <span class="dashdash">--blind</span> mode another layer is involved to
539                 emulate read-write device on top of the real read-only device by the method
540                 of non-persistent memory buffering of all the possible write requests.</p>
541
542                 <p>Such device is still only a&nbsp;UNIX style GLib <span
543                 class="type">GIOChannel</span> type at this point.  As we need to supply it
544                 to the W32 filesystem driver we must convert it to the W32 I/O&nbsp;Device
545                 with its capability of handling <span class="type">IRP</span>
546                         (<span class="constant">I/O Request Packet</span>; structure holding the
547                         request and result data for any W32 filesystem or W32 block device
548                         operation)
549                 requests from its upper W32 filesystem driver. Such W32 I/O&nbsp;Device can
550                 represent either <span class="type">CD-ROM</span> or
551                 <span class="type">disk</span> device type as different W32 filesystem
552                 drivers require different media types:</p>
553
554                 <h3>cdfs.sys</h3>
555
556                         <p><span class="type">CD-ROM</span> filesystem runs just on the
557                         <span class="constant">FILE_DEVICE_CD_ROM_FILE_SYSTEM</span> device type.
558                         Use <span class="dashdash">--cdrom</span> option of this project for
559                         <span class="fname">cdfs.sys</span>.</p>
560
561                 <h3>fastfat.sys</h3>
562
563                         <p><span class="type">FAT</span> filesystem supports both the (expected)
564                         <span class="constant">FILE_DEVICE_DISK_FILE_SYSTEM</span> device type
565                         but it also supports the reading of
566                         <span class="constant">FILE_DEVICE_CD_ROM_FILE_SYSTEM</span> devices as
567                         you can use <span class="type">FAT</span> filesystem on <span
568                         class="type">CD-ROM</span> media in W32 environment. It is recommended to
569                         use <span class="dashdash">--disk</span> option of this project for
570                         <span class="fname">fastfat.sys</span>.</p>
571
572                 <h3>ext2fsd.sys</h3>
573
574                         <p><span class="type">ext2</span> filesystem supports just the
575                         <span class="constant">FILE_DEVICE_DISK_FILE_SYSTEM</span> device type.
576                         Use <span class="dashdash">--disk</span> option of this project for
577                         <span class="fname">ext2fsd.sys</span>.</p>
578                 
579                 @{[ vskip("3ex") ]}
580
581                 <p>W32 media I/O&nbsp;Device is accessed from the W32 filesystem driver.
582                 The filesystem driver itself always creates volume object by
583                 <span class="function">IoCreateStreamFileObject()</span> representing the
584                 underlying W32 media I/O&nbsp;Device as the object handled by the
585                 filesystem driver itself. All the client application filesystem requests
586                 must be first resolved at the filesystem structures level, passed to the
587                 volume stream object of the same filesystem and then finally passed to the
588                 W32 media I/O&nbsp;Device (already implemented by this project as an
589                 interface to <span class="type">GIOChannel</span> noted above).</p>
590
591                 <p>The filesystem driver is called by the core W32 kernel implementation of
592                 <span class="constant">libcaptive</span> in
593                 <a href="#synchronous">synchronous way</a> in single-shot manner instead of
594                 the several reentrancies while waiting for the disk I/O completions as can
595                 be seen in the original
596                 <span class="productname">Microsoft Windows NT</span>.
597                 This single-shot synchronous behaviour is possible since all the needed
598                 resources (disk blocks etc.) can be always presented as instantly ready as
599                 their acquirement is solved by <a href="hostosnote">Host-OS</a> outside of
600                 the W32 emulated <a href="guestosnote">Guest-OS</a> environment.</p>
601
602                 <p><span class="constant">libcaptive</span> offers the W32 kernel
603                 filesystem API to the upper layers. This is still not the API the common
604                 W32 applications are used to as they use W32 libraries which in turn pass
605                 the call to W32 kernel.  For example
606                 <span class="function">CreateFileA()</span> is being implemented by several
607                 libraries such as <span class="fname">user32.dll</span> as a relay
608                 interface for the kernel function
609                 <span class="function">IoCreateFile()</span> implemented by this
610                 project's&nbsp;<span class="constant">libcaptive</span> W32 kernel
611                 emulation component.</p>
612
613                 <p>As it would be very inconvenient to use the legacy, bloated and UNIX
614                 style unfriendly W32 kernel filesystem API this project offers its own
615                 <a href="#client_interface">custom filesystem API interface</a> inspired by
616                 the $GnomeVFS client interface adapted to the specifics of W32 kernel API.
617                 This interface is supposed to be easily utilized by
618                 <a href="#client_interface_customapp">a&nbsp;custom application accessing
619                 the W32 filesystem driver</a>.</p>
620
621                 <p>The rest of the story is not much special for this project since this is
622                 a common UNIX problem how to offer user space implemented UNIX filesystem
623                 as a generic system filesystem (as those are usually implemented only as
624                 the components od UNIX kernel). The most thin implementation would be to
625                 implement <FIXME:LUFS><a href="#fuse_interface">FUSE \bookcitation{FUSE}
626                         (Filesystem in Userspace project for $gnulinux implemented by its own
627                         filesystem code for Linux kernel)
628                 interface</a> for the purpose but such feature is not yet implemented.
629                 Currently this project implements
630                 <a href="#offered_gnomevfs">Gnome-VFS interface</a> allowing its filesystem
631                 access even without any involvement of UNIX kernel from any
632                 $GnomeVFS aware client application (such as
633                 <span class="fname">gnome-vfs/tests/test-shell</span>).
634                 This <a href="#offered_gnomevfs">Gnome-VFS interface</a> connects the data
635                 flow of this project in two points - both as the lowest layer device image
636                 source and also as the upper layer for the filesystem operation
637                 requests.</p>
638
639                 <p>That's&nbsp;all folks!</p>
640
641         <a name="mounted_one"><h2>At Most One Mounted Filesystem</h2></a>
642
643                 @{[ doc_img 'fig/sandbox','Multiple Filesystems by libcaptive Sandboxing' ]}
644
645                 <p>The project technically supports only one (exactly one...) mounted
646                 filesystem device and only one filesystem driver. There is nothing
647                 complicated to support multiple disks and multiple loaded filesystem
648                 modules but as they would share the address space it would only bring
649                 a&nbsp;possible complications during bug reports and the bug solving
650                 itself.  It was considered as a&nbsp;more sane way to support multiple W32
651                 mounted disks by completely separately running project instances in
652                 a&nbsp;different UNIX processes communicating from their sandboxes via
653                 <a href="#todo_sandbox">CORBA sandbox interface</a>. This sandboxing
654                 feature is not yet deployed although its code is already prepared.</p>
655
656                 <p>The project also does not support any state cleanup to be able to load
657                 filesystem&nbsp;<span class="constant">A</span>,
658                 cleanup&nbsp;<span class="constant">A</span> and load a different
659                 filesystem&nbsp;<span class="constant">B</span> in the same process address
660                 space. It complies with the preventions of the possible debugging
661                 complications as noted above. Despite this you still must call the function
662                 <span class="function">captive_shutdown()</span> to flush all the pending
663                 filesystem buffers to the disk. After calling
664                 <span class="function">captive_shutdown()</span> the process address space is
665                 no longer usable for any further project operations and the process is
666                 expected to be terminated in the manner compatible with its driving
667                 <a href="#todo_sandbox">CORBA sandbox interface</a> control master.</p>
668
669                 <p>Each sandbox executing the untrusted W32 binary filesystem driver code
670                 is connected through its
671                 <a href="#todo_sandbox">CORBA sandbox interface</a> at the point of upper
672                 layer <span class="constant">libcaptive</span>-specific filesystem API, at
673                 the point of the bottom layer of <span class="type">GIOChannel</span>
674                 device access and also for transfers of GLib logging
675                 messages/warnings/errors out of the sandbox to the user.</p>
676
677
678 <h1>Choice of the Emulation Methods</h1>
679
680         <p>The intent of the project was to get reliable read-write access to
681         <span class="productname">NTFS</span> partition. There are several possible
682         ways to achieve that:</p>
683
684         <h2>Virtualmachine Running the Original W32 Subsystem</h2>
685
686                 <p>Creating virtual-hardware PC and running the original W32 binaries
687                 including their boot-loader etc. Disk device access would be passed as
688                 virtual IDE disk (=hard disk drive). File access API would be implemented
689                 either by special escaping by some trapped instruction out of the
690                 virtualmachine while using W32 file access API or using the standard W32
691                 SMB (Server Message Block) network access through some virtual network
692                 card. The latter network access solution is almost the currently available
693                 possibility of running full-blown disk-sharing real
694                 <span class="productname">Microsoft Windows NT</span> inside virtual
695                 machine emulator such as <span class="productname">VMware</span>.</p>
696
697                 <p>pros: Full compatibility due to fully native codebase.</p>
698
699                 <p>cons: Hard to debug, missing documentation of NT booting internals,
700                 possible problems by different PC virtual-hardware than expected by NT,
701                 requirement of fully installed
702                 <span class="productname">Microsoft Windows NT</span> product.</p>
703
704         <a name="method_ntoskrnl"><h2>&quot;ntoskrnl.exe&quot; Inside Virtual Address Space</h2></a>
705
706                 <p>This solution was chosen by the project. Binary filesystem driver and
707                 also <span class="fname">ntoskrnl.exe</span> binary file are required.
708                 Unfortunately <span class="fname">ntoskrnl.exe</span> expects a&nbsp;native
709                 PC virtual-hardware missing during regular UNIX user space process
710                 emulation, therefore such instructions must be trapped and emulated/ignored
711                 from case to case.</p>
712
713                 <p>Also the <a name="init_ntoskrnl">initialization code of <span
714                 class="fname">ntoskrnl.exe</span></a> is not executed by this project since
715                 it expects to get full PC hardware access privileges and thus some
716                 datastructures do not get initialized by it (need to be trapped later at
717                 runtime stage). Some of the missing initializations are solved by
718                 <a href="#functype_wrap">API functions wrapping</a>.
719
720                 <p>pros: Lightweight, easier to debug.</p>
721
722                 <p>cons: Possible incompatible emulation of
723                 <span class="fname">ntoskrnl.exe</span> parts, missing documentation needed
724                 for the implementation.</p>
725
726         <h2>Filesystem Driver Inside Virtual Address Space</h2>
727
728                 <p>Unlike <a href="#method_ntoskrnl">previous method</a> here we do not use
729                 even <span class="fname">ntoskrnl.exe</span> as the complete kernel part of
730                 W32 is <a name="native_ntoskrnl">emulated from the project source
731                 files</a>. <span class="fname">cdfs.sys</span> driver was successfuly ran
732                 in this manner in the former versions of this project but the possibility
733                 to run without <span class="fname">ntoskrnl.exe</span> was dropped since it
734                 had no licensing gains (you need the original
735                 <span class="productname">Microsoft Windows NT</span> files at least for
736                 the filesystem driver itself) and the emulation of undocumented parts
737                 reusable from <span class="fname">ntoskrnl.exe</span> binary was
738                 a&nbsp;pain.</p>
739
740                 <p>pros: Lightweight, easier to debug.</p>
741
742                 <p>cons: Possible incompatible emulation of the whole
743                 <span class="fname">ntoskrnl.exe</span>, its missing documentation.</p>
744
745
746 <h1>Implementation Details</h1>
747
748         <a name="functype"><h2>API Function Implementation Choices</h2></a>
749
750                 <p>For each function exported by W32
751                 <span class="fname">ntoskrnl.exe</span> and imported and called by the
752                 filesystem driver a decision needs to be made to properly implement its
753                 functionality. Currently implemented functionality statistics are provided
754                 below:</p>
755
756                 <FIXME:numbers>
757                 <table border="1" align="center">
758                         <tr><th>Function type                                        </th><th>Items</th><th>Portion</th></tr>
759                         <tr><td><a href="#functype_pass">pass</a>                    </td><td>   46</td><td>    21%</td></tr>
760                         <tr><td><a href="#functype_wrap">wrap</a>                    </td><td>    1</td><td>     0%</td></tr>
761                         <tr><td><a href="#functype_native_reactos">native-ReactOS</a></td><td>   94</td><td>    43%</td></tr>
762                         <tr><td><a href="#functype_native_libcaptive">native-own</a> </td><td>   79</td><td>    36%</td></tr>
763                         <caption>Function Implementation Types Statistics</caption>
764                 </table>
765
766                 <p>As there are several choices to implement each function the usual
767                 attempts/investigations ordering is listed in the sections below.</p>
768
769                 <p>Special case must be taken for data-type symbols since they are
770                 referenced without the possibility of catching the code flow by some
771                 breakpoints (it would be possible only in some special access cases). Data
772                 export symbols of <span class="constant">unpatched</span> libraries must
773                 contain already prepared content at the runtime. There is a&nbsp;problem
774                 with <span class="constant">patched</span> libraries where it is necessary
775                 to also fully implement the data symbol as
776                 <a href="#functype_native">native implementation</a> since there is no
777                 possibility to <a href="#functype_pass">pass</a> the data symbol instead of
778                 the original W32 data location and therefore there will be two instances of
779                 such data variable place. As there will be also the uncaught references for
780                 such W32 data location from the <span class="constant">patched</span>
781                 library itself such symbols should be usually only some constants (such as
782                 <span class="constant">KeNumberProcessors</span>).</p>
783
784                 <p>W32 platform symbols export/import can be based either on the symbol
785                 name itself or it can be also exported and imported just by its
786                 identification number called <span class="constant">Ordinal</span>.
787                 Although it saves some jumptables file binary size it is currently no
788                 longer used by W32 binaries and this project also does not support such
789                 <span class="constant">Ordinal</span> symbol reference type at all.</p>
790
791                 <p>All the exporting magic is handled by custom script
792                 <span class="fname">captivesym</span> processing the definition file
793                 <FIXME:span class="fname">src/libcaptive/ke/exports.captivesym</span> to produce
794                 the intermediate relaying code
795                 <FIXME:span class="fname">src/libcaptive/ke/exports.c</span>. For details of the
796                 <span class="fname">captivesym</span>-specific source file syntax please
797                 see its documentation: <FIXME:span class="fname">doc/captivesym-pod.html</span>
798
799                 <a name="functype_pass"><h3>Direct Pass to Original &quot;ntoskrnl.exe&quot;</h3></a>
800
801                         <p>Simple (standalone) functions such as
802                         <span class="function">RtlTimeToSecondsSince1970()</span> can be simply
803                         passed to the original implementation in
804                         <span class="fname">ntoskrnl.exe</span> as they make no hardware access
805                         and they do not expect any special internal data structures to be set up
806                         in advance by an earlier library initialization. A common case are all
807                         the data structures utility functions such as
808                         <span class="constant">GenericTable</span> subsystem or
809                         <span class="constant">LargeMcb</span> handling.</p>
810
811                         <a name="functype_pass_fromunix"><h4>Pass from UNIX Code</h4></a>
812
813                                 <p>Control flow begins in some standard UNIX code. Such code is always
814                                 using <a href="#calltype_cdecl">cdecl call type</a> for all its
815                                 intracalls. <a href="#functype_native_reactos">Native functions
816                                 compiled from <span class="productname">ReactOS</span> sources</a> use
817                                 their own <a href="#calltype">cdecl/stdcall/fastcall</a> declarations
818                                 but these call type modifications are discarded during compilation for
819                                 this project by the <span class="constant">LIBCAPTIVE</span>
820                                 symbol.</p>
821
822                                 <p>UNIX code calls <span class="function">FUNCTIONNAME()</span> relay
823                                 from the generated UNIX jump table. Such relay will debug dump the
824                                 passed arguments and finally pass the control to the original W32
825                                 function code in the proper call type
826                                 <a href="#calltype">cdecl/stdcall/fastcall</a> for a&nbsp;given
827                                 function.</p>
828
829                                 <p>Original W32 code entry point is always trapped by a&nbsp;breakpoint
830                                 although it would not be needed during this specific direct pass from
831                                 UNIX code to the original W32 implementation. Still the breakpoint has
832                                 to be there to catch some other (such as intra-W32) possible calls
833                                 described later. There are several more ways to define breakpoint in
834                                 the code. One way is to use processor hardware breakpoint support but
835                                 the number of breakpoints is limited.  The other way is to patch in the
836                                 <span class="instruction">@{[ 'int $3' ]}</span> instruction but it will invoke
837                                 <span class="constant">SIGTRAP</span> signal handler conflicting with
838                                 the possible debugger (<span class="productname">gdb(1)</span>)
839                                 control. This project uses the <span class="instruction">hlt</span>
840                                 instruction, which also has a&nbsp;single-byte opcode as
841                                 <span class="instruction">@{[ 'int $3' ]}</span> and it is a&nbsp;privileged
842                                 instruction forbidden to be used from the UNIX user space code.
843                                 <span class="instruction">hlt</span> invokes
844                                 <span class="constant">SIGSEGV</span> signal which can be resolved by
845                                 a&nbsp;custom signal handler without any conflict with the possible
846                                 debugger control; <span class="productname">gdb(1)</span> needs the
847                                 following command to pass through such
848                                 <span class="constant">SIGSEGV</span> signal:</p>
849
850                                 <blockquote class="command">
851                                         <p>handle SIGSEGV nostop noprint pass</p>
852                                 </blockquote>
853
854                                 <p>When a breakpoint gets caught, we usually need to return to the
855                                 running code. Unfortunately it is not possible because of the patched
856                                 breakpoint opcode. The breakpoint cannot be simply removed upon return
857                                 as it would permanently loose control over the point of entry. Even if
858                                 the return would include faking of the return address in the bottom
859                                 stack frame to patch the breakpoint back during later function exit it
860                                 still would not solve the caughts of inner calls of recursive
861                                 functions. One of the working possibilities would be to patch the
862                                 original instruction back and perform a&nbsp;singlestep provided by
863                                 <span class="function">ptrace(2)</span> syscall. However such
864                                 singlestep needs another controlling UNIX process and it would again
865                                 conflict with the debuggers such as
866                                 <span class="productname">gdb(1)</span>. This project implements the
867                                 singlestep functionality by two consecutive breakpoints
868                                 (<span class="instruction">hlt</span> instructions to be specific):
869                                 The first two instruction addresses of the W32 functions are called
870                                 <span class="productname">slot #1</span> and
871                                 <span class="productname">slot #2</span>, the length of the first
872                                 function instruction has to be analyzed to get the right address of
873                                 <span class="productname">slot #2</span>. When the first breakpoint is
874                                 caught it is necessary to patch the original instruction back and also
875                                 patch another breakpoint in place of
876                                 <span class="productname">slot #2</span>.
877                                 During the <span class="productname">slot #2</span> breakpoint
878                                 invocation the operation will be reverted - the breakpoint will be put
879                                 to <span class="productname">slot #1</span> again and the instruction
880                                 of <span class="productname">slot #2</span> will be restored to be able
881                                 to continue the execution of the function.</p>
882
883                                 <p>W32 function will finish in its specific
884                                 <a href="#calltype">cdecl/stdcall/fastcall call type</a>, the control
885                                 will return to the UNIX jump table relay which will debug dump the
886                                 return value and it will finally pass the control back to the UNIX
887                                 caller in the standard UNIX
888                                 <a href="#calltype_cdecl">cdecl call type</a>.</p>
889
890                                 @{[ doc_img 'fig/functype_patched_pass_fromunix',
891                                                 'Function Type: <span class="constant">pass</span> from UNIX Code' ]}
892
893                         <a name="functype_pass_fromw32"><h4>Pass from W32 Code</h4></a>
894
895                                 <p>This function type is similiar to the
896                                 <a href="#functype_pass_fromunix">previous one</a> with the exception
897                                 of more complicated entry point. Unfortunately W32 libraries call their
898                                 own functions directly, using the <span class="instruction">call</span>
899                                 instructions without any patchable jump table. Even the
900                                 <span class="instruction">call</span> argument itself cannot be patched
901                                 according to the relocation table record as such library intra-call
902                                 instruction has no relocation due to its relative argument offset on
903                                 <span class="constant">i386</span>. This time the double-breakpoint
904                                 mechanism <a href="#functype_pass_fromunix">described above</a> gets
905                                 handy since it will catch the entry point when the function gets
906                                 called.  <span class="constant">SIGSEGV</span> handler gets invoked by
907                                 the <span class="instruction">hlt</span> instruction and it will
908                                 redirect the control to the jump table relay function to debug dump the
909                                 function entry arguments (it has no other uses in this call type).</p>
910
911                                 <p>When the relay needs to call the original function it will reach
912                                 exactly the same breakpoint instruction as during the recent
913                                 <span class="constant">SIGSEGV</span> handling redirecting to this
914                                 calling relay.  But this time the
915                                 <span class="constant">through_w32_func</span> field of this function
916                                 record will be set to to prevent repeated redirection and to pass the
917                                 control through the breakpoint mangle instead this time.</p>
918
919                                 <p>Returning is not much interesting as the first
920                                 <span class="constant">SIGSEGV</span> handler did a&nbsp;straight jump
921                                 for the redirection purposes without any needed consequent
922                                 handling.</p>
923
924                                 <p>The jump table relay used for the callers from W32 code is
925                                 a&nbsp;different one than the relay being used for the callers
926                                 <a href="#functype_pass_fromunix">from UNIX code</a>. UNIX code always
927                                 uses relay with external <a href="#calltype_cdecl">cdecl call type</a>
928                                 but in this case a&nbsp;relay with the appropriate
929                                 <a href="#calltype">cdecl/stdcall/fastcall call type</a> is used.</p>
930
931                                 @{[ doc_img 'fig/functype_patched_pass_fromw32',
932                                                 'Function Type: <span class="constant">pass</span> from W32 Code' ]}
933
934                         @{[ vskip() ]}
935
936                         <table border="1" align="center">
937                                 <tr><td><span class="fname">captivesym</span> keyword</td><td>pass</td></tr>
938                                 <tr><td>Native code function name                    </td><td>(no implementation)</td></tr>
939                                 <tr><td>W32 traced code from UNIX function name      </td><td>FUNCNAME</td></tr>
940                                 <tr><td>W32 traced code from W32  function name      </td><td>FUNCNAME_cdecl/_stdcall/_fastcall</td></tr>
941                                 <tr><td>Entry/exit debug tracing from UNIX code      </td><td>yes</td></tr>
942                                 <tr><td>Entry/exit debug tracing from W32 code       </td><td>yes</td></tr>
943                                 <caption>Function Type <span class="constant">pass</span> Characteristics</caption>
944                         </table>
945
946                 <a name="functype_wrap"><h3>Wrap of the Original "ntoskrnl.exe" Function</h3></a>
947
948                         <a name="functype_wrap_fromunix"><h4>Wrapping of Call from UNIX Code</h4></a>
949
950                                 <p>The code control flow has no special hardcore features since it is
951                                 very similiar to <a href="#functype_pass_fromunix">the direct pass to
952                                 W32 function from UNIX code</a>. All the wrapping is done in the
953                                 standard UNIX <a href="#calltype_cdecl">cdecl call type</a> manner.
954                                 Jump table debug dumping relays are provided twice - the
955                                 &quot;outer&quot; one to trace the parameters from the function caller
956                                 and the &quot;inner&quot; one to trace the call from the wrapper to the
957                                 original W32 code. The &quot;inner&quot; relay also calls the W32 code
958                                 with the appropriate <a href="#calltype">cdecl/stdcall/fastcall call
959                                 type</a>.</p>
960
961                                 @{[ doc_img 'fig/functype_patched_wrap_fromunix',
962                                                 'Function Type: <span class="constant">wrap</span> from UNIX Code' ]}
963
964                         <a name="functype_wrap_fromw32"><h4>Wrapping of Call from W32 Code</h4></a>
965
966                                 <p>This scheme is a&nbsp;combination of the
967                                 <a href="#functype_wrap_fromunix">previous wrap of a&nbsp;call from
968                                 UNIX code</a> and the <a href="#functype_pass_fromw32">direct pass from
969                                 the W32 code</a>. The control is caught and redirected by
970                                 <span class="constant">SIGSEGV</span> handler from the breakpoint
971                                 placed at the entry to the original W32 function code. The second entry
972                                 to the original W32 function with the
973                                 <span class="constant">through_w32_func</span> field of this function
974                                 description already set is done from the &quot;inner&quot; jump table
975                                 relay with the appropriate
976                                 <a href="#calltype">cdecl/stdcall/fastcall call type</a>.</p>
977
978                                 @{[ doc_img 'fig/functype_patched_wrap_fromw32',
979                                                 'Function Type: <span class="constant">wrap</span> from W32 Code' ]}
980
981                         @{[ vskip() ]}
982
983                         <p>Some functions can be <a href="#functype_pass">passed to the original
984                         code</a> but they need their parameters to be checked/prepared.
985                         Currently, such wrapping is only needed for the
986                         <span class="function">ExAllocateFromPagedLookasideList()</span> function
987                         where it is required due to <a href="#init_ntoskrnl">missing execution of
988                         <span class="fname">ntoskrnl.exe</span> initialization execution</a>,
989                         which would otherwise properly initialize some internal data structures.
990                         In this case the wrapping code detects passing of an uninitialized
991                         parameter and will search through the whole
992                         <span class="fname">ntoskrnl.exe</span> code body at runtime to find the
993                         proper initialization routine containing the correct initialization
994                         parameters.  Passed addresses of static structures must be differentiated
995                         as each of them usually has different initialization parameters. It is
996                         proactive to not to have fixed parameters array as these parameters may
997                         differ across different <span class="fname">ntoskrnl.exe</span>
998                         versions.</p>
999
1000                         <table border="1" align="center">
1001                                 <tr><td><span class="fname">captivesym</span> keyword</td><td>wrap</td></tr>
1002                                 <tr><td>Native UNIX wrapping code function name      </td><td>FUNCNAME_wrap</td></tr>
1003                                 <tr><td>W32 traced wraping code from UNIX func. name </td><td>FUNCNAME</td></tr>
1004                                 <tr><td>W32 traced wrapping code from W32 func. name </td><td>FUNCNAME_cdecl/_stdcall/...</td></tr>
1005                                 <tr><td>W32 traced original code function name       </td><td>FUNCNAME_orig</td></tr>
1006                                 <tr><td>Entry/exit debug tracing from UNIX code      </td><td>yes</td></tr>
1007                                 <tr><td>Entry/exit debug tracing from W32 code       </td><td>yes</td></tr>
1008                                 <caption>Function Type <span class="constant">wrap</span> Characteristics</caption>
1009                         </table>
1010
1011                 <a name="functype_native"><h3>Native Implementation</h3></a>
1012
1013                         <h4>Native Implementation Called from UNIX Code</h4>
1014
1015                                 <p>This is the simplest case of a&nbsp;function call as it is fully
1016                                 handled only by the compiler and/or linker.</p>
1017
1018                                 <p>In this case though, no debug dumping call relay is provided - such
1019                                 relay would need to rename the implementations of native functions to
1020                                 prevent its automatic linking with the caller code. This renaming would
1021                                 not be possible to do by simple <span class="constant">#define</span>
1022                                 since it would also rename any calling statements of such function in
1023                                 the same C&nbsp;sources.  One of the possibilities to solve would be to
1024                                 utilize <span class="dashdash">--redefine-sym</span> feature of the
1025                                 <span class="productname">objcopy(1)</span> utility. On the other hand
1026                                 there is not much need to catch/debug such calls as both the caller and
1027                                 the callee are provided with full source file debug information for the
1028                                 debugger. Also the callee usually debug dumps its entry/exit parameters
1029                                 by custom debug dumps in the
1030                                 <a href="#functype_native_reactos"><span class="productname">ReactOS</span> implementations</a>.
1031
1032                                 @{[ doc_img 'fig/functype_native_fromunix',
1033                                                 'Function Type: <span class="constant">native</span> from UNIX Code' ]}
1034
1035                         <a name="functype_native_fromw32"><h4>Native Implementation of
1036                                         &quot;unpatched&quot; Library Function Called from W32 Code</h4></a>
1037
1038                                 @{[ doc_img 'fig/functype_unpatched_native_fromw32',
1039                                                 'Function Type: <span class="constant">native</span> of <span class="constant">unpatched</span> from W32 Code' ]}
1040
1041                                 <p>Here comes the differentiation if the project deals either with
1042                                 a&nbsp;<span class="constant">patched</span> or an
1043                                 <span class="constant">unpatched</span> version of the library
1044                                 (<span class="constant">patched</span> is a&nbsp;loaded W32 binary
1045                                 library while <span class="constant">unpatched</span> library is
1046                                 completely provided by this project with no use of the library's
1047                                 original W32 binary file). As the project adjusts the exported symbol
1048                                 address during the patching operation, in some cases the
1049                                 <span class="constant">patched</span> library call may be handled
1050                                 simply as <span class="constant">unpatched</span> library call even for
1051                                 the <span class="constant">patched</span> libraries. Fortunately the
1052                                 distinction is not much important as the project is prepared to
1053                                 properly handle both cases.</p>
1054
1055                                 <p>The W32 caller which imported the symbol will be pointed right to
1056                                 the relaying function. The debug dumping relay will be called from W32
1057                                 code with the appropriate
1058                                 <a href="#calltype">cdecl/stdcall/fastcall call type</a> while the
1059                                 relay will call the implementation of the native function in the
1060                                 standard UNIX <a href="#calltype_cdecl">cdecl call type</a> manner.</p>
1061
1062                         <h4>Native Implementation of &quot;patched&quot; Library Function Called from W32 Code</h4>
1063
1064                                 @{[ doc_img 'fig/functype_patched_native_fromw32',
1065                                                 'Function Type: <span class="constant">native</span> of <span class="constant">patched</span> from W32 Code' ]}
1066
1067                                 <p>The calling scheme is similiar to the
1068                                 <a href="#functype_native_fromw32">previous call of
1069                                 <span class="constant">unpatched</span> library function from W32
1070                                 code</a> but the call control is redirected from the entry point of the
1071                                 original W32 binary implementation by the breakpoint and its
1072                                 <span class="constant">SIGSEGV</span> handler as in
1073                                 <a href="#functype_pass_fromw32">the case of passing control from W32
1074                                 call</a>.</p>
1075
1076                                 <p>The original W32 function implementation located in the original
1077                                 loaded binary file is never executed but its entry point needs to be
1078                                 trapped by the breakpoint to be able to catch the function calls within
1079                                 the library.</p>
1080
1081                         @{[ vskip() ]}
1082
1083                         <p>In all cases the final function implementation is a&nbsp;standard UNIX
1084                         code compiled from C&nbsp;sources with full debug information available
1085                         for the debugger. Fortunately all such functions do not need to be coded
1086                         from scratch for this project since there already exist $freespeech
1087                         $ReactOS and $Wine projects and their code can be used instead.</p>
1088
1089                         <p>$Wine project is listed mostly for a&nbsp;completeness as almost no
1090                         code was suitable for reuse as it implements W32 user space while this
1091                         project is running pure W32 kernel space environment (in $gnulinux user
1092                         space!).</p>
1093
1094                         <a name="functype_native_reactos"><h4>Native Implementation
1095                                         - <span class="productname">ReactOS</span></h4></a>
1096
1097                                 <p>Some functions are already implemented in the $ReactOS
1098                                 project and they can be used as they are.  Although it would be
1099                                 possible to <a href="#functype_pass">pass some function calls to the
1100                                 original code</a> it is more handy to provide native implementation as
1101                                 there is better control of the data handling during debugging sessions
1102                                 due to the provided debugging symbols.</p>
1103
1104                                 <p>Such functions can be found in
1105                                 <span class="fname">src/libcaptive/reactos/</span> subdirectory.
1106                                 Some functions had to be adjusted for this project
1107                                 - these modifications are compiled conditionally, depending on the
1108                                 <span class="constant">LIBCAPTIVE</span> symbol existence.</p>
1109
1110                                 <p>Later stages of this project reached the level where
1111                                 $ReactOS is yet too immature and the needed functions are usually
1112                                 written just with the sad body:</p>
1113
1114                                 <blockquote class="command">
1115                                         <p>UNIMPLEMENTED;</p>
1116                                 </blockquote>
1117
1118                                 <p>Functions that were not possible to
1119                                 <a href="#functype_pass">pass</a> were reimplemented by this project
1120                                 and placed in the project's implementation directories
1121                                 <a href="#reactos_nocare">instead of extending</a> $ReactOS code.</p>
1122
1123                         <a name="functype_native_wine"><h4>Native Implementation -- <span class="productname">Wine</span></h4></a>
1124
1125                                 <p>Even though $Wine only implements the
1126                                 <span class="productname">Microsoft Windows NT</span> user space, there
1127                                 still are some common functions which could be copied from the $Wine
1128                                 project.</p>
1129
1130                         <a name="functype_native_libcaptive"><h4>Native Implementation - Project Specific</h4></a>
1131
1132                                 <p>As the last resort it was necessary to provide completely own
1133                                 implementation of some API functions such as PC hardware dependent
1134                                 parts or memory management functions.</p>
1135
1136                         @{[ vskip() ]}
1137
1138                         <table border="1" align="center">
1139                                 <tr><td><span class="fname">captivesym</span> keyword</td><td>(none; just the symbol name)</td></tr>
1140                                 <tr><td>Native code function name                    </td><td>FUNCTIONNAME</td></tr>
1141                                 <tr><td>Native traced code from W32 code func. name  </td><td>FUNCTIONNAME_cdecl/_std...</td></tr>
1142                                 <tr><td>Entry/exit debug tracing from UNIX code      </td><td>no</td></tr>
1143                                 <tr><td>Entry/exit debug tracing from W32 code       </td><td>yes</td></tr>
1144                                 <caption>Function Type <span class="constant">native</span> Characteristics</caption>
1145                         </table>
1146
1147                 <a name="functype_undef"><h3>Undefined Function</h3></a>
1148
1149                         <p>Functions not defined by any of the previous function types cannot be
1150                         called by any W32 code including the code of the library implementing
1151                         such function. All functions of <span class="constant">patch</span>ed
1152                         libraries not listed in the <span class="fname">captivesym</span> exports
1153                         file are automatically set to be trapped as fatal program execution
1154                         errors.</p>
1155
1156                         <p>It is not necessary to list the symbols as
1157                         <span class="constant">undef</span> as long as you are just loading the
1158                         W32 <span class="constant">PE-32</span> code and the symbols belong to
1159                         <span class="constant">patch</span>ed library. On the other hand if you
1160                         are loading W32 <span class="fname">.so</span> code or if such symbol is
1161                         a&nbsp;part of <span class="constant">unpatched</span> library (and thus
1162                         being completely provided by the project) you need to list such symbol as
1163                         <span class="constant">undef</span> type to prevent unresolved symbol
1164                         reference.</p>
1165
1166                         <table border="1" align="center">
1167                                 <tr><td><span class="fname">captivesym</span> keyword</td><td>undef</td></tr>
1168                                 <tr><td>Native code function name                    </td><td>(no implementation)</td></tr>
1169                                 <tr><td>Native traced code function name             </td><td>FUNCTIONNAME_cdecl/_stdcall/_fastcall</td></tr>
1170                                 <tr><td>Debug tracing message from UNIX code         </td><td>yes</td></tr>
1171                                 <tr><td>Debug tracing message from W32 code          </td><td>yes</td></tr>
1172                                 <caption>Function Type <span class="constant">undef</span> Characteristics</caption>
1173                         </table>
1174
1175         
1176         <a name="calltype"><h2>API Function Calling Conventions</h2></a>
1177
1178                 <p>Standard UNIX code compiled by GCC (GNU C&nbsp;Compiler) running on host
1179                 $gnulinux always uses <a href="#calltype_cdecl">cdecl</a> ABI (Application
1180                 Binary Interface) calling convention. This calling convention is also the
1181                 default declaration type of UNIX functions.</p>
1182
1183                 <p>W32 uses three different calling conventions in its ABI. They are all
1184                 described in the
1185                 <a href="http://msdn.microsoft.com/library/default.asp?url=/library/en-us/vclang/html/_core_argument_passing_and_naming_conventions.asp"><span class="productname">Microsoft</span> documentation</a>.
1186                 There is always necessary to have the proper function declaration
1187                 (prototype) in the caller scope to prevent all sorts of unexpected
1188                 crashes.</p>
1189
1190                 <p>Unfortunately some non-matching combinations of calling conventions
1191                 result in hard to debug bugs: the caller gets back an unexpected stack
1192                 pointer from the callee and upon return it will restore registers from the
1193                 wrong stack pointer place. Since the caller will finally reclaim its stack
1194                 frame from its (uncorrupted) <span class="constant">EBP</span> stack frame
1195                 pointer the caller will return to the caller of the caller correctly. Just
1196                 the registers remain corrupted causing crashes of completely unrelated code
1197                 executed far, far away...</p>
1198
1199                 <p><span class="constant">EDI</span>, <span class="constant">ESI</span> and
1200                 <span class="constant">EBX</span> registers are always saved on the stack.
1201                 They are stored on the stack in this particular order from bottom to top
1202                 addresses (using the <span class="instruction">push EBX</span>,
1203                 <span class="instruction">push ESI</span>,
1204                 <span class="instruction">push EDI</span> sequence). Fortunately $gnulinux
1205                 GCC has the same register saving behaviour. If some register corruption
1206                 occurs the calling type presented between the caller and callee should be
1207                 checked.</p>
1208
1209                 <a name="calltype_cdecl"><h3>W32 Calling Convention &quot;cdecl&quot;</h3></a>
1210
1211                         <p>The only calling convention in the UNIX world. The default one for all
1212                         the compilers. All the arguments are passed on the stack, no arguments
1213                         are cleaned by the callee. Possible inconsistencies in the number of
1214                         function arguments with the function prototype used by the caller is
1215                         harmless. Variable arguments lists can be passed by this convention.</p>
1216
1217                         @{[ doc_img 'fig/calltype_cdecl',
1218                                         'W32 Calling Convention <span class="constant">cdecl</span> Scheme' ]}
1219
1220                         <table border="1" align="center">
1221                                 <tr><td>Arguments freed by         </td><td>caller</td></tr>
1222                                 <tr><td>Arguments on the stack     </td><td>#0 ... #(n-1)</td></tr>
1223                                 <tr><td>Arguments in the registers </td><td>none</td></tr>
1224                                 <tr><td>GCC attribute              </td><td><span class="command">__attribute__((__cdecl__))</span> (default)</td></tr>
1225                                 <caption>Calling Convention <span class="constant">cdecl</span> Characteristics</caption>
1226                         </table>
1227
1228                 <h3>W32 Calling Convention &quot;stdcall&quot;</h3>
1229
1230                         @{[ doc_img 'fig/calltype_stdcall',
1231                                         'W32 Calling Convention <span class="constant">stdcall</span> Scheme' ]}
1232
1233                         <p>Convention never used in the UNIX world. It needs to be specified for
1234                         W32 compilers. All the arguments are passed on the stack, all the
1235                         arguments are cleaned by the callee. Possible inconsistencies in the
1236                         number of function arguments with the function prototype used by the
1237                         caller will result in fatal crash. Variable arguments lists cannot be
1238                         passed by this convention - use <a href="#calltype_cdecl">cdecl</a>
1239                         instead.</p>
1240
1241                         <table border="1" align="center">
1242                                 <tr><td>Arguments freed by         </td><td>callee</td></tr>
1243                                 <tr><td>Arguments on the stack     </td><td>#0 ... #(n-1)</td></tr>
1244                                 <tr><td>Arguments in the registers </td><td>none</td></tr>
1245                                 <tr><td>GCC attribute              </td><td><span class="command">__attribute__((__stdcall__))</span></td></tr>
1246                                 <caption>Calling Convention <span class="constant">stdcall</span> Characteristics</caption>
1247                         </table>
1248
1249                 <h3>W32 Calling Convention &quot;fastcall&quot;</h3>
1250
1251                         <p>Convention never used in the UNIX world. It needs to be specified for
1252                         W32 compilers. Convention used in the W32 world for its low calling
1253                         overhead. All but the first two arguments are passed on the stack, such
1254                         arguments are cleaned by the callee. First two arguments are passed in
1255                         the registers <span class="constant">ECX</span> and
1256                         <span class="constant">EDX</span> respectively. Possible inconsistencies
1257                         in the number of function arguments with the function prototype used by
1258                         the caller will result in fatal crash. Variable arguments lists cannot be
1259                         passed by this convention - use <a href="#calltype_cdecl">cdecl</a>
1260                         instead.</p>
1261
1262                         <p>GCC (GNU C&nbsp;Compiler) native support for this calling convention
1263                         is pretty fresh and it is currently present only in the recent CVS
1264                         versions since 21st December of 2002 which should get released as GCC
1265                         version 3.4. This project solved the unsupported calling convention by
1266                         declaration of arguments passed in registers by
1267                         <span class="command">__attribute__((__regparm__(3)))</span>.
1268                         W32 passes the arguments in registers in the order
1269                         <span class="constant">ECX</span>, <span class="constant">EDX</span> but
1270                         GCC passes them in registers <span class="constant">EAX</span>,
1271                         <span class="constant">EDX</span>, <span class="constant">ECX</span>.
1272                         This incompatibility is compensated at C&nbsp;source level in the
1273                         <a href="#functype">relaying code</a> generated by
1274                         <span class="fname">captivesym</span> relay generator.</p>
1275
1276                         @{[ doc_img 'fig/calltype_fastcall',
1277                                         'W32 Calling Convention <span class="constant">fastcall</span> Scheme' ]}
1278
1279                         <table border="1" align="center">
1280                                 <tr><td>Arguments freed by         </td><td>callee</td></tr>
1281                                 <tr><td>Arguments on the stack     </td><td>#2 ... #(n-1)</td></tr>
1282                                 <tr><td>Arguments in the registers </td><td><span class="constant">ECX</span>=#0,
1283                                                                             <span class="constant">EDX</span>=#1</td></tr>
1284                                 <tr><td>GCC &ge;3.4 attribute      </td><td><span class="command">__attribute__((__fastcall__))</span></td></tr>
1285                                 <tr><td>GCC &lt;3.4 attr. emulation</td><td><span class="command">__attribute__((__stdcall__))</span></td></tr>
1286                                 <tr><td>                           </td><td><span class="command">__attribute__((__regparm__(3) /* EAX,EDX,ECX */))</span></td></tr>
1287                                 <caption>Calling Convention <span class="constant">fastcall</span> Characteristics</caption>
1288                         </table>
1289
1290         <a name="synchronous"><h2>Multithreading and Multiple Processors</h2></a>
1291
1292                 <p>W32 platform stands on its&nbsp;thorough architecture parallelism. It
1293                 must lock all its objects to maintain coherence in presence of
1294                 multithreading and multiple processors. Since the author of this project
1295                 considers any parallel execution a serious obstacle for debugging the whole
1296                 project architecture was designed to prevent any undeterministic behaviour.
1297                 Therefore this projects always emulates uniprocessor
1298                 <span class="productname">Microsoft Windows NT</span> kernel
1299                 (<span class="constant">KeNumberProcessors</span> symbol is always 1),
1300                 everything runs in the single initial thread/process and all the filesystem
1301                 operations are performed as synchronous
1302                         (&quot;synchronous&quot; by flags
1303                         <span class="constant">FILE_SYNCHRONOUS_IO_ALERT</span>,
1304                         <span class="constant">FO_SYNCHRONOUS_IO</span>,
1305                         <span class="constant">IRP_SYNCHRONOUS_API</span>,
1306                         <span class="constant">IRP_SYNCHRONOUS_PAGING_IO</span>,
1307                         forced <span class="constant">TRUE</span> result of
1308                         <span class="function">IoIsOperationSynchronous()</span>
1309                         etc.).
1310                 <span class="constant">STATUS_PENDING</span> result code indicating that
1311                 request should be completed in the next callback of the driver is
1312                 considered <a href="#paranoia">fatal</a> as it should not happen for the
1313                 requested synchronous <span class="constant">IRP</span>s (I/O Request
1314                 Packets). Since there is a&nbsp;possibility some filesystem would require
1315                 a&nbsp;real W32 parallel thread all the code that would be hit by W32
1316                 multithreading capability is marked by
1317                 <span class="constant">TODO:thread</span> comment for a&nbsp;possible
1318                 future extension.</p>
1319
1320                 <p>Multiple processors (SMP) support will never need to be implemented
1321                 since uniprocessor W32 kernels apparently run the filesystem driver modules
1322                 fine. As this project implements only the uniprocessor W32 kernel all the
1323                 processor locking functions and structures such as
1324                 <span class="constant">KSPIN_LOCK</span> etc. can be safely implemented as
1325                 no-operations.</p>
1326
1327                 <p>Asynchronous callbacks registered for
1328                 <span class="constant">IO_WORKITEM</span>s are passed as GLib idle
1329                 functions by <span class="function">g_idle_add_full()</span>. Although they
1330                 will probably never be executed during non-interactive project's batch
1331                 executions it is the&nbsp;responsibility of W32 driver implementation to
1332                 complete all the pending tasks before its W32 shutdown. Such W32 shutdown
1333                 is done during cleanup of the project's&nbsp;execution by
1334                 <span class="function">captive_shutdown()</span>.</p>
1335
1336         <a name="paranoia"><h2>Paranoia Checks</h2></a>
1337
1338                 <p>A&nbsp;general approach of software projects development is to implement
1339                 many internal sanity checks during the development stage but to produce the
1340                 most optimized final release product without those debugging checks.</p>
1341
1342                 <p>Facilities for these practices can be seen in the standard
1343                 C&nbsp;include files for example as function
1344                 <span class="function">assert()</span> which gets disabled by the
1345                 <span class="constant">NDEBUG</span> symbol used during the final optimized
1346                 executable compilation. This project uses Gnome GLib messaging subsystem
1347                 offering sanity checks discarded by symbols
1348                 <span class="constant">G_DISABLE_ASSERT</span> and
1349                 <span class="constant">G_DISABLE_CHECKS</span>.
1350                 <span class="productname">Microsoft</span> also produces two versions of
1351                 its products - regular customers use the &quot;free build&quot; (also
1352                 called &quot;retail&quot;) while the programmers should develop their code
1353                 on the &quot;checked build&quot; product releases.</p>
1354
1355                 <p>As this project will always run unknown binary code of proprietary W32
1356                 filesystem drivers, the code can never be trusted. Such code even runs in
1357                 the same unprotected address space as its controlling UNIX code. Since
1358                 there is not enough documentation for the W32 components of the system and
1359                 also such documentation is usually misleading it can never be considered as
1360                 100% emulation. Even in the final releases all the sanity checks
1361                 implemented in this project should remain active as all the project's code
1362                 always interacts with unknown and untrusted W32 binaries.</p>
1363
1364                 <p><span class="productname">Microsoft Windows NT</span> code is written in
1365                 a&nbsp;foolproof style as it accepts even invalid input values, and which
1366                 it usually corrects. This makes long-term debugging a&nbsp;pain as it hides
1367                 sources of problems. &quot;Checked build&quot; releases were probably
1368                 designed to fix this flaw by strict consistency checks but it did not reach
1369                 its goals as such checks are usually missing in the code.</p>
1370
1371                 <p>This project has strict consistency checks across all the code to make
1372                 the debugging phase easy enough. Failed sanity check is not always
1373                 a&nbsp;bug - sometimes it just means the real W32 binary code is more
1374                 benevolent than it could be expected according to the documentation and
1375                 such sanity check gets removed for the next version build. In other cases
1376                 the failed sanity checks mean the execution path for some unexpected
1377                 arguments combination was not yet implemented by this project. I may also
1378                 mean a bug, of course...</p>
1379
1380                 <p>Last but not least - never miss a&nbsp;possible sanity check as its
1381                 later removal is in an order of magnitude cheaper than an&nbsp;uncaught
1382                 invalid assumption. Failed assertion is not always a&nbsp;bug although it
1383                 has to be fixed, of course.</p>
1384
1385         <a name="client_interface"><h2>Client Filesystem Interface</h2></a>
1386
1387                 <p>While this project successfuly communicates with the W32 filesystem
1388                 driver (considered as the lower layer) it must also somehow offer its open
1389                 filesystem interface service to some real client software (upper layer).
1390                 This project offers its own custom filesystem operations interface of <span
1391                 class="constant">libcaptive</span> library based on GLib
1392                 <span class="constant">GObject</span> OO system. Interface prototypes are
1393                 specified in the project's&nbsp;<span class="fname">client-*.h</span>
1394                 include files.</p>
1395
1396                 <p>The filesystem service can be offered in several ways:</p>
1397
1398                 <ul>
1399                         <li>
1400                                 <p>One possibility would be to write
1401                                 <a name="client_interface_customapp">a custom client application</a>
1402                                 for this project such as file manager or a&nbsp;shell. Although it
1403                                 would implement the most appropriate user interface to the set of
1404                                 functions offered by this project (and W32 filesystem API) it has the
1405                                 disadvantage of special client software. Appropriate client is provided
1406                                 by this project as:
1407                                 <span class="fname">src/client/cmdline/cmdline-captive</span></p>
1408                         </li>
1409                         <li>
1410                                 <p>The real UNIX OS filesystem implementation must be completely
1411                                 implemented inside the hosting OS kernel. This requires special coding
1412                                 methods with limited availability of coding features and libraries.
1413                                 Also it would give the full system control to the untrusted W32
1414                                 filesystem driver code with possibly fatal consequences of yet
1415                                 unhandled W32 emulation code paths. It would benefit from the best
1416                                 execution performance but this solution was never considered a real
1417                                 possibility.</p>
1418                         </li>
1419                         <li>
1420                                 <p>The common approach
1421                                 <a name="offered_NFS">of filesystem implementations</a>
1422                                 outside UNIX OS kernel were custom NFS servers usually running on the
1423                                 same machine as the NFS-connected client as such NFS server is usually
1424                                 an ordinary UNIX user space process. It would be possible to implement
1425                                 this project as a&nbsp;custom NFS server but the NFS protocol itself
1426                                 has a&nbsp;lot of fundamental flaws and complicated code for backward
1427                                 compatibility.</p>
1428                         </li>
1429                         <li>
1430                                 <p>Currently there is already implemented
1431                                 <a name="offered_gnomevfs"><a href="#offered_gnomevfs_todo">Gnome-VFS interface</a></a>
1432                                 to the custom filesystem interface of this project's&nbsp;library <span
1433                                 class="constant">libcaptive</span>.
1434                                 The $GnomeVFSmodule can be used by a&nbsp;Gnome-VFS aware client (such
1435                                 as <span class="fname">gnome-vfs/tests/test-shell</span>).</p>
1436
1437                                 <FIXME:lufs-gvfs>
1438                                 <p>The <span class="productname">Gnome-VFS-module</span> can be further
1439                                 utilized by the <span class="productname">UserVFS</span>
1440                                 \bookcitation{UserVFS-2.0} software ported to provide local <span
1441                                 class="productname">Coda</span> \bookcitation{Coda} network filesystem
1442                                 server implementation similar to the <a href="#offered_NFS">NFS
1443                                 server</a> solution but with much more acceptable network protocol ---
1444                                 more about this actual scheme can be found in \link{architecture}{the
1445                                 project architecture description}.</p>
1446                         </li>
1447                         <li>
1448                                 <FIXME:LUFS>
1449                                 <p>Direct interface for the Host-OS kernel would be provided
1450                                 by the
1451                                 \label{fuse_interface}
1452                                 <span class="productname">FUSE</span> \bookcitation{FUSE} project \link{offered_FUSE}{described
1453                                 later in this document}. This interface is currently not yet implemented.
1454                                 Although it would be much more straightforward than
1455                                 <a href="#offered_gnomevfs">Gnome-VFS interface</a> described above,
1456                                 its biggest disadvantage would be the requirement to replace/update
1457                                 the stock distributions kernel package as it usually does not
1458                                 have the <span class="productname">FUSE</span> \bookcitation{FUSE} filesystem support while it already supports
1459                                 the <span class="productname">Coda</span> \bookcitation{Coda} interface, which is sufficient for the
1460                                 ported <span class="productname">UserVFS</span> \bookcitation{UserVFS-2.0} interface.</p>
1461                         </li>
1462                 </ul>
1463
1464         <h2>3rd Party Projects Bugfixes</h2>
1465
1466                 <p>Implementation of this project required certain bugfixes to 3rd party
1467                 software packages:</p>
1468
1469                 <h3>GNU Libtool, A&nbsp;Generic Library Support Script</h3>
1470                 
1471                         <p><span class="productname"><a href="http://www.gnu.org/software/libtool/">libtool</a></span>:
1472                         Handle duplicate object file names when performing piecewise archive
1473                         linking by renaming object files when needed.</p>
1474
1475                 <h3>dosfstools, MS-DOS FAT Filesystems Support on Linux</h3>
1476
1477                         <p><span class="productname"><a href="ftp://ftp.uni-erlangen.de/pub/Linux/LOCAL/dosfstools/">dosfstools</a></span>:
1478                         Prevent generation of <span class="constant">FAT-32</span> filesystems
1479                         not supported by the (buggy?) W32 platform
1480                         <span class="fname">fastfat.sys</span> implementation.</p>
1481
1482                 <h3>ext2fsd, Ext2 File System Driver</h3>
1483
1484                         <p><span class="productname"><a href="http://sys.xiloo.com/projects/projects.htm#ext2fsd">Ext2fsd</a></span>:
1485                         Many filesystem corruption fixes, missing filesystem unregistration
1486                         etc.</p>
1487
1488
1489 <h1>Futher Development</h1>
1490
1491         <p>All the W32 filesystem operations of <span class="fname">cdfs.sys</span>,
1492         <span class="fname">fastfat.sys</span>
1493         and
1494         <span class="fname">ext2fsd.sys</span> can be successfuly executed.
1495         The further development tasks include:</p>
1496
1497         <ul>
1498                 <li>
1499                         <p>The primary goal is to reach <span class="productname">NTFS</span>
1500                         filesystem (<span class="fname">ntfs.sys</span>) compatibility.
1501                         A&nbsp;lot of imported symbols is missing although it is expected most of
1502                         them can be just safely passed for execution in the original
1503                         <span class="fname">ntoskrnl.exe</span>.</p>
1504                 </li>
1505                 <li>
1506                         <p>There may still be valid code paths where some emulated W32 kernel
1507                         functionality and symbols remain unimplemented as these code paths were
1508                         just not hit during testing. The proper way would be to check all the
1509                         possibilities of such code paths execution from the filesystem driver
1510                         code disassembly.</p>
1511                 </li>
1512                 <li>
1513                         <p>No unusual error codes are expected from the filesystem drivers and
1514                         any such return codes will abort the project's execution. For example
1515                         code <span class="constant">STATUS_NO_SUCH_FILE</span> is expected and
1516                         correctly recognized but
1517                         <span class="constant">STATUS_FILE_CORRUPT_ERROR</span> will stop driver
1518                         execution.</p>
1519
1520                         <p><a name="exception_fatal">No exceptions in W32 code are allowed</a>
1521                         - any thrown exception will result in driver execution abortion (instead
1522                         of just returning some error code as in the original W32 environment).</p>
1523
1524                         <p>These issues should cease to be a&nbsp;problem after deployment of
1525                         sandbox wrapper which will restart the filesystem driver after any
1526                         unexpected error.</p>
1527                 </li>
1528                 <li>
1529                         <p><a name="todo_sandbox">Completion and activation of the sandbox
1530                         wrapper.</a> <span class="fname">src/libcaptive/sandbox/</span> sources
1531                         currently implement the base of both the client and the server sides of
1532                         CORBA interface to separate the client calling filesystem operations from
1533                         the W32 filesystem driver itself. Although CORBA usually makes sense for
1534                         crossmachine network interconnections here it gets a&nbsp;role of
1535                         inter-process interface between the regular client process and the
1536                         <span class="constant">chroot</span>ed/unprivileged/<span class="constant">ulimit</span>ed
1537                         environment of the W32 emulation address space.</p>
1538
1539                         <p>Any W32 binary file must be always considered untrusted and therefore
1540                         it is needed to be sandboxed and accessible only via the CORBA interface.
1541                         Furthermore it is needed for clean implementation of $GnomeVFSmodule as
1542                         this project always handles <a href="#mounted_one">exactly one mounted
1543                         filesystem</a> but $GnomeVFSmodule interface expects unlimited number of
1544                         mounts in the scope of one process.</p>
1545                 </li>
1546                 <li>
1547                         <p>Project offers
1548                         <a name="offered_gnomevfs_todo">the filesystem access as its custom UNIX API</a>
1549                         (<span class="fname">captive/client-*.h</span>). This API is currently
1550                         offered in the scope of $GnomeVFSmodule interface as a filter applied to
1551                         the filesystem device (or filesystem image file).
1552                         As $GnomeVFS has no officially supported method of generic $gnulinux
1553                         kernel filesystem access it may be better to provide
1554                         <FIXME:LUFS><a name="offered_FUSE">an interface</a> for <span
1555                         class="productname">FUSE</span> \bookcitation{FUSE} instead.</p>
1556
1557                         <p>To get transparent access to W32 filesystems from legacy
1558                         (=non <span class="productname">Gnome-VFS-2.0</span> aware) applications it is possible to use a draft
1559                         port \bookcitation{UserVFS-2.0} of the original <span class="productname">UserVFS</span>
1560                         \bookcitation{UserVFS} to <span class="productname">Gnome-VFS-2.0</span> interface.
1561                         It is also possible to use the test utilities of <span class="productname">Gnome-VFS-2.0</span> \bookcitation{GnomeVFS} package.</p>
1562                 </li>
1563                 <li>
1564                         <p>Implementation of interface to this project by
1565                         <span class="productname"><a href="http://surprise.sourceforge.net/">Partition Surprise</a></span>
1566                         partition manager. Although there currently exists
1567                         <span class="productname"><a href="http://mlf.linux.rulez.org/mlf/ezaz/ntfsresize.html">ntfsresize</a></span>
1568                         it is a data structures reverse engineered solution which may have
1569                         problems on various hard drives. <span class="productname">Partition
1570                         Surprise</span> project would be able to resize the disk safely by using
1571                         just the original W32 filesystem driver file although with some
1572                         performance hit.</p>
1573                 </li>
1574         </ul>
1575
1576
1577 <h1>Related Projects</h1>
1578
1579         <p>The usual solution for file exchange between $freespeech operating systems
1580         and <span class="productname">Microsoft Windows NT</span> is to use
1581         <span class="productname">FAT32</span> (<span class="productname">vfat</span>
1582         called in $gnulinux) partition and swap the files over it. This method is not
1583         very comfortable as you never have access to all the files of the other
1584         operating system.</p>
1585
1586         <a name="LinuxNTFScompet"><h2>$LinuxNTFS</h2></a>
1587
1588                 <p>Although this project takes a&nbsp;completely different approach and has
1589                 a&nbsp;different architecture, the final goal is the same as for this
1590                 project - reliable read-write <span class="productname">NTFS</span>
1591                 filesystem support. $LinuxNTFS goes the way of reverse engineering
1592                 filesystem data structures (and possibly
1593                 <span class="fname">ntfs.sys</span> itself). Unfortunately after many years
1594                 of its development it did not yet reach the state of reliable read-write
1595                 access although its read-only part is considered trustworthy.</p>
1596
1597                 <p>Using $LinuxNTFS for read-only access to existing partition with
1598                 <span class="productname">Microsoft Windows NT</span> installation is
1599                 planned to be able to acquire existing <span class="fname">ntfs.sys</span>,
1600                 <span class="fname">ntoskrnl.exe</span> and possibly
1601                 <span class="fname">ksecdd.sys</span> (imported by
1602                 <span class="fname">ntfs.sys</span>) files from the user's
1603                 <span class="productname">NTFS</span> partition.</p>
1604
1605         <h2><span class="productname"><a href="http://www.cgsecurity.org/ntfs.html">NTPwd NTFS Driver</a></span></h2>
1606
1607                 <p>DOS based <a href="http://www.gnu.org/licenses/gpl.html">GPL-2.0</a>
1608                 read-write NTFS driver. Filesystem structures are reverse engineered in the
1609                 way of <a href="#LinuxNTFScompet">Linux-NTFS Project</a>. As it is not very
1610                 actively maintained it reaches a&nbsp;lower level of
1611                 <span class="productname">NTFS</span> compatibility.</p>
1612
1613         <h2>Virtual Machine with <span class="productname">Microsoft Windows NT</span></h2>
1614
1615                 <p>Original <span class="productname">Microsoft Windows NT</span>
1616                 operating system can be run inside a virtual machine running under
1617                 $gnulinux (or vice versa) and share the read-write disk partitions by using
1618                 a network file sharing through a&nbsp;virtual network card.</p>
1619
1620                 <p>Although there will be full filesystem structures compatibility the
1621                 <span class="productname">NTFS</span> partition cannot be accessed with no
1622                 system installed (or with non-bootable crashed system to repair it this
1623                 way)
1624                         (Although this project requires the original
1625                         <span class="fname">ntfs.sys</span> it can obtained from the legal
1626                         <span class="productname">Microsoft Windows NT</span> CD.),
1627                 it will have substantial system resources requirement and you also need
1628                 a virtual machine software product such as commercial
1629                 <span class="productname"><a href="http://www.vmware.com/download/workstation.html">VMware Workstation</a></span>.</p>
1630
1631
1632 <h1>Conclusion</h1>
1633
1634         <p>The project established <a href="#existing_emulation">a&nbsp;new form</a>
1635         of W32 emulation model suitable for existing proprietary binary W32 kernel
1636         code (drivers) while being hosted in an open source operating system
1637         (currently $gnulinux). Currently, only the subsystems required by W32
1638         filesystem drivers are implemented but the project can be further extended
1639         for compatibility with various hardware-related drivers such as W32 video
1640         drivers, W32 disk interface drivers etc.</p>
1641
1642         <p>Some W32 kernel space subsystems were implemented for the first time as
1643         $freespeech code as they are still missing in the only currently available
1644         $freespeech W32 kernel implementation, $ReactOS. Some W32 kernel function
1645         behaviour expected by the drivers had to be reverse engineered and documented
1646         in this project's&nbsp;API documentation (not listed in this book) and/or in
1647         its source files, because its description in the
1648         <span class="productname">Microsoft</span> documentation is missing.</p>
1649
1650         <p>Author had to get familiar both with the W32 kernel API and also with the
1651         W32 kernel code by the reverse engineering. This experience also covers the
1652         first <span class="productname">Microsoft Windows</span> compatible code ever
1653         written by the author - <span class="fname">hal.dll</span> (Hardware
1654         Abstraction Layer) part of W32 kernel.</p>
1655
1656         <p>Certain UNIX implementation interfaces allow a regular, non-privileged
1657         user of UNIX system to mount image files with any W32 filesystem supported by
1658         this project. Such mount operation usually requires UNIX
1659         <span class="constant">root</span> privileges to do so. On the other hand the
1660         choice of supported filesystem types is very limited as only a&nbsp;few
1661         filesystem types are supported for the W32 platform.</p>
1662 HERE
1663
1664
1665 My::Web->footer();