+TraceFS known bugs
[www.jankratochvil.net.git] / project / captive / doc / Index.html.pl
1 #! /usr/bin/perl
2
3 # $Id$
4 # Captive project doc Index page Perl template.
5 # Copyright (C) 2003 Jan Kratochvil <project-www.jankratochvil.net@jankratochvil.net>
6
7 # This program is free software; you can redistribute it and/or modify
8 # it under the terms of the GNU General Public License as published by
9 # the Free Software Foundation; exactly version 2 of June 1991 is required
10
11 # This program is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14 # GNU General Public License for more details.
15
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
19
20
21 package project::captive::doc::Index;
22 require 5.6.0;  # at least 'use warnings;' but we need some 5.6.0+ modules anyway
23 our $VERSION=do { my @r=(q$Revision$=~/\d+/g); sprintf "%d.".("%03d"x$#r),@r; };
24 our $CVS_ID=q$Id$;
25 use strict;
26 use warnings;
27
28 BEGIN{ open F,"Makefile"; our $top_dir=pop @{[split /\s/,(grep /^top_srcdir/,<F>)[0]]}; eval "use lib '$top_dir'"; close F; }
29 use My::Web;
30 require "CGI";
31
32
33 My::Web->init(
34                 "__PACKAGE__"=>__PACKAGE__,
35                 "title"=>'Captive NTFS doc',
36                 "head_css"=>"
37 .productname { font-family: cursive; }
38 .fname       { font-family: monospace; }
39 .constant    { font-family: monospace; }
40 .author      { font-family: cursive; }
41 .stuff       { font-style: italic; font-size: larger; margin-left: 20%; margin-right: 10%; }
42 .function    { font-family: monospace; }
43 .type        { font-family: monospace; }
44 .command     { font-family: monospace; }
45 .instruction { font-style: italic; }
46 ",
47                 );
48 My::Web->heading();
49
50
51 sub doc_img ($$)
52 {
53 my($img_base,$caption)=@_;
54
55         my $r="";
56         $r.='<table border="0" align="center">'."\n";
57                 $r.="\t<tr><td>".img($img_base,$caption)."</td></tr>\n";
58                 $r.="\t<caption>$caption</caption>\n";
59         $r.='</table>'."\n";
60         $r.=vskip "2ex";
61         return $r;
62 }
63
64 sub captive_srcfile ($;$)
65 {
66 my($filename,$text)=@_;
67
68         a_href 'http://cvs.jankratochvil.net/viewcvs/*checkout*/priv/captive/'.$filename.'?rev=HEAD',
69                         ($text || $filename);
70 }
71
72 my $freespeech=a_href 'http://www.gnu.org/philosophy/free-sw.html','Free';
73 my $freebeer=a_href 'http://www.gnu.org/philosophy/free-sw.html','free (as in beer)';
74
75 sub productname
76 {
77 my($url,$name)=@_;
78
79         return '<span class="productname">'.a_href($url,CGI::escapeHTML($name)).'</span>';
80 }
81 my $Wine=productname 'http://www.winehq.com/','Wine';
82 my $ReactOS=productname 'http://www.reactos.com/','ReactOS';
83 my $LinuxNTFS=productname 'http://linux-ntfs.sourceforge.net/','Linux NTFS';
84 my $GnomeVFS=productname 'http://developer.gnome.org/doc/API/gnome-vfs/','Gnome-VFS';
85 my $GnomeVFSmodule=productname 'http://developer.gnome.org/doc/API/gnome-vfs/modules.html','Gnome-VFS-module';
86 my $gnulinux='GNU/Linux';
87
88
89 print <<"HERE";
90
91
92 <h1>About</h1>
93
94         <h2>Reasons for the Implementation</h2>
95
96                 <p>Currently there is no possibility to any of the available $freespeech
97                         ($freespeech used in the following text in the meaning of
98                         &quot;@{[ a_href 'http://www.gnu.org/philosophy/free-sw.html','free as in speech' ]}&quot;)
99                 operating systems to reliably write to the most common disk partition
100                 filesystem type &ndash; <span class="productname">Microsoft NTFS</span>. It would
101                 be already supported a long time ago but there is no proper documentation of
102                 <span class="productname">NTFS</span> filesystem data structures available.
103                 Since <span class="productname">Microsoft</span> corporation continues in its
104                 propagation of <span class="productname">Microsoft Windows NT</span>
105                         (<span class="productname">NT</span> identifier used in the following text
106                         applies to all the products of <span class="productname">Microsoft</span>
107                         <span class="productname">NT</span> series such as
108                         <span class="productname">NT&nbsp;4.0</span>,
109                         <span class="productname">2000</span> as NT-5.0
110                         and
111                         <span class="productname">XP</span> as NT-5.1.)
112                 based operating systems <span class="productname">NTFS</span> is the default
113                 disk file system type for vendor preinstalled <span class="productname">Microsoft Windows</span>.
114
115                 <p>Unfortunately the <span class="productname">NTFS</span> filesystem has too
116                 complex data structure to allow a complete reverse enginnering process in
117                 reasonable time. Currently available $freespeech solutions such as $LinuxNTFS
118                 filesystem have already implemented reliable reverse
119                 engineered read-only access. However <a name="reliability">reliabile</a>
120                 read-write part of the access would require much better
121                 knowledge of the <span class="productname">NTFS</span> data structures.
122                 Currently only rewriting of already existing file data blocks is supported
123                 by $LinuxNTFS &mdash; no file creation, no file deletion, no directory operations etc.
124                 Also any future versions of <span class="productname">NTFS</span> filesystem
125                 would require another major reverse engineering effort.</p>
126
127
128         <h2>Challenges of the Project</h2>
129
130                 <p>The <a name="NTFSgoal">ultimate goal</a> of this project is definitely the
131                 free implementation of @{[ a_href '#reliability','reliable' ]} read-write <span
132                 class="productname">NTFS</span> filesystem driver. This project chose to
133                 solve this problem in the style of $Wine project by using the original binary
134                 <span class="fname">ntfs.sys</span> and emulating all the required layers of
135                 <span class="productname">Microsoft Windows NT</span> for it.</p>
136
137                 <p>Unfortunately this effort is tainted by only partial and generally
138                 insufficient documentation of API between filesystem driver
139                 (<span class="fname">ntfs.sys</span>) and the
140                 <span class="productname">Microsoft Windows NT</span>
141                 (&quot;@{[ a_href 'http://mail.gnu.org/archive/html/libtool/2000-09/msg00000.html','W32' ]}&quot;
142                 in the following text) kernel <span class="fname">ntoskrnl.exe</span>. Note
143                 that this API is a different than the one being used in the $Wine project
144                 since <span class="productname">Wine</span> implements only the user space
145                 part of W32.</p>
146
147
148         <h2>Microsoft Windows Versions Compatibility</h2>
149
150                 <p>Currently this project supports only driver files of
151                 <span class="productname">Microsoft Windows XP</span> (NT-5.1)
152                 in the following releases:</p>
153
154                 <ul>
155                         <li>Microsoft Windows XP Service Pack 1a Checked Build U.S.</li>
156                         <li>Microsoft Windows XP Service Pack 1a Free Build U.S.</li>
157                         <li>Microsoft Windows XP (No Service Pack) Checked Build U.S.</li>
158                         <li>Microsoft Windows XP (No Service Pack) Free Build U.S.</li>
159                 </ul>
160
161                 <p>The latest list of supported driver files can be found in:
162                 @{[ captive_srcfile 'src/install/acquire/w32-mod-id.captivemodid.xml' ]}</p>
163
164                 <p>Microsoft Windows NTFS filesystem driver is capable of accessing even
165                 the older formats of the filesystem. Porting to Microsoft Windows 2003
166                 Server is expected to be done soon. There can be also a danger of Microsoft
167                 Windows upgrading NTFS disk filesystem where you would no longer be able
168                 to access your NTFS disk by your original
169                 <span class="productname">Microsoft Windows</span> version.
170                 This upgrade does not happen as it occurs only during complete CD-ROM
171                 Microsoft Windows system installation &ndash; such operation is not performed
172                 by this project.</p>
173
174                 <p>There is no problem technically obtaining the needed driver files of
175                 <span class="productname">Microsoft Windows XP</span> as they are freely
176                 downloadable at:
177                 @{[ a_href 'http://www.microsoft.com/WindowsXP/pro/downloads/servicepacks/sp1/checkedbuild.asp' ]}</p>
178                 There may be @{[ a_href '#law','legal reasons' ]} you would not be allowed
179                 to use there files if you own license to a different version of
180                 <span class="productname">Microsoft Windows</span>. Legal rights will very
181                 depending on your country.</p>
182
183
184 <h1>Architecture</h1>
185
186         <p>The principle of the
187         project lies in the glue between
188         <span class="productname">Microsoft Windows NT</span> kernel space
189         environment and $gnulinux user space process environment:</p>
190
191         @{[ doc_img 'arch-W32','Microsoft Windows Subsystems Architecture' ]}
192         @{[ doc_img 'arch-captive','Captive Subsystems Architecture' ]}
193         
194         <a name="existing_emulation"><h2>Existing Emulation Projects</h2></a>
195
196                 <p>There were two well-known $freespeech projects emulating W32 subsystems
197                 to reach the compatibility with various W32 components:
198                 $Wine and $ReactOS. Sad moment is that the goals of this project do not fit
199                 very well into any role in those two ones. Therefore this project went
200                 its own way of emulation:</p>
201
202                 <table align="center" border="1">
203                         <tr>
204                                 <th>@{[ a_href '#guestosnote','Guest-OS' ]}</th>
205                                 <th>@{[ a_href '#hostosnote' ,'Host-OS'  ]}</th>
206                                 <th>Implements</th>
207                                 <th>W32 kernel library</th>
208                                 </tr>
209                         <tr>
210                                 <td>$Wine</td>
211                                 <td>$gnulinux</td>
212                                 <td>W32 user space</td>
213                                 <td><span class="fname">ntdll.dll</span></td>
214                                 </tr>
215                         <tr>
216                                 <td>$ReactOS</td>
217                                 <td><span class="constant">i386</span> hardware</td>
218                                 <td>W32 kernel and user space</td>
219                                 <td><span class="fname">ntoskrnl.exe</span></td>
220                                 </tr>
221                         <tr style="height: 1ex;"></tr>
222                         <tr>
223                                 <td>this project</td>
224                                 <td>$gnulinux</td>
225                                 <td>W32 kernel</td>
226                                 <td><span class="fname">ntoskrnl.exe</span></td>
227                                 </tr>
228                         <caption>Emulation Projects Characteristics</caption>
229                 </table>
230
231                 <dl>
232                         <a name="guestosnote"><dt>Guest-OS</dt></a>
233                         <dd>@{[ a_href 'http://www.vmware.com/support/reference/common/glossary/#guestos','Guest OS' ]}:
234                                 An operating system that runs inside a&nbsp;virtual machine.</dd>
235                         <a name="hostosnote" ><dt>Host  OS</dt></a>
236                         <dd>@{[ a_href 'http://www.vmware.com/support/reference/common/glossary/#hostos' ,'Host  OS' ]}:
237                                 An operating system that runs on the host machine.</dd>
238                 </dl>
239
240                 <p>While $ReactOS provides the necessary W32 kernel subsystem emulation
241                 code we also need to run such @{[ a_href '#guestosnote','Guest-OS' ]} in the
242                 @{[ a_href '#hostosnote','Host-OS' ]} $gnulinux. Initially it was planned to
243                 extend $Wine with the W32 kernel space emulation functionality but
244                 fortunately <span class="author">Steven Edwards</span> pointed to the $ReactOS
245                 which better suits the needs of this project by its already implemented W32
246                 kernel space emulation.</p>
247
248                 <p>The <a name="reactos_nocare">original reasons</a> for developing
249                 $ReactOS still make no sense to the author of this project. Free
250                 implementation of W32 platform standalone running on the machine hardware
251                 is no longer free as most od the W32 applications are usually closed source
252                 and the user still looses its freedom on the application level anyway. Even
253                 in the case of available free applications there still remains the
254                 disadvantage of loosing the Host-OS platform availability if implemented in
255                 the $Wine style. For these ideology incompatibilities not much effort was
256                 made for acceptance the fixes and improvements of $ReactOS by this project.
257                 Moreover new functionality is not being implemented to the $ReactOS part
258                 but it is coded in Gnome style in the project specific source files
259                 place.</p>
260
261                 <p>The most serious problem of $ReactOS is its dependence on the direct
262                 <span class="constant">i386</span> hardware instead of some
263                 @{[ a_href '#hostosnote','Host-OS' ]} as required by the goals of this project.
264                 W32 is designed to be hardware-independent using its
265                 <span class="fname">hal.dll</span>. Unfortunately $ReactOS does not follow
266                 this design and thus there are needed various patches and replaces of its
267                 various parts and its hardware-dependent code. Despite it $ReactOS code
268                 base still made a big asset for this project.</p>
269
270                 <p class="stuff">... and @{[ a_href 'http://www.reactos.com/','ReactOS' ]} cannot run on Linux!<br />
271
272
273
274                 <p>Some API functions are provided both by
275                 <span class="fname">ntdll.dll</span> and
276                 <span class="fname">ntoskrnl.exe</span> in W32.
277                 <span class="author">Casper Hornstrup</span> enlightened such functions
278                 calling conventions have to be differentiated as
279                 <span class="fname">ntdll.dll</span> lives in the user space (low address
280                 space &ndash; below <span class="constant">0x80000000</span>) and
281                 <span class="fname">ntoskrnl.exe</span> in the kernel space (high address
282                 space &ndash; above <span class="constant">0x80000000</span>). Although they
283                 contain slightly different set of symbols (functions)
284                 <span class="fname">ntdll.dll</span> still can be considered as a&nbsp;user
285                 space interface to the kernel space implementation by
286                 <span class="fname">ntoskrnl.exe</span>.</p>
287
288                 <p>Currently there are
289                 no plans to ever extend the project's crossplatformity beyond the
290                 <span class="constant">i386</span> processor
291                         (<span class="constant">i386</span> used here as
292                         @{[ a_href 'http://www.intel.com/','Intel' ]} architecture covering 32-bit
293                         processors compatible with <span class="constant">i386</span>,
294                         <span class="constant">i486</span>, ...).</p>
295
296         <h2>API Function Implementation Choices</h2>
297
298                 <p>During the initial point of the project development all the API
299                 functions were defined as unimplemented, of course. Any call of such
300                 unimplemented function is fatal and results in program termination. When we
301                 need to implement any required API function we have multiple choices to do
302                 so:
303                 @{[ a_href '#functype_pass','Direct pass to original <span class="fname">ntoskrnl.exe</span>' ]},
304                 @{[ a_href '#functype_wrap','Wrap of the original <span class="fname">ntoskrnl.exe</span> function' ]},
305                 @{[ a_href '#functype_native_reactos','Native implementation &ndash; $ReactOS' ]},
306                 @{[ a_href '#functype_native_wine','Native implementation &ndash; $Wine' ]}
307                 or
308                 @{[ a_href '#functype_native_libcaptive','Native implementation &ndash; project specific' ]}.
309                 <!-- a_href '#functype_undef','Undefined function' -->
310
311         <h2>&quot;patched&quot; vs. &quot;unpatched&quot; Libraries</h2>
312
313                 <p>Library is called <span class="constant">patched</span> if we require
314                 loading its original binary code file. Project needs to patch it to be able
315                 to trap all the function entry points. The only currently
316                 <span class="constant">patched</span> library of this project is
317                 <span class="fname">ntoskrnl.exe</span>.</p>
318
319                 <p>Library is called <span class="constant">unpatched</span> if no original
320                 binary code is needed since all of its functions are completely emulated by
321                 @{[ a_href '#functype_native','the native implementations' ]} of this project.
322                 The typical <span class="constant">unpatched</span> representative is
323                 <span class="fname">hal.dll</span> as it specializes on the hardware
324                 dependent code and therefore it must be completely replaced by this project
325                 running in the $gnulinux operating system environment. Early versions of
326                 this project had also full <span class="constant">unpatched</span>
327                 <a href="#native_ntoskrnl">native implementation of
328                 <span class="fname">ntoskrnl.exe</span></a> but it no longer applies.</p>
329
330         <h2>Memory Management</h2>
331
332                 <p>Original <span class="productname">Microsoft Windows NT</span>
333                 architecture uses two address space areas &ndash; user space and kernel space.
334                 User space is mapped in the range <span class="constant">0x00000000</span>
335                 to <span class="constant">0x7FFFFFFF</span>, kernel space is mapped in the
336                 range <span class="constant">0x80000000</span>
337                 (<span class="constant">KERNEL_BASE</span> in $ReactOS sources) to
338                 <span class="constant">0xFFFFFFFF</span>. All these virtual memory ranges
339                 represent addresses after their MMU (Memory Management Unit) mapping, of
340                 course. More discussion can be found in the
341                 <a href="http://www.microsoft.com/hwdev/platform/server/PAE/PAEmem.asp">description 
342                 by <span class="productname">Microsoft</span></a>.</p>
343
344                 <p>This project runs in the virtual address space used both for the UNIX
345                 user space process part and for the W32 kernel space. Therefore this
346                 project defines that W32 kernel runs in the whole range
347                 <span class="constant">0x00000000</span> to
348                 <span class="constant">0xFFFFFFFF</span> since there are no special mapping
349                 assumptions about the UNIX user space process mapping. No W32 user space
350                 exists in this project. Such approach also nullifies any special memory
351                 moving operations between W32 kernel space and W32 user space memory areas
352                 (such as <span class="function">MmSafeCopyToUser()</span>).</p>
353
354         <h2>Unicode Strings and Characters</h2>
355
356                 <p>W32 platform uses 16-bit type <span class="type">wchar_t</span> while $gnulinux uses a
357                 32-bit one. This can be problem during GCC (GNU C&nbsp;Compiler)
358                 compilation of combination of native UNIX C&nbsp;sources (assuming 32-bit
359                 GCC with 32-bit <span class="type">wchar_t</span>) and
360                 $ReactOS C sources (assuming W32 compiler with 16-bit
361                 <span class="type">wchar_t</span>) for literal wide strings
362                 (C source file systax: <span class="command">L&quot;wstring&quot;</span>).
363                 Possibilities to solve this issue list:</p>
364
365                 <ul>
366                         <li>
367                                 <p>Using <span class="constant">-fshort-wchar</span> GCC option and
368                                 strictly differentiate between compilation of
369                                 <span class="productname">ReactOS</span> code and UNIX code.</p>
370
371                                 <p>pros: No source modifications needed, no runtime performance hit.</p>
372
373                                 <p>cons: No type checking if some part of code has bad compilation
374                                 flags, complicated way to completely split
375                                 <span class="productname">ReactOS</span> and UNIX code.</p>
376                         </li>
377                         <li>
378                                 <p>Wrap all <span class="productname">ReactOS</span> literal constants
379                                 by some conversions function call (implemented as macro
380                                 <span class="function">REACTOS_UCS2()</span> by this project).</p>
381
382                                 <p>pros: Any forgotten/mistaken conversions are type-checked and warned
383                                 during the compilation by GCC.</p>
384
385                                 <p>cons: All compiled <span class="productname">ReactOS</span> sources
386                                 files containing literal wide strings have to be wrapped/modified,
387                                 performance hit by runtime string conversions.</p>
388
389                                 <p>This solution was chosen to get the internal sanity checking
390                                 benefit.</p>
391                         </li>
392                 </ul>
393
394         <h2>Supported Binary Formats</h2>
395
396                 <p>The native W32 binary format is identified as
397                 <span class="constant">PE-32</span> (Portable Executable 32-bit), such
398                 files have all the usual extensions such as
399                 <span class="fname">.sys</span>, <span class="fname">.exe</span>,
400                 <span class="fname">.dll</span> etc. <span class="constant">PE-32</span>
401                 loading support was already implemented by $ReactOS, its memory mapping
402                 specifics just had to be ported to $gnulinux environment by this project.
403                 This loading support does not (yet) cover importing of debug symbols from
404                 W32 <span class="fname">.PDB</span> (Program DataBase) files in $gnulinux
405                 ABI (Application Binary Interface) compatible way.</p>
406
407                 <p>This project also supports transparent loading of UNIX
408                 <span class="fname">.so</span> (Shared Object file) binary format. If you
409                 have W32 source files for some W32 library you can try to compile it by GCC
410                 to get the shared library with $gnulinux ABI compatible debug information
411                 (GCC option <span class="constant">-ggdb3</span> recommended). Beware of
412                 possible compilation problems as <span class="productname">Microsoft</span>
413                 C&nbsp;code expects <span class="constant">exception</span> handling to be
414                 supported by the compiler (definitely not the case of the plain C compiler
415                 of GCC) &mdash; all the exception catching code should be discarded as any
416                 @{[ a_href '#exception_fatal','generated exceptions are always fatal' ]} when
417                 such driver is running in the scope of this project. You can use the
418                 following script of this project to compile W32 filesystem source files as
419                 UNIX <span class="fname">.so</span>:
420                 @{[ captive_srcfile 'src/w32-mod/ext2fsd.so-build.sh' ]}</p>
421                 
422                 <p>Be aware of some differences if you use
423                 <span class="constant">PE-32</span> binary format file vs.
424                 <span class="fname">.so</span> format file.
425                 <span class="constant">PE-32</span> use the appropriate W32 specific
426                 @{[ a_href '#calltype','cdecl/stdcall/fastcall call types' ]},
427                 <span class="fname">.so</span> must be completely compiled in the standard
428                 UNIX @{[ a_href '#calltype_cdecl','cdecl call type semantics' ]}.
429                 @{[ a_href '#functype_native','Native function implementations' ]} do not need
430                 to be explicitely exported by <span class="fname">captivesym</span> as they
431                 are resolved automatically by the UNIX dynamic system linker. It may be
432                 surprising you will have to fix all such missing symbol exports if you
433                 advance during the development from the debugging
434                 <span class="fname">.so</span> file for the production version of the
435                 original <span class="constant">PE-32</span> binary file.</p>
436
437         <a name="reverse"><h2>Reverse Engineering</h2></a>
438
439                 <p>This project has no intentions to reverse engineer and document the
440                 filesystem data structures themselves since they are being encapsulated by
441                 the filesystem driver. For these reasons the resources available in
442                 projects such as $LinuxNTFS get out of any possible use. This project goal
443                 is to provide fully compatible API interface to the rest of the W32 system
444                 to persuade the filesystem driver it is running in the native
445                 <span class="productname">Microsoft Windows XP</span> environment.</p>
446
447                 <p>All the W32 filesystem drivers are running in the W32 kernel address
448                 space and this area of W32 API is not much documented by
449                 <span class="productname">Microsoft</span>. Some API functions are not
450                 documented at all and the others are documented insufficiently for a their
451                 possibly needed reimplementation from scratch. Documentation being
452                 consulted primarily consists of
453                 <span class="productname">@{[ a_href 'http://msdn.microsoft.com/library/default.asp?url=/library/en-us/kmarch/hh/kmarch/kmhdr_6enb.asp','MSDN (Microsoft Developer Network) Kernel-Mode Driver Architecture: Windows DDK' ]}</span>
454                 documentation and also various other 3rd party documentation resources such as
455                 <span class="productname">@{[ a_href 'http://www.osr.com/ntinsider/1996/cacheman.htm',
456                                 'The NT Cache Manager Description' ]}</span>,
457                 <span class="productname">@{[ a_href 'http://www.winntmag.com/Articles/Print.cfm?ArticleID=3864',
458                                 'Learn About NT'."'".'s&nbsp;File-system Cache' ]}</span>,
459                 <span class="productname">@{[ a_href 'http://www.ntfsd.org/archive/',
460                                 'NT File System Developers mailing list archives' ]}</span>
461                 including various
462                 @{[ a_href 'http://www.google.com/search?q=site%3Amicrosoft.com','fulltext searches' ]}
463                 through Internet from case to case.</p>
464
465                 <p>Sometimes no sufficient documentation was found and some code behaviour
466                 had to be reverse engineered directly from the binaries of
467                 <span class="fname">ntoskrnl.exe</span>,
468                 <span class="fname">cdfs.sys</span>,
469                 <span class="fname">fastfat.sys</span>
470                 and primarily
471                 <span class="fname">ntfs.sys</span>.
472                 Up to now the code was disassembled by
473                 <span class="productname">@{[ a_href 'http://www.simtel.net/pub/pd/29498.html','IDA Freeware' ]}</span>
474                 and by
475                 <span class="productname">dumpbin.exe</span> of
476                 <span class="productname">Microsoft Visual Studio</span>.
477                 <span class="productname">dumpbin.exe</span> is fortunately able to
478                 interpret debug symbols from W32 <span class="fname">.PDB</span>
479                 (Program DataBase) debug information files.</p>
480
481                 <h3><span class="productname">dumpbin.exe</span>:</p></h3>
482
483                         <p>You should use the following options for
484                         <span class="productname">dumpbin.exe</span>:</p>
485
486                         <blockquote class="command">
487                                 <p>dumpbin.exe /all /rawdata:none /disasm /pdbpath:verbose FILENAME.SYS</p>
488                         </blockquote>
489
490                         <p>You should see the following line in the output:</p>
491
492                         <blockquote class="command">
493                                 <p>PDB file found at '.\\FILENAME.pdb'</p>
494                         </blockquote>
495
496                 <a name="WinDbg"><h3><span class="productname">WinDbg</span> Windows NT kernel debugging</h3></a>
497
498                         <p><span class="productname">WinDbg</span> is downloadable from:
499                         @{[ a_href 'http://www.microsoft.com/whdc/ddk/debugging/installx86.mspx' ]}</p>
500
501                         <p>This is (the only?) tool able to debug filesystem drivers incl.
502                         <span class="fname">ntfs.sys</span>. You will need two computers running
503                         <span class="productname">Microsoft Windows</span> &mdash; one computer will run
504                         <span class="productname">WinDbg</span> while the other one will be
505                         frozen in remote Windows NT kernel debug mode. It does not matter which
506                         <span class="productname">Microsoft Windows</span> version will be run
507                         on the <span class="productname">WinDbg</span> side.</p>
508
509                         <p>The most easy way to setup two computers is to use commercial
510                         <span class="productname">@{[ a_href 'http://www.vmware.com/download/workstation.html','VMware Workstation' ]}</span>
511                         where you can run two virtual machines simultaneously on single PC
512                         hardware and you can connect them by a virtual serial port provided by
513                         <span class="productname">VMware</span>.</p>
514
515                         <h4><span class="productname">WinDbg</span> side setup</h4>
516
517                                 @{[ doc_img 'ntdebug-vmware-windbg',
518                                                 '<span class="productname">VMware</span> virtual serial port'
519                                                                 .' of <span class="productname">WinDbg</span> side' ]}
520
521                                 <p>You should setup <span class="productname">WinDbg</span> according
522                                 to:</p>
523
524                                 @{[ doc_img 'ntdebug-windbg-port','Port settings of <span class="productname">WinDbg</span>' ]}
525                                 @{[ doc_img 'ntdebug-windbg-sym','Symbols files location of <span class="productname">WinDbg</span>' ]}
526
527                                 <span class="constant">Symbols</span> should point to the directory where
528                                 reside files extracted from the symbol archive for your version of
529                                 <span class="productname">Microsoft Windows</span>. In the case of the
530                                 recommended <span class="productname">Microsoft Windows XP Service Pack 1 Checked Build</span>
531                                 you should use:
532                                 @{[ a_href 'http://msdl.microsoft.com/download/symbols/packages/windowsxp/xpsp1sym_x86_chk.exe' ]}</p>
533
534                                 <blockquote class="command">
535                                         <p># Rename xpsp1sym_x86_chk.exe contents .pdb files for WinDbg<br />
536                                         @{[ CGI::escapeHTML(q{for i in *.pdb*;do ext="`echo $i|sed 's/^.*\.pdb\.\(.*\)$/\1/'`";if [ "$i" = "$ext" ];then echo "BAD:$i";break;fi;base="`echo $i|sed 's/\(\.pdb\)\..*$/\1/'`";echo "md $ext";echo "move /-y $i $ext\\$base";done|sort -u|sed 's/$/'`echo -ne '\r'`'/g' >/tmp/rename.bat}) ]}</p>
537                                 </blockquote>
538
539                                 <p>The resulting <span class="command">rename.bat</span> for
540                                 <span class="command">xpsp1sym_x86_chk.exe</span> can be found at:
541                                 @{[ a_href 'xpsp1sym_x86_chk-rename.bat.zip' ]}</p>
542
543                                 <p>The resulting directory should contain at least
544                                 <span class="command">sys\\ntfs.pdb</span>
545                                 and
546                                 <span class="command">exe\\ntoskrnl.pdb</span>.</p>
547
548                                 <p>Your successfuly connected target (after the steps described
549                                 below) should look like:</p>
550
551                                 @{[ doc_img 'ntdebug-windbg-boot','Successfuly connected <span class="productname">WinDbg</span>' ]}
552
553                         <h4>Setup of the side being kernel-debugged</h4>
554
555                                 @{[ doc_img 'ntdebug-vmware-xpdebug',
556                                                 '<span class="productname">VMware</span> virtual serial port'
557                                                                 .' of the side being kernel-debugged' ]}
558
559                                 <p>You must use the following options in your
560                                 <span class="command">c:\\boot.init</span> command-line:</p>
561
562                                 <blockquote class="command">
563                                         <p>/debug /debugport=COM1 /baudrate=115200</p>
564                                 </blockquote>
565
566                                 <p>After booting this <span class="command">boot.ini</span>-entry
567                                 should freeze at this point
568                                 (if no <span class="productname">WinDbg</span> is waiting in the other
569                                 virtual machine):</p>
570
571                                 @{[ doc_img 'ntdebug-wait','Side being kernel-debugged waiting for <span class="productname">WinDbg</span>' ]}
572
573
574         <a name="law"><h2>Laws and Licensing Conditions</h2></a>
575
576                 <p>If you are an <span class="productname">authorized user</span> of
577                 <span class="productname">Microsoft Windows NT</span> the laws in some
578                 countries give you the right to fully handle the product in any way you
579                 want. Therefore you can disassemble the product even in the case you had
580                 to agree with the product license forbidding such disassembly as the
581                 country laws override any such license agreement.</p>
582
583                 <h3>Microsoft Service Pack</h3>
584
585                         <p>Sometimes you may have the legal license for
586                         <span class="productname">Microsoft Windows NT</span>
587                         but for various technical reasons you do not have the media and/or
588                         installation ready at the place of intended use of this project.</p>
589
590                         <p>Fortunately <span class="productname">Microsoft</span> provides
591                         $freebeer update packages for its
592                         <span class="productname">Microsoft Windows</span> products called
593                         <span class="productname">Service Packs</span>; the latest one is
594                         <span class="productname">@{[ a_href 'http://www.microsoft.com/WindowsXP/pro/downloads/servicepacks/sp1/checkedbuild.asp','Microsoft Windows XP Service Pack 1a' ]}</span>.</p>
595
596                         <p>This downloadable file contains the full versions of the essential
597                         files needed for the current stage of this product:
598                         <span class="fname">ntfs.sys</span>
599                         and
600                         <span class="fname">ntoskrnl.exe</span>.
601                         It even contains
602                         <span class="fname">cdfs.sys</span> and
603                         <span class="fname">fastfat.sys</span> for testing purposes.</p>
604
605                         <p><span class="productname">Service Pack</span> also contains
606                         EULA (End User License Agreement) paper disallowing any use of
607                         <span class="productname">Service Pack</span> outside its original
608                         intentions. According to the laws of some countries you need to be
609                         <span class="productname">authorized user</span> of the
610                         <span class="productname">Microsoft Windows XP</span> product to be
611                         allowed to use the files contained in such
612                         <span class="productname">Service Pack</span> without the bindings of its
613                         EULA. Even the interpretation of such laws may vary.</p>
614
615                         <p>It would be a&nbsp;breach of the law by the project author to provide
616                         automatic (=hidden) functionality to download and extract the
617                         <span class="productname">Service Pack</span> files. On the other hand it
618                         is perfectly legal to ask user for his/her confirmation whether he/she is
619                         really the <span class="productname">authorized user</span> of
620                         <span class="productname">Microsoft Windows XP</span> product and
621                         download/extract the <span class="productname">Service Pack</span> files
622                         accordingly.</p>
623
624                         @{[ doc_img 'captive-install-acquire-ask','Microsoft Windows Drivers Acquire Affirmation' ]}
625
626         <h2>Project Architecture</h2>
627
628                 @{[ doc_img 'dia/arch-all','Project Components Architecture' ]}
629
630                 <p>Most of the work of this project is located in the single box called
631                 &quot;<span class="constant">libcaptive</span>&quot; located in the center
632                 of the scheme. This component implements the core W32 kernel API by
633                 various methods described in this document.
634                 The &quot;<span class="constant">libcaptive</span>&quot; box cannot be
635                 further dissected as it is just an implementation of a&nbsp;set of
636                 @{[ captive_srcfile 'src/libcaptive/ke/exports.captivesym','API functions' ]}.
637                 It could be separated to several subsystems such as the
638                 @{[ a_href '#cache_manager','Cache Manager' ]},
639                 Memory Manager, Object Manager, Runtime Library, I/O&nbsp;Manager
640                 etc. but they have no interesting referencing structure.</p>
641
642                 <p>As this project is in fact just a&nbsp;filesystem implementation every
643                 story must begin at the device file and end at the filesystem operations
644                 interface. The unified suppported interfaces are
645                 <span class="productname">@{[ a_href 'http://developer.gnome.org/doc/API/2.0/glib/','GLib' ]}</span>
646                         (the most low level portability, data-types and utility library for Gnome)
647                 <span class="type">GIOChannel</span> (for the device access) and the custom
648                 <span class="constant">libcaptive</span> filesystem API. Each of these ends
649                 can be connected either to some direct interface (such as the
650                 <span class="constant">captive-cmdline</span> client),
651                 @{[ a_href 'http://lufs.sourceforge.net/lufs/','Linux Userland File System (LUFS)' ]}
652                 or as a general $GnomeVFS filter.
653                 @{[ a_href 'http://lufs.sourceforge.net/lufs/','LUFS' ]} will be used in
654                 most cases as it offers standard filesystem interface by Linux kernel.
655                 
656                 You can also use $GnomeVFS as it offers nice filter interface on
657                 the UNIX user-privileges level for transparent operation with archives and
658                 network protocols. This filter interface was used by this project to turn
659                 the device reference such as <span class="fname">/dev/hda3</span> or <span
660                 class="fname">/dev/discs/disc0/part3</span> to the fully accessible
661                 filesystem (pretending being an &quot;archive&quot; in the device
662                 reference). This device access can be specified by $GnomeVFS URLs such as:
663                 <span
664                 class="fname">file:///dev/hda3#captive-fastfat:/autoexec.bat</span></p>
665
666                 <span class="constant">captive-bug-replay</span> serves just for debugging
667                 purposes &mdash; you can 'replay' existing
668                 <span class="fname">file.captivebug.xml.gz</span> automatically being
669                 generated during W32 filesystem failure. This bugreport file will contain
670                 all the touched data blocks of the device used in the moment of the
671                 failure. <span class="constant">captive-bug-replay</span> will therefore
672                 emulate internal virtual writable device out of these bugreported data.
673
674                 <p>If the passed device reference is requested by the user to be accessed
675                 either in <span class="dashdash">--ro</span> (read-only) mode or in the
676                 <span class="dashdash">--rw</span> (full read-write) mode there are no
677                 further device layers needed. Just in the case of <span
678                 class="dashdash">--blind</span> mode another layer is involved to emulate
679                 read-write device on top of the real read-only device by the method of
680                 non-persistent memory buffering of all the possible write requests.</p>
681
682                 <span class="constant">sandbox commit buffer</span> is involved only in the
683                 case @{[ a_href '#sandbox','sandboxing feature' ]} is active. It will
684                 buffer any writes to the device during the sandbox run to prevent
685                 filesystem damage if the driver would fail in the meantime. If the
686                 filesystem gets finally successfully unmounted this sandbox buffer can be
687                 <a name="safe_flush">safely flushed</a>
688                 to its underlying physical media. The buffer will be dropped
689                 in the case of filesystem failure, of course. The filesystem should be
690                 unmounted from time to time &mdash; it can be transparently unmounted and mounted
691                 by <span class="command">commit</span> of
692                 <span class="constant">captive-cmdline</span> custom client. Currently you
693                 cannot force remounting when using
694                 @{[ a_href 'http://lufs.sourceforge.net/lufs/','LUFS' ]} interface client
695                 but it will be remounted after approx each 1MB data written automatically
696                 due to @{[ a_href '#log_file_full','NTFS log file full' ]}.
697
698                 Now we need to transparently
699                 @{[ captive_srcfile 'src/libcaptive/sandbox/sandbox.idl','connect' ]}
700                 the device interface of <span class="type">GIOChannel</span> type through
701                 @{[ a_href '#sandbox','CORBA/ORBit' ]} to the sandboxed slave.
702
703                 <p>Such device is still only a&nbsp;UNIX style GLib <span
704                 class="type">GIOChannel</span> type at this point. As we need to supply it
705                 to the W32 filesystem driver we must convert it to the W32 I/O&nbsp;Device
706                 with its capability of handling <span class="type">IRP</span>
707                         (<span class="constant">I/O Request Packet</span>; structure holding the
708                         request and result data for any W32 filesystem or W32 block device
709                         operation)
710                 requests from its upper W32 filesystem driver. Such W32 I/O&nbsp;Device can
711                 represent either <span class="type">CD-ROM</span> or
712                 <span class="type">disk</span> device type as different W32 filesystem
713                 drivers require different media types &mdash; currently only
714                 <span class="fname">cdfs.sys</span> requires
715                 <span class="type">CD-ROM</span> type.</p>
716
717                 <p>W32 media I/O&nbsp;Device is accessed from the W32 filesystem driver.
718                 The filesystem driver itself always creates volume object by
719                 <span class="function">IoCreateStreamFileObject()</span> representing the
720                 underlying W32 media I/O&nbsp;Device as the object handled by the
721                 filesystem driver itself. All the client application filesystem requests
722                 must be first resolved at the filesystem structures level, passed to the
723                 volume stream object of the same filesystem and then finally passed to the
724                 W32 media I/O&nbsp;Device (already implemented by this project as an
725                 interface to <span class="type">GIOChannel</span> noted above).</p>
726
727                 <p>The filesystem driver is called by the core W32 kernel implementation of
728                 <span class="constant">libcaptive</span> in
729                 @{[ a_href '#synchronous','synchronous way' ]} in single-shot manner instead of
730                 the several reentrancies while waiting for the disk I/O completions as can
731                 be seen in the original
732                 <span class="productname">Microsoft Windows NT</span>.
733                 This single-shot synchronous behaviour is possible since all the needed
734                 resources (disk blocks etc.) can be always presented as instantly ready as
735                 their acquirement is solved by @{[ a_href 'hostosnote','Host-OS' ]} outside of
736                 the W32 emulated @{[ a_href 'guestosnote','Guest-OS' ]} environment.
737                 For several cases needed only by <span class="fname">ntfs.sys</span> 
738                 there had to be supported asynchronous access &mdash; parallel execution
739                 is emulated by GLib <span class="function">g_idle_add_full()</span>
740                 with <span class="function">g_main_context_iteration()</span> called during
741                 <span class="function">KeWaitForSingleObject()</span>.</p>
742
743                 <p><span class="constant">libcaptive</span> offers the W32 kernel
744                 filesystem API to the upper layers. This is still not the API the common
745                 W32 applications are used to as they use W32 libraries which in turn pass
746                 the call to W32 kernel.  For example
747                 <span class="function">CreateFileA()</span> is being implemented by several
748                 libraries such as <span class="fname">user32.dll</span> as a relay
749                 interface for the kernel function
750                 <span class="function">IoCreateFile()</span> implemented by this
751                 project's&nbsp;<span class="constant">libcaptive</span> W32 kernel
752                 emulation component.</p>
753
754                 <p>As it would be very inconvenient to use the legacy, bloated and UNIX
755                 style unfriendly W32 kernel filesystem API this project offers its own
756                 @{[ a_href '#client_interface','custom filesystem API interface' ]} inspired by
757                 the $GnomeVFS client interface adapted to the specifics of W32 kernel API.
758                 This interface is supposed to be easily utilized by
759                 <a href="#client_interface_customapp">a&nbsp;custom application accessing
760                 the W32 filesystem driver</a>.</p>
761
762                 <p>@{[ a_href '#sandbox','CORBA/ORBit' ]} hits us again &ndash; we need to
763                 @{[ captive_srcfile 'src/libcaptive/sandbox/sandbox.idl','translate' ]}
764                 the @{[ a_href '#client_interface','custom filesystem API interface' ]}
765                 out of the sandboxed slave to the UNIX space.</p>
766
767                 <p><span class="constant">captive sandbox master</span> provides the
768                 functionality of covering any possible sandboxed slave restarts and its
769                 communication. It is also capable of
770                 <a name="demultiplexing_master">demultiplexing single API operations</a>
771                 to multiple its connected sandbox slaves in transparent way
772                 as each of them handles
773                 @{[ a_href '#mounted_one','just one filesystem device' ]}.</p>
774
775                 <p>The rest of the story is not much special for this project since this is
776                 a common UNIX problem how to offer user space implemented UNIX filesystem
777                 as a generic system filesystem (as those are usually implemented only as
778                 the components od UNIX kernel).</p>
779
780                 <p>The filesystem service can be offered in several ways:</p>
781
782                 <dl>
783                         <dt>Custom client</dt>
784                         <dd>
785                                 <p>One possibility would be to write
786                                 <a name="client_interface_customapp">a custom client application</a>
787                                 for this project such as file manager or a&nbsp;shell. Although it
788                                 would implement the most appropriate user interface to the set of
789                                 functions offered by this project (and W32 filesystem API) it has the
790                                 disadvantage of special client software. Appropriate client is provided
791                                 by this project as:
792                                 <span class="fname">src/client/cmdline/cmdline-captive</span></p>
793                         </dd>
794
795                         <dt>@{[ a_href 'http://lufs.sourceforge.net/lufs/','Linux Userland File System (LUFS)' ]}</dt>
796                         <dd>
797                                 <p>The most usable interface is the
798                                 @{[ a_href 'http://lufs.sourceforge.net/lufs/','LUFS' ]} client
799                                 by <span class="constant">liblufs-captivefs</span>.
800                                 As @{[ a_href 'http://lufs.sourceforge.net/lufs/','LUFS' ]}
801                                 already assigns separate process for each filesystem mount the
802                                 @{[ a_href '#demultiplexing_master','demultiplexing feature' ]}
803                                 is not utilized in this case.</p>
804
805                                 <p>@{[ a_href 'http://lufs.sourceforge.net/lufs/','LUFS' ]}
806                                 needs multiple operating threads (each UNIX kernel operation needs
807                                 one free lufsd slot/thread to not to fail immediately).
808                                 As <span class="constant">libcaptive</span> is
809                                 @{[ a_href '#synchronous','single-threaded' ]} all the operations
810                                 get always synchronized by
811                                 <span class="constant">liblufs-captivefs</span>
812                                 before their pass over to <span class="constant">libcaptive</span>.</p>
813                         </dd>
814
815                         <dt>@{[ a_href '#offered_gnomevfs','Gnome-VFS' ]}</dt>
816                         <dd>
817                                 <p>This client allowing its filesystem access even without any
818                                 involvement of UNIX kernel from any $GnomeVFS aware client application
819                                 (such as <span class="fname">gnome-vfs/tests/test-shell</span>).
820                                 This @{[ a_href '#offered_gnomevfs','Gnome-VFS interface' ]} connects the
821                                 data flow of this project in two points &mdash; both as the lowest layer
822                                 device image source and also as the upper layer for the filesystem
823                                 operation requests.</p>
824                         </dd>
825                 </dl>
826
827                 <p>Unimplemented and deprecated methods for providing filesystem
828                 service:</p>
829
830                 <dl>
831                         <dt>W32 filesystem in UNIX OS kernel</dt>
832                         <dd>
833                                 <p>The real UNIX OS filesystem implementation must be completely
834                                 implemented inside the hosting OS kernel. This requires special coding
835                                 methods with limited availability of coding features and libraries.
836                                 Also it would give the full system control to the untrusted W32
837                                 filesystem driver code with possibly fatal consequences of yet
838                                 unhandled W32 emulation code paths. It would benefit from the best
839                                 execution performance but this solution was never considered a real
840                                 possibility.</p>
841                         </dd>
842
843                         <dt>Custom NFS server</dt>
844                         <dd>
845                                 <p>The common approach
846                                 <a name="offered_NFS">of filesystem implementations</a>
847                                 outside UNIX OS kernel were custom NFS servers usually running on the
848                                 same machine as the NFS-connected client as such NFS server is usually
849                                 an ordinary UNIX user space process. It would be possible to implement
850                                 this project as a&nbsp;custom NFS server but the NFS protocol itself
851                                 has a&nbsp;lot of fundamental flaws and complicated code for backward
852                                 compatibility.</p>
853                         </dd>
854                 </dl>
855
856
857         <a name="mounted_one"><h2>At Most One Mounted Filesystem</h2></a>
858
859                 <p>The project technically supports only one (exactly one...) mounted
860                 filesystem device and only one filesystem driver. There is nothing
861                 complicated to support multiple disks and multiple loaded filesystem
862                 modules but as they would share the address space it would only bring
863                 a&nbsp;possible complications during bug reports and the bug solving
864                 itself.  It was considered as a&nbsp;more sane way to support multiple W32
865                 mounted disks by completely separately running project instances in
866                 a&nbsp;different UNIX processes communicating from their sandboxes via
867                 @{[ a_href '#sandbox','CORBA sandbox interface' ]}. This sandboxing
868                 feature is not yet deployed although its code is already prepared.</p>
869
870                 <p>The project also does not support any state cleanup to be able to load
871                 filesystem&nbsp;<span class="constant">A</span>,
872                 cleanup&nbsp;<span class="constant">A</span> and load a different
873                 filesystem&nbsp;<span class="constant">B</span> in the same process address
874                 space. It complies with the preventions of the possible debugging
875                 complications as noted above. Despite this you still must call the function
876                 <span class="function">captive_shutdown()</span> to flush all the pending
877                 filesystem buffers to the disk. After calling
878                 <span class="function">captive_shutdown()</span> the process address space is
879                 no longer usable for any further project operations and the process is
880                 expected to be terminated in the manner compatible with its driving
881                 @{[ a_href '#sandbox','CORBA sandbox interface' ]} control master.</p>
882
883                 <p>Each sandbox executing the untrusted W32 binary filesystem driver code
884                 is connected through its
885                 @{[ a_href '#sandbox','CORBA sandbox interface' ]} at the point of upper
886                 layer <span class="constant">libcaptive</span>-specific filesystem API, at
887                 the point of the bottom layer of <span class="type">GIOChannel</span>
888                 device access and also for transfers of GLib logging
889                 messages/warnings/errors out of the sandbox to the user.</p>
890
891
892         <a name="sandbox"><h2>Sandboxing of W32 filesystem</h2></a>
893
894                 <p>The emulated W32 environment running the original W32 filesystem driver
895                 is separated from the rest of UNIX OS. It achieves the following goals:</p>
896
897                 <ul>
898                         <li><b>Restartable</b>: W32 driver can be restartde in clean state if it crashed</li>
899                         <li><b>Secure</b>: Malicious W32 code cannot affect the security of UNIX OS</li>
900                         <li><b>Stable</b>: Buggy W32 cannot crash any part of UNIX OS</li>
901                 </ul>
902
903                 <p>Sandboxing is provided with the following attributes:</p>
904                 
905                 <ul>
906                         <li>standalone UNIX process with separate memory space</li>
907                         <li>chroot(2) in empty directory to prevent any UNIX OS filesystem access</li>
908                         <li>setuid(2) to own user/group to prevent interaction with UNIX processes</li>
909                         <li>setrlimit(2) to limit system resources available for W32 environment</li>
910                         <li>the only connection with the UNIX OS by CORBA/ORBit RPC</li>
911                 </ul>
912
913                 <p>This security is almost the same as provided by
914                 emulated virtual machines such as
915                 @{[ a_href 'http://www.vmware.com/solutions/security.html','VMware' ]}.</p>
916
917                 @{[ doc_img 'dia/inheritance','Sandboxing Scheme' ]}
918
919                 <p>Project can be also used in non-sandboxed mode by
920                 <span class="command">--no-sandbox</span> option as it is easier to debug
921                 without CORBA/ORBit RPC. In this case the
922                 <span class="type">DirectorySlave</span>/<span class="type">FileSlave</span>
923                 options are used directly instead of their
924                 <span class="type">DirectoryParent</span>/<span class="type">FileParent</span>
925                 peers.</p>
926
927
928 <h1>Choice of the Emulation Methods</h1>
929
930         <p>The intent of the project was to get reliable read-write access to
931         <span class="productname">NTFS</span> partition. There are several possible
932         ways to achieve that:</p>
933
934         <h2>Virtualmachine Running the Original W32 Subsystem</h2>
935
936                 <p>Creating virtual-hardware PC and running the original W32 binaries
937                 including their boot-loader etc. Disk device access would be passed as
938                 virtual IDE disk (=hard disk drive). File access API would be implemented
939                 either by special escaping by some trapped instruction out of the
940                 virtualmachine while using W32 file access API or using the standard W32
941                 SMB (Server Message Block) network access through some virtual network
942                 card. The latter network access solution is almost the currently available
943                 possibility of running full-blown disk-sharing real
944                 <span class="productname">Microsoft Windows NT</span> inside virtual
945                 machine emulator such as <span class="productname">VMware</span>.</p>
946
947                 <p>pros: Full compatibility due to fully native codebase.</p>
948
949                 <p>cons: Hard to debug, missing documentation of NT booting internals,
950                 possible problems by different PC virtual-hardware than expected by NT,
951                 requirement of fully installed
952                 <span class="productname">Microsoft Windows NT</span> product.</p>
953
954         <a name="method_ntoskrnl"><h2>&quot;ntoskrnl.exe&quot; Inside Virtual Address Space</h2></a>
955
956                 <p>This solution was chosen by the project. Binary filesystem driver and
957                 also <span class="fname">ntoskrnl.exe</span> binary file are required.
958                 Unfortunately <span class="fname">ntoskrnl.exe</span> expects a&nbsp;native
959                 PC virtual-hardware missing during regular UNIX user space process
960                 emulation, therefore such instructions must be trapped and emulated/ignored
961                 from case to case.</p>
962
963                 <p>Also the <a name="init_ntoskrnl">initialization code of <span
964                 class="fname">ntoskrnl.exe</span></a> is not executed by this project since
965                 it expects to get full PC hardware access privileges and thus some
966                 datastructures do not get initialized by it (need to be trapped later at
967                 runtime stage). Some of the missing initializations are solved by
968                 @{[ a_href '#functype_wrap','API functions wrapping' ]}.
969
970                 <p>pros: Lightweight, easier to debug.</p>
971
972                 <p>cons: Possible incompatible emulation of
973                 <span class="fname">ntoskrnl.exe</span> parts, missing documentation needed
974                 for the implementation.</p>
975
976         <h2>Filesystem Driver Inside Virtual Address Space</h2>
977
978                 <p>Unlike @{[ a_href '#method_ntoskrnl','previous method' ]} here we do not use
979                 even <span class="fname">ntoskrnl.exe</span> as the complete kernel part of
980                 W32 is <a name="native_ntoskrnl">emulated from the project source
981                 files</a>. <span class="fname">cdfs.sys</span> driver was successfuly ran
982                 in this manner in the former versions of this project but the possibility
983                 to run without <span class="fname">ntoskrnl.exe</span> was dropped since it
984                 had no licensing gains (you need the original
985                 <span class="productname">Microsoft Windows NT</span> files at least for
986                 the filesystem driver itself) and the emulation of undocumented parts
987                 reusable from <span class="fname">ntoskrnl.exe</span> binary was
988                 a&nbsp;pain.</p>
989
990                 <p>pros: Lightweight, easier to debug.</p>
991
992                 <p>cons: Possible incompatible emulation of the whole
993                 <span class="fname">ntoskrnl.exe</span>, its missing documentation.</p>
994
995
996 <h1>Implementation Details</h1>
997
998         <a name="cache_manager"><h2>NT Cache Manager</h2></a>
999
1000                 <p>Although there exist some 3rd party documents about
1001                 <span class="productname">NT Cache Manager</span> W32 subsystem such as
1002                 <span class="productname">@{[ a_href 'http://www.osr.com/ntinsider/1996/cacheman.htm',
1003                                 'The NT Cache Manager Description' ]}</span> or
1004                 <span class="productname">@{[ a_href 'http://www.winntmag.com/Articles/Print.cfm?ArticleID=3864',
1005                                 'Learn About NT'."'".'s&nbsp;File-system Cache' ]}</span>
1006                 they are definitely insufficient for compatible
1007                 <span class="productname">NT Cache Manager</span> reimplementation.</p>
1008
1009                 <p><span class="productname">NT Cache Manager</span> is about mapping
1010                 filesystem objects such as regular file data, filesystem bitmap or
1011                 journalling zone (log file). It is also being used by the filesystem for
1012                 mapping of virtual volume files representing the whole underlying
1013                 filesystem device.</p>
1014
1015                 <p>The original W32 <span class="productname">NT Cache Manager</span>
1016                 is much more complicated as it must coordinate its effort with
1017                 other W32 subsystems like mapping of executable files
1018                 (<span class="type">ImageSectionObject</span>), insufficient system
1019                 resources from <span class="productname">NT Memory Manager</span>
1020                 or general effort to perform caching features for system performance.</p>
1021                 <span class="productname">NT Cache Manager</span> of this project has much
1022                 simpler goal - it just needs to provide compatible
1023                 <span class="productname">NT Cache Manager</span> functionality while
1024                 the other goals of its W32 counterpart are left to be successfuly handled
1025                 by UNIX OS in much more efficient way.</p>
1026
1027                 @{[ doc_img 'dia/cache-manager',
1028                                 '<span class="productname">NT Cache Manager</span> Architecture' ]}
1029
1030                 <p>Cache Manager objects are always bound to
1031                 <span class="type">FCB</span> (File Control Block).
1032                 <span class="type">FileObject</span> (or its associated
1033                 <span class="type">HANDLE</span>) serve only as reference
1034                 to <span class="type">FCB</span> and there can be multiple
1035                 <span class="type">FileObject</span>/<span class="type">HANDLE</span>
1036                 items for one <span class="type">FCB</span>. It is a bit misleading
1037                 you must use <span class="type">FileObject</span> pointer while calling
1038                 most of the Cache Manager functions.</p>
1039
1040                 <p>Before using any other Cache Manager functions you must first call
1041                 <span class="function">CcInitializeCacheMap()</span>. You must give the
1042                 maximum mapped object offset. Each mapped object byte must have at most one
1043                 mapped memory location - no shared pages are allowed. Also any subsequent
1044                 mapping request is expected to be mapped into continuous memory region.
1045                 It implies you must reserve the memory region for possible future mapping
1046                 during the initial <span class="function">CcInitializeCacheMap()</span>
1047                 moment sized according to the given maximum mapped object offset. 
1048                 This is the approach currently implemented by this project although it
1049                 cannot be used for 3rd party <span class="fname">ext2fsd.sys</span>
1050                 driver as it initialized Cache Manager by the whole media device size
1051                 and it surprisingly succeeds for original
1052                 <span class="productname">Microsoft Windows</span> 
1053                 <span class="productname">Cache Manager</span>.
1054                 I expect the space reservation should be postponed to the first mapping
1055                 request and expect no multiple mappings will be done in the case
1056                 of memory-exceeding <span class="function">CcInitializeCacheMap()</span>
1057                 reservation request. <span class="function">CcSetFileSizes()</span>
1058                 changing the reserved memory area size may assume no existing Map
1059                 or Pin mappings exist.</p>
1060
1061                 <p><span class="type">PCACHE_MANAGER_CALLBACKS</type> argument can be
1062                 safely ignored:</p>
1063
1064                 <dl>
1065                         <dt><span class="function">AcquireForReadAhead()</span>/<span class="function">ReleaseFromReadAhead()</span></dt>
1066                         <dd>
1067                                 <p>As any readahead functionality is optional these entries are
1068                                 never used by Cache Manager implementation of this project.</p>
1069                         </dd>
1070
1071                         <dt><span class="function">AcquireForLazyWrite()</span>/<span class="function">ReleaseFromLazyWrite()</span></dt>
1072                         <dd>
1073                                 <p>Even the write-behind functionality is optional for Cache Manager.
1074                                 It is being done in asynchronous way in the original
1075                                 <span class="productname">Microsoft Windows</span>
1076                                 <span class="productname">Cache Manager</span>.
1077                                 implementation and it is ignored by Cache Manager implementation of
1078                                 this project.</p>
1079
1080                                 <p>Cache Manager does not need to write any data if not explicitely
1081                                 requested by the driver. It is even expected to silently drop any
1082                                 pending dirty data blocks during filesystem shutdown.
1083                                 Forced dirty block write by function
1084                                 <span class="function">CcFlushCache()</span> should be written without
1085                                 any wrapping surrounding
1086                                 <span class="function">AcquireForLazyWrite()</span>/<span class="function">ReleaseFromLazyWrite()</span>
1087                                 pair.</p>
1088                         </dd>
1089                 </dl>
1090
1091                 <p><span class="function">CcUninitializeCacheMap()</span> is just
1092                 a suggestion for Cache Manager that driver will no longer reference
1093                 given <span class="type">SharedCacheMap</span>. The uninitialization
1094                 can be postponed to any later moment in original 
1095                 <span class="productname">Microsoft Windows</span> 
1096                 <span class="productname">Cache Manager</span>
1097                 as it may be locked by existing
1098                 <span class="type">ImageSectionObject</span>
1099                 of some file being executed etc.
1100                 <a name="sharedcachemap_leak">It is fatal to destroy
1101                 <span class="type">SharedCacheMap</span></a>
1102                 in the moment you see no other
1103                 references to it as the driver will access it for some moment
1104                 even after <span class="function">CcUninitializeCacheMap()</span>.
1105                 I am not sure if it is a bug of the driver or whether there are some rules
1106                 how long after <span class="function">CcUninitializeCacheMap()</span>
1107                 completion given <span class="type">SharedCacheMap</span> still exists.
1108                 Fortunately it is safe to never destroy
1109                 <span class="type">SharedCacheMap</span> and leave it leaked - everything
1110                 gets clean in the
1111                 @{[ a_href '#sandbox','sandboxed environment' ]} soon anyway.</p>
1112
1113                 <p>There exist Map and Pin type objects for each
1114                 <span class="type">SharedCacheMap</span> although they look very similiar.
1115                 Only these objects give you access to any memory data
1116                 &mdash; <span class="type">SharedCacheMap</span> only reserved the space
1117                 to ensure continuous mapping of the forthcoming mappings but it did not map
1118                 any data into it.</p>
1119
1120                 <p>Mapping of 'new' Map or Pin will create the new object only in the case
1121                 no such mapping exists now. Otherwise you will just get the reference to
1122                 the existing object with increased usecount.</p>
1123
1124                 <dl>
1125                         <dt>Map</dt>
1126                         <dd>
1127                                 <p>Map mapping is always at most one for each
1128                                 <span class="type">SharedCacheMap</span>. Base offset/length of such
1129                                 mapping have no meaning as there can be only single Map.</p>
1130
1131                                 <p>Apparently Map size can be arbitrary long according
1132                                 to its <span class="type">SharedCacheMap</span> reserved space.</p>
1133
1134                                 <p>You cannot modify the memory mapped by Map in any way.
1135                                 As it is the same memory area (address) as the pages used by Pin
1136                                 objects you always access the last modified version by possible
1137                                 Pin of the same page.</p>
1138                         </dd>
1139
1140                         <dt>Pin</dt>
1141                         <dd>
1142                                 <p>Pin mapping always represents just one physical page
1143                                 (<span class="constant">PAGE_SIZE</span> &nspan; 4096 for i386).
1144                                 Its base offset/length can be safely extended to be aligned to the
1145                                 requested page.</p>
1146
1147                                 <p>Pin can have associated pair of oldest and newest
1148                                 <span class="type>LSN</span> (Linear Sequence Number). It can be
1149                                 set by <span class="function">CcSetDirtyPinnedData()</span>
1150                                 and Cache Manager always tracks the lowest and highest
1151                                 reported <span class="type>LSN</span> for each page.
1152                                 <span class="type>LSN</span> is assumed to be
1153                                 <span class="constant">0</span> if not set.</p>
1154
1155                                 <p>Any existing Pin mapping will be reused for further mappings
1156                                 as long as it is not ThreadOwned. In the moment you use
1157                                 <span class="function">CcSetBcbOwnerPointer()</span> you will detach
1158                                 the associated Pin pages from its
1159                                 <span class="type">SharedCacheMap</span>.
1160                                 Although they will further act as valid Pin mappings they will be no
1161                                 longer reused during new Pin mapping of the same page.
1162                                 There can exist multiple Pin mappings of the same page (although
1163                                 sharing the same memory space). This detaching must be implemented
1164                                 even in the
1165                                 @{[ a_href '#synchronous','single-threaded' ]} W32 implementation
1166                                 of this project as it is affecting the behaviour of Cache Manager.
1167                                 It was never
1168                                 @{[ a_href '#TraceFS','seen' ]} how to behave if multiple dirty Pin
1169                                 mappings of the same page exist.</p>
1170                         </dd>
1171                 </dl>
1172
1173                 <p>Only the pages not yet present in the memory must be read from the disk.
1174                 You must not read any pages you do not need to as the driver does not
1175                 expect it and it would corrupt its data buffers.</p>
1176
1177                 <p>Cache Manager of this project will destroy Pin or Map mappings after
1178                 their last unreferencing (in opposite of
1179                 @{[ a_href 'sharedcachemap_leak','leaked <span class="type">SharedCacheMap</span>' ]}).
1180                 Despite it any dirty pages may still be held as the pages
1181                 (including their <span class="type>LSN</span>s) are cached associated
1182                 with <span class="type">SharedCacheMap</span>. It may be also possible
1183                 original <span class="productname">Microsoft Windows</span> 
1184                 <span class="productname">Cache Manager</span>
1185                 postpones Pin mapping destroy to later time but it does not matter.</p>
1186
1187
1188                 <a name="TraceFS"><h3>TraceFS NT Cache Manager Tracer</h3></a>
1189
1190                         <p>@{[ a_href '#cache_manager','Cache Manager behaviour' ]} would be hard
1191                         to analyze just by @{[ a_href '#reverse','reverse engineering' ]} as it
1192                         is pretty complicated code cooperating with many other W32 kernel
1193                         subsystems. It was chosen as easier way to trace it instead and validate
1194                         all the Cache Manager assumptions by Cache Manager simulator.</p>
1195
1196                         @{[ doc_img 'dia/TraceFS','TraceFS Hooking' ]}
1197
1198                         <p>You must prepare your driver to be hooked
1199                         (<span class="fname">ntfs.sys</span> in this case):</p>
1200
1201                         <blockquote class="command">
1202                                 <p>@{[ captive_srcfile './src/TraceFS/hookfs.pl' ]} ntfs.sys ./src/TraceFS/TraceFS-W32/TraceFS.sys &gt;hooked/ntfs.sys</p>
1203                         </blockquote>
1204
1205                         <p>This <span class="fname">hooked/ntfs.sys</span> file must be replaced
1206                         in the <span class="fname">%System32%\\drivers</span> directory.
1207                         Beware as
1208                         <span class="productname">Microsoft Windows</span>
1209                         has many backups of these system files such as
1210                         <span class="fname">%System32%\\dllcache</span> &mdash; delete them
1211                         all!</p>
1212
1213                         <p>You also need to install
1214                         <span class="fname">./src/TraceFS/TraceFS-W32/TraceFS.sys</span>
1215                         into <span class="fname">%System32%\\drivers</span> directory
1216                         and import <span class="fname">TraceFS/TraceFS-W32/TraceFS.reg</span>
1217                         registry file to initialize the debug driver during system boot.</p>
1218
1219                         <p>You can now pray a bit and snap the resulting Cache Manager tracing
1220                         from <span class="productname">WinDbg</span> by
1221                         @{[ a_href '#WinDbg','W32 remote kernel debugging' ]}:</p>
1222
1223                         @{[ doc_img 'ntdebug-windbg-boot','Successfuly connected <span class="productname">WinDbg</span>' ]}
1224
1225                         <p>The resulting trace file should be processed by
1226                         @{[ captive_srcfile './src/TraceFS/checktrace.pl' ]} Perl Cache Manager
1227                         implementation to validate its assumptions about Cache Manager behaviour.
1228                         Any seen incompatibilies will be reported &mdash; your target is to reach
1229                         as few error messages as possible.</p>
1230
1231                         <p>KNOWN BUGS: Combination of message synchronization primitives and
1232                         implemented refusal to create journalling thread of
1233                         <span class="fname">ntfs.sys</span>
1234                         causes fatal system lockup in several advanced operations
1235                         such as setting compression attribute. Despite it more common operations
1236                         can be successfuly traced during the whole
1237                         <span class="productname">Microsoft Windows</span>
1238                         session including its final shutdown and such traces provide enough
1239                         material to be food to
1240                         @{[ captive_srcfile './src/TraceFS/checktrace.pl' ]} Perl Cache Manager
1241                         validator.</p>
1242
1243                         <h4>TraceFS for general API tracing</h4>
1244
1245                         <p>Although TraceFS was up to now used only for tracing of
1246                         <span class="productname">NT Cache Manager</span> it can be easily
1247                         used ever for any other NT kernel API tracing. You need to provide
1248                         appropriate function wrappers in the main source file
1249                         @{[ captive_srcfile './src/TraceFS/TraceFS-W32/TraceFS.c' ]}
1250                         and you must also export them in
1251                         @{[ captive_srcfile './src/TraceFS/TraceFS-W32/TraceFS.def' ]}.
1252                         @{[ captive_srcfile './src/TraceFS/hookfs.pl' ]} has no hardcoded
1253                         function names &ndash; it will hook exactly the exported entries.</p>
1254
1255                         <p>Framework for thread synchronizations and debug tracing is provided to
1256                         prevent mangling of messages while running by multiple threads at once.
1257                         Testing was done just on uniprocessor machine, SMP kernel may need some
1258                         fixes.</p>
1259                         
1260
1261         <a name="functype"><h2>API Function Implementation Choices</h2></a>
1262
1263                 <p>For each function exported by W32
1264                 <span class="fname">ntoskrnl.exe</span> and imported and called by the
1265                 filesystem driver a decision needs to be made to properly implement its
1266                 functionality. Currently implemented functionality statistics are provided
1267                 below:</p>
1268
1269                 <table border="1" align="center">
1270                         <tr><th>Function type                                            </th><th>Items</th><th>Portion</th></tr>
1271                         <tr><td>@{[ a_href '#functype_pass','pass' ]}                    </td><td>   81</td><td>    26%</td></tr>
1272                         <tr><td>@{[ a_href '#functype_wrap','wrap' ]}                    </td><td>    2</td><td>     0%</td></tr>
1273                         <tr><td>@{[ a_href '#functype_native_reactos','native-ReactOS' ]}</td><td>  113</td><td>    36%</td></tr>
1274                         <tr><td>@{[ a_href '#functype_native_libcaptive','native-own' ]} </td><td>  116</td><td>    38%</td></tr>
1275                         <caption>Function Implementation Types Statistics</caption>
1276                 </table>
1277
1278                 <p>As there are several choices to implement each function the usual
1279                 attempts/investigations ordering is listed in the sections below.</p>
1280
1281                 <p>Special case must be taken for data-type symbols since they are
1282                 referenced without the possibility of catching the code flow by some
1283                 breakpoints (it would be possible only in some special access cases). Data
1284                 export symbols of <span class="constant">unpatched</span> libraries must
1285                 contain already prepared content at the runtime. There is a&nbsp;problem
1286                 with <span class="constant">patched</span> libraries where it is necessary
1287                 to also fully implement the data symbol as
1288                 @{[ a_href '#functype_native','native implementation' ]} since there is no
1289                 possibility to @{[ a_href '#functype_pass','pass' ]} the data symbol instead of
1290                 the original W32 data location and therefore there will be two instances of
1291                 such data variable place. As there will be also the uncaught references for
1292                 such W32 data location from the <span class="constant">patched</span>
1293                 library itself such symbols should be usually only some constants (such as
1294                 <span class="constant">KeNumberProcessors</span>).</p>
1295
1296                 <p>W32 platform symbols export/import can be based either on the symbol
1297                 name itself or it can be also exported and imported just by its
1298                 identification number called <span class="constant">Ordinal</span>.
1299                 Although it saves some jumptables file binary size it is currently no
1300                 longer used by W32 binaries and this project also does not support such
1301                 <span class="constant">Ordinal</span> symbol reference type at all.</p>
1302
1303                 <p>All the exporting magic is handled by custom script
1304                 <span class="fname">captivesym</span> processing the definition file
1305                 <span class="fname">@{[ captive_srcfile 'src/libcaptive/ke/exports.captivesym' ]}</span>
1306                 to produce the intermediate relaying code
1307                 <span class="fname">src/libcaptive/ke/exports.c</span>. For details of the
1308                 <span class="fname">captivesym</span>-specific source file syntax please
1309                 see its documentation:
1310                 <span class="fname">@{[ a_href
1311                                 $W->{"top_dir"}.'/project/Pod2Html.html.pl?cvs=priv/captive/src/libcaptive/ke/captivesym.pl',
1312                                 'src/libcaptive/ke/captivesym.pl' ]}</span>
1313
1314                 <a name="functype_pass"><h3>Direct Pass to Original &quot;ntoskrnl.exe&quot;</h3></a>
1315
1316                         <p>Simple (standalone) functions such as
1317                         <span class="function">RtlTimeToSecondsSince1970()</span> can be simply
1318                         passed to the original implementation in
1319                         <span class="fname">ntoskrnl.exe</span> as they make no hardware access
1320                         and they do not expect any special internal data structures to be set up
1321                         in advance by an earlier library initialization. A common case are all
1322                         the data structures utility functions such as
1323                         <span class="constant">GenericTable</span> subsystem or
1324                         <span class="constant">LargeMcb</span> handling.</p>
1325
1326                         <a name="functype_pass_fromunix"><h4>Pass from UNIX Code</h4></a>
1327
1328                                 <p>Control flow begins in some standard UNIX code. Such code is always
1329                                 using @{[ a_href '#calltype_cdecl','cdecl call type' ]} for all its
1330                                 intracalls. <a href="#functype_native_reactos">Native functions
1331                                 compiled from <span class="productname">ReactOS</span> sources</a> use
1332                                 their own @{[ a_href '#calltype','cdecl/stdcall/fastcall' ]} declarations
1333                                 but these call type modifications are discarded during compilation for
1334                                 this project by the <span class="constant">LIBCAPTIVE</span>
1335                                 symbol.</p>
1336
1337                                 <p>UNIX code calls <span class="function">FUNCTIONNAME()</span> relay
1338                                 from the generated UNIX jump table. Such relay will debug dump the
1339                                 passed arguments and finally pass the control to the original W32
1340                                 function code in the proper call type
1341                                 @{[ a_href '#calltype','cdecl/stdcall/fastcall' ]} for a&nbsp;given
1342                                 function.</p>
1343
1344                                 <p>Original W32 code entry point is always trapped by a&nbsp;breakpoint
1345                                 although it would not be needed during this specific direct pass from
1346                                 UNIX code to the original W32 implementation. Still the breakpoint has
1347                                 to be there to catch some other (such as intra-W32) possible calls
1348                                 described later. There are several more ways to define breakpoint in
1349                                 the code. One way is to use processor hardware breakpoint support but
1350                                 the number of breakpoints is limited.  The other way is to patch in the
1351                                 <span class="instruction">@{[ 'int $3' ]}</span> instruction but it will invoke
1352                                 <span class="constant">SIGTRAP</span> signal handler conflicting with
1353                                 the possible debugger (<span class="productname">gdb(1)</span>)
1354                                 control. This project uses the <span class="instruction">hlt</span>
1355                                 instruction, which also has a&nbsp;single-byte opcode as
1356                                 <span class="instruction">@{[ 'int $3' ]}</span> and it is a&nbsp;privileged
1357                                 instruction forbidden to be used from the UNIX user space code.
1358                                 <span class="instruction">hlt</span> invokes
1359                                 <span class="constant">SIGSEGV</span> signal which can be resolved by
1360                                 a&nbsp;custom signal handler without any conflict with the possible
1361                                 debugger control; <span class="productname">gdb(1)</span> needs the
1362                                 following command to pass through such
1363                                 <span class="constant">SIGSEGV</span> signal:</p>
1364
1365                                 <blockquote class="command">
1366                                         <p>handle SIGSEGV nostop noprint pass</p>
1367                                 </blockquote>
1368
1369                                 <p>When a breakpoint gets caught, we usually need to return to the
1370                                 running code. Unfortunately it is not possible because of the patched
1371                                 breakpoint opcode. The breakpoint cannot be simply removed upon return
1372                                 as it would permanently loose control over the point of entry. Even if
1373                                 the return would include faking of the return address in the bottom
1374                                 stack frame to patch the breakpoint back during later function exit it
1375                                 still would not solve the caughts of inner calls of recursive
1376                                 functions. One of the working possibilities would be to patch the
1377                                 original instruction back and perform a&nbsp;singlestep provided by
1378                                 <span class="function">ptrace(2)</span> syscall. However such
1379                                 singlestep needs another controlling UNIX process and it would again
1380                                 conflict with the debuggers such as
1381                                 <span class="productname">gdb(1)</span>. This project implements the
1382                                 singlestep functionality by two consecutive breakpoints
1383                                 (<span class="instruction">hlt</span> instructions to be specific):
1384                                 The first two instruction addresses of the W32 functions are called
1385                                 <span class="productname">slot #1</span> and
1386                                 <span class="productname">slot #2</span>, the length of the first
1387                                 function instruction has to be analyzed to get the right address of
1388                                 <span class="productname">slot #2</span>. When the first breakpoint is
1389                                 caught it is necessary to patch the original instruction back and also
1390                                 patch another breakpoint in place of
1391                                 <span class="productname">slot #2</span>.
1392                                 During the <span class="productname">slot #2</span> breakpoint
1393                                 invocation the operation will be reverted &mdash; the breakpoint will be put
1394                                 to <span class="productname">slot #1</span> again and the instruction
1395                                 of <span class="productname">slot #2</span> will be restored to be able
1396                                 to continue the execution of the function.</p>
1397
1398                                 <p>W32 function will finish in its specific
1399                                 @{[ a_href '#calltype','cdecl/stdcall/fastcall call type' ]}, the control
1400                                 will return to the UNIX jump table relay which will debug dump the
1401                                 return value and it will finally pass the control back to the UNIX
1402                                 caller in the standard UNIX
1403                                 @{[ a_href '#calltype_cdecl','cdecl call type' ]}.</p>
1404
1405                                 @{[ doc_img 'fig/functype_patched_pass_fromunix',
1406                                                 'Function Type: <span class="constant">pass</span> from UNIX Code' ]}
1407
1408                         <a name="functype_pass_fromw32"><h4>Pass from W32 Code</h4></a>
1409
1410                                 <p>This function type is similiar to the
1411                                 @{[ a_href '#functype_pass_fromunix','previous one' ]} with the exception
1412                                 of more complicated entry point. Unfortunately W32 libraries call their
1413                                 own functions directly, using the <span class="instruction">call</span>
1414                                 instructions without any patchable jump table. Even the
1415                                 <span class="instruction">call</span> argument itself cannot be patched
1416                                 according to the relocation table record as such library intra-call
1417                                 instruction has no relocation due to its relative argument offset on
1418                                 <span class="constant">i386</span>. This time the double-breakpoint
1419                                 mechanism @{[ a_href '#functype_pass_fromunix','described above' ]} gets
1420                                 handy since it will catch the entry point when the function gets
1421                                 called.  <span class="constant">SIGSEGV</span> handler gets invoked by
1422                                 the <span class="instruction">hlt</span> instruction and it will
1423                                 redirect the control to the jump table relay function to debug dump the
1424                                 function entry arguments (it has no other uses in this call type).</p>
1425
1426                                 <p>When the relay needs to call the original function it will reach
1427                                 exactly the same breakpoint instruction as during the recent
1428                                 <span class="constant">SIGSEGV</span> handling redirecting to this
1429                                 calling relay.  But this time the
1430                                 <span class="constant">through_w32_func</span> field of this function
1431                                 record will be set to to prevent repeated redirection and to pass the
1432                                 control through the breakpoint mangle instead this time.</p>
1433
1434                                 <p>Returning is not much interesting as the first
1435                                 <span class="constant">SIGSEGV</span> handler did a&nbsp;straight jump
1436                                 for the redirection purposes without any needed consequent
1437                                 handling.</p>
1438
1439                                 <p>The jump table relay used for the callers from W32 code is
1440                                 a&nbsp;different one than the relay being used for the callers
1441                                 @{[ a_href '#functype_pass_fromunix','from UNIX code' ]}. UNIX code always
1442                                 uses relay with external @{[ a_href '#calltype_cdecl','cdecl call type' ]}
1443                                 but in this case a&nbsp;relay with the appropriate
1444                                 @{[ a_href '#calltype','cdecl/stdcall/fastcall call type' ]} is used.</p>
1445
1446                                 @{[ doc_img 'fig/functype_patched_pass_fromw32',
1447                                                 'Function Type: <span class="constant">pass</span> from W32 Code' ]}
1448
1449                         @{[ vskip() ]}
1450
1451                         <table border="1" align="center">
1452                                 <tr><td><span class="fname">captivesym</span> keyword</td><td>pass</td></tr>
1453                                 <tr><td>Native code function name                    </td><td>(no implementation)</td></tr>
1454                                 <tr><td>W32 traced code from UNIX function name      </td><td>FUNCNAME</td></tr>
1455                                 <tr><td>W32 traced code from W32  function name      </td><td>FUNCNAME_cdecl/_stdcall/_fastcall</td></tr>
1456                                 <tr><td>Entry/exit debug tracing from UNIX code      </td><td>yes</td></tr>
1457                                 <tr><td>Entry/exit debug tracing from W32 code       </td><td>yes</td></tr>
1458                                 <caption>Function Type <span class="constant">pass</span> Characteristics</caption>
1459                         </table>
1460
1461                 <a name="functype_wrap"><h3>Wrap of the Original "ntoskrnl.exe" Function</h3></a>
1462
1463                         <a name="functype_wrap_fromunix"><h4>Wrapping of Call from UNIX Code</h4></a>
1464
1465                                 <p>The code control flow has no special hardcore features since it is
1466                                 very similiar to <a href="#functype_pass_fromunix">the direct pass to
1467                                 W32 function from UNIX code</a>. All the wrapping is done in the
1468                                 standard UNIX @{[ a_href '#calltype_cdecl','cdecl call type' ]} manner.
1469                                 Jump table debug dumping relays are provided twice &mdash; the
1470                                 &quot;outer&quot; one to trace the parameters from the function caller
1471                                 and the &quot;inner&quot; one to trace the call from the wrapper to the
1472                                 original W32 code. The &quot;inner&quot; relay also calls the W32 code
1473                                 with the appropriate <a href="#calltype">cdecl/stdcall/fastcall call
1474                                 type</a>.</p>
1475
1476                                 @{[ doc_img 'fig/functype_patched_wrap_fromunix',
1477                                                 'Function Type: <span class="constant">wrap</span> from UNIX Code' ]}
1478
1479                         <a name="functype_wrap_fromw32"><h4>Wrapping of Call from W32 Code</h4></a>
1480
1481                                 <p>This scheme is a&nbsp;combination of the
1482                                 <a href="#functype_wrap_fromunix">previous wrap of a&nbsp;call from
1483                                 UNIX code</a> and the <a href="#functype_pass_fromw32">direct pass from
1484                                 the W32 code</a>. The control is caught and redirected by
1485                                 <span class="constant">SIGSEGV</span> handler from the breakpoint
1486                                 placed at the entry to the original W32 function code. The second entry
1487                                 to the original W32 function with the
1488                                 <span class="constant">through_w32_func</span> field of this function
1489                                 description already set is done from the &quot;inner&quot; jump table
1490                                 relay with the appropriate
1491                                 @{[ a_href '#calltype','cdecl/stdcall/fastcall call type' ]}.</p>
1492
1493                                 @{[ doc_img 'fig/functype_patched_wrap_fromw32',
1494                                                 'Function Type: <span class="constant">wrap</span> from W32 Code' ]}
1495
1496                         @{[ vskip() ]}
1497
1498                         <p>Some functions can be <a href="#functype_pass">passed to the original
1499                         code</a> but they need their parameters to be checked/prepared.
1500                         Currently, such wrapping is only needed for the
1501                         <span class="function">ExAllocateFromPagedLookasideList()</span> function
1502                         where it is required due to <a href="#init_ntoskrnl">missing execution of
1503                         <span class="fname">ntoskrnl.exe</span> initialization execution</a>,
1504                         which would otherwise properly initialize some internal data structures.
1505                         In this case the wrapping code detects passing of an uninitialized
1506                         parameter and will search through the whole
1507                         <span class="fname">ntoskrnl.exe</span> code body at runtime to find the
1508                         proper initialization routine containing the correct initialization
1509                         parameters.  Passed addresses of static structures must be differentiated
1510                         as each of them usually has different initialization parameters. It is
1511                         proactive to not to have fixed parameters array as these parameters may
1512                         differ across different <span class="fname">ntoskrnl.exe</span>
1513                         versions.</p>
1514
1515                         <table border="1" align="center">
1516                                 <tr><td><span class="fname">captivesym</span> keyword</td><td>wrap</td></tr>
1517                                 <tr><td>Native UNIX wrapping code function name      </td><td>FUNCNAME_wrap</td></tr>
1518                                 <tr><td>W32 traced wraping code from UNIX func. name </td><td>FUNCNAME</td></tr>
1519                                 <tr><td>W32 traced wrapping code from W32 func. name </td><td>FUNCNAME_cdecl/_stdcall/...</td></tr>
1520                                 <tr><td>W32 traced original code function name       </td><td>FUNCNAME_orig</td></tr>
1521                                 <tr><td>Entry/exit debug tracing from UNIX code      </td><td>yes</td></tr>
1522                                 <tr><td>Entry/exit debug tracing from W32 code       </td><td>yes</td></tr>
1523                                 <caption>Function Type <span class="constant">wrap</span> Characteristics</caption>
1524                         </table>
1525
1526                 <a name="functype_native"><h3>Native Implementation</h3></a>
1527
1528                         <h4>Native Implementation Called from UNIX Code</h4>
1529
1530                                 <p>This is the simplest case of a&nbsp;function call as it is fully
1531                                 handled only by the compiler and/or linker.</p>
1532
1533                                 <p>In this case though, no debug dumping call relay is provided &mdash; such
1534                                 relay would need to rename the implementations of native functions to
1535                                 prevent its automatic linking with the caller code. This renaming would
1536                                 not be possible to do by simple <span class="constant">#define</span>
1537                                 since it would also rename any calling statements of such function in
1538                                 the same C&nbsp;sources.  One of the possibilities to solve would be to
1539                                 utilize <span class="dashdash">--redefine-sym</span> feature of the
1540                                 <span class="productname">objcopy(1)</span> utility. On the other hand
1541                                 there is not much need to catch/debug such calls as both the caller and
1542                                 the callee are provided with full source file debug information for the
1543                                 debugger. Also the callee usually debug dumps its entry/exit parameters
1544                                 by custom debug dumps in the
1545                                 <a href="#functype_native_reactos"><span class="productname">ReactOS</span> implementations</a>.
1546
1547                                 @{[ doc_img 'fig/functype_native_fromunix',
1548                                                 'Function Type: <span class="constant">native</span> from UNIX Code' ]}
1549
1550                         <a name="functype_native_fromw32"><h4>Native Implementation of
1551                                         &quot;unpatched&quot; Library Function Called from W32 Code</h4></a>
1552
1553                                 @{[ doc_img 'fig/functype_unpatched_native_fromw32',
1554                                                 'Function Type: <span class="constant">native</span> of <span class="constant">unpatched</span> from W32 Code' ]}
1555
1556                                 <p>Here comes the differentiation if the project deals either with
1557                                 a&nbsp;<span class="constant">patched</span> or an
1558                                 <span class="constant">unpatched</span> version of the library
1559                                 (<span class="constant">patched</span> is a&nbsp;loaded W32 binary
1560                                 library while <span class="constant">unpatched</span> library is
1561                                 completely provided by this project with no use of the library's
1562                                 original W32 binary file). As the project adjusts the exported symbol
1563                                 address during the patching operation, in some cases the
1564                                 <span class="constant">patched</span> library call may be handled
1565                                 simply as <span class="constant">unpatched</span> library call even for
1566                                 the <span class="constant">patched</span> libraries. Fortunately the
1567                                 distinction is not much important as the project is prepared to
1568                                 properly handle both cases.</p>
1569
1570                                 <p>The W32 caller which imported the symbol will be pointed right to
1571                                 the relaying function. The debug dumping relay will be called from W32
1572                                 code with the appropriate
1573                                 @{[ a_href '#calltype','cdecl/stdcall/fastcall call type' ]} while the
1574                                 relay will call the implementation of the native function in the
1575                                 standard UNIX @{[ a_href '#calltype_cdecl','cdecl call type' ]} manner.</p>
1576
1577                         <h4>Native Implementation of &quot;patched&quot; Library Function Called from W32 Code</h4>
1578
1579                                 @{[ doc_img 'fig/functype_patched_native_fromw32',
1580                                                 'Function Type: <span class="constant">native</span> of <span class="constant">patched</span> from W32 Code' ]}
1581
1582                                 <p>The calling scheme is similiar to the
1583                                 <a href="#functype_native_fromw32">previous call of
1584                                 <span class="constant">unpatched</span> library function from W32
1585                                 code</a> but the call control is redirected from the entry point of the
1586                                 original W32 binary implementation by the breakpoint and its
1587                                 <span class="constant">SIGSEGV</span> handler as in
1588                                 <a href="#functype_pass_fromw32">the case of passing control from W32
1589                                 call</a>.</p>
1590
1591                                 <p>The original W32 function implementation located in the original
1592                                 loaded binary file is never executed but its entry point needs to be
1593                                 trapped by the breakpoint to be able to catch the function calls within
1594                                 the library.</p>
1595
1596                         @{[ vskip() ]}
1597
1598                         <p>In all cases the final function implementation is a&nbsp;standard UNIX
1599                         code compiled from C&nbsp;sources with full debug information available
1600                         for the debugger. Fortunately all such functions do not need to be coded
1601                         from scratch for this project since there already exist $freespeech
1602                         $ReactOS and $Wine projects and their code can be used instead.</p>
1603
1604                         <p>$Wine project is listed mostly for a&nbsp;completeness as almost no
1605                         code was suitable for reuse as it implements W32 user space while this
1606                         project is running pure W32 kernel space environment (in $gnulinux user
1607                         space!).</p>
1608
1609                         <a name="functype_native_reactos"><h4>Native Implementation
1610                                         - <span class="productname">ReactOS</span></h4></a>
1611
1612                                 <p>Some functions are already implemented in the $ReactOS
1613                                 project and they can be used as they are.  Although it would be
1614                                 possible to <a href="#functype_pass">pass some function calls to the
1615                                 original code</a> it is more handy to provide native implementation as
1616                                 there is better control of the data handling during debugging sessions
1617                                 due to the provided debugging symbols.</p>
1618
1619                                 <p>Such functions can be found in
1620                                 <span class="fname">src/libcaptive/reactos/</span> subdirectory.
1621                                 Some functions had to be adjusted for this project
1622                                 - these modifications are compiled conditionally, depending on the
1623                                 <span class="constant">LIBCAPTIVE</span> symbol existence.</p>
1624
1625                                 <p>Later stages of this project reached the level where
1626                                 $ReactOS is yet too immature and the needed functions are usually
1627                                 written just with the sad body:</p>
1628
1629                                 <blockquote class="command">
1630                                         <p>UNIMPLEMENTED;</p>
1631                                 </blockquote>
1632
1633                                 <p>Functions that were not possible to
1634                                 @{[ a_href '#functype_pass','pass' ]} were reimplemented by this project
1635                                 and placed in the project's implementation directories
1636                                 @{[ a_href '#reactos_nocare','instead of extending' ]} $ReactOS code.</p>
1637
1638                         <a name="functype_native_wine"><h4>Native Implementation &ndash; <span class="productname">Wine</span></h4></a>
1639
1640                                 <p>Even though $Wine only implements the
1641                                 <span class="productname">Microsoft Windows NT</span> user space, there
1642                                 still are some common functions which could be copied from the $Wine
1643                                 project.</p>
1644
1645                         <a name="functype_native_libcaptive"><h4>Native Implementation &ndash; Project Specific</h4></a>
1646
1647                                 <p>As the last resort it was necessary to provide completely own
1648                                 implementation of some API functions such as PC hardware dependent
1649                                 parts or memory management functions.</p>
1650
1651                         @{[ vskip() ]}
1652
1653                         <table border="1" align="center">
1654                                 <tr><td><span class="fname">captivesym</span> keyword</td><td>(none; just the symbol name)</td></tr>
1655                                 <tr><td>Native code function name                    </td><td>FUNCTIONNAME</td></tr>
1656                                 <tr><td>Native traced code from W32 code func. name  </td><td>FUNCTIONNAME_cdecl/_std...</td></tr>
1657                                 <tr><td>Entry/exit debug tracing from UNIX code      </td><td>no</td></tr>
1658                                 <tr><td>Entry/exit debug tracing from W32 code       </td><td>yes</td></tr>
1659                                 <caption>Function Type <span class="constant">native</span> Characteristics</caption>
1660                         </table>
1661
1662                 <a name="functype_undef"><h3>Undefined Function</h3></a>
1663
1664                         <p>Functions not defined by any of the previous function types cannot be
1665                         called by any W32 code including the code of the library implementing
1666                         such function. All functions of <span class="constant">patch</span>ed
1667                         libraries not listed in the <span class="fname">captivesym</span> exports
1668                         file are automatically set to be trapped as fatal program execution
1669                         errors.</p>
1670
1671                         <p>It is not necessary to list the symbols as
1672                         <span class="constant">undef</span> as long as you are just loading the
1673                         W32 <span class="constant">PE-32</span> code and the symbols belong to
1674                         <span class="constant">patch</span>ed library. On the other hand if you
1675                         are loading W32 <span class="fname">.so</span> code or if such symbol is
1676                         a&nbsp;part of <span class="constant">unpatched</span> library (and thus
1677                         being completely provided by the project) you need to list such symbol as
1678                         <span class="constant">undef</span> type to prevent unresolved symbol
1679                         reference.</p>
1680
1681                         <table border="1" align="center">
1682                                 <tr><td><span class="fname">captivesym</span> keyword</td><td>undef</td></tr>
1683                                 <tr><td>Native code function name                    </td><td>(no implementation)</td></tr>
1684                                 <tr><td>Native traced code function name             </td><td>FUNCTIONNAME_cdecl/_stdcall/_fastcall</td></tr>
1685                                 <tr><td>Debug tracing message from UNIX code         </td><td>yes</td></tr>
1686                                 <tr><td>Debug tracing message from W32 code          </td><td>yes</td></tr>
1687                                 <caption>Function Type <span class="constant">undef</span> Characteristics</caption>
1688                         </table>
1689
1690         
1691         <a name="calltype"><h2>API Function Calling Conventions</h2></a>
1692
1693                 <p>Standard UNIX code compiled by GCC (GNU C&nbsp;Compiler) running on host
1694                 $gnulinux always uses @{[ a_href '#calltype_cdecl','cdecl' ]} ABI (Application
1695                 Binary Interface) calling convention. This calling convention is also the
1696                 default declaration type of UNIX functions.</p>
1697
1698                 <p>W32 uses three different calling conventions in its ABI. They are all
1699                 described in the
1700                 <a href="http://msdn.microsoft.com/library/default.asp?url=/library/en-us/vclang/html/_core_argument_passing_and_naming_conventions.asp"><span class="productname">Microsoft</span> documentation</a>.
1701                 There is always necessary to have the proper function declaration
1702                 (prototype) in the caller scope to prevent all sorts of unexpected
1703                 crashes.</p>
1704
1705                 <p>Unfortunately some non-matching combinations of calling conventions
1706                 result in hard to debug bugs: the caller gets back an unexpected stack
1707                 pointer from the callee and upon return it will restore registers from the
1708                 wrong stack pointer place. Since the caller will finally reclaim its stack
1709                 frame from its (uncorrupted) <span class="constant">EBP</span> stack frame
1710                 pointer the caller will return to the caller of the caller correctly. Just
1711                 the registers remain corrupted causing crashes of completely unrelated code
1712                 executed far, far away...</p>
1713
1714                 <p><span class="constant">EDI</span>, <span class="constant">ESI</span> and
1715                 <span class="constant">EBX</span> registers are always saved on the stack.
1716                 They are stored on the stack in this particular order from bottom to top
1717                 addresses (using the <span class="instruction">push EBX</span>,
1718                 <span class="instruction">push ESI</span>,
1719                 <span class="instruction">push EDI</span> sequence). Fortunately $gnulinux
1720                 GCC has the same register saving behaviour. If some register corruption
1721                 occurs the calling type presented between the caller and callee should be
1722                 checked.</p>
1723
1724                 <a name="calltype_cdecl"><h3>W32 Calling Convention &quot;cdecl&quot;</h3></a>
1725
1726                         <p>The only calling convention in the UNIX world. The default one for all
1727                         the compilers. All the arguments are passed on the stack, no arguments
1728                         are cleaned by the callee. Possible inconsistencies in the number of
1729                         function arguments with the function prototype used by the caller is
1730                         harmless. Variable arguments lists can be passed by this convention.</p>
1731
1732                         @{[ doc_img 'fig/calltype_cdecl',
1733                                         'W32 Calling Convention <span class="constant">cdecl</span> Scheme' ]}
1734
1735                         <table border="1" align="center">
1736                                 <tr><td>Arguments freed by         </td><td>caller</td></tr>
1737                                 <tr><td>Arguments on the stack     </td><td>#0 ... #(n-1)</td></tr>
1738                                 <tr><td>Arguments in the registers </td><td>none</td></tr>
1739                                 <tr><td>GCC attribute              </td><td><span class="command">__attribute__((__cdecl__))</span> (default)</td></tr>
1740                                 <caption>Calling Convention <span class="constant">cdecl</span> Characteristics</caption>
1741                         </table>
1742
1743                 <h3>W32 Calling Convention &quot;stdcall&quot;</h3>
1744
1745                         @{[ doc_img 'fig/calltype_stdcall',
1746                                         'W32 Calling Convention <span class="constant">stdcall</span> Scheme' ]}
1747
1748                         <p>Convention never used in the UNIX world. It needs to be specified for
1749                         W32 compilers. All the arguments are passed on the stack, all the
1750                         arguments are cleaned by the callee. Possible inconsistencies in the
1751                         number of function arguments with the function prototype used by the
1752                         caller will result in fatal crash. Variable arguments lists cannot be
1753                         passed by this convention &ndash; use @{[ a_href '#calltype_cdecl','cdecl' ]}
1754                         instead.</p>
1755
1756                         <table border="1" align="center">
1757                                 <tr><td>Arguments freed by         </td><td>callee</td></tr>
1758                                 <tr><td>Arguments on the stack     </td><td>#0 ... #(n-1)</td></tr>
1759                                 <tr><td>Arguments in the registers </td><td>none</td></tr>
1760                                 <tr><td>GCC attribute              </td><td><span class="command">__attribute__((__stdcall__))</span></td></tr>
1761                                 <caption>Calling Convention <span class="constant">stdcall</span> Characteristics</caption>
1762                         </table>
1763
1764                 <h3>W32 Calling Convention &quot;fastcall&quot;</h3>
1765
1766                         <p>Convention never used in the UNIX world. It needs to be specified for
1767                         W32 compilers. Convention used in the W32 world for its low calling
1768                         overhead. All but the first two arguments are passed on the stack, such
1769                         arguments are cleaned by the callee. First two arguments are passed in
1770                         the registers <span class="constant">ECX</span> and
1771                         <span class="constant">EDX</span> respectively. Possible inconsistencies
1772                         in the number of function arguments with the function prototype used by
1773                         the caller will result in fatal crash. Variable arguments lists cannot be
1774                         passed by this convention &ndash; use @{[ a_href '#calltype_cdecl','cdecl' ]}
1775                         instead.</p>
1776
1777                         <p>GCC (GNU C&nbsp;Compiler) native support for this calling convention
1778                         is pretty fresh and it is currently present only in the recent CVS
1779                         versions since 21st December of 2002 which should get released as GCC
1780                         version 3.4. This project solved the unsupported calling convention by
1781                         declaration of arguments passed in registers by
1782                         <span class="command">__attribute__((__regparm__(3)))</span>.
1783                         W32 passes the arguments in registers in the order
1784                         <span class="constant">ECX</span>, <span class="constant">EDX</span> but
1785                         GCC passes them in registers <span class="constant">EAX</span>,
1786                         <span class="constant">EDX</span>, <span class="constant">ECX</span>.
1787                         This incompatibility is compensated at C&nbsp;source level in the
1788                         @{[ a_href '#functype','relaying code' ]} generated by
1789                         <span class="fname">captivesym</span> relay generator.</p>
1790
1791                         @{[ doc_img 'fig/calltype_fastcall',
1792                                         'W32 Calling Convention <span class="constant">fastcall</span> Scheme' ]}
1793
1794                         <table border="1" align="center">
1795                                 <tr><td>Arguments freed by         </td><td>callee</td></tr>
1796                                 <tr><td>Arguments on the stack     </td><td>#2 ... #(n-1)</td></tr>
1797                                 <tr><td>Arguments in the registers </td><td><span class="constant">ECX</span>=#0,
1798                                                                             <span class="constant">EDX</span>=#1</td></tr>
1799                                 <tr><td>GCC &ge;3.4 attribute      </td><td><span class="command">__attribute__((__fastcall__))</span></td></tr>
1800                                 <tr><td>GCC &lt;3.4 attr. emulation</td><td><span class="command">__attribute__((__stdcall__))</span></td></tr>
1801                                 <tr><td>                           </td><td><span class="command">__attribute__((__regparm__(3) /* EAX,EDX,ECX */))</span></td></tr>
1802                                 <caption>Calling Convention <span class="constant">fastcall</span> Characteristics</caption>
1803                         </table>
1804
1805         <a name="synchronous"><h2>Multithreading and Multiple Processors</h2></a>
1806
1807                 <p>W32 platform stands on its&nbsp;thorough architecture parallelism. It
1808                 must lock all its objects to maintain coherence in presence of
1809                 multithreading and multiple processors. Since the author of this project
1810                 considers any parallel execution a serious obstacle for debugging the whole
1811                 project architecture was designed to prevent any undeterministic behaviour.
1812                 Therefore this projects always emulates uniprocessor
1813                 <span class="productname">Microsoft Windows NT</span> kernel
1814                 (<span class="constant">KeNumberProcessors</span> symbol is always 1),
1815                 everything runs in the single initial thread/process and all the filesystem
1816                 operations are performed as synchronous
1817                         (&quot;synchronous&quot; by flags
1818                         <span class="constant">FILE_SYNCHRONOUS_IO_ALERT</span>,
1819                         <span class="constant">FO_SYNCHRONOUS_IO</span>,
1820                         <span class="constant">IRP_SYNCHRONOUS_API</span>,
1821                         <span class="constant">IRP_SYNCHRONOUS_PAGING_IO</span>,
1822                         forced <span class="constant">TRUE</span> result of
1823                         <span class="function">IoIsOperationSynchronous()</span>
1824                         etc.).
1825                 For several cases needed only by <span class="fname">ntfs.sys</span> there
1826                 had to be supported asynchronous access
1827                 (<span class="constant">STATUS_PENDING</span> return code) &ndash; parallel
1828                 execution is emulated by GLib
1829                 <span class="function">g_idle_add_full()</span> with
1830                 <span class="function">g_main_context_iteration()</span> called during
1831                 <span class="function">KeWaitForSingleObject()</span>.</p>
1832                 Since there is a&nbsp;possibility a&nbsp;real W32 parallel threading would
1833                 be yet needed in the future all the code that would be hit by W32
1834                 multithreading capability is marked by
1835                 <span class="constant">TODO:thread</span> comment.</p>
1836
1837                 <p>Multiple processors (SMP) support will never need to be implemented
1838                 since uniprocessor W32 kernels apparently run the filesystem driver modules
1839                 fine. As this project implements only the uniprocessor W32 kernel all the
1840                 processor locking functions and structures such as
1841                 <span class="constant">KSPIN_LOCK</span> etc. can be safely implemented as
1842                 no-operations.</p>
1843
1844                 <p>Asynchronous callbacks registered for
1845                 <span class="constant">IO_WORKITEM</span>s are passed as GLib idle
1846                 functions by <span class="function">g_idle_add_full()</span>. Although they
1847                 will probably never be executed during non-interactive project's batch
1848                 executions it is the&nbsp;responsibility of W32 driver implementation to
1849                 complete all the pending tasks before its W32 shutdown. Such W32 shutdown
1850                 is done during cleanup of the project's&nbsp;execution by
1851                 <span class="function">captive_shutdown()</span>.</p>
1852
1853         <a name="paranoia"><h2>Paranoia Checks</h2></a>
1854
1855                 <p>A&nbsp;general approach of software projects development is to implement
1856                 many internal sanity checks during the development stage but to produce the
1857                 most optimized final release product without those debugging checks.</p>
1858
1859                 <p>Facilities for these practices can be seen in the standard
1860                 C&nbsp;include files for example as function
1861                 <span class="function">assert()</span> which gets disabled by the
1862                 <span class="constant">NDEBUG</span> symbol used during the final optimized
1863                 executable compilation. This project uses Gnome GLib messaging subsystem
1864                 offering sanity checks discarded by symbols
1865                 <span class="constant">G_DISABLE_ASSERT</span> and
1866                 <span class="constant">G_DISABLE_CHECKS</span>.
1867                 <span class="productname">Microsoft</span> also produces two versions of
1868                 its products &ndash; regular customers use the &quot;free build&quot; (also
1869                 called &quot;retail&quot;) while the programmers should develop their code
1870                 on the &quot;checked build&quot; product releases.</p>
1871
1872                 <p>As this project will always run unknown binary code of proprietary W32
1873                 filesystem drivers, the code can never be trusted. Such code even runs in
1874                 the same unprotected address space as its controlling UNIX code. Since
1875                 there is not enough documentation for the W32 components of the system and
1876                 also such documentation is usually misleading it can never be considered as
1877                 100% emulation. Even in the final releases all the sanity checks
1878                 implemented in this project should remain active as all the project's code
1879                 always interacts with unknown and untrusted W32 binaries.</p>
1880
1881                 <p><span class="productname">Microsoft Windows NT</span> code is written in
1882                 a&nbsp;foolproof style as it accepts even invalid input values, and which
1883                 it usually corrects. This makes long-term debugging a&nbsp;pain as it hides
1884                 sources of problems. &quot;Checked build&quot; releases were probably
1885                 designed to fix this flaw by strict consistency checks but it did not reach
1886                 its goals as such checks are usually missing in the code.</p>
1887
1888                 <p>This project has strict consistency checks across all the code to make
1889                 the debugging phase easy enough. Failed sanity check is not always
1890                 a&nbsp;bug &ndash; sometimes it just means the real W32 binary code is more
1891                 benevolent than it could be expected according to the documentation and
1892                 such sanity check gets removed for the next version build. In other cases
1893                 the failed sanity checks mean the execution path for some unexpected
1894                 arguments combination was not yet implemented by this project. I may also
1895                 mean a bug, of course...</p>
1896
1897                 <p>Last but not least &ndash; never miss a&nbsp;possible sanity check as its
1898                 later removal is in an order of magnitude cheaper than an&nbsp;uncaught
1899                 invalid assumption. Failed assertion is not always a&nbsp;bug although it
1900                 has to be fixed, of course.</p>
1901
1902
1903         <h2>STATUS_LOG_FILE_FULL</h2>
1904
1905                 <p>After writing approx. 1MB of data on NTFS test partition NTFS driver
1906                 returns for any further write requests
1907                 <span class="constant">STATUS_LOG_FILE_FULL</status> error code.
1908                 Apparently it is caused by the fact this project is
1909                 @{[ a_href '#synchronous','single-threaded' ]} and it ignores the spawn
1910                 of parallel journalling thread during <span class="fname">ntfs.sys</span>
1911                 initialization.</p>
1912
1913                 <p>Fortunately <span class="fname">ntfs.sys</span> will clear its
1914                 journalling log file during filesystem unmount. This project will therefore
1915                 remount the volume if <span class="constant">STATUS_LOG_FILE_FULL</status>
1916                 is detected to workaround missing journalling thread.</p>
1917
1918                 <p>Similiar behaviour can be seen during write of compressed files &mdash;
1919                 the file gets written uncompressed and its compression will proceed only
1920                 during the final filesystem unmount.</p>
1921
1922                 <p>For these reasons it was mandatory to support
1923                 @{[ a_href '#parent_connector','transparent volume remounting' ]}.</p>
1924
1925
1926         <a name="parent_connector"><h2><span class="constant">ParentConnector</span> volume remounter</h2></a>
1927
1928                 <p>The sandbox master component of this project has control of restarting
1929                 its sandbox slaves containing the W32 filesystem. Target goal of
1930                 <span class="constant">ParentConnector</span> component is to transparently
1931                 provide persistent view of files and directories over the sandboxed slaves
1932                 being restarted.</p>
1933                 
1934                 <p>In the case of read-only operations it would be simple as we could only
1935                 save our state of currently opened filesystem objects with their read
1936                 file/directory offset. Write operations can be handled as the read-only
1937                 ones as long as all the operations are successful. In the case of W32
1938                 filesystem crash we loose all the past write operations. If we would redo
1939                 all the write operations we could very easily invoke the same crash.
1940                 Therefore we write:</p>
1941
1942                         <blockquote class="command">
1943                                 <p>Filesystem crash broke dirty object: FILE/PATH/NAME</p>
1944                         </blockquote>
1945
1946                 <p>message to syslog and refuse any further operations with this
1947                 object.</p>
1948
1949                 @{[ doc_img 'dia/parent-connector','Parent Connector' ]}
1950
1951                 <p><span class="constant">HANDLE</span> represents W32 object open in
1952                 existing W32 filesystem.<span class="constant">HANDLE</span> is created
1953                 on-demand according to the saved state of the object (such as its
1954                 pathname). Even the whole <span class="constant">VFS</span> sandbox slave
1955                 is spawn on-demand if some object operation requests it.</p>
1956
1957                 <p>W32 filesystem crash can obviously occur at any moment - it generates
1958                 @{[ a_href 'http://developer.gnome.org/doc/API/2.0/gobject/','GObject' ]}
1959                 @{[ a_href 'http://developer.gnome.org/doc/API/2.0/gobject/gobject-Signals.html','signal' ]}
1960                 <span class="constant">abort</span>. Successful filesystem unmount
1961                 (even as the part of remount operation) must be first preceded by
1962                 <span class="constant">detach</span> signal to close all existing
1963                 W32 <span class="constant">HANDLE</span>s. After their close the filesystem
1964                 gets the unmount requests. Only in the case all the close operations
1965                 succeeded including the final filesystem unmount the signal
1966                 <span class="constant">cease</span> can be activated to notify all the
1967                 dirty (written) objects they are now clean. During this
1968                 <span class="constant">cease</span> signal the project will also
1969                 @{[ a_href '#safe_flush','flush' ]} the sandbox commit buffer to its
1970                 underlying media.</p>
1971
1972                 <p>Objects never written remain in <span class="constant">clean</span>
1973                 state and they can be transparently reopened even if W32 filesystem crash
1974                 occurs.</p>
1975
1976
1977 <h1>TODO: Fsck of NTFS</h1>
1978
1979         <p>Currently this project does not support checking of data structures
1980         of NTFS volume as being provided by <span class="command">chkdsk.exe</span>
1981         in W32 environment and <span class="command">fsck</span> in UNIX OS.</p>
1982
1983         <p>W32 has its disk checking functionality split to
1984         <span class="fname">untfs.dll</span> W32 userland library.
1985         according to
1986         @{[ a_href 'http://www.sysinternals.com/ntw2k/source/fmifs.shtml',
1987                         'Chkdskx and Formatx' ]}
1988         by @{[ a_href 'http://www.sysinternals.com/aboutus.shtml',
1989                         'Mark Russinovich' ]}.
1990
1991         <p>I&nbsp;assume its execution falls completely
1992         @{[ a_href '#existing_emulation','out of scope' ]}
1993         of this project as it is W32 userland.</p>
1994
1995         <p>This possibility was not yet investigated in any way.</p>
1996
1997
1998 <h1>TODO: NTFS Support for
1999                 <span class="productname">@{[ a_href 'http://surprise.sourceforge.net/','Partition Surprise' ]}</span></h1>
2000
2001         <p>Although there currently exists
2002         <span class="productname">@{[ a_href 'http://mlf.linux.rulez.org/mlf/ezaz/ntfsresize.html','ntfsresize' ]}</span>
2003         I am not sure whether it is really reliable for all NTFS filesystems.
2004         <span class="productname">@{[ a_href 'http://surprise.sourceforge.net/','Partition Surprise' ]}</span>
2005         is the only partition manager capable of safely resize the disk
2006         by using just the original W32 filesystem driver by full rebuild of
2007         filesystem metadata.
2008         Almost no file data blocks would be moved even on these generic filesystems
2009         as W32 supports <span class="constant">FSCTL_MOVE_FILE</span> request
2010         according to
2011         @{[ a_href 'http://www.sysinternals.com/ntw2k/info/defrag.shtml',
2012                         'Inside Windows NT Disk Defragmenting' ]}
2013         by @{[ a_href 'http://www.sysinternals.com/aboutus.shtml',
2014                         'Mark Russinovich' ]}.
2015
2016
2017 <h1>Related Projects</h1>
2018
2019         <p>The usual solution for file exchange between $freespeech operating systems
2020         and <span class="productname">Microsoft Windows NT</span> is to use
2021         <span class="productname">FAT32</span> (<span class="productname">vfat</span>
2022         called in $gnulinux) partition and swap the files over it. This method is not
2023         very comfortable as you never have access to all the files of the other
2024         operating system.</p>
2025
2026         <a name="LinuxNTFScompet"><h2>$LinuxNTFS</h2></a>
2027
2028                 <p>Although this project takes a&nbsp;completely different approach and has
2029                 a&nbsp;different architecture, the final goal is the same as for this
2030                 project &ndash; reliable read-write <span class="productname">NTFS</span>
2031                 filesystem support. $LinuxNTFS goes the way of reverse engineering
2032                 filesystem data structures (and possibly
2033                 <span class="fname">ntfs.sys</span> itself). Unfortunately after many years
2034                 of its development it did not yet reach the state of reliable read-write
2035                 access although its read-only part is considered trustworthy.</p>
2036
2037                 <p>Using $LinuxNTFS for read-only access to existing partition with
2038                 <span class="productname">Microsoft Windows NT</span> installation is
2039                 planned to be able to acquire existing <span class="fname">ntfs.sys</span>,
2040                 <span class="fname">ntoskrnl.exe</span> and possibly
2041                 <span class="fname">ksecdd.sys</span> (imported by
2042                 <span class="fname">ntfs.sys</span>) files from the user's
2043                 <span class="productname">NTFS</span> partition.</p>
2044
2045         <h2><span class="productname">@{[ a_href 'http://www.cgsecurity.org/ntfs.html','NTPwd NTFS Driver' ]}</span></h2>
2046
2047                 <p>DOS based @{[ a_href 'http://www.gnu.org/licenses/gpl.html','GPL-2.0' ]}
2048                 read-write NTFS driver. Filesystem structures are reverse engineered in the
2049                 way of @{[ a_href '#LinuxNTFScompet','Linux-NTFS Project' ]}. As it is not very
2050                 actively maintained it reaches a&nbsp;lower level of
2051                 <span class="productname">NTFS</span> compatibility.</p>
2052
2053         <h2>@{[ a_href 'http://www.vmware.com/download/workstation.html','VMware Workstation' ]}</h2>
2054
2055                 <p>The only real competition: Closed-source read/write @{[ '$299' ]} equivalent.</p>
2056
2057                 <p>Original Microsoft Windows operating system can be run inside a virtual
2058                 machine running under GNU/Linux and share the read-write NTFS disk by using
2059                 a network file sharing through a&nbsp;VMware virtual network card.</p>
2060
2061                 <p>You need @{[ '$299' ]} for this product and you need to
2062                 give up your system security by running un@{[ a_href '#sandbox','sandbox' ]}ed
2063                 closed-source program in your GNU/Linux.</p>
2064
2065         <h2>@{[ a_href 'http://www.winehq.com/','Wine Project' ]}</h2>
2066
2067                 <p>No code could be shared &ndash; Wine emulates only Microsoft Windows userland.
2068                 Filesystem drivers completely belong to Microsoft Windows kernelland.</p>
2069
2070         <h2>@{[ a_href 'http://www.sysinternals.com/ntw2k/freeware/ntfswin98.shtml','NTFS for Windows 98' ]}</h2>
2071
2072                 <p>Closed-source read-only-crippled @{[ '$0' ]} equivalent for Microsoft Windows.</p>
2073
2074                 <p>There is a @{[ a_href 'http://www.sysinternals.com/images/screenshots/ntfs98ap.gif',
2075                                 'diagram' ]} showing exactly the principle of Captive NTFS project.
2076                 There is apparently disabled read/write functionality in <i>NTFS for
2077                 Windows 98</i> as the same company also sells the following product sharing
2078                 the same codebase:</p>
2079
2080         <h2>@{[ a_href 'http://www.winternals.com/products/repairandrecovery/ntfsdospro.asp','NTFSDOS Professional' ]}</h2>
2081
2082                 <p>Closed-source read/write @{[ '$299' ]} equivalent for MS-DOS.</p>
2083
2084                 <p>This product is the most close equivalent to Captive NTFS but it is
2085                 a commercial product, closed-source and it has filesystem interface only
2086                 for MS-DOS.</p>
2087
2088
2089 <h1>Re: @{[ a_href 'http://linux-ntfs.sourceforge.net/info/ntfs.html#7.7',
2090                 "7.7 Can't we write a wrapper for Windows' driver?" ]}</h1>
2091
2092         <p class="re">&gt; It sounds like a great idea, to start with, but there are numerous
2093         problems.</p>
2094
2095         <p><span class="re">&gt; The largest technical problem is joining the Windows
2096         system DLL to the Linux VFS. It could be done, but it wouldn't be pretty.</span><br />
2097         Yep. :-)</p>
2098
2099         <p><span class="re">&gt; It would have to run as part of the kernel which would mean
2100         that if it went wrong it could crash the machine. With no source, we might not
2101         be able to work around the problem.</span><br />
2102         @{[ a_href '#sandbox','Nope' ]},
2103         @{[ a_href 'http://lufs.sourceforge.net/lufs/','Linux Userland File System (LUFS)' ]}
2104         moves the filesystem implementation to UNIX userland where the Microsoft
2105         Windows filesystem is completely unarmed by Captive jail of chroot(2),
2106         setuid(2) and setrlimit(2). There only remains one narrow connection to the rest of
2107         system (by CORBA/ORBit). The filesystem's life environment gets kill(2)ed when
2108         UNIX is no longer satisfied with it. Safety similiar to
2109         @{[ a_href 'http://www.vmware.com/solutions/security.html','VMware sandbox' ]}.</p>
2110
2111         <p><span class="re">&gt; The next major problem is compati<!--orig. text typo-->bility.
2112         Which version of the Windows system file would we use? Picking one would limit
2113         its use, making the wrapper versatile for all of them would be a programming
2114         nightmare.</span><br />
2115         Microsoft Windows NTFS filesystem driver is capable of accessing older formats
2116         of the filesystem. This project currently runs Microsoft Windows XP version,
2117         porting to Microsoft Windows 2003 Server expected. (Microsoft Windows upgrades
2118         NTFS disk filesystem to its own version during complete CD-ROM Microsoft
2119         Windows system installation &ndash; such operation is not threat this project use.)</p>
2120
2121         <p><span class="re">&gt; And it gets worse. The legal implications of
2122         distributing Windows systems files would cause problems.</span><br />
2123         User must be careful to obey all licensing restrictions according to his
2124         local country laws.<br />
2125         <span class="re">&gt; Also the proprietary nature of the driver would mean that
2126         the other kernel coders would not investigate any problems if someone had used
2127         the NTFS wrapper.</span><br />
2128         It does not apply to this project due to the implemented
2129         @{[ a_href '#sandbox','filesystem separation' ]}.</p>
2130
2131
2132 HERE
2133
2134
2135 My::Web->footer();