Skip to content

Commit 13c0387

Browse files
FGaspertoddr
authored andcommitted
Use atomic filesystem operations for cPanel::StateFile.
This eliminates the likelihood of partially-written files. The major changes are twofold: 1) Ensure that a stat() on the filename returns the same inode as the filehandle during _open(). 2) Write to a temporary file in the same directory, then rename() that file into place. This prevents corrupt files.
1 parent a5fd875 commit 13c0387

3 files changed

Lines changed: 234 additions & 36 deletions

File tree

Makefile.PL

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ my %WriteMakefileArgs = (
2424
"Fcntl" => 0,
2525
"File::Path" => 0,
2626
"File::Spec" => 0,
27+
"File::Temp" => 0,
2728
"Getopt::Long" => 0,
2829
"POSIX" => 0,
2930
"Scalar::Util" => 0,
@@ -40,10 +41,12 @@ my %WriteMakefileArgs = (
4041
"TEST_REQUIRES" => {
4142
"Carp" => 0,
4243
"Cwd" => 0,
43-
"File::Temp" => 0,
44+
"File::Slurper" => 0,
4445
"FindBin" => 0,
4546
"Test::Exception" => 0,
4647
"Test::More" => 0,
48+
"Time::HiRes" => 0,
49+
"autodie" => 0,
4750
"lib" => 0
4851
},
4952
"VERSION" => "0.900",
@@ -58,6 +61,7 @@ my %FallbackPrereqs = (
5861
"Cwd" => 0,
5962
"Fcntl" => 0,
6063
"File::Path" => 0,
64+
"File::Slurper" => 0,
6165
"File::Spec" => 0,
6266
"File::Temp" => 0,
6367
"FindBin" => 0,
@@ -69,8 +73,10 @@ my %FallbackPrereqs = (
6973
"Test::Exception" => 0,
7074
"Test::More" => 0,
7175
"Text::Wrap" => 0,
76+
"Time::HiRes" => 0,
7277
"Unix::PID" => "0.21",
7378
"YAML::Syck" => 0,
79+
"autodie" => 0,
7480
"base" => 0,
7581
"lib" => 0,
7682
"parent" => 0,

lib/cPanel/StateFile.pm

Lines changed: 66 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -183,12 +183,7 @@ sub import {
183183
if ( $state_file->{file_handle} ) {
184184

185185
# TODO probably need to check for failure, but then what do I do?
186-
eval {
187-
local $SIG{'ALRM'} = sub { die "flock 8 timeout\n"; };
188-
my $orig_alarm = alarm $state_file->{flock_timeout};
189-
flock $state_file->{file_handle}, 8;
190-
alarm $orig_alarm;
191-
};
186+
eval { flock $state_file->{file_handle}, 8; };
192187
close $state_file->{file_handle};
193188
$state_file->{file_handle} = undef;
194189

@@ -222,29 +217,58 @@ sub import {
222217
return;
223218
}
224219

220+
#Tested directly because this is critical logic.
225221
sub _open {
226-
my ( $self, $mode ) = @_;
222+
my ($self) = @_;
227223
my $state_file = $self->{state_file};
228224
$state_file->throw('Cannot open state file inside a call_unlocked call.') unless defined $self->{lock_file};
229225

230-
open my $fh, $mode, $state_file->{file_name}
231-
or $state_file->throw("Unable to open state file '$state_file->{file_name}': $!");
226+
OPEN_FLOCK: {
227+
sysopen( my $fh, $state_file->{file_name}, Fcntl::O_CREAT() | Fcntl::O_RDWR(), 0600 )
228+
or $state_file->throw("Unable to open state file '$state_file->{file_name}': $!");
229+
230+
$self->_flock_after_open($fh);
231+
232+
#We might have blocked on the flock() long enough for
233+
#another process to have rename()d over the file that
234+
#we just locked. So we need to ensure that the file we
235+
#have locked is the same file as $state_file.
236+
my $fh_inode = ( stat $fh )[1];
237+
my $path_inode = ( stat $state_file->{file_name} )[1];
238+
if ( $fh_inode != $path_inode ) {
239+
redo OPEN_FLOCK;
240+
}
241+
242+
$state_file->{file_handle} = $fh;
243+
}
244+
}
245+
246+
sub _flock_after_open {
247+
my ( $self, $fh ) = @_;
248+
249+
my $timed_out;
250+
232251
eval {
233-
local $SIG{'ALRM'} = sub { die "flock 2 timeout\n"; };
234-
my $orig_alarm = alarm $state_file->{flock_timeout};
235-
flock $fh, 2;
252+
local $SIG{'ALRM'} = sub {
253+
$timed_out = 1;
254+
die "flock LOCK_EX timeout\n";
255+
};
256+
257+
my $orig_alarm = alarm $self->{state_file}{flock_timeout};
258+
flock $fh, Fcntl::LOCK_EX();
236259
alarm $orig_alarm;
237260
1;
238261
} or do {
239262
close($fh);
240-
if ( $@ eq "flock 2 timeout\n" ) {
241-
$state_file->throw('Guard timed out trying to open state file.');
242-
}
243-
else {
244-
$state_file->throw($@);
263+
264+
if ($timed_out) {
265+
$self->{state_file}->throw("Guard timed out trying to lock state file “$self->{state_file}{flock_timeout}”.");
245266
}
267+
268+
$self->{state_file}->throw($@);
246269
};
247-
$state_file->{file_handle} = $fh;
270+
271+
return;
248272
}
249273

250274
sub update_file {
@@ -253,26 +277,33 @@ sub import {
253277
$state_file->throw('Cannot update_file inside a call_unlocked call.') unless defined $self->{lock_file};
254278

255279
if ( !$state_file->{file_handle} ) {
256-
if ( -e $state_file->{file_name} ) {
257-
$self->_open('+<');
258-
}
259-
else {
260-
sysopen( my $fh, $state_file->{file_name}, &Fcntl::O_CREAT | &Fcntl::O_EXCL | &Fcntl::O_RDWR )
261-
or $state_file->throw("Cannot create state file '$state_file->{file_name}': $!");
262-
$state_file->{file_handle} = $fh;
263-
}
280+
$self->_open();
264281
}
265-
seek( $state_file->{file_handle}, 0, 0 );
266-
truncate( $state_file->{file_handle}, 0 )
267-
or $state_file->throw("Unable to truncate the state file: $!");
268282

269-
$state_file->{data_object}->save_to_cache( $state_file->{file_handle} );
270-
$state_file->{file_mtime} = ( stat( $state_file->{file_handle} ) )[9];
283+
#Set UNLINK in case we die().
284+
require File::Temp;
285+
my ( $fh, $path ) = File::Temp::tempfile( DIR => $self->{_file_dir}, UNLINK => 1 );
286+
287+
#We lock the temp file so that it’s “pre-locked”
288+
#when we rename() it into place below. That way the
289+
#production path stays consistently locked.
290+
$self->_flock_after_open($fh);
291+
292+
$state_file->{data_object}->save_to_cache($fh);
293+
294+
rename $path => $state_file->{file_name} or $state_file->throw("Failed to rename($path => $state_file->{file_name}: $!");
295+
296+
flock $state_file->{file_handle}, Fcntl::LOCK_UN();
297+
close $state_file->{file_handle};
298+
299+
@{$state_file}{ 'file_handle', 'file_size', 'file_mtime' } = (
300+
$fh,
301+
( stat $fh )[ 7, 9 ],
302+
);
271303

272304
# Make certain we are at end of file.
273-
seek( $state_file->{file_handle}, 0, 2 )
274-
or $state_file->throw("Unable to go to end of file: $!");
275-
$state_file->{file_size} = tell( $state_file->{file_handle} );
305+
seek( $fh, 0, Fcntl::SEEK_END() ) or $state_file->throw("Unable to go to end of file “$state_file->{file_name}”: $!");
306+
276307
return;
277308
}
278309

@@ -430,7 +461,7 @@ sub import {
430461

431462
# File is newer or a different size
432463
$guard ||= cPanel::StateFile::Guard->new( { state => $self } );
433-
$guard->_open('+<');
464+
$guard->_open();
434465
$self->{data_object}->load_from_cache( $self->{file_handle} );
435466
( $self->{file_mtime}, $self->{file_size} ) = ( stat( $self->{file_handle} ) )[ 9, 7 ];
436467
}

t/statefile_lock_replace.t

Lines changed: 161 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,161 @@
1+
#!/usr/bin/perl
2+
3+
use Test::More tests => 1;
4+
5+
use strict;
6+
use warnings;
7+
use autodie;
8+
9+
use Fcntl ();
10+
use File::Temp ();
11+
use File::Slurper ();
12+
use Time::HiRes ();
13+
14+
use cPanel::StateFile ();
15+
16+
my $dir = File::Temp::tempdir( CLEANUP => 1 );
17+
18+
my $file_path = "$dir/the_file";
19+
20+
our $suffix = substr( rand, 2 );
21+
22+
#----------------------------------------------------------------------
23+
# This is a deep test of object internals. For conciseness,
24+
# it recreates objects directly rather than via constructors.
25+
# This is generally bad practice, but the alternatives would be either
26+
# an unwieldy, brittle test or a significant refactor, neither of which
27+
# seems justified here. (Even these tests are more complicated than
28+
# would be ideal.)
29+
#
30+
# Unfortunately, it means that any change to the implementation of
31+
# cPanel::StateFile or cPanel::StateFile::Guard is not unlikely to
32+
# cause a spurious failure here.
33+
#----------------------------------------------------------------------
34+
35+
my $data_obj = bless( {}, 'MockDataObject' );
36+
37+
sub _create_hack_guard {
38+
my $hack_state = bless {
39+
data_object => $data_obj,
40+
file_name => $file_path,
41+
flock_timeout => 60,
42+
},
43+
'cPanel::StateFile';
44+
45+
return bless {
46+
state_file => $hack_state,
47+
lock_file => 'this_does_not_really_exist',
48+
},
49+
'cPanel::StateFile::Guard';
50+
}
51+
52+
#----------------------------------------------------------------------
53+
54+
local $SIG{'CHLD'} = 'IGNORE';
55+
56+
my $subprocess_timeout = 60; # 1 minute
57+
58+
my $renamer_pid = fork or do {
59+
my $end = time + $subprocess_timeout;
60+
61+
while ( time < $end ) {
62+
open my $fh, '>>', $file_path;
63+
flock $fh, Fcntl::LOCK_EX();
64+
65+
do { open my $fh, '>>', "$file_path.tmp" };
66+
67+
Time::HiRes::sleep(0.01);
68+
69+
rename "$file_path.tmp" => $file_path;
70+
}
71+
72+
print "# $$: Renamer process has outlived its usefulness. Exiting …$/";
73+
74+
exit;
75+
};
76+
77+
for my $iteration ( 1 .. 10 ) {
78+
note "_open() iteration $iteration";
79+
80+
my $hguard = _create_hack_guard();
81+
$hguard->_open();
82+
83+
my $fh_inode = ( stat $hguard->{'state_file'}{'file_handle'} )[1];
84+
my $path_inode = ( stat $hguard->{'state_file'}{'file_name'} )[1];
85+
if ( $fh_inode != $path_inode ) {
86+
die "_open() did flock() a file handle that isn’t the path!";
87+
}
88+
89+
note "\t… done.";
90+
}
91+
92+
if ( CORE::kill 'KILL', $renamer_pid ) {
93+
note "Renamer process ($renamer_pid) sent SIGKILL";
94+
}
95+
else {
96+
diag "kill() of renamer process ($renamer_pid): $! (timed out?)";
97+
}
98+
99+
#----------------------------------------------------------------------
100+
101+
_create_hack_guard()->update_file();
102+
103+
for my $iteration ( 1 .. 20 ) {
104+
$suffix = substr( rand, 2 );
105+
$suffix = substr( $suffix, 0, int rand length $suffix );
106+
107+
note "Integrity test: running iteration $iteration";
108+
109+
my $expected_content = qr<\APID [0-9]+-$suffix\z>;
110+
111+
my $check_content_cr = sub {
112+
my $content = File::Slurper::read_binary($file_path);
113+
114+
if ( $content !~ $expected_content ) {
115+
die "FAIL: content “$content” doesn’t match expected “$expected_content";
116+
}
117+
};
118+
119+
my $pid = fork or do {
120+
alarm $subprocess_timeout;
121+
my $hack_guard = _create_hack_guard();
122+
$hack_guard->update_file();
123+
do { open my $fh, '>', "$dir/wrote-$iteration" };
124+
$hack_guard->update_file() while 1;
125+
};
126+
127+
Time::HiRes::sleep(0.01) while !-e "$dir/wrote-$iteration";
128+
129+
my $start = time;
130+
$check_content_cr->() while time < ( $start + 1 );
131+
132+
if ( !CORE::kill 'KILL', $pid ) {
133+
diag "kill() of updater process ($pid): $! (timed out?)";
134+
}
135+
136+
$check_content_cr->();
137+
}
138+
139+
ok 1, 'Done';
140+
141+
#----------------------------------------------------------------------
142+
143+
package MockDataObject;
144+
145+
sub load_from_cache {
146+
my ( $self, $fh ) = @_;
147+
148+
sysread( $fh, $self->{'_content'}, 32768 );
149+
150+
return;
151+
}
152+
153+
sub save_to_cache {
154+
my ( $self, $fh ) = @_;
155+
156+
syswrite( $fh, "PID $$-$main::suffix" );
157+
158+
return;
159+
}
160+
161+
1;

0 commit comments

Comments
 (0)