From: Matthias Heiserer <m.heiserer@proxmox.com>
To: pve-devel@lists.proxmox.com
Subject: [pve-devel] [PATCH v1 http-server 1/2] AnyEvent: fix #3990 - rewrite file upload
Date: Fri, 22 Apr 2022 16:43:57 +0200 [thread overview]
Message-ID: <20220422144358.3217098-1-m.heiserer@proxmox.com> (raw)
Uploading of files of arbitrary size is now possible.
Some (regex) lines are long, but imo more readable than splitting them
up into multiple lines, as that's how they look in the HTTP request.
Signed-off-by: Matthias Heiserer <m.heiserer@proxmox.com>
---
src/PVE/APIServer/AnyEvent.pm | 152 +++++++++++++++++-----------------
1 file changed, 78 insertions(+), 74 deletions(-)
diff --git a/src/PVE/APIServer/AnyEvent.pm b/src/PVE/APIServer/AnyEvent.pm
index 7dd7d2d..ade3c05 100644
--- a/src/PVE/APIServer/AnyEvent.pm
+++ b/src/PVE/APIServer/AnyEvent.pm
@@ -1157,64 +1157,74 @@ sub handle_request {
sub file_upload_multipart {
my ($self, $reqstate, $auth, $method, $path, $rstate) = @_;
-
eval {
my $boundary = $rstate->{boundary};
my $hdl = $reqstate->{hdl};
-
my $startlen = length($hdl->{rbuf});
- if ($rstate->{phase} == 0) { # skip everything until start
- if ($hdl->{rbuf} =~ s/^.*?--\Q$boundary\E \015?\012
- ((?:[^\015]+\015\012)* ) \015?\012//xs) {
- my $header = $1;
- my ($ct, $disp, $name, $filename);
- foreach my $line (split(/\015?\012/, $header)) {
- # assume we have single line headers
- if ($line =~ m/^Content-Type\s*:\s*(.*)/i) {
- $ct = parse_content_type($1);
- } elsif ($line =~ m/^Content-Disposition\s*:\s*(.*)/i) {
- ($disp, $name, $filename) = parse_content_disposition($1);
- }
- }
+ my $newline = qr/\015?\012/;
+ my $delimiter = qr/--\Q$boundary\E${newline}/;
+ my $closeDelimiter = qr/--\Q$boundary\E--${newline}/;
- if (!($disp && $disp eq 'form-data' && $name)) {
- syslog('err', "wrong content disposition in multipart - abort upload");
- $rstate->{phase} = -1;
- } else {
+ my $check_disposition = sub {
+ my ($disp) = @_;
+ die "wrong Content-Disposition in multipart, expected `form-data` - abort upload"
+ if $disp ne 'form-data';
+ };
- $rstate->{fieldname} = $name;
+ # Phase 0 - preserve boundary, but remove everything before
+ if ($rstate->{phase} == 0 && $hdl->{rbuf} =~ s/^.*?($delimiter)/$1/xs) {
+ $rstate->{read} += $startlen - length($hdl->{rbuf});
+ $rstate->{phase} = 1;
+ }
- if ($filename) {
- if ($name eq 'filename') {
- # found file upload data
- $rstate->{phase} = 1;
- $rstate->{filename} = $filename;
- } else {
- syslog('err', "wrong field name for file upload - abort upload");
- $rstate->{phase} = -1;
- }
- } else {
- # found form data for field $name
- $rstate->{phase} = 2;
- }
- }
- } else {
- my $len = length($hdl->{rbuf});
- substr($hdl->{rbuf}, 0, $len - $rstate->{maxheader}, '')
- if $len > $rstate->{maxheader}; # skip garbage
- }
- } elsif ($rstate->{phase} == 1) { # inside file - dump until end marker
- if ($hdl->{rbuf} =~ s/^(.*?)\015?\012(--\Q$boundary\E(--)? \015?\012(.*))$/$2/xs) {
+ # Phase 1 - parse payload without file data
+ if ($rstate->{phase} == 1 && $hdl->{rbuf} =~
+ s/^${delimiter}Content-Disposition: (.*?); name="content"(.*?)($delimiter)/$3/s
+ ) {
+ $check_disposition->($1);
+ $rstate->{params}->{content} = trim($2);
+ syslog('info', "timeout: " . $hdl->{timeout});
+ }
+
+ if ($rstate->{phase} == 1 && $hdl->{rbuf} =~
+ s/^${delimiter}Content-Disposition: (.*?); name="checksum-algorithm"(.*?)($delimiter)/$3/s
+ ) {
+ $check_disposition->($1);
+ $rstate->{params}->{"checksum-algorithm"} = trim($2);
+ }
+
+ if ($rstate->{phase} == 1 && $hdl->{rbuf} =~
+ s/^${delimiter}Content-Disposition: (.*?); name="checksum"(.*?)($delimiter)/$3/s
+ ) {
+ $check_disposition->($1);
+ $rstate->{params}->{checksum} = trim($2);
+ }
+
+ if ($rstate->{phase} == 1 && $hdl->{rbuf} =~
+ s/^${delimiter}
+ Content-Disposition:\ (.*?);\ name="(.*?)";\ filename="([^"]+)"${newline}
+ Content-Type:\ [^\s]*\s+ #remove all whitespace until begin of data
+ //sxx
+ ) {
+ $check_disposition->($1);
+ die "wrong field `name` for file upload, expected `filename` - abort upload"
+ if $2 ne "filename";
+ $rstate->{phase} = 2;
+ $rstate->{params}->{filename} = trim($3);
+ }
+
+ # Phase 2 - dump content into file
+ if ($rstate->{phase} == 2) {
+ if ($hdl->{rbuf} =~ s/^(.*?)${newline}?+${closeDelimiter}.*$//s) {
my ($rest, $eof) = ($1, $3);
my $len = length($rest);
die "write to temporary file failed - $!"
if syswrite($rstate->{outfh}, $rest) != $len;
$rstate->{ctx}->add($rest);
- $rstate->{params}->{filename} = $rstate->{filename};
$rstate->{md5sum} = $rstate->{ctx}->hexdigest;
$rstate->{bytes} += $len;
- $rstate->{phase} = $eof ? 100 : 0;
+ $rstate->{phase} = 100;
} else {
my $len = length($hdl->{rbuf});
my $wlen = $len - $rstate->{boundlen};
@@ -1226,42 +1236,29 @@ sub file_upload_multipart {
$rstate->{ctx}->add($data);
}
}
- } elsif ($rstate->{phase} == 2) { # inside normal field
+ }
- if ($hdl->{rbuf} =~ s/^(.*?)\015?\012(--\Q$boundary\E(--)? \015?\012(.*))$/$2/xs) {
- my ($rest, $eof) = ($1, $3);
- my $len = length($rest);
- $rstate->{post_size} += $len;
- if ($rstate->{post_size} < $limit_max_post) {
- $rstate->{params}->{$rstate->{fieldname}} = $rest;
- $rstate->{phase} = $eof ? 100 : 0;
- } else {
- syslog('err', "form data to large - abort upload");
- $rstate->{phase} = -1; # skip
- }
- }
- } else { # skip
- my $len = length($hdl->{rbuf});
- substr($hdl->{rbuf}, 0, $len, ''); # empty rbuf
+ # Phase 100 - transfer finished
+ if ($rstate->{phase} == 100) {
+ my $elapsed = tv_interval($rstate->{starttime});
+
+ my $rate = int($rstate->{bytes} / ($elapsed * 1024 * 1024));
+ syslog('info',
+ "multipart upload complete (size: %d time: %ds rate: %.2fMiB/s md5sum: %s)",
+ $rstate->{bytes}, $elapsed, $rate, $rstate->{md5sum}
+ );
+ $self->handle_api2_request($reqstate, $auth, $method, $path, $rstate);
}
- $rstate->{read} += ($startlen - length($hdl->{rbuf}));
+ $rstate->{read} += $startlen - length($hdl->{rbuf});
- if (!$rstate->{done} && ($rstate->{read} + length($hdl->{rbuf})) >= $rstate->{size}) {
- $rstate->{done} = 1; # make sure we dont get called twice
- if ($rstate->{phase} < 0 || !$rstate->{md5sum}) {
- die "upload failed\n";
- } else {
- my $elapsed = tv_interval($rstate->{starttime});
-
- my $rate = int($rstate->{bytes} / ($elapsed * 1024 * 1024));
- syslog('info',
- "multipart upload complete (size: %d time: %ds rate: %.2fMiB/s md5sum: %s)",
- $rstate->{bytes}, $elapsed, $rate, $rstate->{md5sum}
- );
- $self->handle_api2_request($reqstate, $auth, $method, $path, $rstate);
- }
+ if (
+ $rstate->{read} + length($hdl->{rbuf}) >= $rstate->{size}
+ && $rstate->{phase} != 100
+ ) {
+ die "upload failed";
}
+
};
if (my $err = $@) {
syslog('err', $err);
@@ -1269,6 +1266,13 @@ sub file_upload_multipart {
}
}
+sub trim {
+ my ($string) = @_;
+
+ $string =~ s/\s*([^\s]+).*/$1/s;
+ return $string;
+}
+
sub parse_content_type {
my ($ctype) = @_;
--
2.30.2
next reply other threads:[~2022-04-22 14:44 UTC|newest]
Thread overview: 5+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-04-22 14:43 Matthias Heiserer [this message]
2022-04-22 14:43 ` [pve-devel] [PATCH v1 http-server 2/2] AnyEvent: disable upload timeout Matthias Heiserer
2022-04-22 16:54 ` Thomas Lamprecht
2022-05-06 11:44 ` Matthias Heiserer
2022-04-22 16:54 ` [pve-devel] [PATCH v1 http-server 1/2] AnyEvent: fix #3990 - rewrite file upload Thomas Lamprecht
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20220422144358.3217098-1-m.heiserer@proxmox.com \
--to=m.heiserer@proxmox.com \
--cc=pve-devel@lists.proxmox.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox