upload.go 9.7 KB
Newer Older
R
Robert Swain 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
// Copyright 2017 Vector Creations Ltd
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package writers

import (
	"fmt"
19
	"io"
R
Robert Swain 已提交
20 21
	"net/http"
	"net/url"
22
	"path"
R
Robert Swain 已提交
23 24 25

	log "github.com/Sirupsen/logrus"
	"github.com/matrix-org/dendrite/clientapi/jsonerror"
26
	"github.com/matrix-org/dendrite/common/config"
27 28
	"github.com/matrix-org/dendrite/mediaapi/fileutils"
	"github.com/matrix-org/dendrite/mediaapi/storage"
29
	"github.com/matrix-org/dendrite/mediaapi/thumbnailer"
R
Robert Swain 已提交
30
	"github.com/matrix-org/dendrite/mediaapi/types"
31
	"github.com/matrix-org/gomatrixserverlib"
R
Robert Swain 已提交
32 33 34 35 36 37 38 39 40 41 42
	"github.com/matrix-org/util"
)

// uploadRequest metadata included in or derivable from an upload request
// https://matrix.org/docs/spec/client_server/r0.2.0.html#post-matrix-media-r0-upload
// NOTE: The members come from HTTP request metadata such as headers, query parameters or can be derived from such
type uploadRequest struct {
	MediaMetadata *types.MediaMetadata
	Logger        *log.Entry
}

43
// uploadResponse defines the format of the JSON response
R
Robert Swain 已提交
44 45 46 47 48 49 50 51 52 53
// https://matrix.org/docs/spec/client_server/r0.2.0.html#post-matrix-media-r0-upload
type uploadResponse struct {
	ContentURI string `json:"content_uri"`
}

// Upload implements /upload
// This endpoint involves uploading potentially significant amounts of data to the homeserver.
// This implementation supports a configurable maximum file size limit in bytes. If a user tries to upload more than this, they will receive an error that their upload is too large.
// Uploaded files are processed piece-wise to avoid DoS attacks which would starve the server of memory.
// TODO: We should time out requests if they have not received any data within a configured timeout period.
54
func Upload(req *http.Request, cfg *config.Dendrite, db *storage.Database, activeThumbnailGeneration *types.ActiveThumbnailGeneration) util.JSONResponse {
R
Robert Swain 已提交
55 56 57 58 59
	r, resErr := parseAndValidateRequest(req, cfg)
	if resErr != nil {
		return *resErr
	}

60
	if resErr = r.doUpload(req.Body, cfg, db, activeThumbnailGeneration); resErr != nil {
61 62 63 64 65 66
		return *resErr
	}

	return util.JSONResponse{
		Code: 200,
		JSON: uploadResponse{
67
			ContentURI: fmt.Sprintf("mxc://%s/%s", cfg.Matrix.ServerName, r.MediaMetadata.MediaID),
68 69 70 71 72 73 74
		},
	}
}

// parseAndValidateRequest parses the incoming upload request to validate and extract
// all the metadata about the media being uploaded.
// Returns either an uploadRequest or an error formatted as a util.JSONResponse
75
func parseAndValidateRequest(req *http.Request, cfg *config.Dendrite) (*uploadRequest, *util.JSONResponse) {
76 77
	if req.Method != "POST" {
		return nil, &util.JSONResponse{
78
			Code: 405,
79 80 81 82 83 84
			JSON: jsonerror.Unknown("HTTP request method must be POST."),
		}
	}

	r := &uploadRequest{
		MediaMetadata: &types.MediaMetadata{
85
			Origin:        cfg.Matrix.ServerName,
86 87 88 89
			FileSizeBytes: types.FileSizeBytes(req.ContentLength),
			ContentType:   types.ContentType(req.Header.Get("Content-Type")),
			UploadName:    types.Filename(url.PathEscape(req.FormValue("filename"))),
		},
90
		Logger: util.GetLogger(req.Context()).WithField("Origin", cfg.Matrix.ServerName),
91 92
	}

93
	if resErr := r.Validate(*cfg.Media.MaxFileSizeBytes); resErr != nil {
94 95 96 97 98 99
		return nil, resErr
	}

	return r, nil
}

100
func (r *uploadRequest) doUpload(reqReader io.Reader, cfg *config.Dendrite, db *storage.Database, activeThumbnailGeneration *types.ActiveThumbnailGeneration) *util.JSONResponse {
101
	r.Logger.WithFields(log.Fields{
102 103
		"UploadName":    r.MediaMetadata.UploadName,
		"FileSizeBytes": r.MediaMetadata.FileSizeBytes,
104
		"ContentType":   r.MediaMetadata.ContentType,
105 106 107 108 109
	}).Info("Uploading file")

	// The file data is hashed and the hash is used as the MediaID. The hash is useful as a
	// method of deduplicating files to save storage, as well as a way to conduct
	// integrity checks on the file data in the repository.
110
	// Data is truncated to maxFileSizeBytes. Content-Length was reported as 0 < Content-Length <= maxFileSizeBytes so this is OK.
111
	hash, bytesWritten, tmpDir, err := fileutils.WriteTempFile(reqReader, *cfg.Media.MaxFileSizeBytes, cfg.Media.AbsBasePath)
112 113
	if err != nil {
		r.Logger.WithError(err).WithFields(log.Fields{
114
			"MaxFileSizeBytes": *cfg.Media.MaxFileSizeBytes,
115
		}).Warn("Error while transferring file")
116
		fileutils.RemoveDir(tmpDir, r.Logger)
117
		return &util.JSONResponse{
118
			Code: 400,
119
			JSON: jsonerror.Unknown("Failed to upload"),
120 121 122 123 124 125 126
		}
	}

	r.MediaMetadata.FileSizeBytes = bytesWritten
	r.MediaMetadata.Base64Hash = hash
	r.MediaMetadata.MediaID = types.MediaID(hash)

127 128
	r.Logger = r.Logger.WithField("MediaID", r.MediaMetadata.MediaID)

129
	r.Logger.WithFields(log.Fields{
130 131 132
		"Base64Hash":    r.MediaMetadata.Base64Hash,
		"UploadName":    r.MediaMetadata.UploadName,
		"FileSizeBytes": r.MediaMetadata.FileSizeBytes,
133
		"ContentType":   r.MediaMetadata.ContentType,
134 135 136 137
	}).Info("File uploaded")

	// check if we already have a record of the media in our database and if so, we can remove the temporary directory
	mediaMetadata, err := db.GetMediaMetadata(r.MediaMetadata.MediaID, r.MediaMetadata.Origin)
138 139
	if err != nil {
		r.Logger.WithError(err).Error("Error querying the database.")
140 141
		resErr := jsonerror.InternalServerError()
		return &resErr
142 143 144
	}

	if mediaMetadata != nil {
145 146
		r.MediaMetadata = mediaMetadata
		fileutils.RemoveDir(tmpDir, r.Logger)
147
		return &util.JSONResponse{
148 149
			Code: 200,
			JSON: uploadResponse{
150
				ContentURI: fmt.Sprintf("mxc://%s/%s", cfg.Matrix.ServerName, r.MediaMetadata.MediaID),
151 152 153 154
			},
		}
	}

155
	if resErr := r.storeFileAndMetadata(tmpDir, cfg.Media.AbsBasePath, db, cfg.Media.ThumbnailSizes, activeThumbnailGeneration, cfg.Media.MaxThumbnailGenerators); resErr != nil {
156
		return resErr
R
Robert Swain 已提交
157 158
	}

159
	return nil
R
Robert Swain 已提交
160 161 162
}

// Validate validates the uploadRequest fields
163
func (r *uploadRequest) Validate(maxFileSizeBytes config.FileSizeBytes) *util.JSONResponse {
R
Robert Swain 已提交
164 165
	if r.MediaMetadata.FileSizeBytes < 1 {
		return &util.JSONResponse{
166
			Code: 411,
R
Robert Swain 已提交
167 168 169
			JSON: jsonerror.Unknown("HTTP Content-Length request header must be greater than zero."),
		}
	}
170
	if maxFileSizeBytes > 0 && r.MediaMetadata.FileSizeBytes > types.FileSizeBytes(maxFileSizeBytes) {
R
Robert Swain 已提交
171
		return &util.JSONResponse{
172
			Code: 413,
R
Robert Swain 已提交
173 174 175 176 177 178 179 180 181 182
			JSON: jsonerror.Unknown(fmt.Sprintf("HTTP Content-Length is greater than the maximum allowed upload size (%v).", maxFileSizeBytes)),
		}
	}
	// TODO: Check if the Content-Type is a valid type?
	if r.MediaMetadata.ContentType == "" {
		return &util.JSONResponse{
			Code: 400,
			JSON: jsonerror.Unknown("HTTP Content-Type request header must be set."),
		}
	}
183 184 185 186 187 188
	if r.MediaMetadata.UploadName[0] == '~' {
		return &util.JSONResponse{
			Code: 400,
			JSON: jsonerror.Unknown("File name must not begin with '~'."),
		}
	}
R
Robert Swain 已提交
189 190 191 192 193 194 195
	// TODO: Validate filename - what are the valid characters?
	if r.MediaMetadata.UserID != "" {
		// TODO: We should put user ID parsing code into gomatrixserverlib and use that instead
		//       (see https://github.com/matrix-org/gomatrixserverlib/blob/3394e7c7003312043208aa73727d2256eea3d1f6/eventcontent.go#L347 )
		//       It should be a struct (with pointers into a single string to avoid copying) and
		//       we should update all refs to use UserID types rather than strings.
		// https://github.com/matrix-org/synapse/blob/v0.19.2/synapse/types.py#L92
196
		if _, _, err := gomatrixserverlib.SplitID('@', string(r.MediaMetadata.UserID)); err != nil {
R
Robert Swain 已提交
197 198 199 200 201 202 203 204
			return &util.JSONResponse{
				Code: 400,
				JSON: jsonerror.BadJSON("user id must be in the form @localpart:domain"),
			}
		}
	}
	return nil
}
205

206 207 208 209 210
// storeFileAndMetadata moves the temporary file to its final path based on metadata and stores the metadata in the database
// See getPathFromMediaMetadata in fileutils for details of the final path.
// The order of operations is important as it avoids metadata entering the database before the file
// is ready, and if we fail to move the file, it never gets added to the database.
// Returns a util.JSONResponse error and cleans up directories in case of error.
211
func (r *uploadRequest) storeFileAndMetadata(tmpDir types.Path, absBasePath config.Path, db *storage.Database, thumbnailSizes []config.ThumbnailSize, activeThumbnailGeneration *types.ActiveThumbnailGeneration, maxThumbnailGenerators int) *util.JSONResponse {
212 213 214 215 216
	finalPath, duplicate, err := fileutils.MoveFileWithHashCheck(tmpDir, r.MediaMetadata, absBasePath, r.Logger)
	if err != nil {
		r.Logger.WithError(err).Error("Failed to move file.")
		return &util.JSONResponse{
			Code: 400,
217
			JSON: jsonerror.Unknown("Failed to upload"),
218 219 220 221 222 223 224 225 226 227 228
		}
	}
	if duplicate {
		r.Logger.WithField("dst", finalPath).Info("File was stored previously - discarding duplicate")
	}

	if err = db.StoreMediaMetadata(r.MediaMetadata); err != nil {
		r.Logger.WithError(err).Warn("Failed to store metadata")
		// If the file is a duplicate (has the same hash as an existing file) then
		// there is valid metadata in the database for that file. As such we only
		// remove the file if it is not a duplicate.
E
Erik Johnston 已提交
229
		if !duplicate {
R
Robert Swain 已提交
230
			fileutils.RemoveDir(types.Path(path.Dir(string(finalPath))), r.Logger)
231 232 233
		}
		return &util.JSONResponse{
			Code: 400,
234
			JSON: jsonerror.Unknown("Failed to upload"),
235 236 237
		}
	}

238 239 240 241 242 243 244 245 246 247
	go func() {
		busy, err := thumbnailer.GenerateThumbnails(finalPath, thumbnailSizes, r.MediaMetadata, activeThumbnailGeneration, maxThumbnailGenerators, db, r.Logger)
		if err != nil {
			r.Logger.WithError(err).Warn("Error generating thumbnails")
		}
		if busy {
			r.Logger.Warn("Maximum number of active thumbnail generators reached. Skipping pre-generation.")
		}
	}()

248 249
	return nil
}