diff --git a/tools/licenses/lib/filesystem.dart b/tools/licenses/lib/filesystem.dart index 1183afb913408ef85cd49cac4920c9b2b77bc101..65ea72d7ec26641ad88d309b07b53345018511bf 100644 --- a/tools/licenses/lib/filesystem.dart +++ b/tools/licenses/lib/filesystem.dart @@ -10,6 +10,7 @@ import 'package:path/path.dart' as path; import 'package:archive/archive.dart' as a; import 'cache.dart'; +import 'limits.dart'; enum FileType { binary, // won't have its own license block @@ -38,6 +39,22 @@ bool matchesSignature(List bytes, List signature) { return true; } +bool hasSubsequence(List bytes, List signature, int limit) { + if (bytes.length < limit) + limit = bytes.length; + for (int index = 0; index < limit; index += 1) { + if (bytes.length - index < signature.length) + return false; + for (int offset = 0; offset < signature.length; offset += 1) { + if (signature[offset] != -1 && bytes[index + offset] != signature[offset]) + break; + if (offset + 1 == signature.length) + return true; + } + } + return false; +} + const String kMultiLicenseFileHeader = 'Notices for files contained in'; bool isMultiLicenseNotice(Reader reader) { @@ -50,7 +67,8 @@ FileType identifyFile(String name, Reader reader) { if ((path.split(name).reversed.take(6).toList().reversed.join('/') == 'third_party/icu/source/extra/uconv/README') || // This specific ICU README isn't in UTF-8. (path.split(name).reversed.take(6).toList().reversed.join('/') == 'third_party/icu/source/samples/uresb/sr.txt') || // This specific sample contains non-UTF-8 data (unlike other sr.txt files). (path.split(name).reversed.take(2).toList().reversed.join('/') == 'builds/detect.mk') || // This specific freetype sample contains non-UTF-8 data (unlike other .mk files). - (path.split(name).reversed.take(4).toList().reversed.join('/') == 'third_party/freetype2/docs/FTL.TXT')) // This file has a copyright symbol in Latin1 in it + (path.split(name).reversed.take(4).toList().reversed.join('/') == 'third_party/freetype2/docs/FTL.TXT') || // This file has a copyright symbol in Latin1 in it + (path.split(name).reversed.take(3).toList().reversed.join('/') == 'third_party/cares/cares.rc')) // This file has a copyright symbol in Latin1 in it return FileType.latin1Text; if (path.split(name).reversed.take(6).toList().reversed.join('/') == 'dart/runtime/tests/vm/dart/bad_snapshot' || // Not any particular format path.split(name).reversed.take(8).toList().reversed.join('/') == 'third_party/android_tools/ndk/sources/cxx-stl/stlport/src/stlport.rc') // uses the word "copyright" but doesn't have a copyright header @@ -61,6 +79,12 @@ FileType identifyFile(String name, Reader reader) { if (matchesSignature(bytes, [0x00, 0x05, 0x16, 0x07, 0x00, 0x02, 0x00, 0x00, 0x4d, 0x61, 0x63, 0x20, 0x4f, 0x53, 0x20, 0x58])) return FileType.metadata; // The ._* files in Mac OS X archives that gives icons and stuff } + if (path.split(name).contains('cairo')) { + bytes ??= reader(); + // "Copyright " + if (hasSubsequence(bytes, [0x43, 0x6f, 0x70, 0x79, 0x72, 0x69, 0x67, 0x68, 0x74, 0x20, 0xA9, 0x20], kMaxSize)) + return FileType.latin1Text; + } switch (base) { // Build files case 'DEPS': return FileType.text; @@ -74,6 +98,7 @@ FileType identifyFile(String name, Reader reader) { case 'Changes': return FileType.text; case 'change.log': return FileType.text; case 'ChangeLog': return FileType.text; + case 'CHANGES.0': return FileType.latin1Text; case 'README': return FileType.text; case 'TODO': return FileType.text; case 'NEWS': return FileType.text; @@ -87,6 +112,7 @@ FileType identifyFile(String name, Reader reader) { case 'ECLIPSE_.RSA': return FileType.binary; // Binary data files case 'tzdata': return FileType.binary; + case 'compressed_atrace_data.txt': return FileType.binary; // Source files that don't use UTF-8 case 'Messages_de_DE.properties': // has a few non-ASCII characters they forgot to escape (from gnu-libstdc++) case 'mmx_blendtmp.h': // author name in comment contains latin1 (mesa) @@ -140,7 +166,14 @@ FileType identifyFile(String name, Reader reader) { // LLVM bitcode case '.bc': return FileType.binary; // Python code - case '.py': return FileType.text; + case '.py': + bytes ??= reader(); + // # -*- coding: Latin-1 -*- + if (matchesSignature(bytes, [0x23, 0x20, 0x2d, 0x2a, 0x2d, 0x20, 0x63, 0x6f, 0x64, + 0x69, 0x6e, 0x67, 0x3a, 0x20, 0x4c, 0x61, 0x74, 0x69, + 0x6e, 0x2d, 0x31, 0x20, 0x2d, 0x2a, 0x2d])) + return FileType.latin1Text; + return FileType.text; case '.pyc': return FileType.binary; // compiled Python bytecode // Machine code case '.so': return FileType.binary; // ELF shared object diff --git a/tools/licenses/lib/licenses.dart b/tools/licenses/lib/licenses.dart index 441bc5cd7ee1e2ca35ee63a4b855dd59807cf45e..7584ff9c63d2b1a46a1136696f958584dc96ecb3 100644 --- a/tools/licenses/lib/licenses.dart +++ b/tools/licenses/lib/licenses.dart @@ -7,9 +7,7 @@ import 'dart:io' as system; import 'cache.dart'; import 'patterns.dart'; - -// TODO(ianh): vastly increase this before checkin -const int kMaxSize = 5 * 1024; // only look for copyrights and licenses at the top of the file +import 'limits.dart'; class FetchedContentsOf extends Key { FetchedContentsOf(dynamic value) : super(value); } @@ -26,7 +24,10 @@ LicenseType convertLicenseNameToType(String name) { return LicenseType.bsd; case 'LICENSE-LGPL-2': case 'LICENSE-LGPL-2.1': + case 'COPYING-LGPL-2.1': return LicenseType.lgpl; + case 'COPYING-GPL-3': + return LicenseType.gpl; case 'FTL.TXT': return LicenseType.freetype; case 'zlib.h': @@ -40,6 +41,7 @@ LicenseType convertLicenseNameToType(String name) { case 'OpenSSL': return LicenseType.openssl; case 'LICENSE.MPLv2': + case 'COPYING-MPL-1.1': return LicenseType.mpl; // common file names that don't say what the type is case 'COPYING': @@ -55,6 +57,7 @@ LicenseType convertLicenseNameToType(String name) { case 'NOTICE.txt': case 'Copyright': case 'copyright': + case 'license.txt': return LicenseType.unknown; // particularly weird file names case 'LICENSE-APPLE': @@ -64,6 +67,7 @@ LicenseType convertLicenseNameToType(String name) { case 'javolution.license.txt': case 'libyaml-license.txt': case 'license.patch': + case 'license.rst': case 'mh-bsd-gcc': case 'pivotal.labs.license.txt': return LicenseType.unknown; diff --git a/tools/licenses/lib/limits.dart b/tools/licenses/lib/limits.dart new file mode 100644 index 0000000000000000000000000000000000000000..cd3a3cf37d33fe3df0253edc25e5b1aab1a89d40 --- /dev/null +++ b/tools/licenses/lib/limits.dart @@ -0,0 +1,6 @@ +// Copyright 2018 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// TODO(ianh): vastly increase this before checkin +const int kMaxSize = 5 * 1024; // only look for copyrights and licenses at the top of the file