diff --git a/pcre2/132html b/pcre2/132html new file mode 100755 index 000000000..1bd62ba24 --- /dev/null +++ b/pcre2/132html @@ -0,0 +1,314 @@ +#! /usr/bin/perl -w + +# Script to turn PCRE2 man pages into HTML + + +# Subroutine to handle font changes and other escapes + +sub do_line { +my($s) = $_[0]; + +$s =~ s/ +$s =~ s/>/>/g; +$s =~ s"\\fI(.*?)\\f[RP]"$1"g; +$s =~ s"\\fB(.*?)\\f[RP]"$1"g; +$s =~ s"\\e"\\"g; +$s =~ s/(?<=Copyright )\(c\)/©/g; +$s; +} + +# Subroutine to ensure not in a paragraph + +sub end_para { +if ($inpara) + { + print TEMP "\n" if ($inpre); + print TEMP "

\n"; + } +$inpara = $inpre = 0; +$wrotetext = 0; +} + +# Subroutine to start a new paragraph + +sub new_para { +&end_para(); +print TEMP "

\n"; +$inpara = 1; +} + + +# Main program + +$innf = 0; +$inpara = 0; +$inpre = 0; +$wrotetext = 0; +$toc = 0; +$ref = 1; + +while ($#ARGV >= 0 && $ARGV[0] =~ /^-/) + { + $toc = 1 if $ARGV[0] eq "-toc"; + shift; + } + +# Initial output to STDOUT + +print < + +$ARGV[0] specification + + +

$ARGV[0] man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+End + +print "

\n" if ($toc); + +# Copy the remainder to the standard output + +close(TEMP); +open(TEMP, "/tmp/$$") || die "Can't open /tmp/$$ for input\n"; + +print while (); + +print < +Return to the PCRE2 index page. +

+End + +close(TEMP); +unlink("/tmp/$$"); + +# End diff --git a/pcre2/AUTHORS b/pcre2/AUTHORS new file mode 100644 index 000000000..f001cb770 --- /dev/null +++ b/pcre2/AUTHORS @@ -0,0 +1,36 @@ +THE MAIN PCRE2 LIBRARY CODE +--------------------------- + +Written by: Philip Hazel +Email local part: Philip.Hazel +Email domain: gmail.com + +University of Cambridge Computing Service, +Cambridge, England. + +Copyright (c) 1997-2020 University of Cambridge +All rights reserved + + +PCRE2 JUST-IN-TIME COMPILATION SUPPORT +-------------------------------------- + +Written by: Zoltan Herczeg +Email local part: hzmester +Emain domain: freemail.hu + +Copyright(c) 2010-2020 Zoltan Herczeg +All rights reserved. + + +STACK-LESS JUST-IN-TIME COMPILER +-------------------------------- + +Written by: Zoltan Herczeg +Email local part: hzmester +Emain domain: freemail.hu + +Copyright(c) 2009-2020 Zoltan Herczeg +All rights reserved. + +#### diff --git a/pcre2/CMakeLists.txt b/pcre2/CMakeLists.txt index e86bfe5a1..9091c687d 100644 --- a/pcre2/CMakeLists.txt +++ b/pcre2/CMakeLists.txt @@ -92,11 +92,13 @@ # library versioning. # 2020-04-25 Carlo added function check for mkostemp used in ProtExecAllocator # 2020-04-28 PH added function check for memfd_create based on Carlo's patch +# 2020-05-25 PH added a check for Intel CET +# 2020-12-03 PH altered the definition of pcre2test as suggested by Daniel PROJECT(PCRE2 C) -# Increased minimum to 2.8.0 to support newer add_test features. -CMAKE_MINIMUM_REQUIRED(VERSION 2.8.0) +# Increased minimum to 2.8.5 to support GNUInstallDirs. +CMAKE_MINIMUM_REQUIRED(VERSION 2.8.5) # Set policy CMP0026 to avoid warnings for the use of LOCATION in # GET_TARGET_PROPERTY. This should no longer be required. @@ -108,7 +110,7 @@ CMAKE_MINIMUM_REQUIRED(VERSION 2.8.0) LIST(APPEND CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake) -SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -I${PROJECT_SOURCE_DIR}/src") +SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -I\"${PROJECT_SOURCE_DIR}/src\"") # external packages FIND_PACKAGE( BZip2 QUIET ) @@ -123,6 +125,7 @@ INCLUDE(CheckFunctionExists) INCLUDE(CheckSymbolExists) INCLUDE(CheckIncludeFile) INCLUDE(CheckTypeSize) +INCLUDE(GNUInstallDirs) # for CMAKE_INSTALL_LIBDIR CHECK_INCLUDE_FILE(dirent.h HAVE_DIRENT_H) CHECK_INCLUDE_FILE(stdint.h HAVE_STDINT_H) @@ -132,11 +135,11 @@ CHECK_INCLUDE_FILE(sys/types.h HAVE_SYS_TYPES_H) CHECK_INCLUDE_FILE(unistd.h HAVE_UNISTD_H) CHECK_INCLUDE_FILE(windows.h HAVE_WINDOWS_H) -CHECK_FUNCTION_EXISTS(bcopy HAVE_BCOPY) -CHECK_FUNCTION_EXISTS(memfd_create HAVE_MEMFD_CREATE) -CHECK_FUNCTION_EXISTS(memmove HAVE_MEMMOVE) -CHECK_FUNCTION_EXISTS(secure_getenv HAVE_SECURE_GETENV) -CHECK_FUNCTION_EXISTS(strerror HAVE_STRERROR) +CHECK_SYMBOL_EXISTS(bcopy "strings.h" HAVE_BCOPY) +CHECK_SYMBOL_EXISTS(memfd_create "sys/mman.h" HAVE_MEMFD_CREATE) +CHECK_SYMBOL_EXISTS(memmove "string.h" HAVE_MEMMOVE) +CHECK_SYMBOL_EXISTS(secure_getenv "stdlib.h" HAVE_SECURE_GETENV) +CHECK_SYMBOL_EXISTS(strerror "string.h" HAVE_STRERROR) set(ORIG_CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS}) set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -Werror") @@ -146,6 +149,24 @@ CHECK_C_SOURCE_COMPILES( ) set(CMAKE_REQUIRED_FLAGS ${ORIG_CMAKE_REQUIRED_FLAGS}) +# Check whether Intel CET is enabled, and if so, adjust compiler flags. This +# code was written by PH, trying to imitate the logic from the autotools +# configuration. + +CHECK_C_SOURCE_COMPILES( + "#ifndef __CET__ + #error CET is not enabled + #endif + int main() { return 0; }" + INTEL_CET_ENABLED +) + +IF (INTEL_CET_ENABLED) + SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mshstk") +ENDIF(INTEL_CET_ENABLED) + + + # User-configurable options # # Note: CMakeSetup displays these in alphabetical order, regardless of @@ -445,7 +466,7 @@ foreach(configure_line ${configure_lines}) foreach(_substitution_variable ${SEARCHED_VARIABLES}) string(TOUPPER ${_substitution_variable} _substitution_variable_upper) if (NOT ${_substitution_variable_upper}) - string(REGEX MATCH "m4_define\\(${_substitution_variable}, *\\[(.*)\\]" MACTHED_STRING ${configure_line}) + string(REGEX MATCH "m4_define\\(${_substitution_variable}, *\\[(.*)\\]" MATCHED_STRING ${configure_line}) if (CMAKE_MATCH_1) set(${_substitution_variable_upper} ${CMAKE_MATCH_1}) endif() @@ -475,14 +496,23 @@ CONFIGURE_FILE(src/pcre2.h.in ${PROJECT_BINARY_DIR}/pcre2.h @ONLY) +# Make sure to not link debug libs +# against release libs and vice versa +IF(WIN32) + SET(CMAKE_DEBUG_POSTFIX "d") +ENDIF(WIN32) + # Generate pkg-config files SET(PACKAGE_VERSION "${PCRE2_MAJOR}.${PCRE2_MINOR}") SET(prefix ${CMAKE_INSTALL_PREFIX}) SET(exec_prefix "\${prefix}") -SET(libdir "\${exec_prefix}/lib") +SET(libdir "\${exec_prefix}/${CMAKE_INSTALL_LIBDIR}") SET(includedir "\${prefix}/include") +IF(WIN32 AND (CMAKE_BUILD_TYPE MATCHES Debug)) + SET(LIB_POSTFIX ${CMAKE_DEBUG_POSTFIX}) +ENDIF() CONFIGURE_FILE(libpcre2-posix.pc.in libpcre2-posix.pc @ONLY) SET(pkg_config_files ${pkg_config_files} "${CMAKE_CURRENT_BINARY_DIR}/libpcre2-posix.pc") @@ -622,11 +652,6 @@ IF(MSVC) ENDIF(MSVC) SET(CMAKE_INCLUDE_CURRENT_DIR 1) -# needed to make sure to not link debug libs -# against release libs and vice versa -IF(WIN32) - SET(CMAKE_DEBUG_POSTFIX "d") -ENDIF(WIN32) SET(targets) @@ -827,7 +852,9 @@ if test \"$?\" != \"0\"; then exit 1; fi \@echo off setlocal SET srcdir=\"${winsrc}\" -SET pcre2test=\"${winexe}\" +# The next line was replaced by the following one after a user comment. +# SET pcre2test=\"${winexe}\" +SET pcre2test=\"${winbin}\\pcre2test.exe\" if not [%CMAKE_CONFIG_TYPE%]==[] SET pcre2test=\"${winbin}\\%CMAKE_CONFIG_TYPE%\\pcre2test.exe\" call %srcdir%\\RunTest.Bat if errorlevel 1 exit /b 1 @@ -863,9 +890,9 @@ SET(CMAKE_INSTALL_ALWAYS 1) INSTALL(TARGETS ${targets} RUNTIME DESTINATION bin - LIBRARY DESTINATION lib - ARCHIVE DESTINATION lib) -INSTALL(FILES ${pkg_config_files} DESTINATION lib/pkgconfig) + LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} + ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}) +INSTALL(FILES ${pkg_config_files} DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig) INSTALL(FILES "${CMAKE_CURRENT_BINARY_DIR}/pcre2-config" DESTINATION bin # Set 0755 permissions @@ -916,7 +943,7 @@ IF(PCRE2_SHOW_REPORT) ENDIF(CMAKE_C_FLAGS) MESSAGE(STATUS "") MESSAGE(STATUS "") - MESSAGE(STATUS "PCRE2 configuration summary:") + MESSAGE(STATUS "PCRE2-${PCRE2_MAJOR}.${PCRE2_MINOR} configuration summary:") MESSAGE(STATUS "") MESSAGE(STATUS " Install prefix .................. : ${CMAKE_INSTALL_PREFIX}") MESSAGE(STATUS " C compiler ...................... : ${CMAKE_C_COMPILER}") diff --git a/pcre2/COPYING b/pcre2/COPYING new file mode 100644 index 000000000..c233950f6 --- /dev/null +++ b/pcre2/COPYING @@ -0,0 +1,5 @@ +PCRE2 LICENCE + +Please see the file LICENCE in the PCRE2 distribution for licensing details. + +End diff --git a/pcre2/ChangeLog b/pcre2/ChangeLog new file mode 100644 index 000000000..2e20bdbb6 --- /dev/null +++ b/pcre2/ChangeLog @@ -0,0 +1,2434 @@ +Change Log for PCRE2 +-------------------- + +Version 10.36-RC1 04-December-2020 +---------------------------------- + +1. Add CET_CFLAGS so that when Intel CET is enabled, pass -mshstk to +compiler. This fixes https://bugs.exim.org/show_bug.cgi?id=2578. Patch for +Makefile.am and configure.ac by H.J. Lu. Equivalent patch for CMakeLists.txt +invented by PH. + +2. Fix inifinite loop when a single byte newline is searched in JIT when +invalid utf8 mode is enabled. + +3. Updated CMakeLists.txt with patch from Wolfgang Stöggl (Bugzilla #2584): + + - Include GNUInstallDirs and use ${CMAKE_INSTALL_LIBDIR} instead of hardcoded + lib. This allows differentiation between lib and lib64. + CMAKE_INSTALL_LIBDIR is used for installation of libraries and also for + pkgconfig file generation. + + - Add the version of PCRE2 to the configuration summary like ./configure + does. + + - Fix typo: MACTHED_STRING->MATCHED_STRING + +4. Updated CMakeLists.txt with another patch from Wolfgang Stöggl (Bugzilla +#2588): + + - Add escaped double quotes around include directory in CMakeLists.txt to + allow spaces in directory names. + + - This fixes a cmake error, if the path of the pcre2 source contains a space. + +5. Updated CMakeLists.txt with a patch from B. Scott Michel: CMake's +documentation suggests using CHECK_SYMBOL_EXISTS over CHECK_FUNCTION_EXIST. +Moreover, these functions come from specific header files, which need to be +specified (and, thankfully, are the same on both the Linux and WinXX +platforms.) + +6. Added a (uint32_t) cast to prevent a compiler warning in pcre2_compile.c. + +7. Applied a patch from Wolfgang Stöggl (Bugzilla #2600) to fix postfix for +debug Windows builds using CMake. This also updated configure so that it +generates *.pc files and pcre2-config with the same content, as in the past. + +8. If a pattern ended with (?(VERSION=n.d where n is any number but d is just a +single digit, the code unit beyond d was being read (i.e. there was a read +buffer overflow). Fixes ClusterFuzz 23779. + +9. After the rework in r1235, certain character ranges were incorrectly +handled by an optimization in JIT. Furthermore a wrong offset was used to +read a value from a buffer which could lead to memory overread. + +10. Unnoticed for many years was the fact that delimiters other than / in the +testinput1 and testinput4 files could cause incorrect behaviour when these +files were processed by perltest.sh. There were several tests that used quotes +as delimiters, and it was just luck that they didn't go wrong with perltest.sh. +All the patterns in testinput1 and testinput4 now use / as their delimiter. +This fixes Bugzilla #2641. + +11. Perl has started to give an error for \K within lookarounds (though there +are cases where it doesn't). PCRE2 still allows this, so the tests that include +this case have been moved from test 1 to test 2. + +12. Further to 10 above, pcre2test has been updated to detect and grumble if a +delimiter other than / is used after #perltest. + +13. Fixed a bug with PCRE2_MATCH_INVALID_UTF in 8-bit mode when PCRE2_CASELESS +was set and PCRE2_NO_START_OPTIMIZE was not set. The optimization for finding +the start of a match was not resetting correctly after a failed match on the +first valid fragment of the subject, possibly causing incorrect "no match" +returns on subsequent fragments. For example, the pattern /A/ failed to match +the subject \xe5A. Fixes Bugzilla #2642. + +14. Fixed a bug in character set matching when JIT is enabled and both unicode +scripts and unicode classes are present at the same time. + +15. Added GNU grep's -m (aka --max-count) option to pcre2grep. + +16. Refactored substitution processing in pcre2grep strings, both for the -O +option and when dealing with callouts. There is now a single function that +handles $ expansion in all cases (instead of multiple copies of almost +identical code). This means that the same escape sequences are available +everywhere, which was not previously the case. At the same time, the escape +sequences $x{...} and $o{...} have been introduced, to allow for characters +whose code points are greater than 255 in Unicode mode. + +17. Applied the patch from Bugzilla #2628 to RunGrepTest. This does an explicit +test for a version of sed that can handle binary zero, instead of assuming that +any Linux version will work. Later: replaced $(...) by `...` because not all +shells recognize the former. + +18. Fixed a word boundary check bug in JIT when partial matching is enabled. + +19. Fix ARM64 compilation warning in JIT. Patch by Carlo. + +20. A bug in the RunTest script meant that if the first part of test 2 failed, +the failure was not reported. + +21. Test 2 was failing when run from a directory other than the source +directory. This failure was previously missed in RunTest because of 20 above. +Fixes added to both RunTest and RunTest.bat. + +22. Patch to CMakeLists.txt from Daniel to fix problem with testing under +Windows. + + +Version 10.35 09-May-2020 +--------------------------- + +1. Use PCRE2_MATCH_EMPTY flag to detect empty matches in JIT. + +2. Fix ARMv5 JIT improper handling of labels right after a constant pool. + +3. A JIT bug is fixed which allowed to read the fields of the compiled +pattern before its existence is checked. + +4. Back in the PCRE1 day, capturing groups that contained recursive back +references to themselves were made atomic (version 8.01, change 18) because +after the end a repeated group, the captured substrings had their values from +the final repetition, not from an earlier repetition that might be the +destination of a backtrack. This feature was documented, and was carried over +into PCRE2. However, it has now been realized that the major refactoring that +was done for 10.30 has made this atomicizing unnecessary, and it is confusing +when users are unaware of it, making some patterns appear not to be working as +expected. Capture values of recursive back references in repeated groups are +now correctly backtracked, so this unnecessary restriction has been removed. + +5. Added PCRE2_SUBSTITUTE_LITERAL. + +6. Avoid some VS compiler warnings. + +7. Added PCRE2_SUBSTITUTE_MATCHED. + +8. Added (?* and (?<* as synonms for (*napla: and (*naplb: to match another +regex engine. The Perl regex folks are aware of this usage and have made a note +about it. + +9. When an assertion is repeated, PCRE2 used to limit the maximum repetition to +1, believing that repeating an assertion is pointless. However, if a positive +assertion contains capturing groups, repetition can be useful. In any case, an +assertion could always be wrapped in a repeated group. The only restriction +that is now imposed is that an unlimited maximum is changed to one more than +the minimum. + +10. Fix *THEN verbs in lookahead assertions in JIT. + +11. Added PCRE2_SUBSTITUTE_REPLACEMENT_ONLY. + +12. The JIT stack should be freed when the low-level stack allocation fails. + +13. In pcre2grep, if the final line in a scanned file is output but does not +end with a newline sequence, add a newline according to the --newline setting. + +14. (?(DEFINE)...) groups were not being handled correctly when checking for +the fixed length of a lookbehind assertion. Such a group within a lookbehind +should be skipped, as it does not contribute to the length of the group. +Instead, the (DEFINE) group was being processed, and if at the end of the +lookbehind, that end was not correctly recognized. Errors such as "lookbehind +assertion is not fixed length" and also "internal error: bad code value in +parsed_skip()" could result. + +15. Put a limit of 1000 on recursive calls in pcre2_study() when searching +nested groups for starting code units, in order to avoid stack overflow issues. +If the limit is reached, it just gives up trying for this optimization. + +16. The control verb chain list must always be restored when exiting from a +recurse function in JIT. + +17. Fix a crash which occurs when the character type of an invalid UTF +character is decoded in JIT. + +18. Changes in many areas of the code so that when Unicode is supported and +PCRE2_UCP is set without PCRE2_UTF, Unicode character properties are used for +upper/lower case computations on characters whose code points are greater than +127. + +19. The function for checking UTF-16 validity was returning an incorrect offset +for the start of the error when a high surrogate was not followed by a valid +low surrogate. This caused incorrect behaviour, for example when +PCRE2_MATCH_INVALID_UTF was set and a match started immediately following the +invalid high surrogate, such as /aa/ matching "\x{d800}aa". + +20. If a DEFINE group immediately preceded a lookbehind assertion, the pattern +could be mis-compiled and therefore not match correctly. This is the example +that found this: /(?(DEFINE)(?bar))(? has been raised to +50, (b) the new --om-capture option changes the limit, (c) an error is raised +if -o asks for a group that is above the limit. + +12. The quantifier {1} was always being ignored, but this is incorrect when it +is made possessive and applied to an item in parentheses, because a +parenthesized item may contain multiple branches or other backtracking points, +for example /(a|ab){1}+c/ or /(a+){1}+a/. + +13. For partial matches, pcre2test was always showing the maximum lookbehind +characters, flagged with "<", which is misleading when the lookbehind didn't +actually look behind the start (because it was later in the pattern). Showing +all consulted preceding characters for partial matches is now controlled by the +existing "allusedtext" modifier and, as for complete matches, this facility is +available only for non-JIT matching, because JIT does not maintain the first +and last consulted characters. + +14. DFA matching (using pcre2_dfa_match()) was not recognising a partial match +if the end of the subject was encountered in a lookahead (conditional or +otherwise), an atomic group, or a recursion. + +15. Give error if pcre2test -t, -T, -tm or -TM is given an argument of zero. + +16. Check for integer overflow when computing lookbehind lengths. Fixes +Clusterfuzz issue 15636. + +17. Implemented non-atomic positive lookaround assertions. + +18. If a lookbehind contained a lookahead that contained another lookbehind +within it, the nested lookbehind was not correctly processed. For example, if +/(?<=(?=(?<=a)))b/ was matched to "ab" it gave no match instead of matching +"b". + +19. Implemented pcre2_get_match_data_size(). + +20. Two alterations to partial matching: + + (a) The definition of a partial match is slightly changed: if a pattern + contains any lookbehinds, an empty partial match may be given, because this + is another situation where adding characters to the current subject can + lead to a full match. Example: /c*+(?<=[bc])/ with subject "ab". + + (b) Similarly, if a pattern could match an empty string, an empty partial + match may be given. Example: /(?![ab]).*/ with subject "ab". This case + applies only to PCRE2_PARTIAL_HARD. + + (c) An empty string partial hard match can be returned for \z and \Z as it + is documented that they shouldn't match. + +21. A branch that started with (*ACCEPT) was not being recognized as one that +could match an empty string. + +22. Corrected pcre2_set_character_tables() tables data type: was const unsigned +char * instead of const uint8_t *, as generated by pcre2_maketables(). + +23. Upgraded to Unicode 12.1.0. + +24. Add -jitfast command line option to pcre2test (to make all the jit options +available directly). + +25. Make pcre2test -C show if libreadline or libedit is supported. + +26. If the length of one branch of a group exceeded 65535 (the maximum value +that is remembered as a minimum length), the whole group's length was +incorrectly recorded as 65535, leading to incorrect "no match" when start-up +optimizations were in force. + +27. The "rightmost consulted character" value was not always correct; in +particular, if a pattern ended with a negative lookahead, characters that were +inspected in that lookahead were not included. + +28. Add the pcre2_maketables_free() function. + +29. The start-up optimization that looks for a unique initial matching +code unit in the interpretive engines uses memchr() in 8-bit mode. When the +search is caseless, it was doing so inefficiently, which ended up slowing down +the match drastically when the subject was very long. The revised code (a) +remembers if one case is not found, so it never repeats the search for that +case after a bumpalong and (b) when one case has been found, it searches only +up to that position for an earlier occurrence of the other case. This fix +applies to both interpretive pcre2_match() and to pcre2_dfa_match(). + +30. While scanning to find the minimum length of a group, if any branch has +minimum length zero, there is no need to scan any subsequent branches (a small +compile-time performance improvement). + +31. Installed a .gitignore file on a user's suggestion. When using the svn +repository with git (through git svn) this helps keep it tidy. + +32. Add underflow check in JIT which may occur when the value of subject +string pointer is close to 0. + +33. Arrange for classes such as [Aa] which contain just the two cases of the +same character, to be treated as a single caseless character. This causes the +first and required code unit optimizations to kick in where relevant. + +34. Improve the bitmap of starting bytes for positive classes that include wide +characters, but no property types, in UTF-8 mode. Previously, on encountering +such a class, the bits for all bytes greater than \xc4 were set, thus +specifying any character with codepoint >= 0x100. Now the only bits that are +set are for the relevant bytes that start the wide characters. This can give a +noticeable performance improvement. + +35. If the bitmap of starting code units contains only 1 or 2 bits, replace it +with a single starting code unit (1 bit) or a caseless single starting code +unit if the two relevant characters are case-partners. This is particularly +relevant to the 8-bit library, though it applies to all. It can give a +performance boost for patterns such as [Ww]ord and (word|WORD). However, this +optimization doesn't happen if there is a "required" code unit of the same +value (because the search for a "required" code unit starts at the match start +for non-unique first code unit patterns, but after a unique first code unit, +and patterns such as a*a need the former action). + +36. Small patch to pcre2posix.c to set the erroroffset field to -1 immediately +after a successful compile, instead of at the start of matching to avoid a +sanitizer complaint (regexec is supposed to be thread safe). + +37. Add NEON vectorization to JIT to speed up matching of first character and +pairs of characters on ARM64 CPUs. + +38. If a non-ASCII character was the first in a starting assertion in a +caseless match, the "first code unit" optimization did not get the casing +right, and the assertion failed to match a character in the other case if it +did not start with the same code unit. + +39. Fixed the incorrect computation of jump sizes on x86 CPUs in JIT. A masking +operation was incorrectly removed in r1136. Reported by Ralf Junker. + + +Version 10.33 16-April-2019 +--------------------------- + +1. Added "allvector" to pcre2test to make it easy to check the part of the +ovector that shouldn't be changed, in particular after substitute and failed or +partial matches. + +2. Fix subject buffer overread in JIT when UTF is disabled and \X or \R has +a greater than 1 fixed quantifier. This issue was found by Yunho Kim. + +3. Added support for callouts from pcre2_substitute(). After 10.33-RC1, but +prior to release, fixed a bug that caused a crash if pcre2_substitute() was +called with a NULL match context. + +4. The POSIX functions are now all called pcre2_regcomp() etc., with wrapper +functions that use the standard POSIX names. However, in pcre2posix.h the POSIX +names are defined as macros. This should help avoid linking with the wrong +library in some environments while still exporting the POSIX names for +pre-existing programs that use them. (The Debian alternative names are also +defined as macros, but not documented.) + +5. Fix an xclass matching issue in JIT. + +6. Implement PCRE2_EXTRA_ESCAPED_CR_IS_LF (see Bugzilla 2315). + +7. Implement the Perl 5.28 experimental alphabetic names for atomic groups and +lookaround assertions, for example, (*pla:...) and (*atomic:...). These are +characterized by a lower case letter following (* and to simplify coding for +this, the character tables created by pcre2_maketables() were updated to add a +new "is lower case letter" bit. At the same time, the now unused "is +hexadecimal digit" bit was removed. The default tables in +src/pcre2_chartables.c.dist are updated. + +8. Implement the new Perl "script run" features (*script_run:...) and +(*atomic_script_run:...) aka (*sr:...) and (*asr:...). + +9. Fixed two typos in change 22 for 10.21, which added special handling for +ranges such as a-z in EBCDIC environments. The original code probably never +worked, though there were no bug reports. + +10. Implement PCRE2_COPY_MATCHED_SUBJECT for pcre2_match() (including JIT via +pcre2_match()) and pcre2_dfa_match(), but *not* the pcre2_jit_match() fast +path. Also, when a match fails, set the subject field in the match data to NULL +for tidiness - none of the substring extractors should reference this after +match failure. + +11. If a pattern started with a subroutine call that had a quantifier with a +minimum of zero, an incorrect "match must start with this character" could be +recorded. Example: /(?&xxx)*ABC(?XYZ)/ would (incorrectly) expect 'A' to +be the first character of a match. + +12. The heap limit checking code in pcre2_dfa_match() could suffer from +overflow if the heap limit was set very large. This could cause incorrect "heap +limit exceeded" errors. + +13. Add "kibibytes" to the heap limit output from pcre2test -C to make the +units clear. + +14. Add a call to pcre2_jit_free_unused_memory() in pcre2grep, for tidiness. + +15. Updated the VMS-specific code in pcre2test on the advice of a VMS user. + +16. Removed the unnecessary inclusion of stdint.h (or inttypes.h) from +pcre2_internal.h as it is now included by pcre2.h. Also, change 17 for 10.32 +below was unnecessarily complicated, as inttypes.h is a Standard C header, +which is defined to be a superset of stdint.h. Instead of conditionally +including stdint.h or inttypes.h, pcre2.h now unconditionally includes +inttypes.h. This supports environments that do not have stdint.h but do have +inttypes.h, which are known to exist. A note in the autotools documentation +says (November 2018) that there are none known that are the other way round. + +17. Added --disable-percent-zt to "configure" (and equivalent to CMake) to +forcibly disable the use of %zu and %td in formatting strings because there is +at least one version of VMS that claims to be C99 but does not support these +modifiers. + +18. Added --disable-pcre2grep-callout-fork, which restricts the callout support +in pcre2grep to the inbuilt echo facility. This may be useful in environments +that do not support fork(). + +19. Fix two instances of <= 0 being applied to unsigned integers (the VMS +compiler complains). + +20. Added "fork" support for VMS to pcre2grep, for running an external program +via a string callout. + +21. Improve MAP_JIT flag usage on MacOS. Patch by Rich Siegel. + +22. If a pattern started with (*MARK), (*COMMIT), (*PRUNE), (*SKIP), or (*THEN) +followed by ^ it was not recognized as anchored. + +23. The RunGrepTest script used to cut out the test of NUL characters for +Solaris and MacOS as printf and sed can't handle them. It seems that the *BSD +systems can't either. I've inverted the test so that only those OS that are +known to work (currently only Linux) try to run this test. + +24. Some tests in RunGrepTest appended to testtrygrep from two different file +descriptors instead of redirecting stderr to stdout. This worked on Linux, but +it was reported not to on other systems, causing the tests to fail. + +25. In the RunTest script, make the test for stack setting use the same value +for the stack as it needs for -bigstack. + +26. Insert a cast in pcre2_dfa_match.c to suppress a compiler warning. + +26. With PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL set, escape sequences such as \s +which are valid in character classes, but not as the end of ranges, were being +treated as literals. An example is [_-\s] (but not [\s-_] because that gave an +error at the *start* of a range). Now an "invalid range" error is given +independently of PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL. + +27. Related to 26 above, PCRE2_BAD_ESCAPE_IS_LITERAL was affecting known escape +sequences such as \eX when they appeared invalidly in a character class. Now +the option applies only to unrecognized or malformed escape sequences. + +28. Fix word boundary in JIT compiler. Patch by Mike Munday. + +29. The pcre2_dfa_match() function was incorrectly handling conditional version +tests such as (?(VERSION>=0)...) when the version test was true. Incorrect +processing or a crash could result. + +30. When PCRE2_UTF is set, allow non-ASCII letters and decimal digits in group +names, as Perl does. There was a small bug in this new code, found by +ClusterFuzz 12950, fixed before release. + +31. Implemented PCRE2_EXTRA_ALT_BSUX to support ECMAScript 6's \u{hhh} +construct. + +32. Compile \p{Any} to be the same as . in DOTALL mode, so that it benefits +from auto-anchoring if \p{Any}* starts a pattern. + +33. Compile invalid UTF check in JIT test when only pcre32 is enabled. + +34. For some time now, CMake has been warning about the setting of policy +CMP0026 to "OLD" in CmakeLists.txt, and hinting that the feature might be +removed in a future version. A request for CMake expertise on the list produced +no result, so I have now hacked CMakeLists.txt along the lines of some changes +I found on the Internet. The new code no longer needs the policy setting, and +it appears to work fine on Linux. + +35. Setting --enable-jit=auto for an out-of-tree build failed because the +source directory wasn't in the search path for AC_TRY_COMPILE always. Patch +from Ross Burton. + +36. Disable SSE2 JIT optimizations in x86 CPUs when SSE2 is not available. +Patch by Guillem Jover. + +37. Changed expressions such as 1<<10 to 1u<<10 in many places because compiler +warnings were reported. + +38. Using the clang compiler with sanitizing options causes runtime complaints +about truncation for statments such as x = ~x when x is an 8-bit value; it +seems to compute ~x as a 32-bit value. Changing such statements to x = 255 ^ x +gets rid of the warnings. There were also two missing casts in pcre2test. + + +Version 10.32 10-September-2018 +------------------------------- + +1. When matching using the the REG_STARTEND feature of the POSIX API with a +non-zero starting offset, unset capturing groups with lower numbers than a +group that did capture something were not being correctly returned as "unset" +(that is, with offset values of -1). + +2. When matching using the POSIX API, pcre2test used to omit listing unset +groups altogether. Now it shows those that come before any actual captures as +"", as happens for non-POSIX matching. + +3. Running "pcre2test -C" always stated "\R matches CR, LF, or CRLF only", +whatever the build configuration was. It now correctly says "\R matches all +Unicode newlines" in the default case when --enable-bsr-anycrlf has not been +specified. Similarly, running "pcre2test -C bsr" never produced the result +ANY. + +4. Matching the pattern /(*UTF)\C[^\v]+\x80/ against an 8-bit string containing +multi-code-unit characters caused bad behaviour and possibly a crash. This +issue was fixed for other kinds of repeat in release 10.20 by change 19, but +repeating character classes were overlooked. + +5. pcre2grep now supports the inclusion of binary zeros in patterns that are +read from files via the -f option. + +6. A small fix to pcre2grep to avoid compiler warnings for -Wformat-overflow=2. + +7. Added --enable-jit=auto support to configure.ac. + +8. Added some dummy variables to the heapframe structure in 16-bit and 32-bit +modes for the benefit of m68k, where pointers can be 16-bit aligned. The +dummies force 32-bit alignment and this ensures that the structure is a +multiple of PCRE2_SIZE, a requirement that is tested at compile time. In other +architectures, alignment requirements take care of this automatically. + +9. When returning an error from pcre2_pattern_convert(), ensure the error +offset is set zero for early errors. + +10. A number of patches for Windows support from Daniel Richard G: + + (a) List of error numbers in Runtest.bat corrected (it was not the same as in + Runtest). + + (b) pcre2grep snprintf() workaround as used elsewhere in the tree. + + (c) Support for non-C99 snprintf() that returns -1 in the overflow case. + +11. Minor tidy of pcre2_dfa_match() code. + +12. Refactored pcre2_dfa_match() so that the internal recursive calls no longer +use the stack for local workspace and local ovectors. Instead, an initial block +of stack is reserved, but if this is insufficient, heap memory is used. The +heap limit parameter now applies to pcre2_dfa_match(). + +13. If a "find limits" test of DFA matching in pcre2test resulted in too many +matches for the ovector, no matches were displayed. + +14. Removed an occurrence of ctrl/Z from test 6 because Windows treats it as +EOF. The test looks to have come from a fuzzer. + +15. If PCRE2 was built with a default match limit a lot greater than the +default default of 10 000 000, some JIT tests of the match limit no longer +failed. All such tests now set 10 000 000 as the upper limit. + +16. Another Windows related patch for pcregrep to ensure that WIN32 is +undefined under Cygwin. + +17. Test for the presence of stdint.h and inttypes.h in configure and CMake and +include whichever exists (stdint preferred) instead of unconditionally +including stdint. This makes life easier for old and non-standard systems. + +18. Further changes to improve portability, especially to old and or non- +standard systems: + + (a) Put all printf arguments in RunGrepTest into single, not double, quotes, + and use \0 not \x00 for binary zero. + + (b) Avoid the use of C++ (i.e. BCPL) // comments. + + (c) Parameterize the use of %zu in pcre2test to make it like %td. For both of + these now, if using MSVC or a standard C before C99, %lu is used with a + cast if necessary. + +19. Applied a contributed patch to CMakeLists.txt to increase the stack size +when linking pcre2test with MSVC. This gets rid of a stack overflow error in +the standard set of tests. + +20. Output a warning in pcre2test when ignoring the "altglobal" modifier when +it is given with the "replace" modifier. + +21. In both pcre2test and pcre2_substitute(), with global matching, a pattern +that matched an empty string, but never at the starting match offset, was not +handled in a Perl-compatible way. The pattern /(a(*:1))(?>b)(*SKIP:1)x|.*/ matched against "abc", where the *SKIP +shouldn't find a MARK (because is in an atomic group), but it did. + +26. Upgraded the perltest.sh script: (1) #pattern lines can now be used to set +a list of modifiers for all subsequent patterns - only those that the script +recognizes are meaningful; (2) #subject lines can be used to set or unset a +default "mark" modifier; (3) Unsupported #command lines give a warning when +they are ignored; (4) Mark data is output only if the "mark" modifier is +present. + +27. (*ACCEPT:ARG), (*FAIL:ARG), and (*COMMIT:ARG) are now supported. + +28. A (*MARK) name was not being passed back for positive assertions that were +terminated by (*ACCEPT). + +29. Add support for \N{U+dddd}, but only in Unicode mode. + +30. Add support for (?^) for unsetting all imnsx options. + +31. The PCRE2_EXTENDED (/x) option only ever discarded space characters whose +code point was less than 256 and that were recognized by the lookup table +generated by pcre2_maketables(), which uses isspace() to identify white space. +Now, when Unicode support is compiled, PCRE2_EXTENDED also discards U+0085, +U+200E, U+200F, U+2028, and U+2029, which are additional characters defined by +Unicode as "Pattern White Space". This makes PCRE2 compatible with Perl. + +32. In certain circumstances, option settings within patterns were not being +correctly processed. For example, the pattern /((?i)A)(?m)B/ incorrectly +matched "ab". (The (?m) setting lost the fact that (?i) should be reset at the +end of its group during the parse process, but without another setting such as +(?m) the compile phase got it right.) This bug was introduced by the +refactoring in release 10.23. + +33. PCRE2 uses bcopy() if available when memmove() is not, and it used just to +define memmove() as function call to bcopy(). This hasn't been tested for a +long time because in pcre2test the result of memmove() was being used, whereas +bcopy() doesn't return a result. This feature is now refactored always to call +an emulation function when there is no memmove(). The emulation makes use of +bcopy() when available. + +34. When serializing a pattern, set the memctl, executable_jit, and tables +fields (that is, all the fields that contain pointers) to zeros so that the +result of serializing is always the same. These fields are re-set when the +pattern is deserialized. + +35. In a pattern such as /[^\x{100}-\x{ffff}]*[\x80-\xff]/ which has a repeated +negative class with no characters less than 0x100 followed by a positive class +with only characters less than 0x100, the first class was incorrectly being +auto-possessified, causing incorrect match failures. + +36. Removed the character type bit ctype_meta, which dates from PCRE1 and is +not used in PCRE2. + +37. Tidied up unnecessarily complicated macros used in the escapes table. + +38. Since 10.21, the new testoutput8-16-4 file has accidentally been omitted +from distribution tarballs, owing to a typo in Makefile.am which had +testoutput8-16-3 twice. Now fixed. + +39. If the only branch in a conditional subpattern was anchored, the whole +subpattern was treated as anchored, when it should not have been, since the +assumed empty second branch cannot be anchored. Demonstrated by test patterns +such as /(?(1)^())b/ or /(?(?=^))b/. + +40. A repeated conditional subpattern that could match an empty string was +always assumed to be unanchored. Now it it checked just like any other +repeated conditional subpattern, and can be found to be anchored if the minimum +quantifier is one or more. I can't see much use for a repeated anchored +pattern, but the behaviour is now consistent. + +41. Minor addition to pcre2_jit_compile.c to avoid static analyzer complaint +(for an event that could never occur but you had to have external information +to know that). + +42. If before the first match in a file that was being searched by pcre2grep +there was a line that was sufficiently long to cause the input buffer to be +expanded, the variable holding the location of the end of the previous match +was being adjusted incorrectly, and could cause an overflow warning from a code +sanitizer. However, as the value is used only to print pending "after" lines +when the next match is reached (and there are no such lines in this case) this +bug could do no damage. + + +Version 10.31 12-February-2018 +------------------------------ + +1. Fix typo (missing ]) in VMS code in pcre2test.c. + +2. Replace the replicated code for matching extended Unicode grapheme sequences +(which got a lot more complicated by change 10.30/49) by a single subroutine +that is called by both pcre2_match() and pcre2_dfa_match(). + +3. Add idempotent guard to pcre2_internal.h. + +4. Add new pcre2_config() options: PCRE2_CONFIG_NEVER_BACKSLASH_C and +PCRE2_CONFIG_COMPILED_WIDTHS. + +5. Cut out \C tests in the JIT regression tests when NEVER_BACKSLASH_C is +defined (e.g. by --enable-never-backslash-C). + +6. Defined public names for all the pcre2_compile() error numbers, and used +the public names in pcre2_convert.c. + +7. Fixed a small memory leak in pcre2test (convert contexts). + +8. Added two casts to compile.c and one to match.c to avoid compiler warnings. + +9. Added code to pcre2grep when compiled under VMS to set the symbol +PCRE2GREP_RC to the exit status, because VMS does not distinguish between +exit(0) and exit(1). + +10. Added the -LM (list modifiers) option to pcre2test. Also made -C complain +about a bad option only if the following argument item does not start with a +hyphen. + +11. pcre2grep was truncating components of file names to 128 characters when +processing files with the -r option, and also (some very odd code) truncating +path names to 512 characters. There is now a check on the absolute length of +full path file names, which may be up to 2047 characters long. + +12. When an assertion contained (*ACCEPT) it caused all open capturing groups +to be closed (as for a non-assertion ACCEPT), which was wrong and could lead to +misbehaviour for subsequent references to groups that started outside the +assertion. ACCEPT in an assertion now closes only those groups that were +started within that assertion. Fixes oss-fuzz issues 3852 and 3891. + +13. Multiline matching in pcre2grep was misbehaving if the pattern matched +within a line, and then matched again at the end of the line and over into +subsequent lines. Behaviour was different with and without colouring, and +sometimes context lines were incorrectly printed and/or line endings were lost. +All these issues should now be fixed. + +14. If --line-buffered was specified for pcre2grep when input was from a +compressed file (.gz or .bz2) a segfault occurred. (Line buffering should be +ignored for compressed files.) + +15. Although pcre2_jit_match checks whether the pattern is compiled +in a given mode, it was also expected that at least one mode is available. +This is fixed and pcre2_jit_match returns with PCRE2_ERROR_JIT_BADOPTION +when the pattern is not optimized by JIT at all. + +16. The line number and related variables such as match counts in pcre2grep +were all int variables, causing overflow when files with more than 2147483647 +lines were processed (assuming 32-bit ints). They have all been changed to +unsigned long ints. + +17. If a backreference with a minimum repeat count of zero was first in a +pattern, apart from assertions, an incorrect first matching character could be +recorded. For example, for the pattern /(?=(a))\1?b/, "b" was incorrectly set +as the first character of a match. + +18. Characters in a leading positive assertion are considered for recording a +first character of a match when the rest of the pattern does not provide one. +However, a character in a non-assertive group within a leading assertion such +as in the pattern /(?=(a))\1?b/ caused this process to fail. This was an +infelicity rather than an outright bug, because it did not affect the result of +a match, just its speed. (In fact, in this case, the starting 'a' was +subsequently picked up in the study.) + +19. A minor tidy in pcre2_match(): making all PCRE2_ERROR_ returns use "return" +instead of "RRETURN" saves unwinding the backtracks in these cases (only one +didn't). + +20. Allocate a single callout block on the stack at the start of pcre2_match() +and set its never-changing fields once only. Do the same for pcre2_dfa_match(). + +21. Save the extra compile options (set in the compile context) with the +compiled pattern (they were not previously saved), add PCRE2_INFO_EXTRAOPTIONS +to retrieve them, and update pcre2test to show them. + +22. Added PCRE2_CALLOUT_STARTMATCH and PCRE2_CALLOUT_BACKTRACK bits to a new +field callout_flags in callout blocks. The bits are set by pcre2_match(), but +not by JIT or pcre2_dfa_match(). Their settings are shown in pcre2test callouts +if the callout_extra subject modifier is set. These bits are provided to help +with tracking how a backtracking match is proceeding. + +23. Updated the pcre2demo.c demonstration program, which was missing the extra +code for -g that handles the case when \K in an assertion causes the match to +end at the original start point. Also arranged for it to detect when \K causes +the end of a match to be before its start. + +24. Similar to 23 above, strange things (including loops) could happen in +pcre2grep when \K was used in an assertion when --colour was used or in +multiline mode. The "end at original start point" bug is fixed, and if the end +point is found to be before the start point, they are swapped. + +25. When PCRE2_FIRSTLINE without PCRE2_NO_START_OPTIMIZE was used in non-JIT +matching (both pcre2_match() and pcre2_dfa_match()) and the matched string +started with the first code unit of a newline sequence, matching failed because +it was not tried at the newline. + +26. Code for giving up a non-partial match after failing to find a starting +code unit anywhere in the subject was missing when searching for one of a +number of code units (the bitmap case) in both pcre2_match() and +pcre2_dfa_match(). This was a missing optimization rather than a bug. + +27. Tidied up the ACROSSCHAR macro to be like FORWARDCHAR and BACKCHAR, using a +pointer argument rather than a code unit value. This should not have affected +the generated code. + +28. The JIT compiler has been updated. + +29. Avoid pointer overflow for unset captures in pcre2_substring_list_get(). +This could not actually cause a crash because it was always used in a memcpy() +call with zero length. + +30. Some internal structures have a variable-length ovector[] as their last +element. Their actual memory is obtained dynamically, giving an ovector of +appropriate length. However, they are defined in the structure as +ovector[NUMBER], where NUMBER is large so that array bound checkers don't +grumble. The value of NUMBER was 10000, but a fuzzer exceeded 5000 capturing +groups, making the ovector larger than this. The number has been increased to +131072, which allows for the maximum number of captures (65535) plus the +overall match. This fixes oss-fuzz issue 5415. + +31. Auto-possessification at the end of a capturing group was dependent on what +follows the group (e.g. /(a+)b/ would auto-possessify the a+) but this caused +incorrect behaviour when the group was called recursively from elsewhere in the +pattern where something different might follow. This bug is an unforseen +consequence of change #1 for 10.30 - the implementation of backtracking into +recursions. Iterators at the ends of capturing groups are no longer considered +for auto-possessification if the pattern contains any recursions. Fixes +Bugzilla #2232. + + +Version 10.30 14-August-2017 +---------------------------- + +1. The main interpreter, pcre2_match(), has been refactored into a new version +that does not use recursive function calls (and therefore the stack) for +remembering backtracking positions. This makes --disable-stack-for-recursion a +NOOP. The new implementation allows backtracking into recursive group calls in +patterns, making it more compatible with Perl, and also fixes some other +hard-to-do issues such as #1887 in Bugzilla. The code is also cleaner because +the old code had a number of fudges to try to reduce stack usage. It seems to +run no slower than the old code. + +A number of bugs in the refactored code were subsequently fixed during testing +before release, but after the code was made available in the repository. These +bugs were never in fully released code, but are noted here for the record. + + (a) If a pattern had fewer capturing parentheses than the ovector supplied in + the match data block, a memory error (detectable by ASAN) occurred after + a match, because the external block was being set from non-existent + internal ovector fields. Fixes oss-fuzz issue 781. + + (b) A pattern with very many capturing parentheses (when the internal frame + size was greater than the initial frame vector on the stack) caused a + crash. A vector on the heap is now set up at the start of matching if the + vector on the stack is not big enough to handle at least 10 frames. + Fixes oss-fuzz issue 783. + + (c) Handling of (*VERB)s in recursions was wrong in some cases. + + (d) Captures in negative assertions that were used as conditions were not + happening if the assertion matched via (*ACCEPT). + + (e) Mark values were not being passed out of recursions. + + (f) Refactor some code in do_callout() to avoid picky compiler warnings about + negative indices. Fixes oss-fuzz issue 1454. + + (g) Similarly refactor the way the variable length ovector is addressed for + similar reasons. Fixes oss-fuzz issue 1465. + +2. Now that pcre2_match() no longer uses recursive function calls (see above), +the "match limit recursion" value seems misnamed. It still exists, and limits +the depth of tree that is searched. To avoid future confusion, it has been +renamed as "depth limit" in all relevant places (--with-depth-limit, +(*LIMIT_DEPTH), pcre2_set_depth_limit(), etc) but the old names are still +available for backwards compatibility. + +3. Hardened pcre2test so as to reduce the number of bugs reported by fuzzers: + + (a) Check for malloc failures when getting memory for the ovector (POSIX) or + the match data block (non-POSIX). + +4. In the 32-bit library in non-UTF mode, an attempt to find a Unicode property +for a character with a code point greater than 0x10ffff (the Unicode maximum) +caused a crash. + +5. If a lookbehind assertion that contained a back reference to a group +appearing later in the pattern was compiled with the PCRE2_ANCHORED option, +undefined actions (often a segmentation fault) could occur, depending on what +other options were set. An example assertion is (?" should be ">=" in opcode check in pcre2_auto_possess.c. + (b) Added some casts to avoid "suspicious implicit sign extension". + (c) Resource leaks in pcre2test in rare error cases. + (d) Avoid warning for never-use case OP_TABLE_LENGTH which is just a fudge + for checking at compile time that tables are the right size. + (e) Add missing "fall through" comment. + +29. Implemented PCRE2_EXTENDED_MORE and related /xx and (?xx) features. + +30. Implement (?n: for PCRE2_NO_AUTO_CAPTURE, because Perl now has this. + +31. If more than one of "push", "pushcopy", or "pushtablescopy" were set in +pcre2test, a crash could occur. + +32. Make -bigstack in RunTest allocate a 64MiB stack (instead of 16MiB) so +that all the tests can run with clang's sanitizing options. + +33. Implement extra compile options in the compile context and add the first +one: PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES. + +34. Implement newline type PCRE2_NEWLINE_NUL. + +35. A lookbehind assertion that had a zero-length branch caused undefined +behaviour when processed by pcre2_dfa_match(). This is oss-fuzz issue 1859. + +36. The match limit value now also applies to pcre2_dfa_match() as there are +patterns that can use up a lot of resources without necessarily recursing very +deeply. (Compare item 10.23/36.) This should fix oss-fuzz #1761. + +37. Implement PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL. + +38. Fix returned offsets from regexec() when REG_STARTEND is used with a +starting offset greater than zero. + +39. Implement REG_PEND (GNU extension) for the POSIX wrapper. + +40. Implement the subject_literal modifier in pcre2test, and allow jitstack on +pattern lines. + +41. Implement PCRE2_LITERAL and use it to support REG_NOSPEC. + +42. Implement PCRE2_EXTRA_MATCH_LINE and PCRE2_EXTRA_MATCH_WORD for the benefit +of pcre2grep. + +43. Re-implement pcre2grep's -F, -w, and -x options using PCRE2_LITERAL, +PCRE2_EXTRA_MATCH_WORD, and PCRE2_EXTRA_MATCH_LINE. This fixes two bugs: + + (a) The -F option did not work for fixed strings containing \E. + (b) The -w option did not work for patterns with multiple branches. + +44. Added configuration options for the SELinux compatible execmem allocator in +JIT. + +45. Increased the limit for searching for a "must be present" code unit in +subjects from 1000 to 2000 for 8-bit searches, since they use memchr() and are +much faster. + +46. Arrange for anchored patterns to record and use "first code unit" data, +because this can give a fast "no match" without searching for a "required code +unit". Previously only non-anchored patterns did this. + +47. Upgraded the Unicode tables from Unicode 8.0.0 to Unicode 10.0.0. + +48. Add the callout_no_where modifier to pcre2test. + +49. Update extended grapheme breaking rules to the latest set that are in +Unicode Standard Annex #29. + +50. Added experimental foreign pattern conversion facilities +(pcre2_pattern_convert() and friends). + +51. Change the macro FWRITE, used in pcre2grep, to FWRITE_IGNORE because FWRITE +is defined in a system header in cygwin. Also modified some of the #ifdefs in +pcre2grep related to Windows and Cygwin support. + +52. Change 3(g) for 10.23 was a bit too zealous. If a hyphen that follows a +character class is the last character in the class, Perl does not give a +warning. PCRE2 now also treats this as a literal. + +53. Related to 52, though PCRE2 was throwing an error for [[:digit:]-X] it was +not doing so for [\d-X] (and similar escapes), as is documented. + +54. Fixed a MIPS issue in the JIT compiler reported by Joshua Kinard. + +55. Fixed a "maybe uninitialized" warning for class_uchardata in \p handling in +pcre2_compile() which could never actually trigger (code should have been cut +out when Unicode support is disabled). + + +Version 10.23 14-February-2017 +------------------------------ + +1. Extended pcre2test with the utf8_input modifier so that it is able to +generate all possible 16-bit and 32-bit code unit values in non-UTF modes. + +2. In any wide-character mode (8-bit UTF or any 16-bit or 32-bit mode), without +PCRE2_UCP set, a negative character type such as \D in a positive class should +cause all characters greater than 255 to match, whatever else is in the class. +There was a bug that caused this not to happen if a Unicode property item was +added to such a class, for example [\D\P{Nd}] or [\W\pL]. + +3. There has been a major re-factoring of the pcre2_compile.c file. Most syntax +checking is now done in the pre-pass that identifies capturing groups. This has +reduced the amount of duplication and made the code tidier. While doing this, +some minor bugs and Perl incompatibilities were fixed, including: + + (a) \Q\E in the middle of a quantifier such as A+\Q\E+ is now ignored instead + of giving an invalid quantifier error. + + (b) {0} can now be used after a group in a lookbehind assertion; previously + this caused an "assertion is not fixed length" error. + + (c) Perl always treats (?(DEFINE) as a "define" group, even if a group with + the name "DEFINE" exists. PCRE2 now does likewise. + + (d) A recursion condition test such as (?(R2)...) must now refer to an + existing subpattern. + + (e) A conditional recursion test such as (?(R)...) misbehaved if there was a + group whose name began with "R". + + (f) When testing zero-terminated patterns under valgrind, the terminating + zero is now marked "no access". This catches bugs that would otherwise + show up only with non-zero-terminated patterns. + + (g) A hyphen appearing immediately after a POSIX character class (for example + /[[:ascii:]-z]/) now generates an error. Perl does accept this as a + literal, but gives a warning, so it seems best to fail it in PCRE. + + (h) An empty \Q\E sequence may appear after a callout that precedes an + assertion condition (it is, of course, ignored). + +One effect of the refactoring is that some error numbers and messages have +changed, and the pattern offset given for compiling errors is not always the +right-most character that has been read. In particular, for a variable-length +lookbehind assertion it now points to the start of the assertion. Another +change is that when a callout appears before a group, the "length of next +pattern item" that is passed now just gives the length of the opening +parenthesis item, not the length of the whole group. A length of zero is now +given only for a callout at the end of the pattern. Automatic callouts are no +longer inserted before and after explicit callouts in the pattern. + +A number of bugs in the refactored code were subsequently fixed during testing +before release, but after the code was made available in the repository. Many +of the bugs were discovered by fuzzing testing. Several of them were related to +the change from assuming a zero-terminated pattern (which previously had +required non-zero terminated strings to be copied). These bugs were never in +fully released code, but are noted here for the record. + + (a) An overall recursion such as (?0) inside a lookbehind assertion was not + being diagnosed as an error. + + (b) In utf mode, the length of a *MARK (or other verb) name was being checked + in characters instead of code units, which could lead to bad code being + compiled, leading to unpredictable behaviour. + + (c) In extended /x mode, characters whose code was greater than 255 caused + a lookup outside one of the global tables. A similar bug existed for wide + characters in *VERB names. + + (d) The amount of memory needed for a compiled pattern was miscalculated if a + lookbehind contained more than one toplevel branch and the first branch + was of length zero. + + (e) In UTF-8 or UTF-16 modes with PCRE2_EXTENDED (/x) set and a non-zero- + terminated pattern, if a # comment ran on to the end of the pattern, one + or more code units past the end were being read. + + (f) An unterminated repeat at the end of a non-zero-terminated pattern (e.g. + "{2,2") could cause reading beyond the pattern. + + (g) When reading a callout string, if the end delimiter was at the end of the + pattern one further code unit was read. + + (h) An unterminated number after \g' could cause reading beyond the pattern. + + (i) An insufficient memory size was being computed for compiling with + PCRE2_AUTO_CALLOUT. + + (j) A conditional group with an assertion condition used more memory than was + allowed for it during parsing, so too many of them could therefore + overrun a buffer. + + (k) If parsing a pattern exactly filled the buffer, the internal test for + overrun did not check when the final META_END item was added. + + (l) If a lookbehind contained a subroutine call, and the called group + contained an option setting such as (?s), and the PCRE2_ANCHORED option + was set, unpredictable behaviour could occur. The underlying bug was + incorrect code and insufficient checking while searching for the end of + the called subroutine in the parsed pattern. + + (m) Quantifiers following (*VERB)s were not being diagnosed as errors. + + (n) The use of \Q...\E in a (*VERB) name when PCRE2_ALT_VERBNAMES and + PCRE2_AUTO_CALLOUT were both specified caused undetermined behaviour. + + (o) If \Q was preceded by a quantified item, and the following \E was + followed by '?' or '+', and there was at least one literal character + between them, an internal error "unexpected repeat" occurred (example: + /.+\QX\E+/). + + (p) A buffer overflow could occur while sorting the names in the group name + list (depending on the order in which the names were seen). + + (q) A conditional group that started with a callout was not doing the right + check for a following assertion, leading to compiling bad code. Example: + /(?(C'XX))?!XX/ + + (r) If a character whose code point was greater than 0xffff appeared within + a lookbehind that was within another lookbehind, the calculation of the + lookbehind length went wrong and could provoke an internal error. + + (t) The sequence \E- or \Q\E- after a POSIX class in a character class caused + an internal error. Now the hyphen is treated as a literal. + +4. Back references are now permitted in lookbehind assertions when there are +no duplicated group numbers (that is, (?| has not been used), and, if the +reference is by name, there is only one group of that name. The referenced +group must, of course be of fixed length. + +5. pcre2test has been upgraded so that, when run under valgrind with valgrind +support enabled, reading past the end of the pattern is detected, both when +compiling and during callout processing. + +6. \g{+} (e.g. \g{+2} ) is now supported. It is a "forward back +reference" and can be useful in repetitions (compare \g{-} ). Perl does +not recognize this syntax. + +7. Automatic callouts are no longer generated before and after callouts in the +pattern. + +8. When pcre2test was outputing information from a callout, the caret indicator +for the current position in the subject line was incorrect if it was after an +escape sequence for a character whose code point was greater than \x{ff}. + +9. Change 19 for 10.22 had a typo (PCRE_STATIC_RUNTIME should be +PCRE2_STATIC_RUNTIME). Fix from David Gaussmann. + +10. Added --max-buffer-size to pcre2grep, to allow for automatic buffer +expansion when long lines are encountered. Original patch by Dmitry +Cherniachenko. + +11. If pcre2grep was compiled with JIT support, but the library was compiled +without it (something that neither ./configure nor CMake allow, but it can be +done by editing config.h), pcre2grep was giving a JIT error. Now it detects +this situation and does not try to use JIT. + +12. Added some "const" qualifiers to variables in pcre2grep. + +13. Added Dmitry Cherniachenko's patch for colouring output in Windows +(untested by me). Also, look for GREP_COLOUR or GREP_COLOR if the environment +variables PCRE2GREP_COLOUR and PCRE2GREP_COLOR are not found. + +14. Add the -t (grand total) option to pcre2grep. + +15. A number of bugs have been mended relating to match start-up optimizations +when the first thing in a pattern is a positive lookahead. These all applied +only when PCRE2_NO_START_OPTIMIZE was *not* set: + + (a) A pattern such as (?=.*X)X$ was incorrectly optimized as if it needed + both an initial 'X' and a following 'X'. + (b) Some patterns starting with an assertion that started with .* were + incorrectly optimized as having to match at the start of the subject or + after a newline. There are cases where this is not true, for example, + (?=.*[A-Z])(?=.{8,16})(?!.*[\s]) matches after the start in lines that + start with spaces. Starting .* in an assertion is no longer taken as an + indication of matching at the start (or after a newline). + +16. The "offset" modifier in pcre2test was not being ignored (as documented) +when the POSIX API was in use. + +17. Added --enable-fuzz-support to "configure", causing an non-installed +library containing a test function that can be called by fuzzers to be +compiled. A non-installed binary to run the test function locally, called +pcre2fuzzcheck is also compiled. + +18. A pattern with PCRE2_DOTALL (/s) set but not PCRE2_NO_DOTSTAR_ANCHOR, and +which started with .* inside a positive lookahead was incorrectly being +compiled as implicitly anchored. + +19. Removed all instances of "register" declarations, as they are considered +obsolete these days and in any case had become very haphazard. + +20. Add strerror() to pcre2test for failed file opening. + +21. Make pcre2test -C list valgrind support when it is enabled. + +22. Add the use_length modifier to pcre2test. + +23. Fix an off-by-one bug in pcre2test for the list of names for 'get' and +'copy' modifiers. + +24. Add PCRE2_CALL_CONVENTION into the prototype declarations in pcre2.h as it +is apparently needed there as well as in the function definitions. (Why did +nobody ask for this in PCRE1?) + +25. Change the _PCRE2_H and _PCRE2_UCP_H guard macros in the header files to +PCRE2_H_IDEMPOTENT_GUARD and PCRE2_UCP_H_IDEMPOTENT_GUARD to be more standard +compliant and unique. + +26. pcre2-config --libs-posix was listing -lpcre2posix instead of +-lpcre2-posix. Also, the CMake build process was building the library with the +wrong name. + +27. In pcre2test, give some offset information for errors in hex patterns. +This uses the C99 formatting sequence %td, except for MSVC which doesn't +support it - %lu is used instead. + +28. Implemented pcre2_code_copy_with_tables(), and added pushtablescopy to +pcre2test for testing it. + +29. Fix small memory leak in pcre2test. + +30. Fix out-of-bounds read for partial matching of /./ against an empty string +when the newline type is CRLF. + +31. Fix a bug in pcre2test that caused a crash when a locale was set either in +the current pattern or a previous one and a wide character was matched. + +32. The appearance of \p, \P, or \X in a substitution string when +PCRE2_SUBSTITUTE_EXTENDED was set caused a segmentation fault (NULL +dereference). + +33. If the starting offset was specified as greater than the subject length in +a call to pcre2_substitute() an out-of-bounds memory reference could occur. + +34. When PCRE2 was compiled to use the heap instead of the stack for recursive +calls to match(), a repeated minimizing caseless back reference, or a +maximizing one where the two cases had different numbers of code units, +followed by a caseful back reference, could lose the caselessness of the first +repeated back reference (example: /(Z)(a)\2{1,2}?(?-i)\1X/i should match ZaAAZX +but didn't). + +35. When a pattern is too complicated, PCRE2 gives up trying to find a minimum +matching length and just records zero. Typically this happens when there are +too many nested or recursive back references. If the limit was reached in +certain recursive cases it failed to be triggered and an internal error could +be the result. + +36. The pcre2_dfa_match() function now takes note of the recursion limit for +the internal recursive calls that are used for lookrounds and recursions within +the pattern. + +37. More refactoring has got rid of the internal could_be_empty_branch() +function (around 400 lines of code, including comments) by keeping track of +could-be-emptiness as the pattern is compiled instead of scanning compiled +groups. (This would have been much harder before the refactoring of #3 above.) +This lifts a restriction on the number of branches in a group (more than about +1100 would give "pattern is too complicated"). + +38. Add the "-ac" command line option to pcre2test as a synonym for "-pattern +auto_callout". + +39. In a library with Unicode support, incorrect data was compiled for a +pattern with PCRE2_UCP set without PCRE2_UTF if a class required all wide +characters to match (for example, /[\s[:^ascii:]]/). + +40. The callout_error modifier has been added to pcre2test to make it possible +to return PCRE2_ERROR_CALLOUT from a callout. + +41. A minor change to pcre2grep: colour reset is now "[0m" instead of +"[00m". + +42. The limit in the auto-possessification code that was intended to catch +overly-complicated patterns and not spend too much time auto-possessifying was +being reset too often, resulting in very long compile times for some patterns. +Now such patterns are no longer completely auto-possessified. + +43. Applied Jason Hood's revised patch for RunTest.bat. + +44. Added a new Windows script RunGrepTest.bat, courtesy of Jason Hood. + +45. Minor cosmetic fix to pcre2test: move a variable that is not used under +Windows into the "not Windows" code. + +46. Applied Jason Hood's patches to upgrade pcre2grep under Windows and tidy +some of the code: + + * normalised the Windows condition by ensuring WIN32 is defined; + * enables the callout feature under Windows; + * adds globbing (Microsoft's implementation expands quoted args), + using a tweaked opendirectory; + * implements the is_*_tty functions for Windows; + * --color=always will write the ANSI sequences to file; + * add sequences 4 (underline works on Win10) and 5 (blink as bright + background, relatively standard on DOS/Win); + * remove the (char *) casts for the now-const strings; + * remove GREP_COLOUR (grep's command line allowed the 'u', but not + the environment), parsing GREP_COLORS instead; + * uses the current colour if not set, rather than black; + * add print_match for the undefined case; + * fixes a typo. + +In addition, colour settings containing anything other than digits and +semicolon are ignored, and the colour controls are no longer output for empty +strings. + +47. Detecting patterns that are too large inside the length-measuring loop +saves processing ridiculously long patterns to their end. + +48. Ignore PCRE2_CASELESS when processing \h, \H, \v, and \V in classes as it +just wastes time. In the UTF case it can also produce redundant entries in +XCLASS lists caused by characters with multiple other cases and pairs of +characters in the same "not-x" sublists. + +49. A pattern such as /(?=(a\K))/ can report the end of the match being before +its start; pcre2test was not handling this correctly when using the POSIX +interface (it was OK with the native interface). + +50. In pcre2grep, ignore all JIT compile errors. This means that pcre2grep will +continue to work, falling back to interpretation if anything goes wrong with +JIT. + +51. Applied patches from Christian Persch to configure.ac to make use of the +AC_USE_SYSTEM_EXTENSIONS macro and to test for functions used by the JIT +modules. + +52. Minor fixes to pcre2grep from Jason Hood: + * fixed some spacing; + * Windows doesn't usually use single quotes, so I've added a define + to use appropriate quotes [in an example]; + * LC_ALL was displayed as "LCC_ALL"; + * numbers 11, 12 & 13 should end in "th"; + * use double quotes in usage message. + +53. When autopossessifying, skip empty branches without recursion, to reduce +stack usage for the benefit of clang with -fsanitize-address, which uses huge +stack frames. Example pattern: /X?(R||){3335}/. Fixes oss-fuzz issue 553. + +54. A pattern with very many explicit back references to a group that is a long +way from the start of the pattern could take a long time to compile because +searching for the referenced group in order to find the minimum length was +being done repeatedly. Now up to 128 group minimum lengths are cached and the +attempt to find a minimum length is abandoned if there is a back reference to a +group whose number is greater than 128. (In that case, the pattern is so +complicated that this optimization probably isn't worth it.) This fixes +oss-fuzz issue 557. + +55. Issue 32 for 10.22 below was not correctly fixed. If pcre2grep in multiline +mode with --only-matching matched several lines, it restarted scanning at the +next line instead of moving on to the end of the matched string, which can be +several lines after the start. + +56. Applied Jason Hood's new patch for RunGrepTest.bat that updates it in line +with updates to the non-Windows version. + + + +Version 10.22 29-July-2016 +-------------------------- + +1. Applied Jason Hood's patches to RunTest.bat and testdata/wintestoutput3 +to fix problems with running the tests under Windows. + +2. Implemented a facility for quoting literal characters within hexadecimal +patterns in pcre2test, to make it easier to create patterns with just a few +non-printing characters. + +3. Binary zeros are not supported in pcre2test input files. It now detects them +and gives an error. + +4. Updated the valgrind parameters in RunTest: (a) changed smc-check=all to +smc-check=all-non-file; (b) changed obj:* in the suppression file to obj:??? so +that it matches only unknown objects. + +5. Updated the maintenance script maint/ManyConfigTests to make it easier to +select individual groups of tests. + +6. When the POSIX wrapper function regcomp() is called, the REG_NOSUB option +used to set PCRE2_NO_AUTO_CAPTURE when calling pcre2_compile(). However, this +disables the use of back references (and subroutine calls), which are supported +by other implementations of regcomp() with RE_NOSUB. Therefore, REG_NOSUB no +longer causes PCRE2_NO_AUTO_CAPTURE to be set, though it still ignores nmatch +and pmatch when regexec() is called. + +7. Because of 6 above, pcre2test has been modified with a new modifier called +posix_nosub, to call regcomp() with REG_NOSUB. Previously the no_auto_capture +modifier had this effect. That option is now ignored when the POSIX API is in +use. + +8. Minor tidies to the pcre2demo.c sample program, including more comments +about its 8-bit-ness. + +9. Detect unmatched closing parentheses and give the error in the pre-scan +instead of later. Previously the pre-scan carried on and could give a +misleading incorrect error message. For example, /(?J)(?'a'))(?'a')/ gave a +message about invalid duplicate group names. + +10. It has happened that pcre2test was accidentally linked with another POSIX +regex library instead of libpcre2-posix. In this situation, a call to regcomp() +(in the other library) may succeed, returning zero, but of course putting its +own data into the regex_t block. In one example the re_pcre2_code field was +left as NULL, which made pcre2test think it had not got a compiled POSIX regex, +so it treated the next line as another pattern line, resulting in a confusing +error message. A check has been added to pcre2test to see if the data returned +from a successful call of regcomp() are valid for PCRE2's regcomp(). If they +are not, an error message is output and the pcre2test run is abandoned. The +message points out the possibility of a mis-linking. Hopefully this will avoid +some head-scratching the next time this happens. + +11. A pattern such as /(?<=((?C)0))/, which has a callout inside a lookbehind +assertion, caused pcre2test to output a very large number of spaces when the +callout was taken, making the program appearing to loop. + +12. A pattern that included (*ACCEPT) in the middle of a sufficiently deeply +nested set of parentheses of sufficient size caused an overflow of the +compiling workspace (which was diagnosed, but of course is not desirable). + +13. Detect missing closing parentheses during the pre-pass for group +identification. + +14. Changed some integer variable types and put in a number of casts, following +a report of compiler warnings from Visual Studio 2013 and a few tests with +gcc's -Wconversion (which still throws up a lot). + +15. Implemented pcre2_code_copy(), and added pushcopy and #popcopy to pcre2test +for testing it. + +16. Change 66 for 10.21 introduced the use of snprintf() in PCRE2's version of +regerror(). When the error buffer is too small, my version of snprintf() puts a +binary zero in the final byte. Bug #1801 seems to show that other versions do +not do this, leading to bad output from pcre2test when it was checking for +buffer overflow. It no longer assumes a binary zero at the end of a too-small +regerror() buffer. + +17. Fixed typo ("&&" for "&") in pcre2_study(). Fortunately, this could not +actually affect anything, by sheer luck. + +18. Two minor fixes for MSVC compilation: (a) removal of apparently incorrect +"const" qualifiers in pcre2test and (b) defining snprintf as _snprintf for +older MSVC compilers. This has been done both in src/pcre2_internal.h for most +of the library, and also in src/pcre2posix.c, which no longer includes +pcre2_internal.h (see 24 below). + +19. Applied Chris Wilson's patch (Bugzilla #1681) to CMakeLists.txt for MSVC +static compilation. Subsequently applied Chris Wilson's second patch, putting +the first patch under a new option instead of being unconditional when +PCRE_STATIC is set. + +20. Updated pcre2grep to set stdout as binary when run under Windows, so as not +to convert \r\n at the ends of reflected lines into \r\r\n. This required +ensuring that other output that is written to stdout (e.g. file names) uses the +appropriate line terminator: \r\n for Windows, \n otherwise. + +21. When a line is too long for pcre2grep's internal buffer, show the maximum +length in the error message. + +22. Added support for string callouts to pcre2grep (Zoltan's patch with PH +additions). + +23. RunTest.bat was missing a "set type" line for test 22. + +24. The pcre2posix.c file was including pcre2_internal.h, and using some +"private" knowledge of the data structures. This is unnecessary; the code has +been re-factored and no longer includes pcre2_internal.h. + +25. A racing condition is fixed in JIT reported by Mozilla. + +26. Minor code refactor to avoid "array subscript is below array bounds" +compiler warning. + +27. Minor code refactor to avoid "left shift of negative number" warning. + +28. Add a bit more sanity checking to pcre2_serialize_decode() and document +that it expects trusted data. + +29. Fix typo in pcre2_jit_test.c + +30. Due to an oversight, pcre2grep was not making use of JIT when available. +This is now fixed. + +31. The RunGrepTest script is updated to use the valgrind suppressions file +when testing with JIT under valgrind (compare 10.21/51 below). The suppressions +file is updated so that is now the same as for PCRE1: it suppresses the +Memcheck warnings Addr16 and Cond in unknown objects (that is, JIT-compiled +code). Also changed smc-check=all to smc-check=all-non-file as was done for +RunTest (see 4 above). + +32. Implemented the PCRE2_NO_JIT option for pcre2_match(). + +33. Fix typo that gave a compiler error when JIT not supported. + +34. Fix comment describing the returns from find_fixedlength(). + +35. Fix potential negative index in pcre2test. + +36. Calls to pcre2_get_error_message() with error numbers that are never +returned by PCRE2 functions were returning empty strings. Now the error code +PCRE2_ERROR_BADDATA is returned. A facility has been added to pcre2test to +show the texts for given error numbers (i.e. to call pcre2_get_error_message() +and display what it returns) and a few representative error codes are now +checked in RunTest. + +37. Added "&& !defined(__INTEL_COMPILER)" to the test for __GNUC__ in +pcre2_match.c, in anticipation that this is needed for the same reason it was +recently added to pcrecpp.cc in PCRE1. + +38. Using -o with -M in pcre2grep could cause unnecessary repeated output when +the match extended over a line boundary, as it tried to find more matches "on +the same line" - but it was already over the end. + +39. Allow \C in lookbehinds and DFA matching in UTF-32 mode (by converting it +to the same code as '.' when PCRE2_DOTALL is set). + +40. Fix two clang compiler warnings in pcre2test when only one code unit width +is supported. + +41. Upgrade RunTest to automatically re-run test 2 with a large (64MiB) stack +if it fails when running the interpreter with a 16MiB stack (and if changing +the stack size via pcre2test is possible). This avoids having to manually set a +large stack size when testing with clang. + +42. Fix register overwite in JIT when SSE2 acceleration is enabled. + +43. Detect integer overflow in pcre2test pattern and data repetition counts. + +44. In pcre2test, ignore "allcaptures" after DFA matching. + +45. Fix unaligned accesses on x86. Patch by Marc Mutz. + +46. Fix some more clang compiler warnings. + + +Version 10.21 12-January-2016 +----------------------------- + +1. Improve matching speed of patterns starting with + or * in JIT. + +2. Use memchr() to find the first character in an unanchored match in 8-bit +mode in the interpreter. This gives a significant speed improvement. + +3. Removed a redundant copy of the opcode_possessify table in the +pcre2_auto_possessify.c source. + +4. Fix typos in dftables.c for z/OS. + +5. Change 36 for 10.20 broke the handling of [[:>:]] and [[:<:]] in that +processing them could involve a buffer overflow if the following character was +an opening parenthesis. + +6. Change 36 for 10.20 also introduced a bug in processing this pattern: +/((?x)(*:0))#(?'/. Specifically: if a setting of (?x) was followed by a (*MARK) +setting (which (*:0) is), then (?x) did not get unset at the end of its group +during the scan for named groups, and hence the external # was incorrectly +treated as a comment and the invalid (?' at the end of the pattern was not +diagnosed. This caused a buffer overflow during the real compile. This bug was +discovered by Karl Skomski with the LLVM fuzzer. + +7. Moved the pcre2_find_bracket() function from src/pcre2_compile.c into its +own source module to avoid a circular dependency between src/pcre2_compile.c +and src/pcre2_study.c + +8. A callout with a string argument containing an opening square bracket, for +example /(?C$[$)(?<]/, was incorrectly processed and could provoke a buffer +overflow. This bug was discovered by Karl Skomski with the LLVM fuzzer. + +9. The handling of callouts during the pre-pass for named group identification +has been tightened up. + +10. The quantifier {1} can be ignored, whether greedy, non-greedy, or +possessive. This is a very minor optimization. + +11. A possessively repeated conditional group that could match an empty string, +for example, /(?(R))*+/, was incorrectly compiled. + +12. The Unicode tables have been updated to Unicode 8.0.0 (thanks to Christian +Persch). + +13. An empty comment (?#) in a pattern was incorrectly processed and could +provoke a buffer overflow. This bug was discovered by Karl Skomski with the +LLVM fuzzer. + +14. Fix infinite recursion in the JIT compiler when certain patterns such as +/(?:|a|){100}x/ are analysed. + +15. Some patterns with character classes involving [: and \\ were incorrectly +compiled and could cause reading from uninitialized memory or an incorrect +error diagnosis. Examples are: /[[:\\](?<[::]/ and /[[:\\](?'abc')[a:]. The +first of these bugs was discovered by Karl Skomski with the LLVM fuzzer. + +16. Pathological patterns containing many nested occurrences of [: caused +pcre2_compile() to run for a very long time. This bug was found by the LLVM +fuzzer. + +17. A missing closing parenthesis for a callout with a string argument was not +being diagnosed, possibly leading to a buffer overflow. This bug was found by +the LLVM fuzzer. + +18. A conditional group with only one branch has an implicit empty alternative +branch and must therefore be treated as potentially matching an empty string. + +19. If (?R was followed by - or + incorrect behaviour happened instead of a +diagnostic. This bug was discovered by Karl Skomski with the LLVM fuzzer. + +20. Another bug that was introduced by change 36 for 10.20: conditional groups +whose condition was an assertion preceded by an explicit callout with a string +argument might be incorrectly processed, especially if the string contained \Q. +This bug was discovered by Karl Skomski with the LLVM fuzzer. + +21. Compiling PCRE2 with the sanitize options of clang showed up a number of +very pedantic coding infelicities and a buffer overflow while checking a UTF-8 +string if the final multi-byte UTF-8 character was truncated. + +22. For Perl compatibility in EBCDIC environments, ranges such as a-z in a +class, where both values are literal letters in the same case, omit the +non-letter EBCDIC code points within the range. + +23. Finding the minimum matching length of complex patterns with back +references and/or recursions can take a long time. There is now a cut-off that +gives up trying to find a minimum length when things get too complex. + +24. An optimization has been added that speeds up finding the minimum matching +length for patterns containing repeated capturing groups or recursions. + +25. If a pattern contained a back reference to a group whose number was +duplicated as a result of appearing in a (?|...) group, the computation of the +minimum matching length gave a wrong result, which could cause incorrect "no +match" errors. For such patterns, a minimum matching length cannot at present +be computed. + +26. Added a check for integer overflow in conditions (?() and +(?(R). This omission was discovered by Karl Skomski with the LLVM +fuzzer. + +27. Fixed an issue when \p{Any} inside an xclass did not read the current +character. + +28. If pcre2grep was given the -q option with -c or -l, or when handling a +binary file, it incorrectly wrote output to stdout. + +29. The JIT compiler did not restore the control verb head in case of *THEN +control verbs. This issue was found by Karl Skomski with a custom LLVM fuzzer. + +30. The way recursive references such as (?3) are compiled has been re-written +because the old way was the cause of many issues. Now, conversion of the group +number into a pattern offset does not happen until the pattern has been +completely compiled. This does mean that detection of all infinitely looping +recursions is postponed till match time. In the past, some easy ones were +detected at compile time. This re-writing was done in response to yet another +bug found by the LLVM fuzzer. + +31. A test for a back reference to a non-existent group was missing for items +such as \987. This caused incorrect code to be compiled. This issue was found +by Karl Skomski with a custom LLVM fuzzer. + +32. Error messages for syntax errors following \g and \k were giving inaccurate +offsets in the pattern. + +33. Improve the performance of starting single character repetitions in JIT. + +34. (*LIMIT_MATCH=) now gives an error instead of setting the value to 0. + +35. Error messages for syntax errors in *LIMIT_MATCH and *LIMIT_RECURSION now +give the right offset instead of zero. + +36. The JIT compiler should not check repeats after a {0,1} repeat byte code. +This issue was found by Karl Skomski with a custom LLVM fuzzer. + +37. The JIT compiler should restore the control chain for empty possessive +repeats. This issue was found by Karl Skomski with a custom LLVM fuzzer. + +38. A bug which was introduced by the single character repetition optimization +was fixed. + +39. Match limit check added to recursion. This issue was found by Karl Skomski +with a custom LLVM fuzzer. + +40. Arrange for the UTF check in pcre2_match() and pcre2_dfa_match() to look +only at the part of the subject that is relevant when the starting offset is +non-zero. + +41. Improve first character match in JIT with SSE2 on x86. + +42. Fix two assertion fails in JIT. These issues were found by Karl Skomski +with a custom LLVM fuzzer. + +43. Correct the setting of CMAKE_C_FLAGS in CMakeLists.txt (patch from Roy Ivy +III). + +44. Fix bug in RunTest.bat for new test 14, and adjust the script for the added +test (there are now 20 in total). + +45. Fixed a corner case of range optimization in JIT. + +46. Add the ${*MARK} facility to pcre2_substitute(). + +47. Modifier lists in pcre2test were splitting at spaces without the required +commas. + +48. Implemented PCRE2_ALT_VERBNAMES. + +49. Fixed two issues in JIT. These were found by Karl Skomski with a custom +LLVM fuzzer. + +50. The pcre2test program has been extended by adding the #newline_default +command. This has made it possible to run the standard tests when PCRE2 is +compiled with either CR or CRLF as the default newline convention. As part of +this work, the new command was added to several test files and the testing +scripts were modified. The pcre2grep tests can now also be run when there is no +LF in the default newline convention. + +51. The RunTest script has been modified so that, when JIT is used and valgrind +is specified, a valgrind suppressions file is set up to ignore "Invalid read of +size 16" errors because these are false positives when the hardware supports +the SSE2 instruction set. + +52. It is now possible to have comment lines amid the subject strings in +pcre2test (and perltest.sh) input. + +53. Implemented PCRE2_USE_OFFSET_LIMIT and pcre2_set_offset_limit(). + +54. Add the null_context modifier to pcre2test so that calling pcre2_compile() +and the matching functions with NULL contexts can be tested. + +55. Implemented PCRE2_SUBSTITUTE_EXTENDED. + +56. In a character class such as [\W\p{Any}] where both a negative-type escape +("not a word character") and a property escape were present, the property +escape was being ignored. + +57. Fixed integer overflow for patterns whose minimum matching length is very, +very large. + +58. Implemented --never-backslash-C. + +59. Change 55 above introduced a bug by which certain patterns provoked the +erroneous error "\ at end of pattern". + +60. The special sequences [[:<:]] and [[:>:]] gave rise to incorrect compiling +errors or other strange effects if compiled in UCP mode. Found with libFuzzer +and AddressSanitizer. + +61. Whitespace at the end of a pcre2test pattern line caused a spurious error +message if there were only single-character modifiers. It should be ignored. + +62. The use of PCRE2_NO_AUTO_CAPTURE could cause incorrect compilation results +or segmentation errors for some patterns. Found with libFuzzer and +AddressSanitizer. + +63. Very long names in (*MARK) or (*THEN) etc. items could provoke a buffer +overflow. + +64. Improve error message for overly-complicated patterns. + +65. Implemented an optional replication feature for patterns in pcre2test, to +make it easier to test long repetitive patterns. The tests for 63 above are +converted to use the new feature. + +66. In the POSIX wrapper, if regerror() was given too small a buffer, it could +misbehave. + +67. In pcre2_substitute() in UTF mode, the UTF validity check on the +replacement string was happening before the length setting when the replacement +string was zero-terminated. + +68. In pcre2_substitute() in UTF mode, PCRE2_NO_UTF_CHECK can be set for the +second and subsequent calls to pcre2_match(). + +69. There was no check for integer overflow for a replacement group number in +pcre2_substitute(). An added check for a number greater than the largest group +number in the pattern means this is not now needed. + +70. The PCRE2-specific VERSION condition didn't work correctly if only one +digit was given after the decimal point, or if more than two digits were given. +It now works with one or two digits, and gives a compile time error if more are +given. + +71. In pcre2_substitute() there was the possibility of reading one code unit +beyond the end of the replacement string. + +72. The code for checking a subject's UTF-32 validity for a pattern with a +lookbehind involved an out-of-bounds pointer, which could potentially cause +trouble in some environments. + +73. The maximum lookbehind length was incorrectly calculated for patterns such +as /(?<=(a)(?-1))x/ which have a recursion within a backreference. + +74. Give an error if a lookbehind assertion is longer than 65535 code units. + +75. Give an error in pcre2_substitute() if a match ends before it starts (as a +result of the use of \K). + +76. Check the length of subpattern names and the names in (*MARK:xx) etc. +dynamically to avoid the possibility of integer overflow. + +77. Implement pcre2_set_max_pattern_length() so that programs can restrict the +size of patterns that they are prepared to handle. + +78. (*NO_AUTO_POSSESS) was not working. + +79. Adding group information caching improves the speed of compiling when +checking whether a group has a fixed length and/or could match an empty string, +especially when recursion or subroutine calls are involved. However, this +cannot be used when (?| is present in the pattern because the same number may +be used for groups of different sizes. To catch runaway patterns in this +situation, counts have been introduced to the functions that scan for empty +branches or compute fixed lengths. + +80. Allow for the possibility of the size of the nest_save structure not being +a factor of the size of the compiling workspace (it currently is). + +81. Check for integer overflow in minimum length calculation and cap it at +65535. + +82. Small optimizations in code for finding the minimum matching length. + +83. Lock out configuring for EBCDIC with non-8-bit libraries. + +84. Test for error code <= 0 in regerror(). + +85. Check for too many replacements (more than INT_MAX) in pcre2_substitute(). + +86. Avoid the possibility of computing with an out-of-bounds pointer (though +not dereferencing it) while handling lookbehind assertions. + +87. Failure to get memory for the match data in regcomp() is now given as a +regcomp() error instead of waiting for regexec() to pick it up. + +88. In pcre2_substitute(), ensure that CRLF is not split when it is a valid +newline sequence. + +89. Paranoid check in regcomp() for bad error code from pcre2_compile(). + +90. Run test 8 (internal offsets and code sizes) for link sizes 3 and 4 as well +as for link size 2. + +91. Document that JIT has a limit on pattern size, and give more information +about JIT compile failures in pcre2test. + +92. Implement PCRE2_INFO_HASBACKSLASHC. + +93. Re-arrange valgrind support code in pcre2test to avoid spurious reports +with JIT (possibly caused by SSE2?). + +94. Support offset_limit in JIT. + +95. A sequence such as [[:punct:]b] that is, a POSIX character class followed +by a single ASCII character in a class item, was incorrectly compiled in UCP +mode. The POSIX class got lost, but only if the single character followed it. + +96. [:punct:] in UCP mode was matching some characters in the range 128-255 +that should not have been matched. + +97. If [:^ascii:] or [:^xdigit:] are present in a non-negated class, all +characters with code points greater than 255 are in the class. When a Unicode +property was also in the class (if PCRE2_UCP is set, escapes such as \w are +turned into Unicode properties), wide characters were not correctly handled, +and could fail to match. + +98. In pcre2test, make the "startoffset" modifier a synonym of "offset", +because it sets the "startoffset" parameter for pcre2_match(). + +99. If PCRE2_AUTO_CALLOUT was set on a pattern that had a (?# comment between +an item and its qualifier (for example, A(?#comment)?B) pcre2_compile() +misbehaved. This bug was found by the LLVM fuzzer. + +100. The error for an invalid UTF pattern string always gave the code unit +offset as zero instead of where the invalidity was found. + +101. Further to 97 above, negated classes such as [^[:^ascii:]\d] were also not +working correctly in UCP mode. + +102. Similar to 99 above, if an isolated \E was present between an item and its +qualifier when PCRE2_AUTO_CALLOUT was set, pcre2_compile() misbehaved. This bug +was found by the LLVM fuzzer. + +103. The POSIX wrapper function regexec() crashed if the option REG_STARTEND +was set when the pmatch argument was NULL. It now returns REG_INVARG. + +104. Allow for up to 32-bit numbers in the ordin() function in pcre2grep. + +105. An empty \Q\E sequence between an item and its qualifier caused +pcre2_compile() to misbehave when auto callouts were enabled. This bug +was found by the LLVM fuzzer. + +106. If both PCRE2_ALT_VERBNAMES and PCRE2_EXTENDED were set, and a (*MARK) or +other verb "name" ended with whitespace immediately before the closing +parenthesis, pcre2_compile() misbehaved. Example: /(*:abc )/, but only when +both those options were set. + +107. In a number of places pcre2_compile() was not handling NULL characters +correctly, and pcre2test with the "bincode" modifier was not always correctly +displaying fields containing NULLS: + + (a) Within /x extended #-comments + (b) Within the "name" part of (*MARK) and other *verbs + (c) Within the text argument of a callout + +108. If a pattern that was compiled with PCRE2_EXTENDED started with white +space or a #-type comment that was followed by (?-x), which turns off +PCRE2_EXTENDED, and there was no subsequent (?x) to turn it on again, +pcre2_compile() assumed that (?-x) applied to the whole pattern and +consequently mis-compiled it. This bug was found by the LLVM fuzzer. The fix +for this bug means that a setting of any of the (?imsxJU) options at the start +of a pattern is no longer transferred to the options that are returned by +PCRE2_INFO_ALLOPTIONS. In fact, this was an anachronism that should have +changed when the effects of those options were all moved to compile time. + +109. An escaped closing parenthesis in the "name" part of a (*verb) when +PCRE2_ALT_VERBNAMES was set caused pcre2_compile() to malfunction. This bug +was found by the LLVM fuzzer. + +110. Implemented PCRE2_SUBSTITUTE_UNSET_EMPTY, and updated pcre2test to make it +possible to test it. + +111. "Harden" pcre2test against ridiculously large values in modifiers and +command line arguments. + +112. Implemented PCRE2_SUBSTITUTE_UNKNOWN_UNSET and PCRE2_SUBSTITUTE_OVERFLOW_ +LENGTH. + +113. Fix printing of *MARK names that contain binary zeroes in pcre2test. + + +Version 10.20 30-June-2015 +-------------------------- + +1. Callouts with string arguments have been added. + +2. Assertion code generator in JIT has been optimized. + +3. The invalid pattern (?(?C) has a missing assertion condition at the end. The +pcre2_compile() function read past the end of the input before diagnosing an +error. This bug was discovered by the LLVM fuzzer. + +4. Implemented pcre2_callout_enumerate(). + +5. Fix JIT compilation of conditional blocks whose assertion is converted to +(*FAIL). E.g: /(?(?!))/. + +6. The pattern /(?(?!)^)/ caused references to random memory. This bug was +discovered by the LLVM fuzzer. + +7. The assertion (?!) is optimized to (*FAIL). This was not handled correctly +when this assertion was used as a condition, for example (?(?!)a|b). In +pcre2_match() it worked by luck; in pcre2_dfa_match() it gave an incorrect +error about an unsupported item. + +8. For some types of pattern, for example /Z*(|d*){216}/, the auto- +possessification code could take exponential time to complete. A recursion +depth limit of 1000 has been imposed to limit the resources used by this +optimization. This infelicity was discovered by the LLVM fuzzer. + +9. A pattern such as /(*UTF)[\S\V\H]/, which contains a negated special class +such as \S in non-UCP mode, explicit wide characters (> 255) can be ignored +because \S ensures they are all in the class. The code for doing this was +interacting badly with the code for computing the amount of space needed to +compile the pattern, leading to a buffer overflow. This bug was discovered by +the LLVM fuzzer. + +10. A pattern such as /((?2)+)((?1))/ which has mutual recursion nested inside +other kinds of group caused stack overflow at compile time. This bug was +discovered by the LLVM fuzzer. + +11. A pattern such as /(?1)(?#?'){8}(a)/ which had a parenthesized comment +between a subroutine call and its quantifier was incorrectly compiled, leading +to buffer overflow or other errors. This bug was discovered by the LLVM fuzzer. + +12. The illegal pattern /(?(?.*!.*)?)/ was not being diagnosed as missing an +assertion after (?(. The code was failing to check the character after (?(?< +for the ! or = that would indicate a lookbehind assertion. This bug was +discovered by the LLVM fuzzer. + +13. A pattern such as /X((?2)()*+){2}+/ which has a possessive quantifier with +a fixed maximum following a group that contains a subroutine reference was +incorrectly compiled and could trigger buffer overflow. This bug was discovered +by the LLVM fuzzer. + +14. Negative relative recursive references such as (?-7) to non-existent +subpatterns were not being diagnosed and could lead to unpredictable behaviour. +This bug was discovered by the LLVM fuzzer. + +15. The bug fixed in 14 was due to an integer variable that was unsigned when +it should have been signed. Some other "int" variables, having been checked, +have either been changed to uint32_t or commented as "must be signed". + +16. A mutual recursion within a lookbehind assertion such as (?<=((?2))((?1))) +caused a stack overflow instead of the diagnosis of a non-fixed length +lookbehind assertion. This bug was discovered by the LLVM fuzzer. + +17. The use of \K in a positive lookbehind assertion in a non-anchored pattern +(e.g. /(?<=\Ka)/) could make pcre2grep loop. + +18. There was a similar problem to 17 in pcre2test for global matches, though +the code there did catch the loop. + +19. If a greedy quantified \X was preceded by \C in UTF mode (e.g. \C\X*), +and a subsequent item in the pattern caused a non-match, backtracking over the +repeated \X did not stop, but carried on past the start of the subject, causing +reference to random memory and/or a segfault. There were also some other cases +where backtracking after \C could crash. This set of bugs was discovered by the +LLVM fuzzer. + +20. The function for finding the minimum length of a matching string could take +a very long time if mutual recursion was present many times in a pattern, for +example, /((?2){73}(?2))((?1))/. A better mutual recursion detection method has +been implemented. This infelicity was discovered by the LLVM fuzzer. + +21. Implemented PCRE2_NEVER_BACKSLASH_C. + +22. The feature for string replication in pcre2test could read from freed +memory if the replication required a buffer to be extended, and it was not +working properly in 16-bit and 32-bit modes. This issue was discovered by a +fuzzer: see http://lcamtuf.coredump.cx/afl/. + +23. Added the PCRE2_ALT_CIRCUMFLEX option. + +24. Adjust the treatment of \8 and \9 to be the same as the current Perl +behaviour. + +25. Static linking against the PCRE2 library using the pkg-config module was +failing on missing pthread symbols. + +26. If a group that contained a recursive back reference also contained a +forward reference subroutine call followed by a non-forward-reference +subroutine call, for example /.((?2)(?R)\1)()/, pcre2_compile() failed to +compile correct code, leading to undefined behaviour or an internally detected +error. This bug was discovered by the LLVM fuzzer. + +27. Quantification of certain items (e.g. atomic back references) could cause +incorrect code to be compiled when recursive forward references were involved. +For example, in this pattern: /(?1)()((((((\1++))\x85)+)|))/. This bug was +discovered by the LLVM fuzzer. + +28. A repeated conditional group whose condition was a reference by name caused +a buffer overflow if there was more than one group with the given name. This +bug was discovered by the LLVM fuzzer. + +29. A recursive back reference by name within a group that had the same name as +another group caused a buffer overflow. For example: /(?J)(?'d'(?'d'\g{d}))/. +This bug was discovered by the LLVM fuzzer. + +30. A forward reference by name to a group whose number is the same as the +current group, for example in this pattern: /(?|(\k'Pm')|(?'Pm'))/, caused a +buffer overflow at compile time. This bug was discovered by the LLVM fuzzer. + +31. Fix -fsanitize=undefined warnings for left shifts of 1 by 31 (it treats 1 +as an int; fixed by writing it as 1u). + +32. Fix pcre2grep compile when -std=c99 is used with gcc, though it still gives +a warning for "fileno" unless -std=gnu99 us used. + +33. A lookbehind assertion within a set of mutually recursive subpatterns could +provoke a buffer overflow. This bug was discovered by the LLVM fuzzer. + +34. Give an error for an empty subpattern name such as (?''). + +35. Make pcre2test give an error if a pattern that follows #forbud_utf contains +\P, \p, or \X. + +36. The way named subpatterns are handled has been refactored. There is now a +pre-pass over the regex which does nothing other than identify named +subpatterns and count the total captures. This means that information about +named patterns is known before the rest of the compile. In particular, it means +that forward references can be checked as they are encountered. Previously, the +code for handling forward references was contorted and led to several errors in +computing the memory requirements for some patterns, leading to buffer +overflows. + +37. There was no check for integer overflow in subroutine calls such as (?123). + +38. The table entry for \l in EBCDIC environments was incorrect, leading to its +being treated as a literal 'l' instead of causing an error. + +39. If a non-capturing group containing a conditional group that could match +an empty string was repeated, it was not identified as matching an empty string +itself. For example: /^(?:(?(1)x|)+)+$()/. + +40. In an EBCDIC environment, pcretest was mishandling the escape sequences +\a and \e in test subject lines. + +41. In an EBCDIC environment, \a in a pattern was converted to the ASCII +instead of the EBCDIC value. + +42. The handling of \c in an EBCDIC environment has been revised so that it is +now compatible with the specification in Perl's perlebcdic page. + +43. Single character repetition in JIT has been improved. 20-30% speedup +was achieved on certain patterns. + +44. The EBCDIC character 0x41 is a non-breaking space, equivalent to 0xa0 in +ASCII/Unicode. This has now been added to the list of characters that are +recognized as white space in EBCDIC. + +45. When PCRE2 was compiled without Unicode support, the use of \p and \P gave +an error (correctly) when used outside a class, but did not give an error +within a class. + +46. \h within a class was incorrectly compiled in EBCDIC environments. + +47. JIT should return with error when the compiled pattern requires +more stack space than the maximum. + +48. Fixed a memory leak in pcre2grep when a locale is set. + + +Version 10.10 06-March-2015 +--------------------------- + +1. When a pattern is compiled, it remembers the highest back reference so that +when matching, if the ovector is too small, extra memory can be obtained to +use instead. A conditional subpattern whose condition is a check on a capture +having happened, such as, for example in the pattern /^(?:(a)|b)(?(1)A|B)/, is +another kind of back reference, but it was not setting the highest +backreference number. This mattered only if pcre2_match() was called with an +ovector that was too small to hold the capture, and there was no other kind of +back reference (a situation which is probably quite rare). The effect of the +bug was that the condition was always treated as FALSE when the capture could +not be consulted, leading to a incorrect behaviour by pcre2_match(). This bug +has been fixed. + +2. Functions for serialization and deserialization of sets of compiled patterns +have been added. + +3. The value that is returned by PCRE2_INFO_SIZE has been corrected to remove +excess code units at the end of the data block that may occasionally occur if +the code for calculating the size over-estimates. This change stops the +serialization code copying uninitialized data, to which valgrind objects. The +documentation of PCRE2_INFO_SIZE was incorrect in stating that the size did not +include the general overhead. This has been corrected. + +4. All code units in every slot in the table of group names are now set, again +in order to avoid accessing uninitialized data when serializing. + +5. The (*NO_JIT) feature is implemented. + +6. If a bug that caused pcre2_compile() to use more memory than allocated was +triggered when using valgrind, the code in (3) above passed a stupidly large +value to valgrind. This caused a crash instead of an "internal error" return. + +7. A reference to a duplicated named group (either a back reference or a test +for being set in a conditional) that occurred in a part of the pattern where +PCRE2_DUPNAMES was not set caused the amount of memory needed for the pattern +to be incorrectly calculated, leading to overwriting. + +8. A mutually recursive set of back references such as (\2)(\1) caused a +segfault at compile time (while trying to find the minimum matching length). +The infinite loop is now broken (with the minimum length unset, that is, zero). + +9. If an assertion that was used as a condition was quantified with a minimum +of zero, matching went wrong. In particular, if the whole group had unlimited +repetition and could match an empty string, a segfault was likely. The pattern +(?(?=0)?)+ is an example that caused this. Perl allows assertions to be +quantified, but not if they are being used as conditions, so the above pattern +is faulted by Perl. PCRE2 has now been changed so that it also rejects such +patterns. + +10. The error message for an invalid quantifier has been changed from "nothing +to repeat" to "quantifier does not follow a repeatable item". + +11. If a bad UTF string is compiled with NO_UTF_CHECK, it may succeed, but +scanning the compiled pattern in subsequent auto-possessification can get out +of step and lead to an unknown opcode. Previously this could have caused an +infinite loop. Now it generates an "internal error" error. This is a tidyup, +not a bug fix; passing bad UTF with NO_UTF_CHECK is documented as having an +undefined outcome. + +12. A UTF pattern containing a "not" match of a non-ASCII character and a +subroutine reference could loop at compile time. Example: /[^\xff]((?1))/. + +13. The locale test (RunTest 3) has been upgraded. It now checks that a locale +that is found in the output of "locale -a" can actually be set by pcre2test +before it is accepted. Previously, in an environment where a locale was listed +but would not set (an example does exist), the test would "pass" without +actually doing anything. Also the fr_CA locale has been added to the list of +locales that can be used. + +14. Fixed a bug in pcre2_substitute(). If a replacement string ended in a +capturing group number without parentheses, the last character was incorrectly +literally included at the end of the replacement string. + +15. A possessive capturing group such as (a)*+ with a minimum repeat of zero +failed to allow the zero-repeat case if pcre2_match() was called with an +ovector too small to capture the group. + +16. Improved error message in pcre2test when setting the stack size (-S) fails. + +17. Fixed two bugs in CMakeLists.txt: (1) Some lines had got lost in the +transfer from PCRE1, meaning that CMake configuration failed if "build tests" +was selected. (2) The file src/pcre2_serialize.c had not been added to the list +of PCRE2 sources, which caused a failure to build pcre2test. + +18. Fixed typo in pcre2_serialize.c (DECL instead of DEFN) that causes problems +only on Windows. + +19. Use binary input when reading back saved serialized patterns in pcre2test. + +20. Added RunTest.bat for running the tests under Windows. + +21. "make distclean" was not removing config.h, a file that may be created for +use with CMake. + +22. A pattern such as "((?2){0,1999}())?", which has a group containing a +forward reference repeated a large (but limited) number of times within a +repeated outer group that has a zero minimum quantifier, caused incorrect code +to be compiled, leading to the error "internal error: previously-checked +referenced subpattern not found" when an incorrect memory address was read. +This bug was reported as "heap overflow", discovered by Kai Lu of Fortinet's +FortiGuard Labs. (Added 24-March-2015: CVE-2015-2325 was given to this.) + +23. A pattern such as "((?+1)(\1))/" containing a forward reference subroutine +call within a group that also contained a recursive back reference caused +incorrect code to be compiled. This bug was reported as "heap overflow", +discovered by Kai Lu of Fortinet's FortiGuard Labs. (Added 24-March-2015: +CVE-2015-2326 was given to this.) + +24. Computing the size of the JIT read-only data in advance has been a source +of various issues, and new ones are still appear unfortunately. To fix +existing and future issues, size computation is eliminated from the code, +and replaced by on-demand memory allocation. + +25. A pattern such as /(?i)[A-`]/, where characters in the other case are +adjacent to the end of the range, and the range contained characters with more +than one other case, caused incorrect behaviour when compiled in UTF mode. In +that example, the range a-j was left out of the class. + + +Version 10.00 05-January-2015 +----------------------------- + +Version 10.00 is the first release of PCRE2, a revised API for the PCRE +library. Changes prior to 10.00 are logged in the ChangeLog file for the old +API, up to item 20 for release 8.36. + +The code of the library was heavily revised as part of the new API +implementation. Details of each and every modification were not individually +logged. In addition to the API changes, the following changes were made. They +are either new functionality, or bug fixes and other noticeable changes of +behaviour that were implemented after the code had been forked. + +1. Including Unicode support at build time is now enabled by default, but it +can optionally be disabled. It is not enabled by default at run time (no +change). + +2. The test program, now called pcre2test, was re-specified and almost +completely re-written. Its input is not compatible with input for pcretest. + +3. Patterns may start with (*NOTEMPTY) or (*NOTEMPTY_ATSTART) to set the +PCRE2_NOTEMPTY or PCRE2_NOTEMPTY_ATSTART options for every subject line that is +matched by that pattern. + +4. For the benefit of those who use PCRE2 via some other application, that is, +not writing the function calls themselves, it is possible to check the PCRE2 +version by matching a pattern such as /(?(VERSION>=10)yes|no)/ against a +string such as "yesno". + +5. There are case-equivalent Unicode characters whose encodings use different +numbers of code units in UTF-8. U+023A and U+2C65 are one example. (It is +theoretically possible for this to happen in UTF-16 too.) If a backreference to +a group containing one of these characters was greedily repeated, and during +the match a backtrack occurred, the subject might be backtracked by the wrong +number of code units. For example, if /^(\x{23a})\1*(.)/ is matched caselessly +(and in UTF-8 mode) against "\x{23a}\x{2c65}\x{2c65}\x{2c65}", group 2 should +capture the final character, which is the three bytes E2, B1, and A5 in UTF-8. +Incorrect backtracking meant that group 2 captured only the last two bytes. +This bug has been fixed; the new code is slower, but it is used only when the +strings matched by the repetition are not all the same length. + +6. A pattern such as /()a/ was not setting the "first character must be 'a'" +information. This applied to any pattern with a group that matched no +characters, for example: /(?:(?=.)|(? 0) + { + $line = 0; + $file = shift @ARGV; + + open (IN, $file) || die "Failed to open $file\n"; + + while () + { + $count = 0; + $line++; + if (/^\s*$/) + { + printf "Empty line $line of $file\n"; + $yield = 1; + } + elsif (/^\./) + { + if (!/^\.\s*$| + ^\.B\s+\S| + ^\.TH\s\S| + ^\.SH\s\S| + ^\.SS\s\S| + ^\.TP(?:\s?\d+)?\s*$| + ^\.SM\s*$| + ^\.br\s*$| + ^\.rs\s*$| + ^\.sp\s*$| + ^\.nf\s*$| + ^\.fi\s*$| + ^\.P\s*$| + ^\.PP\s*$| + ^\.\\"(?:\ HREF)?\s*$| + ^\.\\"\sHTML\s\s*$| + ^\.\\"\sHTML\s<\/a>\s*$| + ^\.\\"\s<\/a>\s*$| + ^\.\\"\sJOINSH\s*$| + ^\.\\"\sJOIN\s*$/x + ) + { + printf "Bad control line $line of $file\n"; + $yield = 1; + } + } + elsif (/\\[^ef]|\\f[^IBP]/) + { + printf "Bad backslash in line $line of $file\n"; + $yield = 1; + } + while (/\\f[BI]/g) + { + $count++; + } + while (/\\fP/g) + { + $count--; + } + if ($count != 0) + { + printf "Mismatching formatting in line $line of $file\n"; + $yield = 1; + } + } + + close(IN); + } + +exit $yield; +# End diff --git a/pcre2/CleanTxt b/pcre2/CleanTxt new file mode 100755 index 000000000..1f42519c8 --- /dev/null +++ b/pcre2/CleanTxt @@ -0,0 +1,113 @@ +#! /usr/bin/perl -w + +# Script to take the output of nroff -man and remove all the backspacing and +# the page footers and the screen commands etc so that it is more usefully +# readable online. In fact, in the latest nroff, intermediate footers don't +# seem to be generated any more. + +$blankcount = 0; +$lastwascut = 0; +$firstheader = 1; + +# Input on STDIN; output to STDOUT. + +while () + { + s/\x1b\[\d+m//g; # Remove screen controls "ESC [ number m" + s/.\x8//g; # Remove "char, backspace" + + # Handle header lines. Retain only the first one we encounter, but remove + # the blank line that follows. Any others (e.g. at end of document) and the + # following blank line are dropped. + + if (/^PCRE(\w*)\(([13])\)\s+PCRE\1\(\2\)$/) + { + if ($firstheader) + { + $firstheader = 0; + print; + $lastprinted = $_; + $lastwascut = 0; + } + $_=; # Remove a blank that follows + next; + } + + # Count runs of empty lines + + if (/^\s*$/) + { + $blankcount++; + $lastwascut = 0; + next; + } + + # If a chunk of lines has been cut out (page footer) and the next line + # has a different indentation, put back one blank line. + + if ($lastwascut && $blankcount < 1 && defined($lastprinted)) + { + ($a) = $lastprinted =~ /^(\s*)/; + ($b) = $_ =~ /^(\s*)/; + $blankcount++ if ($a ne $b); + } + + # We get here only when we have a non-blank line in hand. If it was preceded + # by 3 or more blank lines, read the next 3 lines and see if they are blank. + # If so, remove all 7 lines, and remember that we have just done a cut. + + if ($blankcount >= 3) + { + for ($i = 0; $i < 3; $i++) + { + $next[$i] = ; + $next[$i] = "" if !defined $next[$i]; + $next[$i] =~ s/\x1b\[\d+m//g; # Remove screen controls "ESC [ number m" + $next[$i] =~ s/.\x8//g; # Remove "char, backspace" + } + + # Cut out chunks of the form <3 blanks><3 blanks> + + if ($next[0] =~ /^\s*$/ && + $next[1] =~ /^\s*$/ && + $next[2] =~ /^\s*$/) + { + $blankcount -= 3; + $lastwascut = 1; + } + + # Otherwise output the saved blanks, the current, and the next three + # lines. Remember the last printed line. + + else + { + for ($i = 0; $i < $blankcount; $i++) { print "\n"; } + print; + for ($i = 0; $i < 3; $i++) + { + $next[$i] =~ s/.\x8//g; + print $next[$i]; + $lastprinted = $_; + } + $lastwascut = 0; + $blankcount = 0; + } + } + + # This non-blank line is not preceded by 3 or more blank lines. Output + # any blanks there are, and the line. Remember it. Force two blank lines + # before headings. + + else + { + $blankcount = 2 if /^\S/ && !/^Last updated/ && !/^Copyright/ && + defined($lastprinted); + for ($i = 0; $i < $blankcount; $i++) { print "\n"; } + print; + $lastprinted = $_; + $lastwascut = 0; + $blankcount = 0; + } + } + +# End diff --git a/pcre2/Detrail b/pcre2/Detrail new file mode 100755 index 000000000..1c5c7e9ca --- /dev/null +++ b/pcre2/Detrail @@ -0,0 +1,35 @@ +#!/usr/bin/perl + +# This is a script for removing trailing whitespace from lines in files that +# are listed on the command line. + +# This subroutine does the work for one file. + +sub detrail { +my($file) = $_[0]; +my($changed) = 0; +open(IN, "$file") || die "Can't open $file for input"; +@lines = ; +close(IN); +foreach (@lines) + { + if (/\s+\n$/) + { + s/\s+\n$/\n/; + $changed = 1; + } + } +if ($changed) + { + open(OUT, ">$file") || die "Can't open $file for output"; + print OUT @lines; + close(OUT); + } +} + +# This is the main program + +$, = ""; # Output field separator +for ($i = 0; $i < @ARGV; $i++) { &detrail($ARGV[$i]); } + +# End diff --git a/pcre2/HACKING b/pcre2/HACKING new file mode 100644 index 000000000..20faf8f47 --- /dev/null +++ b/pcre2/HACKING @@ -0,0 +1,830 @@ +Technical Notes about PCRE2 +--------------------------- + +These are very rough technical notes that record potentially useful information +about PCRE2 internals. PCRE2 is a library based on the original PCRE library, +but with a revised (and incompatible) API. To avoid confusion, the original +library is referred to as PCRE1 below. For information about testing PCRE2, see +the pcre2test documentation and the comment at the head of the RunTest file. + +PCRE1 releases were up to 8.3x when PCRE2 was developed, and later bug fix +releases remain in the 8.xx series. PCRE2 releases started at 10.00 to avoid +confusion with PCRE1. + + +Historical note 1 +----------------- + +Many years ago I implemented some regular expression functions to an algorithm +suggested by Martin Richards. The rather simple patterns were not Unix-like in +form, and were quite restricted in what they could do by comparison with Perl. +The interesting part about the algorithm was that the amount of space required +to hold the compiled form of an expression was known in advance. The code to +apply an expression did not operate by backtracking, as the original Henry +Spencer code and current PCRE2 and Perl code does, but instead checked all +possibilities simultaneously by keeping a list of current states and checking +all of them as it advanced through the subject string. In the terminology of +Jeffrey Friedl's book, it was a "DFA algorithm", though it was not a +traditional Finite State Machine (FSM). When the pattern was all used up, all +remaining states were possible matches, and the one matching the longest subset +of the subject string was chosen. This did not necessarily maximize the +individual wild portions of the pattern, as is expected in Unix and Perl-style +regular expressions. + + +Historical note 2 +----------------- + +By contrast, the code originally written by Henry Spencer (which was +subsequently heavily modified for Perl) compiles the expression twice: once in +a dummy mode in order to find out how much store will be needed, and then for +real. (The Perl version probably doesn't do this any more; I'm talking about +the original library.) The execution function operates by backtracking and +maximizing (or, optionally, minimizing, in Perl) the amount of the subject that +matches individual wild portions of the pattern. This is an "NFA algorithm" in +Friedl's terminology. + + +OK, here's the real stuff +------------------------- + +For the set of functions that formed the original PCRE1 library in 1997 (which +are unrelated to those mentioned above), I tried at first to invent an +algorithm that used an amount of store bounded by a multiple of the number of +characters in the pattern, to save on compiling time. However, because of the +greater complexity in Perl regular expressions, I couldn't do this, even though +the then current Perl 5.004 patterns were much simpler than those supported +nowadays. In any case, a first pass through the pattern is helpful for other +reasons. + + +Support for 16-bit and 32-bit data strings +------------------------------------------- + +The PCRE2 library can be compiled in any combination of 8-bit, 16-bit or 32-bit +modes, creating up to three different libraries. In the description that +follows, the word "short" is used for a 16-bit data quantity, and the phrase +"code unit" is used for a quantity that is a byte in 8-bit mode, a short in +16-bit mode and a 32-bit word in 32-bit mode. The names of PCRE2 functions are +given in generic form, without the _8, _16, or _32 suffix. + + +Computing the memory requirement: how it was +-------------------------------------------- + +Up to and including release 6.7, PCRE1 worked by running a very degenerate +first pass to calculate a maximum memory requirement, and then a second pass to +do the real compile - which might use a bit less than the predicted amount of +memory. The idea was that this would turn out faster than the Henry Spencer +code because the first pass is degenerate and the second pass can just store +stuff straight into memory, which it knows is big enough. + + +Computing the memory requirement: how it is +------------------------------------------- + +By the time I was working on a potential 6.8 release, the degenerate first pass +had become very complicated and hard to maintain. Indeed one of the early +things I did for 6.8 was to fix Yet Another Bug in the memory computation. Then +I had a flash of inspiration as to how I could run the real compile function in +a "fake" mode that enables it to compute how much memory it would need, while +in most cases only ever using a small amount of working memory, and without too +many tests of the mode that might slow it down. So I refactored the compiling +functions to work this way. This got rid of about 600 lines of source and made +further maintenance and development easier. As this was such a major change, I +never released 6.8, instead upping the number to 7.0 (other quite major changes +were also present in the 7.0 release). + +A side effect of this work was that the previous limit of 200 on the nesting +depth of parentheses was removed. However, there was a downside: compiling ran +more slowly than before (30% or more, depending on the pattern) because it now +did a full analysis of the pattern. My hope was that this would not be a big +issue, and in the event, nobody has commented on it. + +At release 8.34, a limit on the nesting depth of parentheses was re-introduced +(default 250, settable at build time) so as to put a limit on the amount of +system stack used by the compile function, which uses recursive function calls +for nested parenthesized groups. This is a safety feature for environments with +small stacks where the patterns are provided by users. + + +Yet another pattern scan +------------------------ + +History repeated itself for PCRE2 release 10.20. A number of bugs relating to +named subpatterns had been discovered by fuzzers. Most of these were related to +the handling of forward references when it was not known if the named group was +unique. (References to non-unique names use a different opcode and more +memory.) The use of duplicate group numbers (the (?| facility) also caused +issues. + +To get around these problems I adopted a new approach by adding a third pass +over the pattern (really a "pre-pass"), which did nothing other than identify +all the named subpatterns and their corresponding group numbers. This means +that the actual compile (both the memory-computing dummy run and the real +compile) has full knowledge of group names and numbers throughout. Several +dozen lines of messy code were eliminated, though the new pre-pass was not +short. In particular, parsing and skipping over [] classes is complicated. + +While working on 10.22 I realized that I could simplify yet again by moving +more of the parsing into the pre-pass, thus avoiding doing it in two places, so +after 10.22 was released, the code underwent yet another big refactoring. This +is how it is from 10.23 onwards: + +The function called parse_regex() scans the pattern characters, parsing them +into literal data and meta characters. It converts escapes such as \x{123} +into literals, handles \Q...\E, and skips over comments and non-significant +white space. The result of the scanning is put into a vector of 32-bit unsigned +integers. Values less than 0x80000000 are literal data. Higher values represent +meta-characters. The top 16-bits of such values identify the meta-character, +and these are given names such as META_CAPTURE. The lower 16-bits are available +for data, for example, the capturing group number. The only situation in which +literal data values greater than 0x7fffffff can appear is when the 32-bit +library is running in non-UTF mode. This is handled by having a special +meta-character that is followed by the 32-bit data value. + +The size of the parsed pattern vector, when auto-callouts are not enabled, is +bounded by the length of the pattern (with one exception). The code is written +so that each item in the pattern uses no more vector elements than the number +of code units in the item itself. The exception is the aforementioned large +32-bit number handling. For this reason, 32-bit non-UTF patterns are scanned in +advance to check for such values. When auto-callouts are enabled, the generous +assumption is made that there will be a callout for each pattern code unit +(which of course is only actually true if all code units are literals) plus one +at the end. There is a default parsed pattern vector on the system stack, but +if this is not big enough, heap memory is used. + +As before, the actual compiling function is run twice, the first time to +determine the amount of memory needed for the final compiled pattern. It +now processes the parsed pattern vector, not the pattern itself, although some +of the parsed items refer to strings in the pattern - for example, group +names. As escapes and comments have already been processed, the code is a bit +simpler than before. + +Most errors can be diagnosed during the parsing scan. For those that cannot +(for example, "lookbehind assertion is not fixed length"), the parsed code +contains offsets into the pattern so that the actual compiling code can +report where errors are. + + +The elements of the parsed pattern vector +----------------------------------------- + +The word "offset" below means a code unit offset into the pattern. When +PCRE2_SIZE (which is usually size_t) is no bigger than uint32_t, an offset is +stored in a single parsed pattern element. Otherwise (typically on 64-bit +systems) it occupies two elements. The following meta items occupy just one +element, with no data: + +META_ACCEPT (*ACCEPT) +META_ASTERISK * +META_ASTERISK_PLUS *+ +META_ASTERISK_QUERY *? +META_ATOMIC (?> start of atomic group +META_CIRCUMFLEX ^ metacharacter +META_CLASS [ start of non-empty class +META_CLASS_EMPTY [] empty class - only with PCRE2_ALLOW_EMPTY_CLASS +META_CLASS_EMPTY_NOT [^] negative empty class - ditto +META_CLASS_END ] end of non-empty class +META_CLASS_NOT [^ start non-empty negative class +META_COMMIT (*COMMIT) +META_COND_ASSERT (?(?assertion) +META_DOLLAR $ metacharacter +META_DOT . metacharacter +META_END End of pattern (this value is 0x80000000) +META_FAIL (*FAIL) +META_KET ) closing parenthesis +META_LOOKAHEAD (?= start of lookahead +META_LOOKAHEAD_NA (*napla: start of non-atomic lookahead +META_LOOKAHEADNOT (?! start of negative lookahead +META_NOCAPTURE (?: no capture parens +META_PLUS + +META_PLUS_PLUS ++ +META_PLUS_QUERY +? +META_PRUNE (*PRUNE) - no argument +META_QUERY ? +META_QUERY_PLUS ?+ +META_QUERY_QUERY ?? +META_RANGE_ESCAPED hyphen in class range with at least one escape +META_RANGE_LITERAL hyphen in class range defined literally +META_SKIP (*SKIP) - no argument +META_THEN (*THEN) - no argument + +The two RANGE values occur only in character classes. They are positioned +between two literals that define the start and end of the range. In an EBCDIC +evironment it is necessary to know whether either of the range values was +specified as an escape. In an ASCII/Unicode environment the distinction is not +relevant. + +The following have data in the lower 16 bits, and may be followed by other data +elements: + +META_ALT | alternation +META_BACKREF back reference +META_CAPTURE start of capturing group +META_ESCAPE non-literal escape sequence +META_RECURSE recursion call + +If the data for META_ALT is non-zero, it is inside a lookbehind, and the data +is the length of its branch, for which OP_REVERSE must be generated. + +META_BACKREF, META_CAPTURE, and META_RECURSE have the capture group number as +their data in the lower 16 bits of the element. + +META_BACKREF is followed by an offset if the back reference group number is 10 +or more. The offsets of the first ocurrences of references to groups whose +numbers are less than 10 are put in cb->small_ref_offset[] (only the first +occurrence is useful). On 64-bit systems this avoids using more than two parsed +pattern elements for items such as \3. The offset is used when an error occurs +because the reference is to a non-existent group. + +META_RECURSE is always followed by an offset, for use in error messages. + +META_ESCAPE has an ESC_xxx value as its data. For ESC_P and ESC_p, the next +element contains the 16-bit type and data property values, packed together. +ESC_g and ESC_k are used only for named references - numerical ones are turned +into META_RECURSE or META_BACKREF as appropriate. ESC_g and ESC_k are followed +by a length and an offset into the pattern to specify the name. + +The following have one data item that follows in the next vector element: + +META_BIGVALUE Next is a literal >= META_END +META_OPTIONS (?i) and friends (data is new option bits) +META_POSIX POSIX class item (data identifies the class) +META_POSIX_NEG negative POSIX class item (ditto) + +The following are followed by a length element, then a number of character code +values (which should match with the length): + +META_MARK (*MARK:xxxx) +META_COMMIT_ARG )*COMMIT:xxxx) +META_PRUNE_ARG (*PRUNE:xxx) +META_SKIP_ARG (*SKIP:xxxx) +META_THEN_ARG (*THEN:xxxx) + +The following are followed by a length element, then an offset in the pattern +that identifies the name: + +META_COND_NAME (?() or (?('name') or (?(name) +META_COND_RNAME (?(R&name) +META_COND_RNUMBER (?(Rdigits) +META_RECURSE_BYNAME (?&name) +META_BACKREF_BYNAME \k'name' + +META_COND_RNUMBER is used for names that start with R and continue with digits, +because this is an ambiguous case. It could be a back reference to a group with +that name, or it could be a recursion test on a numbered group. + +This one is followed by an offset, for use in error messages, then a number: + +META_COND_NUMBER (?([+-]digits) + +The following is followed just by an offset, for use in error messages: + +META_COND_DEFINE (?(DEFINE) + +The following are also followed just by an offset, but also the lower 16 bits +of the main word contain the length of the first branch of the lookbehind +group; this is used when generating OP_REVERSE for that branch. + +META_LOOKBEHIND (?<= start of lookbehind +META_LOOKBEHIND_NA (*naplb: start of non-atomic lookbehind +META_LOOKBEHINDNOT (?' and 1 for '>='; +the next two are the major and minor numbers: + +META_COND_VERSION (?(VERSIONx.y) + +Callouts are converted into one of two items: + +META_CALLOUT_NUMBER (?C with numerical argument +META_CALLOUT_STRING (?C with string argument + +In both cases, the next two elements contain the offset and length of the next +item in the pattern. Then there is either one callout number, or a length and +an offset for the string argument. The length includes both delimiters. + + +Traditional matching function +----------------------------- + +The "traditional", and original, matching function is called pcre2_match(), and +it implements an NFA algorithm, similar to the original Henry Spencer algorithm +and the way that Perl works. This is not surprising, since it is intended to be +as compatible with Perl as possible. This is the function most users of PCRE2 +will use most of the time. If PCRE2 is compiled with just-in-time (JIT) +support, and studying a compiled pattern with JIT is successful, the JIT code +is run instead of the normal pcre2_match() code, but the result is the same. + + +Supplementary matching function +------------------------------- + +There is also a supplementary matching function called pcre2_dfa_match(). This +implements a DFA matching algorithm that searches simultaneously for all +possible matches that start at one point in the subject string. (Going back to +my roots: see Historical Note 1 above.) This function intreprets the same +compiled pattern data as pcre2_match(); however, not all the facilities are +available, and those that are do not always work in quite the same way. See the +user documentation for details. + +The algorithm that is used for pcre2_dfa_match() is not a traditional FSM, +because it may have a number of states active at one time. More work would be +needed at compile time to produce a traditional FSM where only one state is +ever active at once. I believe some other regex matchers work this way. JIT +support is not available for this kind of matching. + + +Changeable options +------------------ + +The /i, /m, or /s options (PCRE2_CASELESS, PCRE2_MULTILINE, PCRE2_DOTALL, and +others) may be changed in the middle of patterns by items such as (?i). Their +processing is handled entirely at compile time by generating different opcodes +for the different settings. The runtime functions do not need to keep track of +an option's state. + +PCRE2_DUPNAMES, PCRE2_EXTENDED, PCRE2_EXTENDED_MORE, and PCRE2_NO_AUTO_CAPTURE +are tracked and processed during the parsing pre-pass. The others are handled +from META_OPTIONS items during the main compile phase. + + +Format of compiled patterns +--------------------------- + +The compiled form of a pattern is a vector of unsigned code units (bytes in +8-bit mode, shorts in 16-bit mode, 32-bit words in 32-bit mode), containing +items of variable length. The first code unit in an item contains an opcode, +and the length of the item is either implicit in the opcode or contained in the +data that follows it. + +In many cases listed below, LINK_SIZE data values are specified for offsets +within the compiled pattern. LINK_SIZE always specifies a number of bytes. The +default value for LINK_SIZE is 2, except for the 32-bit library, where it can +only be 4. The 8-bit library can be compiled to used 3-byte or 4-byte values, +and the 16-bit library can be compiled to use 4-byte values, though this +impairs performance. Specifing a LINK_SIZE larger than 2 for these libraries is +necessary only when patterns whose compiled length is greater than 65535 code +units are going to be processed. When a LINK_SIZE value uses more than one code +unit, the most significant unit is first. + +In this description, we assume the "normal" compilation options. Data values +that are counts (e.g. quantifiers) are always two bytes long in 8-bit mode +(most significant byte first), and one code unit in 16-bit and 32-bit modes. + + +Opcodes with no following data +------------------------------ + +These items are all just one unit long: + + OP_END end of pattern + OP_ANY match any one character other than newline + OP_ALLANY match any one character, including newline + OP_ANYBYTE match any single code unit, even in UTF-8/16 mode + OP_SOD match start of data: \A + OP_SOM, start of match (subject + offset): \G + OP_SET_SOM, set start of match (\K) + OP_CIRC ^ (start of data) + OP_CIRCM ^ multiline mode (start of data or after newline) + OP_NOT_WORD_BOUNDARY \W + OP_WORD_BOUNDARY \w + OP_NOT_DIGIT \D + OP_DIGIT \d + OP_NOT_HSPACE \H + OP_HSPACE \h + OP_NOT_WHITESPACE \S + OP_WHITESPACE \s + OP_NOT_VSPACE \V + OP_VSPACE \v + OP_NOT_WORDCHAR \W + OP_WORDCHAR \w + OP_EODN match end of data or newline at end: \Z + OP_EOD match end of data: \z + OP_DOLL $ (end of data, or before final newline) + OP_DOLLM $ multiline mode (end of data or before newline) + OP_EXTUNI match an extended Unicode grapheme cluster + OP_ANYNL match any Unicode newline sequence + + OP_ASSERT_ACCEPT ) + OP_ACCEPT ) These are Perl 5.10's "backtracking control + OP_COMMIT ) verbs". If OP_ACCEPT is inside capturing + OP_FAIL ) parentheses, it may be preceded by one or more + OP_PRUNE ) OP_CLOSE, each followed by a number that + OP_SKIP ) indicates which parentheses must be closed. + OP_THEN ) + +OP_ASSERT_ACCEPT is used when (*ACCEPT) is encountered within an assertion. +This ends the assertion, not the entire pattern match. The assertion (?!) is +always optimized to OP_FAIL. + +OP_ALLANY is used for '.' when PCRE2_DOTALL is set. It is also used for \C in +non-UTF modes and in UTF-32 mode (since one code unit still equals one +character). Another use is for [^] when empty classes are permitted +(PCRE2_ALLOW_EMPTY_CLASS is set). + + +Backtracking control verbs +-------------------------- + +Verbs with no arguments generate opcodes with no following data (as listed +in the section above). + +(*MARK:NAME) generates OP_MARK followed by the mark name, preceded by a +length in one code unit, and followed by a binary zero. The name length is +limited by the size of the code unit. + +(*ACCEPT:NAME) and (*FAIL:NAME) are compiled as (*MARK:NAME)(*ACCEPT) and +(*MARK:NAME)(*FAIL) respectively. + +For (*COMMIT:NAME), (*PRUNE:NAME), (*SKIP:NAME), and (*THEN:NAME), the opcodes +OP_COMMIT_ARG, OP_PRUNE_ARG, OP_SKIP_ARG, and OP_THEN_ARG are used, with the +name following in the same format as for OP_MARK. + + +Matching literal characters +--------------------------- + +The OP_CHAR opcode is followed by a single character that is to be matched +casefully. For caseless matching of characters that have at most two +case-equivalent code points, OP_CHARI is used. In UTF-8 or UTF-16 modes, the +character may be more than one code unit long. In UTF-32 mode, characters are +always exactly one code unit long. + +If there is only one character in a character class, OP_CHAR or OP_CHARI is +used for a positive class, and OP_NOT or OP_NOTI for a negative one (that is, +for something like [^a]). + +Caseless matching (positive or negative) of characters that have more than two +case-equivalent code points (which is possible only in UTF mode) is handled by +compiling a Unicode property item (see below), with the pseudo-property +PT_CLIST. The value of this property is an offset in a vector called +"ucd_caseless_sets" which identifies the start of a short list of equivalent +characters, terminated by the value NOTACHAR (0xffffffff). + + +Repeating single characters +--------------------------- + +The common repeats (*, +, ?), when applied to a single character, use the +following opcodes, which come in caseful and caseless versions: + + Caseful Caseless + OP_STAR OP_STARI + OP_MINSTAR OP_MINSTARI + OP_POSSTAR OP_POSSTARI + OP_PLUS OP_PLUSI + OP_MINPLUS OP_MINPLUSI + OP_POSPLUS OP_POSPLUSI + OP_QUERY OP_QUERYI + OP_MINQUERY OP_MINQUERYI + OP_POSQUERY OP_POSQUERYI + +Each opcode is followed by the character that is to be repeated. In ASCII or +UTF-32 modes, these are two-code-unit items; in UTF-8 or UTF-16 modes, the +length is variable. Those with "MIN" in their names are the minimizing +versions. Those with "POS" in their names are possessive versions. Other kinds +of repeat make use of these opcodes: + + Caseful Caseless + OP_UPTO OP_UPTOI + OP_MINUPTO OP_MINUPTOI + OP_POSUPTO OP_POSUPTOI + OP_EXACT OP_EXACTI + +Each of these is followed by a count and then the repeated character. The count +is two bytes long in 8-bit mode (most significant byte first), or one code unit +in 16-bit and 32-bit modes. + +OP_UPTO matches from 0 to the given number. A repeat with a non-zero minimum +and a fixed maximum is coded as an OP_EXACT followed by an OP_UPTO (or +OP_MINUPTO or OPT_POSUPTO). + +Another set of matching repeating opcodes (called OP_NOTSTAR, OP_NOTSTARI, +etc.) are used for repeated, negated, single-character classes such as [^a]*. +The normal single-character opcodes (OP_STAR, etc.) are used for repeated +positive single-character classes. + + +Repeating character types +------------------------- + +Repeats of things like \d are done exactly as for single characters, except +that instead of a character, the opcode for the type (e.g. OP_DIGIT) is stored +in the next code unit. The opcodes are: + + OP_TYPESTAR + OP_TYPEMINSTAR + OP_TYPEPOSSTAR + OP_TYPEPLUS + OP_TYPEMINPLUS + OP_TYPEPOSPLUS + OP_TYPEQUERY + OP_TYPEMINQUERY + OP_TYPEPOSQUERY + OP_TYPEUPTO + OP_TYPEMINUPTO + OP_TYPEPOSUPTO + OP_TYPEEXACT + + +Match by Unicode property +------------------------- + +OP_PROP and OP_NOTPROP are used for positive and negative matches of a +character by testing its Unicode property (the \p and \P escape sequences). +Each is followed by two code units that encode the desired property as a type +and a value. The types are a set of #defines of the form PT_xxx, and the values +are enumerations of the form ucp_xx, defined in the pcre2_ucp.h source file. +The value is relevant only for PT_GC (General Category), PT_PC (Particular +Category), PT_SC (Script), and the pseudo-property PT_CLIST, which is used to +identify a list of case-equivalent characters when there are three or more. + +Repeats of these items use the OP_TYPESTAR etc. set of opcodes, followed by +three code units: OP_PROP or OP_NOTPROP, and then the desired property type and +value. + + +Character classes +----------------- + +If there is only one character in a class, OP_CHAR or OP_CHARI is used for a +positive class, and OP_NOT or OP_NOTI for a negative one (that is, for +something like [^a]), except when caselessly matching a character that has more +than two case-equivalent code points (which can happen only in UTF mode). In +this case a Unicode property item is used, as described above in "Matching +literal characters". + +A set of repeating opcodes (called OP_NOTSTAR etc.) are used for repeated, +negated, single-character classes. The normal single-character opcodes +(OP_STAR, etc.) are used for repeated positive single-character classes. + +When there is more than one character in a class, and all the code points are +less than 256, OP_CLASS is used for a positive class, and OP_NCLASS for a +negative one. In either case, the opcode is followed by a 32-byte (16-short, +8-word) bit map containing a 1 bit for every character that is acceptable. The +bits are counted from the least significant end of each unit. In caseless mode, +bits for both cases are set. + +The reason for having both OP_CLASS and OP_NCLASS is so that, in UTF-8 and +16-bit and 32-bit modes, subject characters with values greater than 255 can be +handled correctly. For OP_CLASS they do not match, whereas for OP_NCLASS they +do. + +For classes containing characters with values greater than 255 or that contain +\p or \P, OP_XCLASS is used. It optionally uses a bit map if any acceptable +code points are less than 256, followed by a list of pairs (for a range) and/or +single characters and/or properties. In caseless mode, all equivalent +characters are explicitly listed. + +OP_XCLASS is followed by a LINK_SIZE value containing the total length of the +opcode and its data. This is followed by a code unit containing flag bits: +XCL_NOT indicates that this is a negative class, and XCL_MAP indicates that a +bit map is present. There follows the bit map, if XCL_MAP is set, and then a +sequence of items coded as follows: + + XCL_END marks the end of the list + XCL_SINGLE one character follows + XCL_RANGE two characters follow + XCL_PROP a Unicode property (type, value) follows + XCL_NOTPROP a Unicode property (type, value) follows + +If a range starts with a code point less than 256 and ends with one greater +than 255, it is split into two ranges, with characters less than 256 being +indicated in the bit map, and the rest with XCL_RANGE. + +When XCL_NOT is set, the bit map, if present, contains bits for characters that +are allowed (exactly as for OP_NCLASS), but the list of items that follow it +specifies characters and properties that are not allowed. + + +Back references +--------------- + +OP_REF (caseful) or OP_REFI (caseless) is followed by a count containing the +reference number when the reference is to a unique capturing group (either by +number or by name). When named groups are used, there may be more than one +group with the same name. In this case, a reference to such a group by name +generates OP_DNREF or OP_DNREFI. These are followed by two counts: the index +(not the byte offset) in the group name table of the first entry for the +required name, followed by the number of groups with the same name. The +matching code can then search for the first one that is set. + + +Repeating character classes and back references +----------------------------------------------- + +Single-character classes are handled specially (see above). This section +applies to other classes and also to back references. In both cases, the repeat +information follows the base item. The matching code looks at the following +opcode to see if it is one of these: + + OP_CRSTAR + OP_CRMINSTAR + OP_CRPOSSTAR + OP_CRPLUS + OP_CRMINPLUS + OP_CRPOSPLUS + OP_CRQUERY + OP_CRMINQUERY + OP_CRPOSQUERY + OP_CRRANGE + OP_CRMINRANGE + OP_CRPOSRANGE + +All but the last three are single-code-unit items, with no data. The range +opcodes are followed by the minimum and maximum repeat counts. + + +Brackets and alternation +------------------------ + +A pair of non-capturing round brackets is wrapped round each expression at +compile time, so alternation always happens in the context of brackets. + +[Note for North Americans: "bracket" to some English speakers, including +myself, can be round, square, curly, or pointy. Hence this usage rather than +"parentheses".] + +Non-capturing brackets use the opcode OP_BRA, capturing brackets use OP_CBRA. A +bracket opcode is followed by a LINK_SIZE value which gives the offset to the +next alternative OP_ALT or, if there aren't any branches, to the terminating +opcode. Each OP_ALT is followed by a LINK_SIZE value giving the offset to the +next one, or to the final opcode. For capturing brackets, the bracket number is +a count that immediately follows the offset. + +There are several opcodes that mark the end of a subpattern group. OP_KET is +used for subpatterns that do not repeat indefinitely, OP_KETRMIN and +OP_KETRMAX are used for indefinite repetitions, minimally or maximally +respectively, and OP_KETRPOS for possessive repetitions (see below for more +details). All four are followed by a LINK_SIZE value giving (as a positive +number) the offset back to the matching bracket opcode. + +If a subpattern is quantified such that it is permitted to match zero times, it +is preceded by one of OP_BRAZERO, OP_BRAMINZERO, or OP_SKIPZERO. These are +single-unit opcodes that tell the matcher that skipping the following +subpattern entirely is a valid match. In the case of the first two, not +skipping the pattern is also valid (greedy and non-greedy). The third is used +when a pattern has the quantifier {0,0}. It cannot be entirely discarded, +because it may be called as a subroutine from elsewhere in the pattern. + +A subpattern with an indefinite maximum repetition is replicated in the +compiled data its minimum number of times (or once with OP_BRAZERO if the +minimum is zero), with the final copy terminating with OP_KETRMIN or OP_KETRMAX +as appropriate. + +A subpattern with a bounded maximum repetition is replicated in a nested +fashion up to the maximum number of times, with OP_BRAZERO or OP_BRAMINZERO +before each replication after the minimum, so that, for example, (abc){2,5} is +compiled as (abc)(abc)((abc)((abc)(abc)?)?)?, except that each bracketed group +has the same number. + +When a repeated subpattern has an unbounded upper limit, it is checked to see +whether it could match an empty string. If this is the case, the opcode in the +final replication is changed to OP_SBRA or OP_SCBRA. This tells the matcher +that it needs to check for matching an empty string when it hits OP_KETRMIN or +OP_KETRMAX, and if so, to break the loop. + + +Possessive brackets +------------------- + +When a repeated group (capturing or non-capturing) is marked as possessive by +the "+" notation, e.g. (abc)++, different opcodes are used. Their names all +have POS on the end, e.g. OP_BRAPOS instead of OP_BRA and OP_SCBRAPOS instead +of OP_SCBRA. The end of such a group is marked by OP_KETRPOS. If the minimum +repetition is zero, the group is preceded by OP_BRAPOSZERO. + + +Once-only (atomic) groups +------------------------- + +These are just like other subpatterns, but they start with the opcode OP_ONCE. +The check for matching an empty string in an unbounded repeat is handled +entirely at runtime, so there is just this one opcode for atomic groups. + + +Assertions +---------- + +Forward assertions are also just like other subpatterns, but starting with one +of the opcodes OP_ASSERT, OP_ASSERT_NA (non-atomic assertion), or +OP_ASSERT_NOT. Backward assertions use the opcodes OP_ASSERTBACK, +OP_ASSERTBACK_NA, and OP_ASSERTBACK_NOT, and the first opcode inside the +assertion is OP_REVERSE, followed by a count of the number of characters to +move back the pointer in the subject string. In ASCII or UTF-32 mode, the count +is also the number of code units, but in UTF-8/16 mode each character may +occupy more than one code unit. A separate count is present in each alternative +of a lookbehind assertion, allowing each branch to have a different (but fixed) +length. + + +Conditional subpatterns +----------------------- + +These are like other subpatterns, but they start with the opcode OP_COND, or +OP_SCOND for one that might match an empty string in an unbounded repeat. + +If the condition is a back reference, this is stored at the start of the +subpattern using the opcode OP_CREF followed by a count containing the +reference number, provided that the reference is to a unique capturing group. +If the reference was by name and there is more than one group with that name, +OP_DNCREF is used instead. It is followed by two counts: the index in the group +names table, and the number of groups with the same name. The allows the +matcher to check if any group with the given name is set. + +If the condition is "in recursion" (coded as "(?(R)"), or "in recursion of +group x" (coded as "(?(Rx)"), the group number is stored at the start of the +subpattern using the opcode OP_RREF (with a value of RREF_ANY (0xffff) for "the +whole pattern") or OP_DNRREF (with data as for OP_DNCREF). + +For a DEFINE condition, OP_FALSE is used (with no associated data). During +compilation, however, a DEFINE condition is coded as OP_DEFINE so that, when +the conditional group is complete, there can be a check to ensure that it +contains only one top-level branch. Once this has happened, the opcode is +changed to OP_FALSE, so the matcher never sees OP_DEFINE. + +There is a special PCRE2-specific condition of the form (VERSION[>]=x.y), which +tests the PCRE2 version number. This compiles into one of the opcodes OP_TRUE +or OP_FALSE. + +If a condition is not a back reference, recursion test, DEFINE, or VERSION, it +must start with a parenthesized atomic assertion, whose opcode normally +immediately follows OP_COND or OP_SCOND. However, if automatic callouts are +enabled, a callout is inserted immediately before the assertion. It is also +possible to insert a manual callout at this point. Only assertion conditions +may have callouts preceding the condition. + +A condition that is the negative assertion (?!) is optimized to OP_FAIL in all +parts of the pattern, so this is another opcode that may appear as a condition. +It is treated the same as OP_FALSE. + + +Recursion +--------- + +Recursion either matches the current pattern, or some subexpression. The opcode +OP_RECURSE is followed by a LINK_SIZE value that is the offset to the starting +bracket from the start of the whole pattern. OP_RECURSE is also used for +"subroutine" calls, even though they are not strictly a recursion. Up till +release 10.30 recursions were treated as atomic groups, making them +incompatible with Perl (but PCRE had them well before Perl did). From 10.30, +backtracking into recursions is supported. + +Repeated recursions used to be wrapped inside OP_ONCE brackets, which not only +forced no backtracking, but also allowed repetition to be handled as for other +bracketed groups. From 10.30 onwards, repeated recursions are duplicated for +their minimum repetitions, and then wrapped in non-capturing brackets for the +remainder. For example, (?1){3} is treated as (?1)(?1)(?1), and (?1){2,4} is +treated as (?1)(?1)(?:(?1)){0,2}. + + +Callouts +-------- + +A callout may have either a numerical argument or a string argument. These use +OP_CALLOUT or OP_CALLOUT_STR, respectively. In each case these are followed by +two LINK_SIZE values giving the offset in the pattern string to the start of +the following item, and another count giving the length of this item. These +values make it possible for pcre2test to output useful tracing information +using callouts. + +In the case of a numeric callout, after these two values there is a single code +unit containing the callout number, in the range 0-255, with 255 being used for +callouts that are automatically inserted as a result of the PCRE2_AUTO_CALLOUT +option. Thus, this opcode item is of fixed length: + + [OP_CALLOUT] [PATTERN_OFFSET] [PATTERN_LENGTH] [NUMBER] + +For callouts with string arguments, OP_CALLOUT_STR has three more data items: +a LINK_SIZE value giving the complete length of the entire opcode item, a +LINK_SIZE item containing the offset within the pattern string to the start of +the string argument, and the string itself, preceded by its starting delimiter +and followed by a binary zero. When a callout function is called, a pointer to +the actual string is passed, but the delimiter can be accessed as string[-1] if +the application needs it. In the 8-bit library, the callout in /X(?C'abc')Y/ is +compiled as the following bytes (decimal numbers represent binary values): + + [OP_CALLOUT_STR] [0] [10] [0] [1] [0] [14] [0] [5] ['] [a] [b] [c] [0] + -------- ------- -------- ------- + | | | | + ------- LINK_SIZE items ------ + +Opcode table checking +--------------------- + +The last opcode that is defined in pcre2_internal.h is OP_TABLE_LENGTH. This is +not a real opcode, but is used to check at compile time that tables indexed by +opcode are the correct length, in order to catch updating errors. + +Philip Hazel +12 July 2019 diff --git a/pcre2/INSTALL b/pcre2/INSTALL new file mode 100644 index 000000000..8865734f8 --- /dev/null +++ b/pcre2/INSTALL @@ -0,0 +1,368 @@ +Installation Instructions +************************* + + Copyright (C) 1994-1996, 1999-2002, 2004-2016 Free Software +Foundation, Inc. + + Copying and distribution of this file, with or without modification, +are permitted in any medium without royalty provided the copyright +notice and this notice are preserved. This file is offered as-is, +without warranty of any kind. + +Basic Installation +================== + + Briefly, the shell command './configure && make && make install' +should configure, build, and install this package. The following +more-detailed instructions are generic; see the 'README' file for +instructions specific to this package. Some packages provide this +'INSTALL' file but do not implement all of the features documented +below. The lack of an optional feature in a given package is not +necessarily a bug. More recommendations for GNU packages can be found +in *note Makefile Conventions: (standards)Makefile Conventions. + + The 'configure' shell script attempts to guess correct values for +various system-dependent variables used during compilation. It uses +those values to create a 'Makefile' in each directory of the package. +It may also create one or more '.h' files containing system-dependent +definitions. Finally, it creates a shell script 'config.status' that +you can run in the future to recreate the current configuration, and a +file 'config.log' containing compiler output (useful mainly for +debugging 'configure'). + + It can also use an optional file (typically called 'config.cache' and +enabled with '--cache-file=config.cache' or simply '-C') that saves the +results of its tests to speed up reconfiguring. Caching is disabled by +default to prevent problems with accidental use of stale cache files. + + If you need to do unusual things to compile the package, please try +to figure out how 'configure' could check whether to do them, and mail +diffs or instructions to the address given in the 'README' so they can +be considered for the next release. If you are using the cache, and at +some point 'config.cache' contains results you don't want to keep, you +may remove or edit it. + + The file 'configure.ac' (or 'configure.in') is used to create +'configure' by a program called 'autoconf'. You need 'configure.ac' if +you want to change it or regenerate 'configure' using a newer version of +'autoconf'. + + The simplest way to compile this package is: + + 1. 'cd' to the directory containing the package's source code and type + './configure' to configure the package for your system. + + Running 'configure' might take a while. While running, it prints + some messages telling which features it is checking for. + + 2. Type 'make' to compile the package. + + 3. Optionally, type 'make check' to run any self-tests that come with + the package, generally using the just-built uninstalled binaries. + + 4. Type 'make install' to install the programs and any data files and + documentation. When installing into a prefix owned by root, it is + recommended that the package be configured and built as a regular + user, and only the 'make install' phase executed with root + privileges. + + 5. Optionally, type 'make installcheck' to repeat any self-tests, but + this time using the binaries in their final installed location. + This target does not install anything. Running this target as a + regular user, particularly if the prior 'make install' required + root privileges, verifies that the installation completed + correctly. + + 6. You can remove the program binaries and object files from the + source code directory by typing 'make clean'. To also remove the + files that 'configure' created (so you can compile the package for + a different kind of computer), type 'make distclean'. There is + also a 'make maintainer-clean' target, but that is intended mainly + for the package's developers. If you use it, you may have to get + all sorts of other programs in order to regenerate files that came + with the distribution. + + 7. Often, you can also type 'make uninstall' to remove the installed + files again. In practice, not all packages have tested that + uninstallation works correctly, even though it is required by the + GNU Coding Standards. + + 8. Some packages, particularly those that use Automake, provide 'make + distcheck', which can by used by developers to test that all other + targets like 'make install' and 'make uninstall' work correctly. + This target is generally not run by end users. + +Compilers and Options +===================== + + Some systems require unusual options for compilation or linking that +the 'configure' script does not know about. Run './configure --help' +for details on some of the pertinent environment variables. + + You can give 'configure' initial values for configuration parameters +by setting variables in the command line or in the environment. Here is +an example: + + ./configure CC=c99 CFLAGS=-g LIBS=-lposix + + *Note Defining Variables::, for more details. + +Compiling For Multiple Architectures +==================================== + + You can compile the package for more than one kind of computer at the +same time, by placing the object files for each architecture in their +own directory. To do this, you can use GNU 'make'. 'cd' to the +directory where you want the object files and executables to go and run +the 'configure' script. 'configure' automatically checks for the source +code in the directory that 'configure' is in and in '..'. This is known +as a "VPATH" build. + + With a non-GNU 'make', it is safer to compile the package for one +architecture at a time in the source code directory. After you have +installed the package for one architecture, use 'make distclean' before +reconfiguring for another architecture. + + On MacOS X 10.5 and later systems, you can create libraries and +executables that work on multiple system types--known as "fat" or +"universal" binaries--by specifying multiple '-arch' options to the +compiler but only a single '-arch' option to the preprocessor. Like +this: + + ./configure CC="gcc -arch i386 -arch x86_64 -arch ppc -arch ppc64" \ + CXX="g++ -arch i386 -arch x86_64 -arch ppc -arch ppc64" \ + CPP="gcc -E" CXXCPP="g++ -E" + + This is not guaranteed to produce working output in all cases, you +may have to build one architecture at a time and combine the results +using the 'lipo' tool if you have problems. + +Installation Names +================== + + By default, 'make install' installs the package's commands under +'/usr/local/bin', include files under '/usr/local/include', etc. You +can specify an installation prefix other than '/usr/local' by giving +'configure' the option '--prefix=PREFIX', where PREFIX must be an +absolute file name. + + You can specify separate installation prefixes for +architecture-specific files and architecture-independent files. If you +pass the option '--exec-prefix=PREFIX' to 'configure', the package uses +PREFIX as the prefix for installing programs and libraries. +Documentation and other data files still use the regular prefix. + + In addition, if you use an unusual directory layout you can give +options like '--bindir=DIR' to specify different values for particular +kinds of files. Run 'configure --help' for a list of the directories +you can set and what kinds of files go in them. In general, the default +for these options is expressed in terms of '${prefix}', so that +specifying just '--prefix' will affect all of the other directory +specifications that were not explicitly provided. + + The most portable way to affect installation locations is to pass the +correct locations to 'configure'; however, many packages provide one or +both of the following shortcuts of passing variable assignments to the +'make install' command line to change installation locations without +having to reconfigure or recompile. + + The first method involves providing an override variable for each +affected directory. For example, 'make install +prefix=/alternate/directory' will choose an alternate location for all +directory configuration variables that were expressed in terms of +'${prefix}'. Any directories that were specified during 'configure', +but not in terms of '${prefix}', must each be overridden at install time +for the entire installation to be relocated. The approach of makefile +variable overrides for each directory variable is required by the GNU +Coding Standards, and ideally causes no recompilation. However, some +platforms have known limitations with the semantics of shared libraries +that end up requiring recompilation when using this method, particularly +noticeable in packages that use GNU Libtool. + + The second method involves providing the 'DESTDIR' variable. For +example, 'make install DESTDIR=/alternate/directory' will prepend +'/alternate/directory' before all installation names. The approach of +'DESTDIR' overrides is not required by the GNU Coding Standards, and +does not work on platforms that have drive letters. On the other hand, +it does better at avoiding recompilation issues, and works well even +when some directory options were not specified in terms of '${prefix}' +at 'configure' time. + +Optional Features +================= + + If the package supports it, you can cause programs to be installed +with an extra prefix or suffix on their names by giving 'configure' the +option '--program-prefix=PREFIX' or '--program-suffix=SUFFIX'. + + Some packages pay attention to '--enable-FEATURE' options to +'configure', where FEATURE indicates an optional part of the package. +They may also pay attention to '--with-PACKAGE' options, where PACKAGE +is something like 'gnu-as' or 'x' (for the X Window System). The +'README' should mention any '--enable-' and '--with-' options that the +package recognizes. + + For packages that use the X Window System, 'configure' can usually +find the X include and library files automatically, but if it doesn't, +you can use the 'configure' options '--x-includes=DIR' and +'--x-libraries=DIR' to specify their locations. + + Some packages offer the ability to configure how verbose the +execution of 'make' will be. For these packages, running './configure +--enable-silent-rules' sets the default to minimal output, which can be +overridden with 'make V=1'; while running './configure +--disable-silent-rules' sets the default to verbose, which can be +overridden with 'make V=0'. + +Particular systems +================== + + On HP-UX, the default C compiler is not ANSI C compatible. If GNU CC +is not installed, it is recommended to use the following options in +order to use an ANSI C compiler: + + ./configure CC="cc -Ae -D_XOPEN_SOURCE=500" + +and if that doesn't work, install pre-built binaries of GCC for HP-UX. + + HP-UX 'make' updates targets which have the same time stamps as their +prerequisites, which makes it generally unusable when shipped generated +files such as 'configure' are involved. Use GNU 'make' instead. + + On OSF/1 a.k.a. Tru64, some versions of the default C compiler cannot +parse its '' header file. The option '-nodtk' can be used as a +workaround. If GNU CC is not installed, it is therefore recommended to +try + + ./configure CC="cc" + +and if that doesn't work, try + + ./configure CC="cc -nodtk" + + On Solaris, don't put '/usr/ucb' early in your 'PATH'. This +directory contains several dysfunctional programs; working variants of +these programs are available in '/usr/bin'. So, if you need '/usr/ucb' +in your 'PATH', put it _after_ '/usr/bin'. + + On Haiku, software installed for all users goes in '/boot/common', +not '/usr/local'. It is recommended to use the following options: + + ./configure --prefix=/boot/common + +Specifying the System Type +========================== + + There may be some features 'configure' cannot figure out +automatically, but needs to determine by the type of machine the package +will run on. Usually, assuming the package is built to be run on the +_same_ architectures, 'configure' can figure that out, but if it prints +a message saying it cannot guess the machine type, give it the +'--build=TYPE' option. TYPE can either be a short name for the system +type, such as 'sun4', or a canonical name which has the form: + + CPU-COMPANY-SYSTEM + +where SYSTEM can have one of these forms: + + OS + KERNEL-OS + + See the file 'config.sub' for the possible values of each field. If +'config.sub' isn't included in this package, then this package doesn't +need to know the machine type. + + If you are _building_ compiler tools for cross-compiling, you should +use the option '--target=TYPE' to select the type of system they will +produce code for. + + If you want to _use_ a cross compiler, that generates code for a +platform different from the build platform, you should specify the +"host" platform (i.e., that on which the generated programs will +eventually be run) with '--host=TYPE'. + +Sharing Defaults +================ + + If you want to set default values for 'configure' scripts to share, +you can create a site shell script called 'config.site' that gives +default values for variables like 'CC', 'cache_file', and 'prefix'. +'configure' looks for 'PREFIX/share/config.site' if it exists, then +'PREFIX/etc/config.site' if it exists. Or, you can set the +'CONFIG_SITE' environment variable to the location of the site script. +A warning: not all 'configure' scripts look for a site script. + +Defining Variables +================== + + Variables not defined in a site shell script can be set in the +environment passed to 'configure'. However, some packages may run +configure again during the build, and the customized values of these +variables may be lost. In order to avoid this problem, you should set +them in the 'configure' command line, using 'VAR=value'. For example: + + ./configure CC=/usr/local2/bin/gcc + +causes the specified 'gcc' to be used as the C compiler (unless it is +overridden in the site shell script). + +Unfortunately, this technique does not work for 'CONFIG_SHELL' due to an +Autoconf limitation. Until the limitation is lifted, you can use this +workaround: + + CONFIG_SHELL=/bin/bash ./configure CONFIG_SHELL=/bin/bash + +'configure' Invocation +====================== + + 'configure' recognizes the following options to control how it +operates. + +'--help' +'-h' + Print a summary of all of the options to 'configure', and exit. + +'--help=short' +'--help=recursive' + Print a summary of the options unique to this package's + 'configure', and exit. The 'short' variant lists options used only + in the top level, while the 'recursive' variant lists options also + present in any nested packages. + +'--version' +'-V' + Print the version of Autoconf used to generate the 'configure' + script, and exit. + +'--cache-file=FILE' + Enable the cache: use and save the results of the tests in FILE, + traditionally 'config.cache'. FILE defaults to '/dev/null' to + disable caching. + +'--config-cache' +'-C' + Alias for '--cache-file=config.cache'. + +'--quiet' +'--silent' +'-q' + Do not print messages saying which checks are being made. To + suppress all normal output, redirect it to '/dev/null' (any error + messages will still be shown). + +'--srcdir=DIR' + Look for the package's source code in directory DIR. Usually + 'configure' can determine that directory automatically. + +'--prefix=DIR' + Use DIR as the installation prefix. *note Installation Names:: for + more details, including other options available for fine-tuning the + installation locations. + +'--no-create' +'-n' + Run the configure checks, but stop before creating any output + files. + +'configure' also accepts some other, not widely useful, options. Run +'configure --help' for more details. diff --git a/pcre2/LICENCE b/pcre2/LICENCE index 1568be3a1..155d07312 100644 --- a/pcre2/LICENCE +++ b/pcre2/LICENCE @@ -20,8 +20,8 @@ THE BASIC LIBRARY FUNCTIONS --------------------------- Written by: Philip Hazel -Email local part: ph10 -Email domain: cam.ac.uk +Email local part: Philip.Hazel +Email domain: gmail.com University of Cambridge Computing Service, Cambridge, England. diff --git a/pcre2/NEWS b/pcre2/NEWS new file mode 100644 index 000000000..de797e7b5 --- /dev/null +++ b/pcre2/NEWS @@ -0,0 +1,347 @@ +News about PCRE2 releases +------------------------- + + +Version 10.36 04-December-2020 +------------------------------ + +Again, mainly bug fixes and tidies. The only enhancements are the addition of +GNU grep's -m (aka --max-count) option to pcre2grep, and also unifying the +handling of substitution strings for both -O and callouts in pcre2grep, with +the addition of $x{...} and $o{...} to allow for characters whose code points +are greater than 255 in Unicode mode. + +NOTE: there is an outstanding issue with JIT support for MacOS on arm64 +hardware. For details, please see Bugzilla issue #2618. + + +Version 10.35 15-April-2020 +--------------------------- + +Bugfixes, tidies, and a few new enhancements. + +1. Capturing groups that contain recursive backreferences to themselves are no +longer automatically atomic, because the restriction is no longer necessary +as a result of the 10.30 restructuring. + +2. Several new options for pcre2_substitute(). + +3. When Unicode is supported and PCRE2_UCP is set without PCRE2_UTF, Unicode +character properties are used for upper/lower case computations on characters +whose code points are greater than 127. + +4. The character tables (for low-valued characters) can now more easily be +saved and restored in binary. + +5. Updated to Unicode 13.0.0. + + +Version 10.34 21-November-2019 +------------------------------ + +Another release with a few enhancements as well as bugfixes and tidies. The +main new features are: + +1. There is now some support for matching in invalid UTF strings. + +2. Non-atomic positive lookarounds are implemented in the pcre2_match() +interpreter, but not in JIT. + +3. Added two new functions: pcre2_get_match_data_size() and +pcre2_maketables_free(). + +4. Upgraded to Unicode 12.1.0. + + +Version 10.33 16-April-2019 +--------------------------- + +Yet more bugfixes, tidies, and a few enhancements, summarized here (see +ChangeLog for the full list): + +1. Callouts from pcre2_substitute() are now available. + +2. The POSIX functions are now all called pcre2_regcomp() etc., with wrapper +functions that use the standard POSIX names. However, in pcre2posix.h the POSIX +names are defined as macros. This should help avoid linking with the wrong +library in some environments, while still exporting the POSIX names for +pre-existing programs that use them. + +3. Some new options: + + (a) PCRE2_EXTRA_ESCAPED_CR_IS_LF makes \r behave as \n. + + (b) PCRE2_EXTRA_ALT_BSUX enables support for ECMAScript 6's \u{hh...} + construct. + + (c) PCRE2_COPY_MATCHED_SUBJECT causes a copy of a matched subject to be + made, instead of just remembering a pointer. + +4. Some new Perl features: + + (a) Perl 5.28's experimental alphabetic names for atomic groups and + lookaround assertions, for example, (*pla:...) and (*atomic:...). + + (b) The new Perl "script run" features (*script_run:...) and + (*atomic_script_run:...) aka (*sr:...) and (*asr:...). + + (c) When PCRE2_UTF is set, allow non-ASCII letters and decimal digits in + capture group names. + +5. --disable-percent-zt disables the use of %zu and %td in formatting strings +in pcre2test. They were already automatically disabled for VC and older C +compilers. + +6. Some changes related to callouts in pcre2grep: + + (a) Support for running an external program under VMS has been added, in + addition to Windows and fork() support. + + (b) --disable-pcre2grep-callout-fork restricts the callout support in + to the inbuilt echo facility. + + +Version 10.32 10-September-2018 +------------------------------- + +This is another mainly bugfix and tidying release with a few minor +enhancements. These are the main ones: + +1. pcre2grep now supports the inclusion of binary zeros in patterns that are +read from files via the -f option. + +2. ./configure now supports --enable-jit=auto, which automatically enables JIT +if the hardware supports it. + +3. In pcre2_dfa_match(), internal recursive calls no longer use the stack for +local workspace and local ovectors. Instead, an initial block of stack is +reserved, but if this is insufficient, heap memory is used. The heap limit +parameter now applies to pcre2_dfa_match(). + +4. Updated to Unicode version 11.0.0. + +5. (*ACCEPT:ARG), (*FAIL:ARG), and (*COMMIT:ARG) are now supported. + +6. Added support for \N{U+dddd}, but only in Unicode mode. + +7. Added support for (?^) to unset all imnsx options. + + +Version 10.31 12-February-2018 +------------------------------ + +This is mainly a bugfix and tidying release (see ChangeLog for full details). +However, there are some minor enhancements. + +1. New pcre2_config() options: PCRE2_CONFIG_NEVER_BACKSLASH_C and +PCRE2_CONFIG_COMPILED_WIDTHS. + +2. New pcre2_pattern_info() option PCRE2_INFO_EXTRAOPTIONS to retrieve the +extra compile time options. + +3. There are now public names for all the pcre2_compile() error numbers. + +4. Added PCRE2_CALLOUT_STARTMATCH and PCRE2_CALLOUT_BACKTRACK bits to a new +field callout_flags in callout blocks. + + +Version 10.30 14-August-2017 +---------------------------- + +The full list of changes that includes bugfixes and tidies is, as always, in +ChangeLog. These are the most important new features: + +1. The main interpreter, pcre2_match(), has been refactored into a new version +that does not use recursive function calls (and therefore the system stack) for +remembering backtracking positions. This makes --disable-stack-for-recursion a +NOOP. The new implementation allows backtracking into recursive group calls in +patterns, making it more compatible with Perl, and also fixes some other +previously hard-to-do issues. For patterns that have a lot of backtracking, the +heap is now used, and there is an explicit limit on the amount, settable by +pcre2_set_heap_limit() or (*LIMIT_HEAP=xxx). The "recursion limit" is retained, +but is renamed as "depth limit" (though the old names remain for +compatibility). + +There is also a change in the way callouts from pcre2_match() are handled. The +offset_vector field in the callout block is no longer a pointer to the +actual ovector that was passed to the matching function in the match data +block. Instead it points to an internal ovector of a size large enough to hold +all possible captured substrings in the pattern. + +2. The new option PCRE2_ENDANCHORED insists that a pattern match must end at +the end of the subject. + +3. The new option PCRE2_EXTENDED_MORE implements Perl's /xx feature, and +pcre2test is upgraded to support it. Setting within the pattern by (?xx) is +also supported. + +4. (?n) can be used to set PCRE2_NO_AUTO_CAPTURE, because Perl now has this. + +5. Additional compile options in the compile context are now available, and the +first two are: PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES and +PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL. + +6. The newline type PCRE2_NEWLINE_NUL is now available. + +7. The match limit value now also applies to pcre2_dfa_match() as there are +patterns that can use up a lot of resources without necessarily recursing very +deeply. + +8. The option REG_PEND (a GNU extension) is now available for the POSIX +wrapper. Also there is a new option PCRE2_LITERAL which is used to support +REG_NOSPEC. + +9. PCRE2_EXTRA_MATCH_LINE and PCRE2_EXTRA_MATCH_WORD are implemented for the +benefit of pcre2grep, and pcre2grep's -F, -w, and -x options are re-implemented +using PCRE2_LITERAL, PCRE2_EXTRA_MATCH_WORD, and PCRE2_EXTRA_MATCH_LINE. This +is tidier and also fixes some bugs. + +10. The Unicode tables are upgraded from Unicode 8.0.0 to Unicode 10.0.0. + +11. There are some experimental functions for converting foreign patterns +(globs and POSIX patterns) into PCRE2 patterns. + + +Version 10.23 14-February-2017 +------------------------------ + +1. ChangeLog has the details of a lot of bug fixes and tidies. + +2. There has been a major re-factoring of the pcre2_compile.c file. Most syntax +checking is now done in the pre-pass that identifies capturing groups. This has +reduced the amount of duplication and made the code tidier. While doing this, +some minor bugs and Perl incompatibilities were fixed (see ChangeLog for +details.) + +3. Back references are now permitted in lookbehind assertions when there are +no duplicated group numbers (that is, (?| has not been used), and, if the +reference is by name, there is only one group of that name. The referenced +group must, of course be of fixed length. + +4. \g{+} (e.g. \g{+2} ) is now supported. It is a "forward back +reference" and can be useful in repetitions (compare \g{-} ). Perl does +not recognize this syntax. + +5. pcre2grep now automatically expands its buffer up to a maximum set by +--max-buffer-size. + +6. The -t option (grand total) has been added to pcre2grep. + +7. A new function called pcre2_code_copy_with_tables() exists to copy a +compiled pattern along with a private copy of the character tables that is +uses. + +8. A user supplied a number of patches to upgrade pcre2grep under Windows and +tidy the code. + +9. Several updates have been made to pcre2test and test scripts (see +ChangeLog). + + +Version 10.22 29-July-2016 +-------------------------- + +1. ChangeLog has the details of a number of bug fixes. + +2. The POSIX wrapper function regcomp() did not used to support back references +and subroutine calls if called with the REG_NOSUB option. It now does. + +3. A new function, pcre2_code_copy(), is added, to make a copy of a compiled +pattern. + +4. Support for string callouts is added to pcre2grep. + +5. Added the PCRE2_NO_JIT option to pcre2_match(). + +6. The pcre2_get_error_message() function now returns with a negative error +code if the error number it is given is unknown. + +7. Several updates have been made to pcre2test and test scripts (see +ChangeLog). + + +Version 10.21 12-January-2016 +----------------------------- + +1. Many bugs have been fixed. A large number of them were provoked only by very +strange pattern input, and were discovered by fuzzers. Some others were +discovered by code auditing. See ChangeLog for details. + +2. The Unicode tables have been updated to Unicode version 8.0.0. + +3. For Perl compatibility in EBCDIC environments, ranges such as a-z in a +class, where both values are literal letters in the same case, omit the +non-letter EBCDIC code points within the range. + +4. There have been a number of enhancements to the pcre2_substitute() function, +giving more flexibility to replacement facilities. It is now also possible to +cause the function to return the needed buffer size if the one given is too +small. + +5. The PCRE2_ALT_VERBNAMES option causes the "name" parts of special verbs such +as (*THEN:name) to be processed for backslashes and to take note of +PCRE2_EXTENDED. + +6. PCRE2_INFO_HASBACKSLASHC makes it possible for a client to find out if a +pattern uses \C, and --never-backslash-C makes it possible to compile a version +PCRE2 in which the use of \C is always forbidden. + +7. A limit to the length of pattern that can be handled can now be set by +calling pcre2_set_max_pattern_length(). + +8. When matching an unanchored pattern, a match can be required to begin within +a given number of code units after the start of the subject by calling +pcre2_set_offset_limit(). + +9. The pcre2test program has been extended to test new facilities, and it can +now run the tests when LF on its own is not a valid newline sequence. + +10. The RunTest script has also been updated to enable more tests to be run. + +11. There have been some minor performance enhancements. + + +Version 10.20 30-June-2015 +-------------------------- + +1. Callouts with string arguments and the pcre2_callout_enumerate() function +have been implemented. + +2. The PCRE2_NEVER_BACKSLASH_C option, which locks out the use of \C, is added. + +3. The PCRE2_ALT_CIRCUMFLEX option lets ^ match after a newline at the end of a +subject in multiline mode. + +4. The way named subpatterns are handled has been refactored. The previous +approach had several bugs. + +5. The handling of \c in EBCDIC environments has been changed to conform to the +perlebcdic document. This is an incompatible change. + +6. Bugs have been mended, many of them discovered by fuzzers. + + +Version 10.10 06-March-2015 +--------------------------- + +1. Serialization and de-serialization functions have been added to the API, +making it possible to save and restore sets of compiled patterns, though +restoration must be done in the same environment that was used for compilation. + +2. The (*NO_JIT) feature has been added; this makes it possible for a pattern +creator to specify that JIT is not to be used. + +3. A number of bugs have been fixed. In particular, bugs that caused building +on Windows using CMake to fail have been mended. + + +Version 10.00 05-January-2015 +----------------------------- + +Version 10.00 is the first release of PCRE2, a revised API for the PCRE +library. Changes prior to 10.00 are logged in the ChangeLog file for the old +API, up to item 20 for release 8.36. New programs are recommended to use the +new library. Programs that use the original (PCRE1) API will need changing +before linking with the new library. + +**** diff --git a/pcre2/NON-AUTOTOOLS-BUILD b/pcre2/NON-AUTOTOOLS-BUILD new file mode 100644 index 000000000..a73c058bb --- /dev/null +++ b/pcre2/NON-AUTOTOOLS-BUILD @@ -0,0 +1,406 @@ +Building PCRE2 without using autotools +-------------------------------------- + +This document contains the following sections: + + General + Generic instructions for the PCRE2 C library + Stack size in Windows environments + Linking programs in Windows environments + Calling conventions in Windows environments + Comments about Win32 builds + Building PCRE2 on Windows with CMake + Building PCRE2 on Windows with Visual Studio + Testing with RunTest.bat + Building PCRE2 on native z/OS and z/VM + + +GENERAL + +The basic PCRE2 library consists entirely of code written in Standard C, and so +should compile successfully on any system that has a Standard C compiler and +library. + +The PCRE2 distribution includes a "configure" file for use by the +configure/make (autotools) build system, as found in many Unix-like +environments. The README file contains information about the options for +"configure". + +There is also support for CMake, which some users prefer, especially in Windows +environments, though it can also be run in Unix-like environments. See the +section entitled "Building PCRE2 on Windows with CMake" below. + +Versions of src/config.h and src/pcre2.h are distributed in the PCRE2 tarballs +under the names src/config.h.generic and src/pcre2.h.generic. These are +provided for those who build PCRE2 without using "configure" or CMake. If you +use "configure" or CMake, the .generic versions are not used. + + +GENERIC INSTRUCTIONS FOR THE PCRE2 C LIBRARY + +The following are generic instructions for building the PCRE2 C library "by +hand". If you are going to use CMake, this section does not apply to you; you +can skip ahead to the CMake section. + + (1) Copy or rename the file src/config.h.generic as src/config.h, and edit the + macro settings that it contains to whatever is appropriate for your + environment. In particular, you can alter the definition of the NEWLINE + macro to specify what character(s) you want to be interpreted as line + terminators by default. + + When you subsequently compile any of the PCRE2 modules, you must specify + -DHAVE_CONFIG_H to your compiler so that src/config.h is included in the + sources. + + An alternative approach is not to edit src/config.h, but to use -D on the + compiler command line to make any changes that you need to the + configuration options. In this case -DHAVE_CONFIG_H must not be set. + + NOTE: There have been occasions when the way in which certain parameters + in src/config.h are used has changed between releases. (In the + configure/make world, this is handled automatically.) When upgrading to a + new release, you are strongly advised to review src/config.h.generic + before re-using what you had previously. + + Note also that the src/config.h.generic file is created from a config.h + that was generated by Autotools, which automatically includes settings of + a number of macros that are not actually used by PCRE2 (for example, + HAVE_MEMORY_H). + + (2) Copy or rename the file src/pcre2.h.generic as src/pcre2.h. + + (3) EITHER: + Copy or rename file src/pcre2_chartables.c.dist as + src/pcre2_chartables.c. + + OR: + Compile src/pcre2_dftables.c as a stand-alone program (using + -DHAVE_CONFIG_H if you have set up src/config.h), and then run it with + the single argument "src/pcre2_chartables.c". This generates a set of + standard character tables and writes them to that file. The tables are + generated using the default C locale for your system. If you want to use + a locale that is specified by LC_xxx environment variables, add the -L + option to the pcre2_dftables command. You must use this method if you + are building on a system that uses EBCDIC code. + + The tables in src/pcre2_chartables.c are defaults. The caller of PCRE2 can + specify alternative tables at run time. + + (4) For an 8-bit library, compile the following source files from the src + directory, setting -DPCRE2_CODE_UNIT_WIDTH=8 as a compiler option. Also + set -DHAVE_CONFIG_H if you have set up src/config.h with your + configuration, or else use other -D settings to change the configuration + as required. + + pcre2_auto_possess.c + pcre2_chartables.c + pcre2_compile.c + pcre2_config.c + pcre2_context.c + pcre2_convert.c + pcre2_dfa_match.c + pcre2_error.c + pcre2_extuni.c + pcre2_find_bracket.c + pcre2_jit_compile.c + pcre2_maketables.c + pcre2_match.c + pcre2_match_data.c + pcre2_newline.c + pcre2_ord2utf.c + pcre2_pattern_info.c + pcre2_script_run.c + pcre2_serialize.c + pcre2_string_utils.c + pcre2_study.c + pcre2_substitute.c + pcre2_substring.c + pcre2_tables.c + pcre2_ucd.c + pcre2_valid_utf.c + pcre2_xclass.c + + Make sure that you include -I. in the compiler command (or equivalent for + an unusual compiler) so that all included PCRE2 header files are first + sought in the src directory under the current directory. Otherwise you run + the risk of picking up a previously-installed file from somewhere else. + + Note that you must compile pcre2_jit_compile.c, even if you have not + defined SUPPORT_JIT in src/config.h, because when JIT support is not + configured, dummy functions are compiled. When JIT support IS configured, + pcre2_jit_compile.c #includes other files from the sljit subdirectory, + all of whose names begin with "sljit". It also #includes + src/pcre2_jit_match.c and src/pcre2_jit_misc.c, so you should not compile + these yourself. + + Note also that the pcre2_fuzzsupport.c file contains special code that is + useful to those who want to run fuzzing tests on the PCRE2 library. Unless + you are doing that, you can ignore it. + + (5) Now link all the compiled code into an object library in whichever form + your system keeps such libraries. This is the basic PCRE2 C 8-bit library. + If your system has static and shared libraries, you may have to do this + once for each type. + + (6) If you want to build a 16-bit library or 32-bit library (as well as, or + instead of the 8-bit library) just supply 16 or 32 as the value of + -DPCRE2_CODE_UNIT_WIDTH when you are compiling. + + (7) If you want to build the POSIX wrapper functions (which apply only to the + 8-bit library), ensure that you have the src/pcre2posix.h file and then + compile src/pcre2posix.c. Link the result (on its own) as the pcre2posix + library. + + (8) The pcre2test program can be linked with any combination of the 8-bit, + 16-bit and 32-bit libraries (depending on what you selected in + src/config.h). Compile src/pcre2test.c; don't forget -DHAVE_CONFIG_H if + necessary, but do NOT define PCRE2_CODE_UNIT_WIDTH. Then link with the + appropriate library/ies. If you compiled an 8-bit library, pcre2test also + needs the pcre2posix wrapper library. + + (9) Run pcre2test on the testinput files in the testdata directory, and check + that the output matches the corresponding testoutput files. There are + comments about what each test does in the section entitled "Testing PCRE2" + in the README file. If you compiled more than one of the 8-bit, 16-bit and + 32-bit libraries, you need to run pcre2test with the -16 option to do + 16-bit tests and with the -32 option to do 32-bit tests. + + Some tests are relevant only when certain build-time options are selected. + For example, test 4 is for Unicode support, and will not run if you have + built PCRE2 without it. See the comments at the start of each testinput + file. If you have a suitable Unix-like shell, the RunTest script will run + the appropriate tests for you. The command "RunTest list" will output a + list of all the tests. + + Note that the supplied files are in Unix format, with just LF characters + as line terminators. You may need to edit them to change this if your + system uses a different convention. + +(10) If you have built PCRE2 with SUPPORT_JIT, the JIT features can be tested + by running pcre2test with the -jit option. This is done automatically by + the RunTest script. You might also like to build and run the freestanding + JIT test program, src/pcre2_jit_test.c. + +(11) If you want to use the pcre2grep command, compile and link + src/pcre2grep.c; it uses only the basic 8-bit PCRE2 library (it does not + need the pcre2posix library). If you have built the PCRE2 library with JIT + support by defining SUPPORT_JIT in src/config.h, you can also define + SUPPORT_PCRE2GREP_JIT, which causes pcre2grep to make use of JIT (unless + it is run with --no-jit). If you define SUPPORT_PCRE2GREP_JIT without + defining SUPPORT_JIT, pcre2grep does not try to make use of JIT. + + +STACK SIZE IN WINDOWS ENVIRONMENTS + +Prior to release 10.30 the default system stack size of 1MiB in some Windows +environments caused issues with some tests. This should no longer be the case +for 10.30 and later releases. + + +LINKING PROGRAMS IN WINDOWS ENVIRONMENTS + +If you want to statically link a program against a PCRE2 library in the form of +a non-dll .a file, you must define PCRE2_STATIC before including src/pcre2.h. + + +CALLING CONVENTIONS IN WINDOWS ENVIRONMENTS + +It is possible to compile programs to use different calling conventions using +MSVC. Search the web for "calling conventions" for more information. To make it +easier to change the calling convention for the exported functions in the +PCRE2 library, the macro PCRE2_CALL_CONVENTION is present in all the external +definitions. It can be set externally when compiling (e.g. in CFLAGS). If it is +not set, it defaults to empty; the default calling convention is then used +(which is what is wanted most of the time). + + +COMMENTS ABOUT WIN32 BUILDS (see also "BUILDING PCRE2 ON WINDOWS WITH CMAKE") + +There are two ways of building PCRE2 using the "configure, make, make install" +paradigm on Windows systems: using MinGW or using Cygwin. These are not at all +the same thing; they are completely different from each other. There is also +support for building using CMake, which some users find a more straightforward +way of building PCRE2 under Windows. + +The MinGW home page (http://www.mingw.org/) says this: + + MinGW: A collection of freely available and freely distributable Windows + specific header files and import libraries combined with GNU toolsets that + allow one to produce native Windows programs that do not rely on any + 3rd-party C runtime DLLs. + +The Cygwin home page (http://www.cygwin.com/) says this: + + Cygwin is a Linux-like environment for Windows. It consists of two parts: + + . A DLL (cygwin1.dll) which acts as a Linux API emulation layer providing + substantial Linux API functionality + + . A collection of tools which provide Linux look and feel. + +On both MinGW and Cygwin, PCRE2 should build correctly using: + + ./configure && make && make install + +This should create two libraries called libpcre2-8 and libpcre2-posix. These +are independent libraries: when you link with libpcre2-posix you must also link +with libpcre2-8, which contains the basic functions. + +Using Cygwin's compiler generates libraries and executables that depend on +cygwin1.dll. If a library that is generated this way is distributed, +cygwin1.dll has to be distributed as well. Since cygwin1.dll is under the GPL +licence, this forces not only PCRE2 to be under the GPL, but also the entire +application. A distributor who wants to keep their own code proprietary must +purchase an appropriate Cygwin licence. + +MinGW has no such restrictions. The MinGW compiler generates a library or +executable that can run standalone on Windows without any third party dll or +licensing issues. + +But there is more complication: + +If a Cygwin user uses the -mno-cygwin Cygwin gcc flag, what that really does is +to tell Cygwin's gcc to use the MinGW gcc. Cygwin's gcc is only acting as a +front end to MinGW's gcc (if you install Cygwin's gcc, you get both Cygwin's +gcc and MinGW's gcc). So, a user can: + +. Build native binaries by using MinGW or by getting Cygwin and using + -mno-cygwin. + +. Build binaries that depend on cygwin1.dll by using Cygwin with the normal + compiler flags. + +The test files that are supplied with PCRE2 are in UNIX format, with LF +characters as line terminators. Unless your PCRE2 library uses a default +newline option that includes LF as a valid newline, it may be necessary to +change the line terminators in the test files to get some of the tests to work. + + +BUILDING PCRE2 ON WINDOWS WITH CMAKE + +CMake is an alternative configuration facility that can be used instead of +"configure". CMake creates project files (make files, solution files, etc.) +tailored to numerous development environments, including Visual Studio, +Borland, Msys, MinGW, NMake, and Unix. If possible, use short paths with no +spaces in the names for your CMake installation and your PCRE2 source and build +directories. + +The following instructions were contributed by a PCRE1 user, but they should +also work for PCRE2. If they are not followed exactly, errors may occur. In the +event that errors do occur, it is recommended that you delete the CMake cache +before attempting to repeat the CMake build process. In the CMake GUI, the +cache can be deleted by selecting "File > Delete Cache". + +1. Install the latest CMake version available from http://www.cmake.org/, and + ensure that cmake\bin is on your path. + +2. Unzip (retaining folder structure) the PCRE2 source tree into a source + directory such as C:\pcre2. You should ensure your local date and time + is not earlier than the file dates in your source dir if the release is + very new. + +3. Create a new, empty build directory, preferably a subdirectory of the + source dir. For example, C:\pcre2\pcre2-xx\build. + +4. Run cmake-gui from the Shell envirornment of your build tool, for example, + Msys for Msys/MinGW or Visual Studio Command Prompt for VC/VC++. Do not try + to start Cmake from the Windows Start menu, as this can lead to errors. + +5. Enter C:\pcre2\pcre2-xx and C:\pcre2\pcre2-xx\build for the source and + build directories, respectively. + +6. Hit the "Configure" button. + +7. Select the particular IDE / build tool that you are using (Visual + Studio, MSYS makefiles, MinGW makefiles, etc.) + +8. The GUI will then list several configuration options. This is where + you can disable Unicode support or select other PCRE2 optional features. + +9. Hit "Configure" again. The adjacent "Generate" button should now be + active. + +10. Hit "Generate". + +11. The build directory should now contain a usable build system, be it a + solution file for Visual Studio, makefiles for MinGW, etc. Exit from + cmake-gui and use the generated build system with your compiler or IDE. + E.g., for MinGW you can run "make", or for Visual Studio, open the PCRE2 + solution, select the desired configuration (Debug, or Release, etc.) and + build the ALL_BUILD project. + +12. If during configuration with cmake-gui you've elected to build the test + programs, you can execute them by building the test project. E.g., for + MinGW: "make test"; for Visual Studio build the RUN_TESTS project. The + most recent build configuration is targeted by the tests. A summary of + test results is presented. Complete test output is subsequently + available for review in Testing\Temporary under your build dir. + + +BUILDING PCRE2 ON WINDOWS WITH VISUAL STUDIO + +The code currently cannot be compiled without a stdint.h header, which is +available only in relatively recent versions of Visual Studio. However, this +portable and permissively-licensed implementation of the header worked without +issue: + + http://www.azillionmonkeys.com/qed/pstdint.h + +Just rename it and drop it into the top level of the build tree. + + +TESTING WITH RUNTEST.BAT + +If configured with CMake, building the test project ("make test" or building +ALL_TESTS in Visual Studio) creates (and runs) pcre2_test.bat (and depending +on your configuration options, possibly other test programs) in the build +directory. The pcre2_test.bat script runs RunTest.bat with correct source and +exe paths. + +For manual testing with RunTest.bat, provided the build dir is a subdirectory +of the source directory: Open command shell window. Chdir to the location +of your pcre2test.exe and pcre2grep.exe programs. Call RunTest.bat with +"..\RunTest.Bat" or "..\..\RunTest.bat" as appropriate. + +To run only a particular test with RunTest.Bat provide a test number argument. + +Otherwise: + +1. Copy RunTest.bat into the directory where pcre2test.exe and pcre2grep.exe + have been created. + +2. Edit RunTest.bat to indentify the full or relative location of + the pcre2 source (wherein which the testdata folder resides), e.g.: + + set srcdir=C:\pcre2\pcre2-10.00 + +3. In a Windows command environment, chdir to the location of your bat and + exe programs. + +4. Run RunTest.bat. Test outputs will automatically be compared to expected + results, and discrepancies will be identified in the console output. + +To independently test the just-in-time compiler, run pcre2_jit_test.exe. + + +BUILDING PCRE2 ON NATIVE Z/OS AND Z/VM + +z/OS and z/VM are operating systems for mainframe computers, produced by IBM. +The character code used is EBCDIC, not ASCII or Unicode. In z/OS, UNIX APIs and +applications can be supported through UNIX System Services, and in such an +environment it should be possible to build PCRE2 in the same way as in other +systems, with the EBCDIC related configuration settings, but it is not known if +anybody has tried this. + +In native z/OS (without UNIX System Services) and in z/VM, special ports are +required. For details, please see file 939 on this web site: + + http://www.cbttape.org + +Everything in that location, source and executable, is in EBCDIC and native +z/OS file formats. The port provides an API for LE languages such as COBOL and +for the z/OS and z/VM versions of the Rexx languages. + +============================== +Last Updated: 14 November 2018 +============================== diff --git a/pcre2/PrepareRelease b/pcre2/PrepareRelease new file mode 100755 index 000000000..e7cf8db8b --- /dev/null +++ b/pcre2/PrepareRelease @@ -0,0 +1,236 @@ +#/bin/sh + +# Script to prepare the files for building a PCRE2 release. It does some +# processing of the documentation, detrails files, and creates pcre2.h.generic +# and config.h.generic (for use by builders who can't run ./configure). + +# You must run this script before runnning "make dist". If its first argument +# is "doc", it stops after preparing the documentation. There are no other +# arguments. The script makes use of the following files: + +# 132html A Perl script that converts a .1 or .3 man page into HTML. It +# "knows" the relevant troff constructs that are used in the PCRE2 +# man pages. + +# CheckMan A Perl script that checks man pages for typos in the mark up. + +# CleanTxt A Perl script that cleans up the output of "nroff -man" by +# removing backspaces and other redundant text so as to produce +# a readable .txt file. + +# Detrail A Perl script that removes trailing spaces from files. + +# doc/index.html.src +# A file that is copied as index.html into the doc/html directory +# when the HTML documentation is built. It works like this so that +# doc/html can be deleted and re-created from scratch. + +# README & NON-AUTOTOOLS-BUILD +# These files are copied into the doc/html directory, with .txt +# extensions so that they can by hyperlinked from the HTML +# documentation, because some people just go to the HTML without +# looking for text files. + + +# First, sort out the documentation. Remove pcre2demo.3 first because it won't +# pass the markup check (it is created below, using markup that none of the +# other pages use). + +cd doc +echo Processing documentation + +/bin/rm -f pcre2demo.3 + +# Check the remaining man pages + +perl ../CheckMan *.1 *.3 +if [ $? != 0 ] ; then exit 1; fi + +# Make Text form of the documentation. It needs some mangling to make it +# tidy for online reading. Concatenate all the .3 stuff, but omit the +# individual function pages. + +cat <pcre2.txt +----------------------------------------------------------------------------- +This file contains a concatenation of the PCRE2 man pages, converted to plain +text format for ease of searching with a text editor, or for use on systems +that do not have a man page processor. The small individual files that give +synopses of each function in the library have not been included. Neither has +the pcre2demo program. There are separate text files for the pcre2grep and +pcre2test commands. +----------------------------------------------------------------------------- + + +End + +echo "Making pcre2.txt" +for file in pcre2 pcre2api pcre2build pcre2callout pcre2compat pcre2jit \ + pcre2limits pcre2matching pcre2partial pcre2pattern pcre2perform \ + pcre2posix pcre2sample pcre2serialize pcre2syntax \ + pcre2unicode ; do + echo " Processing $file.3" + nroff -c -man $file.3 >$file.rawtxt + perl ../CleanTxt <$file.rawtxt >>pcre2.txt + /bin/rm $file.rawtxt + echo "------------------------------------------------------------------------------" >>pcre2.txt + if [ "$file" != "pcre2sample" ] ; then + echo " " >>pcre2.txt + echo " " >>pcre2.txt + fi +done + +# The three commands +for file in pcre2test pcre2grep pcre2-config ; do + echo Making $file.txt + nroff -c -man $file.1 >$file.rawtxt + perl ../CleanTxt <$file.rawtxt >$file.txt + /bin/rm $file.rawtxt +done + + +# Make pcre2demo.3 from the pcre2demo.c source file + +echo "Making pcre2demo.3" +perl <<"END" >pcre2demo.3 + open(IN, "../src/pcre2demo.c") || die "Failed to open src/pcre2demo.c\n"; + open(OUT, ">pcre2demo.3") || die "Failed to open pcre2demo.3\n"; + print OUT ".\\\" Start example.\n" . + ".de EX\n" . + ". nr mE \\\\n(.f\n" . + ". nf\n" . + ". nh\n" . + ". ft CW\n" . + "..\n" . + ".\n" . + ".\n" . + ".\\\" End example.\n" . + ".de EE\n" . + ". ft \\\\n(mE\n" . + ". fi\n" . + ". hy \\\\n(HY\n" . + "..\n" . + ".\n" . + ".EX\n" ; + while () + { + s/\\/\\e/g; + print OUT; + } + print OUT ".EE\n"; + close(IN); + close(OUT); +END +if [ $? != 0 ] ; then exit 1; fi + + +# Make HTML form of the documentation. + +echo "Making HTML documentation" +/bin/rm html/* +cp index.html.src html/index.html +cp ../README html/README.txt +cp ../NON-AUTOTOOLS-BUILD html/NON-AUTOTOOLS-BUILD.txt + +for file in *.1 ; do + base=`basename $file .1` + echo " Making $base.html" + perl ../132html -toc $base <$file >html/$base.html +done + +# Exclude table of contents for function summaries. It seems that expr +# forces an anchored regex. Also exclude them for small pages that have +# only one section. + +for file in *.3 ; do + base=`basename $file .3` + toc=-toc + if [ `expr $base : '.*_'` -ne 0 ] ; then toc="" ; fi + if [ "$base" = "pcre2sample" ] || \ + [ "$base" = "pcre2compat" ] || \ + [ "$base" = "pcre2limits" ] || \ + [ "$base" = "pcre2unicode" ] ; then + toc="" + fi + echo " Making $base.html" + perl ../132html $toc $base <$file >html/$base.html + if [ $? != 0 ] ; then exit 1; fi +done + +# End of documentation processing; stop if only documentation required. + +cd .. +echo Documentation done +if [ "$1" = "doc" ] ; then exit; fi + +# These files are detrailed; do not detrail the test data because there may be +# significant trailing spaces. Do not detrail RunTest.bat, because it has CRLF +# line endings and the detrail script removes all trailing white space. The +# configure files are also omitted from the detrailing. + +files="\ + Makefile.am \ + configure.ac \ + README \ + LICENCE \ + COPYING \ + AUTHORS \ + NEWS \ + NON-AUTOTOOLS-BUILD \ + INSTALL \ + 132html \ + CleanTxt \ + Detrail \ + ChangeLog \ + CMakeLists.txt \ + RunGrepTest \ + RunTest \ + pcre2-config.in \ + perltest.sh \ + libpcre2-8.pc.in \ + libpcre2-16.pc.in \ + libpcre2-32.pc.in \ + libpcre2-posix.pc.in \ + src/pcre2_dftables.c \ + src/pcre2.h.in \ + src/pcre2_auto_possess.c \ + src/pcre2_compile.c \ + src/pcre2_config.c \ + src/pcre2_context.c \ + src/pcre2_convert.c \ + src/pcre2_dfa_match.c \ + src/pcre2_error.c \ + src/pcre2_extuni.c \ + src/pcre2_find_bracket.c \ + src/pcre2_internal.h \ + src/pcre2_intmodedep.h \ + src/pcre2_jit_compile.c \ + src/pcre2_jit_match.c \ + src/pcre2_jit_misc.c \ + src/pcre2_jit_test.c \ + src/pcre2_maketables.c \ + src/pcre2_match.c \ + src/pcre2_match_data.c \ + src/pcre2_newline.c \ + src/pcre2_ord2utf.c \ + src/pcre2_pattern_info.c \ + src/pcre2_printint.c \ + src/pcre2_string_utils.c \ + src/pcre2_study.c \ + src/pcre2_substring.c \ + src/pcre2_tables.c \ + src/pcre2_ucd.c \ + src/pcre2_ucp.h \ + src/pcre2_valid_utf.c \ + src/pcre2_xclass.c \ + src/pcre2demo.c \ + src/pcre2grep.c \ + src/pcre2posix.c \ + src/pcre2posix.h \ + src/pcre2test.c" + +echo Detrailing +perl ./Detrail $files doc/p* doc/html/* + +echo Done + +#End diff --git a/pcre2/README b/pcre2/README new file mode 100644 index 000000000..1d6df8f2c --- /dev/null +++ b/pcre2/README @@ -0,0 +1,906 @@ +README file for PCRE2 (Perl-compatible regular expression library) +------------------------------------------------------------------ + +PCRE2 is a re-working of the original PCRE1 library to provide an entirely new +API. Since its initial release in 2015, there has been further development of +the code and it now differs from PCRE1 in more than just the API. There are new +features and the internals have been improved. The latest release of PCRE2 is +available in three alternative formats from: + +https://ftp.pcre.org/pub/pcre/pcre2-10.xx.tar.gz +https://ftp.pcre.org/pub/pcre/pcre2-10.xx.tar.bz2 +https://ftp.pcre.org/pub/pcre/pcre2-10.xx.tar.zip + +There is a mailing list for discussion about the development of PCRE (both the +original and new APIs) at pcre-dev@exim.org. You can access the archives and +subscribe or manage your subscription here: + + https://lists.exim.org/mailman/listinfo/pcre-dev + +Please read the NEWS file if you are upgrading from a previous release. The +contents of this README file are: + + The PCRE2 APIs + Documentation for PCRE2 + Contributions by users of PCRE2 + Building PCRE2 on non-Unix-like systems + Building PCRE2 without using autotools + Building PCRE2 using autotools + Retrieving configuration information + Shared libraries + Cross-compiling using autotools + Making new tarballs + Testing PCRE2 + Character tables + File manifest + + +The PCRE2 APIs +-------------- + +PCRE2 is written in C, and it has its own API. There are three sets of +functions, one for the 8-bit library, which processes strings of bytes, one for +the 16-bit library, which processes strings of 16-bit values, and one for the +32-bit library, which processes strings of 32-bit values. Unlike PCRE1, there +are no C++ wrappers. + +The distribution does contain a set of C wrapper functions for the 8-bit +library that are based on the POSIX regular expression API (see the pcre2posix +man page). These are built into a library called libpcre2-posix. Note that this +just provides a POSIX calling interface to PCRE2; the regular expressions +themselves still follow Perl syntax and semantics. The POSIX API is restricted, +and does not give full access to all of PCRE2's facilities. + +The header file for the POSIX-style functions is called pcre2posix.h. The +official POSIX name is regex.h, but I did not want to risk possible problems +with existing files of that name by distributing it that way. To use PCRE2 with +an existing program that uses the POSIX API, pcre2posix.h will have to be +renamed or pointed at by a link (or the program modified, of course). See the +pcre2posix documentation for more details. + + +Documentation for PCRE2 +----------------------- + +If you install PCRE2 in the normal way on a Unix-like system, you will end up +with a set of man pages whose names all start with "pcre2". The one that is +just called "pcre2" lists all the others. In addition to these man pages, the +PCRE2 documentation is supplied in two other forms: + + 1. There are files called doc/pcre2.txt, doc/pcre2grep.txt, and + doc/pcre2test.txt in the source distribution. The first of these is a + concatenation of the text forms of all the section 3 man pages except the + listing of pcre2demo.c and those that summarize individual functions. The + other two are the text forms of the section 1 man pages for the pcre2grep + and pcre2test commands. These text forms are provided for ease of scanning + with text editors or similar tools. They are installed in + /share/doc/pcre2, where is the installation prefix + (defaulting to /usr/local). + + 2. A set of files containing all the documentation in HTML form, hyperlinked + in various ways, and rooted in a file called index.html, is distributed in + doc/html and installed in /share/doc/pcre2/html. + + +Building PCRE2 on non-Unix-like systems +--------------------------------------- + +For a non-Unix-like system, please read the file NON-AUTOTOOLS-BUILD, though if +your system supports the use of "configure" and "make" you may be able to build +PCRE2 using autotools in the same way as for many Unix-like systems. + +PCRE2 can also be configured using CMake, which can be run in various ways +(command line, GUI, etc). This creates Makefiles, solution files, etc. The file +NON-AUTOTOOLS-BUILD has information about CMake. + +PCRE2 has been compiled on many different operating systems. It should be +straightforward to build PCRE2 on any system that has a Standard C compiler and +library, because it uses only Standard C functions. + + +Building PCRE2 without using autotools +-------------------------------------- + +The use of autotools (in particular, libtool) is problematic in some +environments, even some that are Unix or Unix-like. See the NON-AUTOTOOLS-BUILD +file for ways of building PCRE2 without using autotools. + + +Building PCRE2 using autotools +------------------------------ + +The following instructions assume the use of the widely used "configure; make; +make install" (autotools) process. + +To build PCRE2 on system that supports autotools, first run the "configure" +command from the PCRE2 distribution directory, with your current directory set +to the directory where you want the files to be created. This command is a +standard GNU "autoconf" configuration script, for which generic instructions +are supplied in the file INSTALL. + +Most commonly, people build PCRE2 within its own distribution directory, and in +this case, on many systems, just running "./configure" is sufficient. However, +the usual methods of changing standard defaults are available. For example: + +CFLAGS='-O2 -Wall' ./configure --prefix=/opt/local + +This command specifies that the C compiler should be run with the flags '-O2 +-Wall' instead of the default, and that "make install" should install PCRE2 +under /opt/local instead of the default /usr/local. + +If you want to build in a different directory, just run "configure" with that +directory as current. For example, suppose you have unpacked the PCRE2 source +into /source/pcre2/pcre2-xxx, but you want to build it in +/build/pcre2/pcre2-xxx: + +cd /build/pcre2/pcre2-xxx +/source/pcre2/pcre2-xxx/configure + +PCRE2 is written in C and is normally compiled as a C library. However, it is +possible to build it as a C++ library, though the provided building apparatus +does not have any features to support this. + +There are some optional features that can be included or omitted from the PCRE2 +library. They are also documented in the pcre2build man page. + +. By default, both shared and static libraries are built. You can change this + by adding one of these options to the "configure" command: + + --disable-shared + --disable-static + + (See also "Shared libraries on Unix-like systems" below.) + +. By default, only the 8-bit library is built. If you add --enable-pcre2-16 to + the "configure" command, the 16-bit library is also built. If you add + --enable-pcre2-32 to the "configure" command, the 32-bit library is also + built. If you want only the 16-bit or 32-bit library, use --disable-pcre2-8 + to disable building the 8-bit library. + +. If you want to include support for just-in-time (JIT) compiling, which can + give large performance improvements on certain platforms, add --enable-jit to + the "configure" command. This support is available only for certain hardware + architectures. If you try to enable it on an unsupported architecture, there + will be a compile time error. If in doubt, use --enable-jit=auto, which + enables JIT only if the current hardware is supported. + +. If you are enabling JIT under SELinux environment you may also want to add + --enable-jit-sealloc, which enables the use of an executable memory allocator + that is compatible with SELinux. Warning: this allocator is experimental! + It does not support fork() operation and may crash when no disk space is + available. This option has no effect if JIT is disabled. + +. If you do not want to make use of the default support for UTF-8 Unicode + character strings in the 8-bit library, UTF-16 Unicode character strings in + the 16-bit library, or UTF-32 Unicode character strings in the 32-bit + library, you can add --disable-unicode to the "configure" command. This + reduces the size of the libraries. It is not possible to configure one + library with Unicode support, and another without, in the same configuration. + It is also not possible to use --enable-ebcdic (see below) with Unicode + support, so if this option is set, you must also use --disable-unicode. + + When Unicode support is available, the use of a UTF encoding still has to be + enabled by setting the PCRE2_UTF option at run time or starting a pattern + with (*UTF). When PCRE2 is compiled with Unicode support, its input can only + either be ASCII or UTF-8/16/32, even when running on EBCDIC platforms. + + As well as supporting UTF strings, Unicode support includes support for the + \P, \p, and \X sequences that recognize Unicode character properties. + However, only the basic two-letter properties such as Lu are supported. + Escape sequences such as \d and \w in patterns do not by default make use of + Unicode properties, but can be made to do so by setting the PCRE2_UCP option + or starting a pattern with (*UCP). + +. You can build PCRE2 to recognize either CR or LF or the sequence CRLF, or any + of the preceding, or any of the Unicode newline sequences, or the NUL (zero) + character as indicating the end of a line. Whatever you specify at build time + is the default; the caller of PCRE2 can change the selection at run time. The + default newline indicator is a single LF character (the Unix standard). You + can specify the default newline indicator by adding --enable-newline-is-cr, + --enable-newline-is-lf, --enable-newline-is-crlf, + --enable-newline-is-anycrlf, --enable-newline-is-any, or + --enable-newline-is-nul to the "configure" command, respectively. + +. By default, the sequence \R in a pattern matches any Unicode line ending + sequence. This is independent of the option specifying what PCRE2 considers + to be the end of a line (see above). However, the caller of PCRE2 can + restrict \R to match only CR, LF, or CRLF. You can make this the default by + adding --enable-bsr-anycrlf to the "configure" command (bsr = "backslash R"). + +. In a pattern, the escape sequence \C matches a single code unit, even in a + UTF mode. This can be dangerous because it breaks up multi-code-unit + characters. You can build PCRE2 with the use of \C permanently locked out by + adding --enable-never-backslash-C (note the upper case C) to the "configure" + command. When \C is allowed by the library, individual applications can lock + it out by calling pcre2_compile() with the PCRE2_NEVER_BACKSLASH_C option. + +. PCRE2 has a counter that limits the depth of nesting of parentheses in a + pattern. This limits the amount of system stack that a pattern uses when it + is compiled. The default is 250, but you can change it by setting, for + example, + + --with-parens-nest-limit=500 + +. PCRE2 has a counter that can be set to limit the amount of computing resource + it uses when matching a pattern. If the limit is exceeded during a match, the + match fails. The default is ten million. You can change the default by + setting, for example, + + --with-match-limit=500000 + + on the "configure" command. This is just the default; individual calls to + pcre2_match() or pcre2_dfa_match() can supply their own value. There is more + discussion in the pcre2api man page (search for pcre2_set_match_limit). + +. There is a separate counter that limits the depth of nested backtracking + (pcre2_match()) or nested function calls (pcre2_dfa_match()) during a + matching process, which indirectly limits the amount of heap memory that is + used, and in the case of pcre2_dfa_match() the amount of stack as well. This + counter also has a default of ten million, which is essentially "unlimited". + You can change the default by setting, for example, + + --with-match-limit-depth=5000 + + There is more discussion in the pcre2api man page (search for + pcre2_set_depth_limit). + +. You can also set an explicit limit on the amount of heap memory used by + the pcre2_match() and pcre2_dfa_match() interpreters: + + --with-heap-limit=500 + + The units are kibibytes (units of 1024 bytes). This limit does not apply when + the JIT optimization (which has its own memory control features) is used. + There is more discussion on the pcre2api man page (search for + pcre2_set_heap_limit). + +. In the 8-bit library, the default maximum compiled pattern size is around + 64 kibibytes. You can increase this by adding --with-link-size=3 to the + "configure" command. PCRE2 then uses three bytes instead of two for offsets + to different parts of the compiled pattern. In the 16-bit library, + --with-link-size=3 is the same as --with-link-size=4, which (in both + libraries) uses four-byte offsets. Increasing the internal link size reduces + performance in the 8-bit and 16-bit libraries. In the 32-bit library, the + link size setting is ignored, as 4-byte offsets are always used. + +. For speed, PCRE2 uses four tables for manipulating and identifying characters + whose code point values are less than 256. By default, it uses a set of + tables for ASCII encoding that is part of the distribution. If you specify + + --enable-rebuild-chartables + + a program called pcre2_dftables is compiled and run in the default C locale + when you obey "make". It builds a source file called pcre2_chartables.c. If + you do not specify this option, pcre2_chartables.c is created as a copy of + pcre2_chartables.c.dist. See "Character tables" below for further + information. + +. It is possible to compile PCRE2 for use on systems that use EBCDIC as their + character code (as opposed to ASCII/Unicode) by specifying + + --enable-ebcdic --disable-unicode + + This automatically implies --enable-rebuild-chartables (see above). However, + when PCRE2 is built this way, it always operates in EBCDIC. It cannot support + both EBCDIC and UTF-8/16/32. There is a second option, --enable-ebcdic-nl25, + which specifies that the code value for the EBCDIC NL character is 0x25 + instead of the default 0x15. + +. If you specify --enable-debug, additional debugging code is included in the + build. This option is intended for use by the PCRE2 maintainers. + +. In environments where valgrind is installed, if you specify + + --enable-valgrind + + PCRE2 will use valgrind annotations to mark certain memory regions as + unaddressable. This allows it to detect invalid memory accesses, and is + mostly useful for debugging PCRE2 itself. + +. In environments where the gcc compiler is used and lcov is installed, if you + specify + + --enable-coverage + + the build process implements a code coverage report for the test suite. The + report is generated by running "make coverage". If ccache is installed on + your system, it must be disabled when building PCRE2 for coverage reporting. + You can do this by setting the environment variable CCACHE_DISABLE=1 before + running "make" to build PCRE2. There is more information about coverage + reporting in the "pcre2build" documentation. + +. When JIT support is enabled, pcre2grep automatically makes use of it, unless + you add --disable-pcre2grep-jit to the "configure" command. + +. There is support for calling external programs during matching in the + pcre2grep command, using PCRE2's callout facility with string arguments. This + support can be disabled by adding --disable-pcre2grep-callout to the + "configure" command. There are two kinds of callout: one that generates + output from inbuilt code, and another that calls an external program. The + latter has special support for Windows and VMS; otherwise it assumes the + existence of the fork() function. This facility can be disabled by adding + --disable-pcre2grep-callout-fork to the "configure" command. + +. The pcre2grep program currently supports only 8-bit data files, and so + requires the 8-bit PCRE2 library. It is possible to compile pcre2grep to use + libz and/or libbz2, in order to read .gz and .bz2 files (respectively), by + specifying one or both of + + --enable-pcre2grep-libz + --enable-pcre2grep-libbz2 + + Of course, the relevant libraries must be installed on your system. + +. The default starting size (in bytes) of the internal buffer used by pcre2grep + can be set by, for example: + + --with-pcre2grep-bufsize=51200 + + The value must be a plain integer. The default is 20480. The amount of memory + used by pcre2grep is actually three times this number, to allow for "before" + and "after" lines. If very long lines are encountered, the buffer is + automatically enlarged, up to a fixed maximum size. + +. The default maximum size of pcre2grep's internal buffer can be set by, for + example: + + --with-pcre2grep-max-bufsize=2097152 + + The default is either 1048576 or the value of --with-pcre2grep-bufsize, + whichever is the larger. + +. It is possible to compile pcre2test so that it links with the libreadline + or libedit libraries, by specifying, respectively, + + --enable-pcre2test-libreadline or --enable-pcre2test-libedit + + If this is done, when pcre2test's input is from a terminal, it reads it using + the readline() function. This provides line-editing and history facilities. + Note that libreadline is GPL-licenced, so if you distribute a binary of + pcre2test linked in this way, there may be licensing issues. These can be + avoided by linking with libedit (which has a BSD licence) instead. + + Enabling libreadline causes the -lreadline option to be added to the + pcre2test build. In many operating environments with a sytem-installed + readline library this is sufficient. However, in some environments (e.g. if + an unmodified distribution version of readline is in use), it may be + necessary to specify something like LIBS="-lncurses" as well. This is + because, to quote the readline INSTALL, "Readline uses the termcap functions, + but does not link with the termcap or curses library itself, allowing + applications which link with readline the to choose an appropriate library." + If you get error messages about missing functions tgetstr, tgetent, tputs, + tgetflag, or tgoto, this is the problem, and linking with the ncurses library + should fix it. + +. The C99 standard defines formatting modifiers z and t for size_t and + ptrdiff_t values, respectively. By default, PCRE2 uses these modifiers in + environments other than Microsoft Visual Studio when __STDC_VERSION__ is + defined and has a value greater than or equal to 199901L (indicating C99). + However, there is at least one environment that claims to be C99 but does not + support these modifiers. If --disable-percent-zt is specified, no use is made + of the z or t modifiers. Instead or %td or %zu, %lu is used, with a cast for + size_t values. + +. There is a special option called --enable-fuzz-support for use by people who + want to run fuzzing tests on PCRE2. At present this applies only to the 8-bit + library. If set, it causes an extra library called libpcre2-fuzzsupport.a to + be built, but not installed. This contains a single function called + LLVMFuzzerTestOneInput() whose arguments are a pointer to a string and the + length of the string. When called, this function tries to compile the string + as a pattern, and if that succeeds, to match it. This is done both with no + options and with some random options bits that are generated from the string. + Setting --enable-fuzz-support also causes a binary called pcre2fuzzcheck to + be created. This is normally run under valgrind or used when PCRE2 is + compiled with address sanitizing enabled. It calls the fuzzing function and + outputs information about it is doing. The input strings are specified by + arguments: if an argument starts with "=" the rest of it is a literal input + string. Otherwise, it is assumed to be a file name, and the contents of the + file are the test string. + +. Releases before 10.30 could be compiled with --disable-stack-for-recursion, + which caused pcre2_match() to use individual blocks on the heap for + backtracking instead of recursive function calls (which use the stack). This + is now obsolete since pcre2_match() was refactored always to use the heap (in + a much more efficient way than before). This option is retained for backwards + compatibility, but has no effect other than to output a warning. + +The "configure" script builds the following files for the basic C library: + +. Makefile the makefile that builds the library +. src/config.h build-time configuration options for the library +. src/pcre2.h the public PCRE2 header file +. pcre2-config script that shows the building settings such as CFLAGS + that were set for "configure" +. libpcre2-8.pc ) +. libpcre2-16.pc ) data for the pkg-config command +. libpcre2-32.pc ) +. libpcre2-posix.pc ) +. libtool script that builds shared and/or static libraries + +Versions of config.h and pcre2.h are distributed in the src directory of PCRE2 +tarballs under the names config.h.generic and pcre2.h.generic. These are +provided for those who have to build PCRE2 without using "configure" or CMake. +If you use "configure" or CMake, the .generic versions are not used. + +The "configure" script also creates config.status, which is an executable +script that can be run to recreate the configuration, and config.log, which +contains compiler output from tests that "configure" runs. + +Once "configure" has run, you can run "make". This builds whichever of the +libraries libpcre2-8, libpcre2-16 and libpcre2-32 are configured, and a test +program called pcre2test. If you enabled JIT support with --enable-jit, another +test program called pcre2_jit_test is built as well. If the 8-bit library is +built, libpcre2-posix and the pcre2grep command are also built. Running +"make" with the -j option may speed up compilation on multiprocessor systems. + +The command "make check" runs all the appropriate tests. Details of the PCRE2 +tests are given below in a separate section of this document. The -j option of +"make" can also be used when running the tests. + +You can use "make install" to install PCRE2 into live directories on your +system. The following are installed (file names are all relative to the + that is set when "configure" is run): + + Commands (bin): + pcre2test + pcre2grep (if 8-bit support is enabled) + pcre2-config + + Libraries (lib): + libpcre2-8 (if 8-bit support is enabled) + libpcre2-16 (if 16-bit support is enabled) + libpcre2-32 (if 32-bit support is enabled) + libpcre2-posix (if 8-bit support is enabled) + + Configuration information (lib/pkgconfig): + libpcre2-8.pc + libpcre2-16.pc + libpcre2-32.pc + libpcre2-posix.pc + + Header files (include): + pcre2.h + pcre2posix.h + + Man pages (share/man/man{1,3}): + pcre2grep.1 + pcre2test.1 + pcre2-config.1 + pcre2.3 + pcre2*.3 (lots more pages, all starting "pcre2") + + HTML documentation (share/doc/pcre2/html): + index.html + *.html (lots more pages, hyperlinked from index.html) + + Text file documentation (share/doc/pcre2): + AUTHORS + COPYING + ChangeLog + LICENCE + NEWS + README + pcre2.txt (a concatenation of the man(3) pages) + pcre2test.txt the pcre2test man page + pcre2grep.txt the pcre2grep man page + pcre2-config.txt the pcre2-config man page + +If you want to remove PCRE2 from your system, you can run "make uninstall". +This removes all the files that "make install" installed. However, it does not +remove any directories, because these are often shared with other programs. + + +Retrieving configuration information +------------------------------------ + +Running "make install" installs the command pcre2-config, which can be used to +recall information about the PCRE2 configuration and installation. For example: + + pcre2-config --version + +prints the version number, and + + pcre2-config --libs8 + +outputs information about where the 8-bit library is installed. This command +can be included in makefiles for programs that use PCRE2, saving the programmer +from having to remember too many details. Run pcre2-config with no arguments to +obtain a list of possible arguments. + +The pkg-config command is another system for saving and retrieving information +about installed libraries. Instead of separate commands for each library, a +single command is used. For example: + + pkg-config --libs libpcre2-16 + +The data is held in *.pc files that are installed in a directory called +/lib/pkgconfig. + + +Shared libraries +---------------- + +The default distribution builds PCRE2 as shared libraries and static libraries, +as long as the operating system supports shared libraries. Shared library +support relies on the "libtool" script which is built as part of the +"configure" process. + +The libtool script is used to compile and link both shared and static +libraries. They are placed in a subdirectory called .libs when they are newly +built. The programs pcre2test and pcre2grep are built to use these uninstalled +libraries (by means of wrapper scripts in the case of shared libraries). When +you use "make install" to install shared libraries, pcre2grep and pcre2test are +automatically re-built to use the newly installed shared libraries before being +installed themselves. However, the versions left in the build directory still +use the uninstalled libraries. + +To build PCRE2 using static libraries only you must use --disable-shared when +configuring it. For example: + +./configure --prefix=/usr/gnu --disable-shared + +Then run "make" in the usual way. Similarly, you can use --disable-static to +build only shared libraries. + + +Cross-compiling using autotools +------------------------------- + +You can specify CC and CFLAGS in the normal way to the "configure" command, in +order to cross-compile PCRE2 for some other host. However, you should NOT +specify --enable-rebuild-chartables, because if you do, the pcre2_dftables.c +source file is compiled and run on the local host, in order to generate the +inbuilt character tables (the pcre2_chartables.c file). This will probably not +work, because pcre2_dftables.c needs to be compiled with the local compiler, +not the cross compiler. + +When --enable-rebuild-chartables is not specified, pcre2_chartables.c is +created by making a copy of pcre2_chartables.c.dist, which is a default set of +tables that assumes ASCII code. Cross-compiling with the default tables should +not be a problem. + +If you need to modify the character tables when cross-compiling, you should +move pcre2_chartables.c.dist out of the way, then compile pcre2_dftables.c by +hand and run it on the local host to make a new version of +pcre2_chartables.c.dist. See the pcre2build section "Creating character tables +at build time" for more details. + + +Making new tarballs +------------------- + +The command "make dist" creates three PCRE2 tarballs, in tar.gz, tar.bz2, and +zip formats. The command "make distcheck" does the same, but then does a trial +build of the new distribution to ensure that it works. + +If you have modified any of the man page sources in the doc directory, you +should first run the PrepareRelease script before making a distribution. This +script creates the .txt and HTML forms of the documentation from the man pages. + + +Testing PCRE2 +------------- + +To test the basic PCRE2 library on a Unix-like system, run the RunTest script. +There is another script called RunGrepTest that tests the pcre2grep command. +When JIT support is enabled, a third test program called pcre2_jit_test is +built. Both the scripts and all the program tests are run if you obey "make +check". For other environments, see the instructions in NON-AUTOTOOLS-BUILD. + +The RunTest script runs the pcre2test test program (which is documented in its +own man page) on each of the relevant testinput files in the testdata +directory, and compares the output with the contents of the corresponding +testoutput files. RunTest uses a file called testtry to hold the main output +from pcre2test. Other files whose names begin with "test" are used as working +files in some tests. + +Some tests are relevant only when certain build-time options were selected. For +example, the tests for UTF-8/16/32 features are run only when Unicode support +is available. RunTest outputs a comment when it skips a test. + +Many (but not all) of the tests that are not skipped are run twice if JIT +support is available. On the second run, JIT compilation is forced. This +testing can be suppressed by putting "nojit" on the RunTest command line. + +The entire set of tests is run once for each of the 8-bit, 16-bit and 32-bit +libraries that are enabled. If you want to run just one set of tests, call +RunTest with either the -8, -16 or -32 option. + +If valgrind is installed, you can run the tests under it by putting "valgrind" +on the RunTest command line. To run pcre2test on just one or more specific test +files, give their numbers as arguments to RunTest, for example: + + RunTest 2 7 11 + +You can also specify ranges of tests such as 3-6 or 3- (meaning 3 to the +end), or a number preceded by ~ to exclude a test. For example: + + Runtest 3-15 ~10 + +This runs tests 3 to 15, excluding test 10, and just ~13 runs all the tests +except test 13. Whatever order the arguments are in, the tests are always run +in numerical order. + +You can also call RunTest with the single argument "list" to cause it to output +a list of tests. + +The test sequence starts with "test 0", which is a special test that has no +input file, and whose output is not checked. This is because it will be +different on different hardware and with different configurations. The test +exists in order to exercise some of pcre2test's code that would not otherwise +be run. + +Tests 1 and 2 can always be run, as they expect only plain text strings (not +UTF) and make no use of Unicode properties. The first test file can be fed +directly into the perltest.sh script to check that Perl gives the same results. +The only difference you should see is in the first few lines, where the Perl +version is given instead of the PCRE2 version. The second set of tests check +auxiliary functions, error detection, and run-time flags that are specific to +PCRE2. It also uses the debugging flags to check some of the internals of +pcre2_compile(). + +If you build PCRE2 with a locale setting that is not the standard C locale, the +character tables may be different (see next paragraph). In some cases, this may +cause failures in the second set of tests. For example, in a locale where the +isprint() function yields TRUE for characters in the range 128-255, the use of +[:isascii:] inside a character class defines a different set of characters, and +this shows up in this test as a difference in the compiled code, which is being +listed for checking. For example, where the comparison test output contains +[\x00-\x7f] the test might contain [\x00-\xff], and similarly in some other +cases. This is not a bug in PCRE2. + +Test 3 checks pcre2_maketables(), the facility for building a set of character +tables for a specific locale and using them instead of the default tables. The +script uses the "locale" command to check for the availability of the "fr_FR", +"french", or "fr" locale, and uses the first one that it finds. If the "locale" +command fails, or if its output doesn't include "fr_FR", "french", or "fr" in +the list of available locales, the third test cannot be run, and a comment is +output to say why. If running this test produces an error like this: + + ** Failed to set locale "fr_FR" + +it means that the given locale is not available on your system, despite being +listed by "locale". This does not mean that PCRE2 is broken. There are three +alternative output files for the third test, because three different versions +of the French locale have been encountered. The test passes if its output +matches any one of them. + +Tests 4 and 5 check UTF and Unicode property support, test 4 being compatible +with the perltest.sh script, and test 5 checking PCRE2-specific things. + +Tests 6 and 7 check the pcre2_dfa_match() alternative matching function, in +non-UTF mode and UTF-mode with Unicode property support, respectively. + +Test 8 checks some internal offsets and code size features, but it is run only +when Unicode support is enabled. The output is different in 8-bit, 16-bit, and +32-bit modes and for different link sizes, so there are different output files +for each mode and link size. + +Tests 9 and 10 are run only in 8-bit mode, and tests 11 and 12 are run only in +16-bit and 32-bit modes. These are tests that generate different output in +8-bit mode. Each pair are for general cases and Unicode support, respectively. + +Test 13 checks the handling of non-UTF characters greater than 255 by +pcre2_dfa_match() in 16-bit and 32-bit modes. + +Test 14 contains some special UTF and UCP tests that give different output for +different code unit widths. + +Test 15 contains a number of tests that must not be run with JIT. They check, +among other non-JIT things, the match-limiting features of the intepretive +matcher. + +Test 16 is run only when JIT support is not available. It checks that an +attempt to use JIT has the expected behaviour. + +Test 17 is run only when JIT support is available. It checks JIT complete and +partial modes, match-limiting under JIT, and other JIT-specific features. + +Tests 18 and 19 are run only in 8-bit mode. They check the POSIX interface to +the 8-bit library, without and with Unicode support, respectively. + +Test 20 checks the serialization functions by writing a set of compiled +patterns to a file, and then reloading and checking them. + +Tests 21 and 22 test \C support when the use of \C is not locked out, without +and with UTF support, respectively. Test 23 tests \C when it is locked out. + +Tests 24 and 25 test the experimental pattern conversion functions, without and +with UTF support, respectively. + + +Character tables +---------------- + +For speed, PCRE2 uses four tables for manipulating and identifying characters +whose code point values are less than 256. By default, a set of tables that is +built into the library is used. The pcre2_maketables() function can be called +by an application to create a new set of tables in the current locale. This are +passed to PCRE2 by calling pcre2_set_character_tables() to put a pointer into a +compile context. + +The source file called pcre2_chartables.c contains the default set of tables. +By default, this is created as a copy of pcre2_chartables.c.dist, which +contains tables for ASCII coding. However, if --enable-rebuild-chartables is +specified for ./configure, a new version of pcre2_chartables.c is built by the +program pcre2_dftables (compiled from pcre2_dftables.c), which uses the ANSI C +character handling functions such as isalnum(), isalpha(), isupper(), +islower(), etc. to build the table sources. This means that the default C +locale that is set for your system will control the contents of these default +tables. You can change the default tables by editing pcre2_chartables.c and +then re-building PCRE2. If you do this, you should take care to ensure that the +file does not get automatically re-generated. The best way to do this is to +move pcre2_chartables.c.dist out of the way and replace it with your customized +tables. + +When the pcre2_dftables program is run as a result of specifying +--enable-rebuild-chartables, it uses the default C locale that is set on your +system. It does not pay attention to the LC_xxx environment variables. In other +words, it uses the system's default locale rather than whatever the compiling +user happens to have set. If you really do want to build a source set of +character tables in a locale that is specified by the LC_xxx variables, you can +run the pcre2_dftables program by hand with the -L option. For example: + + ./pcre2_dftables -L pcre2_chartables.c.special + +The second argument names the file where the source code for the tables is +written. The first two 256-byte tables provide lower casing and case flipping +functions, respectively. The next table consists of a number of 32-byte bit +maps which identify certain character classes such as digits, "word" +characters, white space, etc. These are used when building 32-byte bit maps +that represent character classes for code points less than 256. The final +256-byte table has bits indicating various character types, as follows: + + 1 white space character + 2 letter + 4 lower case letter + 8 decimal digit + 16 alphanumeric or '_' + +You can also specify -b (with or without -L) when running pcre2_dftables. This +causes the tables to be written in binary instead of as source code. A set of +binary tables can be loaded into memory by an application and passed to +pcre2_compile() in the same way as tables created dynamically by calling +pcre2_maketables(). The tables are just a string of bytes, independent of +hardware characteristics such as endianness. This means they can be bundled +with an application that runs in different environments, to ensure consistent +behaviour. + +See also the pcre2build section "Creating character tables at build time". + + +File manifest +------------- + +The distribution should contain the files listed below. + +(A) Source files for the PCRE2 library functions and their headers are found in + the src directory: + + src/pcre2_dftables.c auxiliary program for building pcre2_chartables.c + when --enable-rebuild-chartables is specified + + src/pcre2_chartables.c.dist a default set of character tables that assume + ASCII coding; unless --enable-rebuild-chartables is + specified, used by copying to pcre2_chartables.c + + src/pcre2posix.c ) + src/pcre2_auto_possess.c ) + src/pcre2_compile.c ) + src/pcre2_config.c ) + src/pcre2_context.c ) + src/pcre2_convert.c ) + src/pcre2_dfa_match.c ) + src/pcre2_error.c ) + src/pcre2_extuni.c ) + src/pcre2_find_bracket.c ) + src/pcre2_jit_compile.c ) + src/pcre2_jit_match.c ) sources for the functions in the library, + src/pcre2_jit_misc.c ) and some internal functions that they use + src/pcre2_maketables.c ) + src/pcre2_match.c ) + src/pcre2_match_data.c ) + src/pcre2_newline.c ) + src/pcre2_ord2utf.c ) + src/pcre2_pattern_info.c ) + src/pcre2_script_run.c ) + src/pcre2_serialize.c ) + src/pcre2_string_utils.c ) + src/pcre2_study.c ) + src/pcre2_substitute.c ) + src/pcre2_substring.c ) + src/pcre2_tables.c ) + src/pcre2_ucd.c ) + src/pcre2_valid_utf.c ) + src/pcre2_xclass.c ) + + src/pcre2_printint.c debugging function that is used by pcre2test, + src/pcre2_fuzzsupport.c function for (optional) fuzzing support + + src/config.h.in template for config.h, when built by "configure" + src/pcre2.h.in template for pcre2.h when built by "configure" + src/pcre2posix.h header for the external POSIX wrapper API + src/pcre2_internal.h header for internal use + src/pcre2_intmodedep.h a mode-specific internal header + src/pcre2_ucp.h header for Unicode property handling + + sljit/* source files for the JIT compiler + +(B) Source files for programs that use PCRE2: + + src/pcre2demo.c simple demonstration of coding calls to PCRE2 + src/pcre2grep.c source of a grep utility that uses PCRE2 + src/pcre2test.c comprehensive test program + src/pcre2_jit_test.c JIT test program + +(C) Auxiliary files: + + 132html script to turn "man" pages into HTML + AUTHORS information about the author of PCRE2 + ChangeLog log of changes to the code + CleanTxt script to clean nroff output for txt man pages + Detrail script to remove trailing spaces + HACKING some notes about the internals of PCRE2 + INSTALL generic installation instructions + LICENCE conditions for the use of PCRE2 + COPYING the same, using GNU's standard name + Makefile.in ) template for Unix Makefile, which is built by + ) "configure" + Makefile.am ) the automake input that was used to create + ) Makefile.in + NEWS important changes in this release + NON-AUTOTOOLS-BUILD notes on building PCRE2 without using autotools + PrepareRelease script to make preparations for "make dist" + README this file + RunTest a Unix shell script for running tests + RunGrepTest a Unix shell script for pcre2grep tests + aclocal.m4 m4 macros (generated by "aclocal") + config.guess ) files used by libtool, + config.sub ) used only when building a shared library + configure a configuring shell script (built by autoconf) + configure.ac ) the autoconf input that was used to build + ) "configure" and config.h + depcomp ) script to find program dependencies, generated by + ) automake + doc/*.3 man page sources for PCRE2 + doc/*.1 man page sources for pcre2grep and pcre2test + doc/index.html.src the base HTML page + doc/html/* HTML documentation + doc/pcre2.txt plain text version of the man pages + doc/pcre2test.txt plain text documentation of test program + install-sh a shell script for installing files + libpcre2-8.pc.in template for libpcre2-8.pc for pkg-config + libpcre2-16.pc.in template for libpcre2-16.pc for pkg-config + libpcre2-32.pc.in template for libpcre2-32.pc for pkg-config + libpcre2-posix.pc.in template for libpcre2-posix.pc for pkg-config + ltmain.sh file used to build a libtool script + missing ) common stub for a few missing GNU programs while + ) installing, generated by automake + mkinstalldirs script for making install directories + perltest.sh Script for running a Perl test program + pcre2-config.in source of script which retains PCRE2 information + testdata/testinput* test data for main library tests + testdata/testoutput* expected test results + testdata/grep* input and output for pcre2grep tests + testdata/* other supporting test files + +(D) Auxiliary files for cmake support + + cmake/COPYING-CMAKE-SCRIPTS + cmake/FindPackageHandleStandardArgs.cmake + cmake/FindEditline.cmake + cmake/FindReadline.cmake + CMakeLists.txt + config-cmake.h.in + +(E) Auxiliary files for building PCRE2 "by hand" + + src/pcre2.h.generic ) a version of the public PCRE2 header file + ) for use in non-"configure" environments + src/config.h.generic ) a version of config.h for use in non-"configure" + ) environments + +Philip Hazel +Email local part: Philip.Hazel +Email domain: gmail.com +Last updated: 04 December 2020 diff --git a/pcre2/aclocal.m4 b/pcre2/aclocal.m4 new file mode 100644 index 000000000..a1b8aed63 --- /dev/null +++ b/pcre2/aclocal.m4 @@ -0,0 +1,1548 @@ +# generated automatically by aclocal 1.16.2 -*- Autoconf -*- + +# Copyright (C) 1996-2020 Free Software Foundation, Inc. + +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +m4_ifndef([AC_CONFIG_MACRO_DIRS], [m4_defun([_AM_CONFIG_MACRO_DIRS], [])m4_defun([AC_CONFIG_MACRO_DIRS], [_AM_CONFIG_MACRO_DIRS($@)])]) +m4_ifndef([AC_AUTOCONF_VERSION], + [m4_copy([m4_PACKAGE_VERSION], [AC_AUTOCONF_VERSION])])dnl +m4_if(m4_defn([AC_AUTOCONF_VERSION]), [2.69],, +[m4_warning([this file was generated for autoconf 2.69. +You have another version of autoconf. It may work, but is not guaranteed to. +If you have problems, you may need to regenerate the build system entirely. +To do so, use the procedure documented by the package, typically 'autoreconf'.])]) + +# pkg.m4 - Macros to locate and utilise pkg-config. -*- Autoconf -*- +# serial 11 (pkg-config-0.29.1) + +dnl Copyright © 2004 Scott James Remnant . +dnl Copyright © 2012-2015 Dan Nicholson +dnl +dnl This program is free software; you can redistribute it and/or modify +dnl it under the terms of the GNU General Public License as published by +dnl the Free Software Foundation; either version 2 of the License, or +dnl (at your option) any later version. +dnl +dnl This program is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl General Public License for more details. +dnl +dnl You should have received a copy of the GNU General Public License +dnl along with this program; if not, write to the Free Software +dnl Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA +dnl 02111-1307, USA. +dnl +dnl As a special exception to the GNU General Public License, if you +dnl distribute this file as part of a program that contains a +dnl configuration script generated by Autoconf, you may include it under +dnl the same distribution terms that you use for the rest of that +dnl program. + +dnl PKG_PREREQ(MIN-VERSION) +dnl ----------------------- +dnl Since: 0.29 +dnl +dnl Verify that the version of the pkg-config macros are at least +dnl MIN-VERSION. Unlike PKG_PROG_PKG_CONFIG, which checks the user's +dnl installed version of pkg-config, this checks the developer's version +dnl of pkg.m4 when generating configure. +dnl +dnl To ensure that this macro is defined, also add: +dnl m4_ifndef([PKG_PREREQ], +dnl [m4_fatal([must install pkg-config 0.29 or later before running autoconf/autogen])]) +dnl +dnl See the "Since" comment for each macro you use to see what version +dnl of the macros you require. +m4_defun([PKG_PREREQ], +[m4_define([PKG_MACROS_VERSION], [0.29.1]) +m4_if(m4_version_compare(PKG_MACROS_VERSION, [$1]), -1, + [m4_fatal([pkg.m4 version $1 or higher is required but ]PKG_MACROS_VERSION[ found])]) +])dnl PKG_PREREQ + +dnl PKG_PROG_PKG_CONFIG([MIN-VERSION]) +dnl ---------------------------------- +dnl Since: 0.16 +dnl +dnl Search for the pkg-config tool and set the PKG_CONFIG variable to +dnl first found in the path. Checks that the version of pkg-config found +dnl is at least MIN-VERSION. If MIN-VERSION is not specified, 0.9.0 is +dnl used since that's the first version where most current features of +dnl pkg-config existed. +AC_DEFUN([PKG_PROG_PKG_CONFIG], +[m4_pattern_forbid([^_?PKG_[A-Z_]+$]) +m4_pattern_allow([^PKG_CONFIG(_(PATH|LIBDIR|SYSROOT_DIR|ALLOW_SYSTEM_(CFLAGS|LIBS)))?$]) +m4_pattern_allow([^PKG_CONFIG_(DISABLE_UNINSTALLED|TOP_BUILD_DIR|DEBUG_SPEW)$]) +AC_ARG_VAR([PKG_CONFIG], [path to pkg-config utility]) +AC_ARG_VAR([PKG_CONFIG_PATH], [directories to add to pkg-config's search path]) +AC_ARG_VAR([PKG_CONFIG_LIBDIR], [path overriding pkg-config's built-in search path]) + +if test "x$ac_cv_env_PKG_CONFIG_set" != "xset"; then + AC_PATH_TOOL([PKG_CONFIG], [pkg-config]) +fi +if test -n "$PKG_CONFIG"; then + _pkg_min_version=m4_default([$1], [0.9.0]) + AC_MSG_CHECKING([pkg-config is at least version $_pkg_min_version]) + if $PKG_CONFIG --atleast-pkgconfig-version $_pkg_min_version; then + AC_MSG_RESULT([yes]) + else + AC_MSG_RESULT([no]) + PKG_CONFIG="" + fi +fi[]dnl +])dnl PKG_PROG_PKG_CONFIG + +dnl PKG_CHECK_EXISTS(MODULES, [ACTION-IF-FOUND], [ACTION-IF-NOT-FOUND]) +dnl ------------------------------------------------------------------- +dnl Since: 0.18 +dnl +dnl Check to see whether a particular set of modules exists. Similar to +dnl PKG_CHECK_MODULES(), but does not set variables or print errors. +dnl +dnl Please remember that m4 expands AC_REQUIRE([PKG_PROG_PKG_CONFIG]) +dnl only at the first occurence in configure.ac, so if the first place +dnl it's called might be skipped (such as if it is within an "if", you +dnl have to call PKG_CHECK_EXISTS manually +AC_DEFUN([PKG_CHECK_EXISTS], +[AC_REQUIRE([PKG_PROG_PKG_CONFIG])dnl +if test -n "$PKG_CONFIG" && \ + AC_RUN_LOG([$PKG_CONFIG --exists --print-errors "$1"]); then + m4_default([$2], [:]) +m4_ifvaln([$3], [else + $3])dnl +fi]) + +dnl _PKG_CONFIG([VARIABLE], [COMMAND], [MODULES]) +dnl --------------------------------------------- +dnl Internal wrapper calling pkg-config via PKG_CONFIG and setting +dnl pkg_failed based on the result. +m4_define([_PKG_CONFIG], +[if test -n "$$1"; then + pkg_cv_[]$1="$$1" + elif test -n "$PKG_CONFIG"; then + PKG_CHECK_EXISTS([$3], + [pkg_cv_[]$1=`$PKG_CONFIG --[]$2 "$3" 2>/dev/null` + test "x$?" != "x0" && pkg_failed=yes ], + [pkg_failed=yes]) + else + pkg_failed=untried +fi[]dnl +])dnl _PKG_CONFIG + +dnl _PKG_SHORT_ERRORS_SUPPORTED +dnl --------------------------- +dnl Internal check to see if pkg-config supports short errors. +AC_DEFUN([_PKG_SHORT_ERRORS_SUPPORTED], +[AC_REQUIRE([PKG_PROG_PKG_CONFIG]) +if $PKG_CONFIG --atleast-pkgconfig-version 0.20; then + _pkg_short_errors_supported=yes +else + _pkg_short_errors_supported=no +fi[]dnl +])dnl _PKG_SHORT_ERRORS_SUPPORTED + + +dnl PKG_CHECK_MODULES(VARIABLE-PREFIX, MODULES, [ACTION-IF-FOUND], +dnl [ACTION-IF-NOT-FOUND]) +dnl -------------------------------------------------------------- +dnl Since: 0.4.0 +dnl +dnl Note that if there is a possibility the first call to +dnl PKG_CHECK_MODULES might not happen, you should be sure to include an +dnl explicit call to PKG_PROG_PKG_CONFIG in your configure.ac +AC_DEFUN([PKG_CHECK_MODULES], +[AC_REQUIRE([PKG_PROG_PKG_CONFIG])dnl +AC_ARG_VAR([$1][_CFLAGS], [C compiler flags for $1, overriding pkg-config])dnl +AC_ARG_VAR([$1][_LIBS], [linker flags for $1, overriding pkg-config])dnl + +pkg_failed=no +AC_MSG_CHECKING([for $1]) + +_PKG_CONFIG([$1][_CFLAGS], [cflags], [$2]) +_PKG_CONFIG([$1][_LIBS], [libs], [$2]) + +m4_define([_PKG_TEXT], [Alternatively, you may set the environment variables $1[]_CFLAGS +and $1[]_LIBS to avoid the need to call pkg-config. +See the pkg-config man page for more details.]) + +if test $pkg_failed = yes; then + AC_MSG_RESULT([no]) + _PKG_SHORT_ERRORS_SUPPORTED + if test $_pkg_short_errors_supported = yes; then + $1[]_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "$2" 2>&1` + else + $1[]_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "$2" 2>&1` + fi + # Put the nasty error message in config.log where it belongs + echo "$$1[]_PKG_ERRORS" >&AS_MESSAGE_LOG_FD + + m4_default([$4], [AC_MSG_ERROR( +[Package requirements ($2) were not met: + +$$1_PKG_ERRORS + +Consider adjusting the PKG_CONFIG_PATH environment variable if you +installed software in a non-standard prefix. + +_PKG_TEXT])[]dnl + ]) +elif test $pkg_failed = untried; then + AC_MSG_RESULT([no]) + m4_default([$4], [AC_MSG_FAILURE( +[The pkg-config script could not be found or is too old. Make sure it +is in your PATH or set the PKG_CONFIG environment variable to the full +path to pkg-config. + +_PKG_TEXT + +To get pkg-config, see .])[]dnl + ]) +else + $1[]_CFLAGS=$pkg_cv_[]$1[]_CFLAGS + $1[]_LIBS=$pkg_cv_[]$1[]_LIBS + AC_MSG_RESULT([yes]) + $3 +fi[]dnl +])dnl PKG_CHECK_MODULES + + +dnl PKG_CHECK_MODULES_STATIC(VARIABLE-PREFIX, MODULES, [ACTION-IF-FOUND], +dnl [ACTION-IF-NOT-FOUND]) +dnl --------------------------------------------------------------------- +dnl Since: 0.29 +dnl +dnl Checks for existence of MODULES and gathers its build flags with +dnl static libraries enabled. Sets VARIABLE-PREFIX_CFLAGS from --cflags +dnl and VARIABLE-PREFIX_LIBS from --libs. +dnl +dnl Note that if there is a possibility the first call to +dnl PKG_CHECK_MODULES_STATIC might not happen, you should be sure to +dnl include an explicit call to PKG_PROG_PKG_CONFIG in your +dnl configure.ac. +AC_DEFUN([PKG_CHECK_MODULES_STATIC], +[AC_REQUIRE([PKG_PROG_PKG_CONFIG])dnl +_save_PKG_CONFIG=$PKG_CONFIG +PKG_CONFIG="$PKG_CONFIG --static" +PKG_CHECK_MODULES($@) +PKG_CONFIG=$_save_PKG_CONFIG[]dnl +])dnl PKG_CHECK_MODULES_STATIC + + +dnl PKG_INSTALLDIR([DIRECTORY]) +dnl ------------------------- +dnl Since: 0.27 +dnl +dnl Substitutes the variable pkgconfigdir as the location where a module +dnl should install pkg-config .pc files. By default the directory is +dnl $libdir/pkgconfig, but the default can be changed by passing +dnl DIRECTORY. The user can override through the --with-pkgconfigdir +dnl parameter. +AC_DEFUN([PKG_INSTALLDIR], +[m4_pushdef([pkg_default], [m4_default([$1], ['${libdir}/pkgconfig'])]) +m4_pushdef([pkg_description], + [pkg-config installation directory @<:@]pkg_default[@:>@]) +AC_ARG_WITH([pkgconfigdir], + [AS_HELP_STRING([--with-pkgconfigdir], pkg_description)],, + [with_pkgconfigdir=]pkg_default) +AC_SUBST([pkgconfigdir], [$with_pkgconfigdir]) +m4_popdef([pkg_default]) +m4_popdef([pkg_description]) +])dnl PKG_INSTALLDIR + + +dnl PKG_NOARCH_INSTALLDIR([DIRECTORY]) +dnl -------------------------------- +dnl Since: 0.27 +dnl +dnl Substitutes the variable noarch_pkgconfigdir as the location where a +dnl module should install arch-independent pkg-config .pc files. By +dnl default the directory is $datadir/pkgconfig, but the default can be +dnl changed by passing DIRECTORY. The user can override through the +dnl --with-noarch-pkgconfigdir parameter. +AC_DEFUN([PKG_NOARCH_INSTALLDIR], +[m4_pushdef([pkg_default], [m4_default([$1], ['${datadir}/pkgconfig'])]) +m4_pushdef([pkg_description], + [pkg-config arch-independent installation directory @<:@]pkg_default[@:>@]) +AC_ARG_WITH([noarch-pkgconfigdir], + [AS_HELP_STRING([--with-noarch-pkgconfigdir], pkg_description)],, + [with_noarch_pkgconfigdir=]pkg_default) +AC_SUBST([noarch_pkgconfigdir], [$with_noarch_pkgconfigdir]) +m4_popdef([pkg_default]) +m4_popdef([pkg_description]) +])dnl PKG_NOARCH_INSTALLDIR + + +dnl PKG_CHECK_VAR(VARIABLE, MODULE, CONFIG-VARIABLE, +dnl [ACTION-IF-FOUND], [ACTION-IF-NOT-FOUND]) +dnl ------------------------------------------- +dnl Since: 0.28 +dnl +dnl Retrieves the value of the pkg-config variable for the given module. +AC_DEFUN([PKG_CHECK_VAR], +[AC_REQUIRE([PKG_PROG_PKG_CONFIG])dnl +AC_ARG_VAR([$1], [value of $3 for $2, overriding pkg-config])dnl + +_PKG_CONFIG([$1], [variable="][$3]["], [$2]) +AS_VAR_COPY([$1], [pkg_cv_][$1]) + +AS_VAR_IF([$1], [""], [$5], [$4])dnl +])dnl PKG_CHECK_VAR + +dnl PKG_WITH_MODULES(VARIABLE-PREFIX, MODULES, +dnl [ACTION-IF-FOUND],[ACTION-IF-NOT-FOUND], +dnl [DESCRIPTION], [DEFAULT]) +dnl ------------------------------------------ +dnl +dnl Prepare a "--with-" configure option using the lowercase +dnl [VARIABLE-PREFIX] name, merging the behaviour of AC_ARG_WITH and +dnl PKG_CHECK_MODULES in a single macro. +AC_DEFUN([PKG_WITH_MODULES], +[ +m4_pushdef([with_arg], m4_tolower([$1])) + +m4_pushdef([description], + [m4_default([$5], [build with ]with_arg[ support])]) + +m4_pushdef([def_arg], [m4_default([$6], [auto])]) +m4_pushdef([def_action_if_found], [AS_TR_SH([with_]with_arg)=yes]) +m4_pushdef([def_action_if_not_found], [AS_TR_SH([with_]with_arg)=no]) + +m4_case(def_arg, + [yes],[m4_pushdef([with_without], [--without-]with_arg)], + [m4_pushdef([with_without],[--with-]with_arg)]) + +AC_ARG_WITH(with_arg, + AS_HELP_STRING(with_without, description[ @<:@default=]def_arg[@:>@]),, + [AS_TR_SH([with_]with_arg)=def_arg]) + +AS_CASE([$AS_TR_SH([with_]with_arg)], + [yes],[PKG_CHECK_MODULES([$1],[$2],$3,$4)], + [auto],[PKG_CHECK_MODULES([$1],[$2], + [m4_n([def_action_if_found]) $3], + [m4_n([def_action_if_not_found]) $4])]) + +m4_popdef([with_arg]) +m4_popdef([description]) +m4_popdef([def_arg]) + +])dnl PKG_WITH_MODULES + +dnl PKG_HAVE_WITH_MODULES(VARIABLE-PREFIX, MODULES, +dnl [DESCRIPTION], [DEFAULT]) +dnl ----------------------------------------------- +dnl +dnl Convenience macro to trigger AM_CONDITIONAL after PKG_WITH_MODULES +dnl check._[VARIABLE-PREFIX] is exported as make variable. +AC_DEFUN([PKG_HAVE_WITH_MODULES], +[ +PKG_WITH_MODULES([$1],[$2],,,[$3],[$4]) + +AM_CONDITIONAL([HAVE_][$1], + [test "$AS_TR_SH([with_]m4_tolower([$1]))" = "yes"]) +])dnl PKG_HAVE_WITH_MODULES + +dnl PKG_HAVE_DEFINE_WITH_MODULES(VARIABLE-PREFIX, MODULES, +dnl [DESCRIPTION], [DEFAULT]) +dnl ------------------------------------------------------ +dnl +dnl Convenience macro to run AM_CONDITIONAL and AC_DEFINE after +dnl PKG_WITH_MODULES check. HAVE_[VARIABLE-PREFIX] is exported as make +dnl and preprocessor variable. +AC_DEFUN([PKG_HAVE_DEFINE_WITH_MODULES], +[ +PKG_HAVE_WITH_MODULES([$1],[$2],[$3],[$4]) + +AS_IF([test "$AS_TR_SH([with_]m4_tolower([$1]))" = "yes"], + [AC_DEFINE([HAVE_][$1], 1, [Enable ]m4_tolower([$1])[ support])]) +])dnl PKG_HAVE_DEFINE_WITH_MODULES + +# Copyright (C) 2002-2020 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# AM_AUTOMAKE_VERSION(VERSION) +# ---------------------------- +# Automake X.Y traces this macro to ensure aclocal.m4 has been +# generated from the m4 files accompanying Automake X.Y. +# (This private macro should not be called outside this file.) +AC_DEFUN([AM_AUTOMAKE_VERSION], +[am__api_version='1.16' +dnl Some users find AM_AUTOMAKE_VERSION and mistake it for a way to +dnl require some minimum version. Point them to the right macro. +m4_if([$1], [1.16.2], [], + [AC_FATAL([Do not call $0, use AM_INIT_AUTOMAKE([$1]).])])dnl +]) + +# _AM_AUTOCONF_VERSION(VERSION) +# ----------------------------- +# aclocal traces this macro to find the Autoconf version. +# This is a private macro too. Using m4_define simplifies +# the logic in aclocal, which can simply ignore this definition. +m4_define([_AM_AUTOCONF_VERSION], []) + +# AM_SET_CURRENT_AUTOMAKE_VERSION +# ------------------------------- +# Call AM_AUTOMAKE_VERSION and AM_AUTOMAKE_VERSION so they can be traced. +# This function is AC_REQUIREd by AM_INIT_AUTOMAKE. +AC_DEFUN([AM_SET_CURRENT_AUTOMAKE_VERSION], +[AM_AUTOMAKE_VERSION([1.16.2])dnl +m4_ifndef([AC_AUTOCONF_VERSION], + [m4_copy([m4_PACKAGE_VERSION], [AC_AUTOCONF_VERSION])])dnl +_AM_AUTOCONF_VERSION(m4_defn([AC_AUTOCONF_VERSION]))]) + +# Copyright (C) 2011-2020 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# AM_PROG_AR([ACT-IF-FAIL]) +# ------------------------- +# Try to determine the archiver interface, and trigger the ar-lib wrapper +# if it is needed. If the detection of archiver interface fails, run +# ACT-IF-FAIL (default is to abort configure with a proper error message). +AC_DEFUN([AM_PROG_AR], +[AC_BEFORE([$0], [LT_INIT])dnl +AC_BEFORE([$0], [AC_PROG_LIBTOOL])dnl +AC_REQUIRE([AM_AUX_DIR_EXPAND])dnl +AC_REQUIRE_AUX_FILE([ar-lib])dnl +AC_CHECK_TOOLS([AR], [ar lib "link -lib"], [false]) +: ${AR=ar} + +AC_CACHE_CHECK([the archiver ($AR) interface], [am_cv_ar_interface], + [AC_LANG_PUSH([C]) + am_cv_ar_interface=ar + AC_COMPILE_IFELSE([AC_LANG_SOURCE([[int some_variable = 0;]])], + [am_ar_try='$AR cru libconftest.a conftest.$ac_objext >&AS_MESSAGE_LOG_FD' + AC_TRY_EVAL([am_ar_try]) + if test "$ac_status" -eq 0; then + am_cv_ar_interface=ar + else + am_ar_try='$AR -NOLOGO -OUT:conftest.lib conftest.$ac_objext >&AS_MESSAGE_LOG_FD' + AC_TRY_EVAL([am_ar_try]) + if test "$ac_status" -eq 0; then + am_cv_ar_interface=lib + else + am_cv_ar_interface=unknown + fi + fi + rm -f conftest.lib libconftest.a + ]) + AC_LANG_POP([C])]) + +case $am_cv_ar_interface in +ar) + ;; +lib) + # Microsoft lib, so override with the ar-lib wrapper script. + # FIXME: It is wrong to rewrite AR. + # But if we don't then we get into trouble of one sort or another. + # A longer-term fix would be to have automake use am__AR in this case, + # and then we could set am__AR="$am_aux_dir/ar-lib \$(AR)" or something + # similar. + AR="$am_aux_dir/ar-lib $AR" + ;; +unknown) + m4_default([$1], + [AC_MSG_ERROR([could not determine $AR interface])]) + ;; +esac +AC_SUBST([AR])dnl +]) + +# AM_AUX_DIR_EXPAND -*- Autoconf -*- + +# Copyright (C) 2001-2020 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# For projects using AC_CONFIG_AUX_DIR([foo]), Autoconf sets +# $ac_aux_dir to '$srcdir/foo'. In other projects, it is set to +# '$srcdir', '$srcdir/..', or '$srcdir/../..'. +# +# Of course, Automake must honor this variable whenever it calls a +# tool from the auxiliary directory. The problem is that $srcdir (and +# therefore $ac_aux_dir as well) can be either absolute or relative, +# depending on how configure is run. This is pretty annoying, since +# it makes $ac_aux_dir quite unusable in subdirectories: in the top +# source directory, any form will work fine, but in subdirectories a +# relative path needs to be adjusted first. +# +# $ac_aux_dir/missing +# fails when called from a subdirectory if $ac_aux_dir is relative +# $top_srcdir/$ac_aux_dir/missing +# fails if $ac_aux_dir is absolute, +# fails when called from a subdirectory in a VPATH build with +# a relative $ac_aux_dir +# +# The reason of the latter failure is that $top_srcdir and $ac_aux_dir +# are both prefixed by $srcdir. In an in-source build this is usually +# harmless because $srcdir is '.', but things will broke when you +# start a VPATH build or use an absolute $srcdir. +# +# So we could use something similar to $top_srcdir/$ac_aux_dir/missing, +# iff we strip the leading $srcdir from $ac_aux_dir. That would be: +# am_aux_dir='\$(top_srcdir)/'`expr "$ac_aux_dir" : "$srcdir//*\(.*\)"` +# and then we would define $MISSING as +# MISSING="\${SHELL} $am_aux_dir/missing" +# This will work as long as MISSING is not called from configure, because +# unfortunately $(top_srcdir) has no meaning in configure. +# However there are other variables, like CC, which are often used in +# configure, and could therefore not use this "fixed" $ac_aux_dir. +# +# Another solution, used here, is to always expand $ac_aux_dir to an +# absolute PATH. The drawback is that using absolute paths prevent a +# configured tree to be moved without reconfiguration. + +AC_DEFUN([AM_AUX_DIR_EXPAND], +[AC_REQUIRE([AC_CONFIG_AUX_DIR_DEFAULT])dnl +# Expand $ac_aux_dir to an absolute path. +am_aux_dir=`cd "$ac_aux_dir" && pwd` +]) + +# AM_CONDITIONAL -*- Autoconf -*- + +# Copyright (C) 1997-2020 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# AM_CONDITIONAL(NAME, SHELL-CONDITION) +# ------------------------------------- +# Define a conditional. +AC_DEFUN([AM_CONDITIONAL], +[AC_PREREQ([2.52])dnl + m4_if([$1], [TRUE], [AC_FATAL([$0: invalid condition: $1])], + [$1], [FALSE], [AC_FATAL([$0: invalid condition: $1])])dnl +AC_SUBST([$1_TRUE])dnl +AC_SUBST([$1_FALSE])dnl +_AM_SUBST_NOTMAKE([$1_TRUE])dnl +_AM_SUBST_NOTMAKE([$1_FALSE])dnl +m4_define([_AM_COND_VALUE_$1], [$2])dnl +if $2; then + $1_TRUE= + $1_FALSE='#' +else + $1_TRUE='#' + $1_FALSE= +fi +AC_CONFIG_COMMANDS_PRE( +[if test -z "${$1_TRUE}" && test -z "${$1_FALSE}"; then + AC_MSG_ERROR([[conditional "$1" was never defined. +Usually this means the macro was only invoked conditionally.]]) +fi])]) + +# Copyright (C) 1999-2020 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + + +# There are a few dirty hacks below to avoid letting 'AC_PROG_CC' be +# written in clear, in which case automake, when reading aclocal.m4, +# will think it sees a *use*, and therefore will trigger all it's +# C support machinery. Also note that it means that autoscan, seeing +# CC etc. in the Makefile, will ask for an AC_PROG_CC use... + + +# _AM_DEPENDENCIES(NAME) +# ---------------------- +# See how the compiler implements dependency checking. +# NAME is "CC", "CXX", "OBJC", "OBJCXX", "UPC", or "GJC". +# We try a few techniques and use that to set a single cache variable. +# +# We don't AC_REQUIRE the corresponding AC_PROG_CC since the latter was +# modified to invoke _AM_DEPENDENCIES(CC); we would have a circular +# dependency, and given that the user is not expected to run this macro, +# just rely on AC_PROG_CC. +AC_DEFUN([_AM_DEPENDENCIES], +[AC_REQUIRE([AM_SET_DEPDIR])dnl +AC_REQUIRE([AM_OUTPUT_DEPENDENCY_COMMANDS])dnl +AC_REQUIRE([AM_MAKE_INCLUDE])dnl +AC_REQUIRE([AM_DEP_TRACK])dnl + +m4_if([$1], [CC], [depcc="$CC" am_compiler_list=], + [$1], [CXX], [depcc="$CXX" am_compiler_list=], + [$1], [OBJC], [depcc="$OBJC" am_compiler_list='gcc3 gcc'], + [$1], [OBJCXX], [depcc="$OBJCXX" am_compiler_list='gcc3 gcc'], + [$1], [UPC], [depcc="$UPC" am_compiler_list=], + [$1], [GCJ], [depcc="$GCJ" am_compiler_list='gcc3 gcc'], + [depcc="$$1" am_compiler_list=]) + +AC_CACHE_CHECK([dependency style of $depcc], + [am_cv_$1_dependencies_compiler_type], +[if test -z "$AMDEP_TRUE" && test -f "$am_depcomp"; then + # We make a subdir and do the tests there. Otherwise we can end up + # making bogus files that we don't know about and never remove. For + # instance it was reported that on HP-UX the gcc test will end up + # making a dummy file named 'D' -- because '-MD' means "put the output + # in D". + rm -rf conftest.dir + mkdir conftest.dir + # Copy depcomp to subdir because otherwise we won't find it if we're + # using a relative directory. + cp "$am_depcomp" conftest.dir + cd conftest.dir + # We will build objects and dependencies in a subdirectory because + # it helps to detect inapplicable dependency modes. For instance + # both Tru64's cc and ICC support -MD to output dependencies as a + # side effect of compilation, but ICC will put the dependencies in + # the current directory while Tru64 will put them in the object + # directory. + mkdir sub + + am_cv_$1_dependencies_compiler_type=none + if test "$am_compiler_list" = ""; then + am_compiler_list=`sed -n ['s/^#*\([a-zA-Z0-9]*\))$/\1/p'] < ./depcomp` + fi + am__universal=false + m4_case([$1], [CC], + [case " $depcc " in #( + *\ -arch\ *\ -arch\ *) am__universal=true ;; + esac], + [CXX], + [case " $depcc " in #( + *\ -arch\ *\ -arch\ *) am__universal=true ;; + esac]) + + for depmode in $am_compiler_list; do + # Setup a source with many dependencies, because some compilers + # like to wrap large dependency lists on column 80 (with \), and + # we should not choose a depcomp mode which is confused by this. + # + # We need to recreate these files for each test, as the compiler may + # overwrite some of them when testing with obscure command lines. + # This happens at least with the AIX C compiler. + : > sub/conftest.c + for i in 1 2 3 4 5 6; do + echo '#include "conftst'$i'.h"' >> sub/conftest.c + # Using ": > sub/conftst$i.h" creates only sub/conftst1.h with + # Solaris 10 /bin/sh. + echo '/* dummy */' > sub/conftst$i.h + done + echo "${am__include} ${am__quote}sub/conftest.Po${am__quote}" > confmf + + # We check with '-c' and '-o' for the sake of the "dashmstdout" + # mode. It turns out that the SunPro C++ compiler does not properly + # handle '-M -o', and we need to detect this. Also, some Intel + # versions had trouble with output in subdirs. + am__obj=sub/conftest.${OBJEXT-o} + am__minus_obj="-o $am__obj" + case $depmode in + gcc) + # This depmode causes a compiler race in universal mode. + test "$am__universal" = false || continue + ;; + nosideeffect) + # After this tag, mechanisms are not by side-effect, so they'll + # only be used when explicitly requested. + if test "x$enable_dependency_tracking" = xyes; then + continue + else + break + fi + ;; + msvc7 | msvc7msys | msvisualcpp | msvcmsys) + # This compiler won't grok '-c -o', but also, the minuso test has + # not run yet. These depmodes are late enough in the game, and + # so weak that their functioning should not be impacted. + am__obj=conftest.${OBJEXT-o} + am__minus_obj= + ;; + none) break ;; + esac + if depmode=$depmode \ + source=sub/conftest.c object=$am__obj \ + depfile=sub/conftest.Po tmpdepfile=sub/conftest.TPo \ + $SHELL ./depcomp $depcc -c $am__minus_obj sub/conftest.c \ + >/dev/null 2>conftest.err && + grep sub/conftst1.h sub/conftest.Po > /dev/null 2>&1 && + grep sub/conftst6.h sub/conftest.Po > /dev/null 2>&1 && + grep $am__obj sub/conftest.Po > /dev/null 2>&1 && + ${MAKE-make} -s -f confmf > /dev/null 2>&1; then + # icc doesn't choke on unknown options, it will just issue warnings + # or remarks (even with -Werror). So we grep stderr for any message + # that says an option was ignored or not supported. + # When given -MP, icc 7.0 and 7.1 complain thusly: + # icc: Command line warning: ignoring option '-M'; no argument required + # The diagnosis changed in icc 8.0: + # icc: Command line remark: option '-MP' not supported + if (grep 'ignoring option' conftest.err || + grep 'not supported' conftest.err) >/dev/null 2>&1; then :; else + am_cv_$1_dependencies_compiler_type=$depmode + break + fi + fi + done + + cd .. + rm -rf conftest.dir +else + am_cv_$1_dependencies_compiler_type=none +fi +]) +AC_SUBST([$1DEPMODE], [depmode=$am_cv_$1_dependencies_compiler_type]) +AM_CONDITIONAL([am__fastdep$1], [ + test "x$enable_dependency_tracking" != xno \ + && test "$am_cv_$1_dependencies_compiler_type" = gcc3]) +]) + + +# AM_SET_DEPDIR +# ------------- +# Choose a directory name for dependency files. +# This macro is AC_REQUIREd in _AM_DEPENDENCIES. +AC_DEFUN([AM_SET_DEPDIR], +[AC_REQUIRE([AM_SET_LEADING_DOT])dnl +AC_SUBST([DEPDIR], ["${am__leading_dot}deps"])dnl +]) + + +# AM_DEP_TRACK +# ------------ +AC_DEFUN([AM_DEP_TRACK], +[AC_ARG_ENABLE([dependency-tracking], [dnl +AS_HELP_STRING( + [--enable-dependency-tracking], + [do not reject slow dependency extractors]) +AS_HELP_STRING( + [--disable-dependency-tracking], + [speeds up one-time build])]) +if test "x$enable_dependency_tracking" != xno; then + am_depcomp="$ac_aux_dir/depcomp" + AMDEPBACKSLASH='\' + am__nodep='_no' +fi +AM_CONDITIONAL([AMDEP], [test "x$enable_dependency_tracking" != xno]) +AC_SUBST([AMDEPBACKSLASH])dnl +_AM_SUBST_NOTMAKE([AMDEPBACKSLASH])dnl +AC_SUBST([am__nodep])dnl +_AM_SUBST_NOTMAKE([am__nodep])dnl +]) + +# Generate code to set up dependency tracking. -*- Autoconf -*- + +# Copyright (C) 1999-2020 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# _AM_OUTPUT_DEPENDENCY_COMMANDS +# ------------------------------ +AC_DEFUN([_AM_OUTPUT_DEPENDENCY_COMMANDS], +[{ + # Older Autoconf quotes --file arguments for eval, but not when files + # are listed without --file. Let's play safe and only enable the eval + # if we detect the quoting. + # TODO: see whether this extra hack can be removed once we start + # requiring Autoconf 2.70 or later. + AS_CASE([$CONFIG_FILES], + [*\'*], [eval set x "$CONFIG_FILES"], + [*], [set x $CONFIG_FILES]) + shift + # Used to flag and report bootstrapping failures. + am_rc=0 + for am_mf + do + # Strip MF so we end up with the name of the file. + am_mf=`AS_ECHO(["$am_mf"]) | sed -e 's/:.*$//'` + # Check whether this is an Automake generated Makefile which includes + # dependency-tracking related rules and includes. + # Grep'ing the whole file directly is not great: AIX grep has a line + # limit of 2048, but all sed's we know have understand at least 4000. + sed -n 's,^am--depfiles:.*,X,p' "$am_mf" | grep X >/dev/null 2>&1 \ + || continue + am_dirpart=`AS_DIRNAME(["$am_mf"])` + am_filepart=`AS_BASENAME(["$am_mf"])` + AM_RUN_LOG([cd "$am_dirpart" \ + && sed -e '/# am--include-marker/d' "$am_filepart" \ + | $MAKE -f - am--depfiles]) || am_rc=$? + done + if test $am_rc -ne 0; then + AC_MSG_FAILURE([Something went wrong bootstrapping makefile fragments + for automatic dependency tracking. If GNU make was not used, consider + re-running the configure script with MAKE="gmake" (or whatever is + necessary). You can also try re-running configure with the + '--disable-dependency-tracking' option to at least be able to build + the package (albeit without support for automatic dependency tracking).]) + fi + AS_UNSET([am_dirpart]) + AS_UNSET([am_filepart]) + AS_UNSET([am_mf]) + AS_UNSET([am_rc]) + rm -f conftest-deps.mk +} +])# _AM_OUTPUT_DEPENDENCY_COMMANDS + + +# AM_OUTPUT_DEPENDENCY_COMMANDS +# ----------------------------- +# This macro should only be invoked once -- use via AC_REQUIRE. +# +# This code is only required when automatic dependency tracking is enabled. +# This creates each '.Po' and '.Plo' makefile fragment that we'll need in +# order to bootstrap the dependency handling code. +AC_DEFUN([AM_OUTPUT_DEPENDENCY_COMMANDS], +[AC_CONFIG_COMMANDS([depfiles], + [test x"$AMDEP_TRUE" != x"" || _AM_OUTPUT_DEPENDENCY_COMMANDS], + [AMDEP_TRUE="$AMDEP_TRUE" MAKE="${MAKE-make}"])]) + +# Do all the work for Automake. -*- Autoconf -*- + +# Copyright (C) 1996-2020 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This macro actually does too much. Some checks are only needed if +# your package does certain things. But this isn't really a big deal. + +dnl Redefine AC_PROG_CC to automatically invoke _AM_PROG_CC_C_O. +m4_define([AC_PROG_CC], +m4_defn([AC_PROG_CC]) +[_AM_PROG_CC_C_O +]) + +# AM_INIT_AUTOMAKE(PACKAGE, VERSION, [NO-DEFINE]) +# AM_INIT_AUTOMAKE([OPTIONS]) +# ----------------------------------------------- +# The call with PACKAGE and VERSION arguments is the old style +# call (pre autoconf-2.50), which is being phased out. PACKAGE +# and VERSION should now be passed to AC_INIT and removed from +# the call to AM_INIT_AUTOMAKE. +# We support both call styles for the transition. After +# the next Automake release, Autoconf can make the AC_INIT +# arguments mandatory, and then we can depend on a new Autoconf +# release and drop the old call support. +AC_DEFUN([AM_INIT_AUTOMAKE], +[AC_PREREQ([2.65])dnl +dnl Autoconf wants to disallow AM_ names. We explicitly allow +dnl the ones we care about. +m4_pattern_allow([^AM_[A-Z]+FLAGS$])dnl +AC_REQUIRE([AM_SET_CURRENT_AUTOMAKE_VERSION])dnl +AC_REQUIRE([AC_PROG_INSTALL])dnl +if test "`cd $srcdir && pwd`" != "`pwd`"; then + # Use -I$(srcdir) only when $(srcdir) != ., so that make's output + # is not polluted with repeated "-I." + AC_SUBST([am__isrc], [' -I$(srcdir)'])_AM_SUBST_NOTMAKE([am__isrc])dnl + # test to see if srcdir already configured + if test -f $srcdir/config.status; then + AC_MSG_ERROR([source directory already configured; run "make distclean" there first]) + fi +fi + +# test whether we have cygpath +if test -z "$CYGPATH_W"; then + if (cygpath --version) >/dev/null 2>/dev/null; then + CYGPATH_W='cygpath -w' + else + CYGPATH_W=echo + fi +fi +AC_SUBST([CYGPATH_W]) + +# Define the identity of the package. +dnl Distinguish between old-style and new-style calls. +m4_ifval([$2], +[AC_DIAGNOSE([obsolete], + [$0: two- and three-arguments forms are deprecated.]) +m4_ifval([$3], [_AM_SET_OPTION([no-define])])dnl + AC_SUBST([PACKAGE], [$1])dnl + AC_SUBST([VERSION], [$2])], +[_AM_SET_OPTIONS([$1])dnl +dnl Diagnose old-style AC_INIT with new-style AM_AUTOMAKE_INIT. +m4_if( + m4_ifdef([AC_PACKAGE_NAME], [ok]):m4_ifdef([AC_PACKAGE_VERSION], [ok]), + [ok:ok],, + [m4_fatal([AC_INIT should be called with package and version arguments])])dnl + AC_SUBST([PACKAGE], ['AC_PACKAGE_TARNAME'])dnl + AC_SUBST([VERSION], ['AC_PACKAGE_VERSION'])])dnl + +_AM_IF_OPTION([no-define],, +[AC_DEFINE_UNQUOTED([PACKAGE], ["$PACKAGE"], [Name of package]) + AC_DEFINE_UNQUOTED([VERSION], ["$VERSION"], [Version number of package])])dnl + +# Some tools Automake needs. +AC_REQUIRE([AM_SANITY_CHECK])dnl +AC_REQUIRE([AC_ARG_PROGRAM])dnl +AM_MISSING_PROG([ACLOCAL], [aclocal-${am__api_version}]) +AM_MISSING_PROG([AUTOCONF], [autoconf]) +AM_MISSING_PROG([AUTOMAKE], [automake-${am__api_version}]) +AM_MISSING_PROG([AUTOHEADER], [autoheader]) +AM_MISSING_PROG([MAKEINFO], [makeinfo]) +AC_REQUIRE([AM_PROG_INSTALL_SH])dnl +AC_REQUIRE([AM_PROG_INSTALL_STRIP])dnl +AC_REQUIRE([AC_PROG_MKDIR_P])dnl +# For better backward compatibility. To be removed once Automake 1.9.x +# dies out for good. For more background, see: +# +# +AC_SUBST([mkdir_p], ['$(MKDIR_P)']) +# We need awk for the "check" target (and possibly the TAP driver). The +# system "awk" is bad on some platforms. +AC_REQUIRE([AC_PROG_AWK])dnl +AC_REQUIRE([AC_PROG_MAKE_SET])dnl +AC_REQUIRE([AM_SET_LEADING_DOT])dnl +_AM_IF_OPTION([tar-ustar], [_AM_PROG_TAR([ustar])], + [_AM_IF_OPTION([tar-pax], [_AM_PROG_TAR([pax])], + [_AM_PROG_TAR([v7])])]) +_AM_IF_OPTION([no-dependencies],, +[AC_PROVIDE_IFELSE([AC_PROG_CC], + [_AM_DEPENDENCIES([CC])], + [m4_define([AC_PROG_CC], + m4_defn([AC_PROG_CC])[_AM_DEPENDENCIES([CC])])])dnl +AC_PROVIDE_IFELSE([AC_PROG_CXX], + [_AM_DEPENDENCIES([CXX])], + [m4_define([AC_PROG_CXX], + m4_defn([AC_PROG_CXX])[_AM_DEPENDENCIES([CXX])])])dnl +AC_PROVIDE_IFELSE([AC_PROG_OBJC], + [_AM_DEPENDENCIES([OBJC])], + [m4_define([AC_PROG_OBJC], + m4_defn([AC_PROG_OBJC])[_AM_DEPENDENCIES([OBJC])])])dnl +AC_PROVIDE_IFELSE([AC_PROG_OBJCXX], + [_AM_DEPENDENCIES([OBJCXX])], + [m4_define([AC_PROG_OBJCXX], + m4_defn([AC_PROG_OBJCXX])[_AM_DEPENDENCIES([OBJCXX])])])dnl +]) +AC_REQUIRE([AM_SILENT_RULES])dnl +dnl The testsuite driver may need to know about EXEEXT, so add the +dnl 'am__EXEEXT' conditional if _AM_COMPILER_EXEEXT was seen. This +dnl macro is hooked onto _AC_COMPILER_EXEEXT early, see below. +AC_CONFIG_COMMANDS_PRE(dnl +[m4_provide_if([_AM_COMPILER_EXEEXT], + [AM_CONDITIONAL([am__EXEEXT], [test -n "$EXEEXT"])])])dnl + +# POSIX will say in a future version that running "rm -f" with no argument +# is OK; and we want to be able to make that assumption in our Makefile +# recipes. So use an aggressive probe to check that the usage we want is +# actually supported "in the wild" to an acceptable degree. +# See automake bug#10828. +# To make any issue more visible, cause the running configure to be aborted +# by default if the 'rm' program in use doesn't match our expectations; the +# user can still override this though. +if rm -f && rm -fr && rm -rf; then : OK; else + cat >&2 <<'END' +Oops! + +Your 'rm' program seems unable to run without file operands specified +on the command line, even when the '-f' option is present. This is contrary +to the behaviour of most rm programs out there, and not conforming with +the upcoming POSIX standard: + +Please tell bug-automake@gnu.org about your system, including the value +of your $PATH and any error possibly output before this message. This +can help us improve future automake versions. + +END + if test x"$ACCEPT_INFERIOR_RM_PROGRAM" = x"yes"; then + echo 'Configuration will proceed anyway, since you have set the' >&2 + echo 'ACCEPT_INFERIOR_RM_PROGRAM variable to "yes"' >&2 + echo >&2 + else + cat >&2 <<'END' +Aborting the configuration process, to ensure you take notice of the issue. + +You can download and install GNU coreutils to get an 'rm' implementation +that behaves properly: . + +If you want to complete the configuration process using your problematic +'rm' anyway, export the environment variable ACCEPT_INFERIOR_RM_PROGRAM +to "yes", and re-run configure. + +END + AC_MSG_ERROR([Your 'rm' program is bad, sorry.]) + fi +fi +dnl The trailing newline in this macro's definition is deliberate, for +dnl backward compatibility and to allow trailing 'dnl'-style comments +dnl after the AM_INIT_AUTOMAKE invocation. See automake bug#16841. +]) + +dnl Hook into '_AC_COMPILER_EXEEXT' early to learn its expansion. Do not +dnl add the conditional right here, as _AC_COMPILER_EXEEXT may be further +dnl mangled by Autoconf and run in a shell conditional statement. +m4_define([_AC_COMPILER_EXEEXT], +m4_defn([_AC_COMPILER_EXEEXT])[m4_provide([_AM_COMPILER_EXEEXT])]) + +# When config.status generates a header, we must update the stamp-h file. +# This file resides in the same directory as the config header +# that is generated. The stamp files are numbered to have different names. + +# Autoconf calls _AC_AM_CONFIG_HEADER_HOOK (when defined) in the +# loop where config.status creates the headers, so we can generate +# our stamp files there. +AC_DEFUN([_AC_AM_CONFIG_HEADER_HOOK], +[# Compute $1's index in $config_headers. +_am_arg=$1 +_am_stamp_count=1 +for _am_header in $config_headers :; do + case $_am_header in + $_am_arg | $_am_arg:* ) + break ;; + * ) + _am_stamp_count=`expr $_am_stamp_count + 1` ;; + esac +done +echo "timestamp for $_am_arg" >`AS_DIRNAME(["$_am_arg"])`/stamp-h[]$_am_stamp_count]) + +# Copyright (C) 2001-2020 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# AM_PROG_INSTALL_SH +# ------------------ +# Define $install_sh. +AC_DEFUN([AM_PROG_INSTALL_SH], +[AC_REQUIRE([AM_AUX_DIR_EXPAND])dnl +if test x"${install_sh+set}" != xset; then + case $am_aux_dir in + *\ * | *\ *) + install_sh="\${SHELL} '$am_aux_dir/install-sh'" ;; + *) + install_sh="\${SHELL} $am_aux_dir/install-sh" + esac +fi +AC_SUBST([install_sh])]) + +# Copyright (C) 2003-2020 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# Check whether the underlying file-system supports filenames +# with a leading dot. For instance MS-DOS doesn't. +AC_DEFUN([AM_SET_LEADING_DOT], +[rm -rf .tst 2>/dev/null +mkdir .tst 2>/dev/null +if test -d .tst; then + am__leading_dot=. +else + am__leading_dot=_ +fi +rmdir .tst 2>/dev/null +AC_SUBST([am__leading_dot])]) + +# Check to see how 'make' treats includes. -*- Autoconf -*- + +# Copyright (C) 2001-2020 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# AM_MAKE_INCLUDE() +# ----------------- +# Check whether make has an 'include' directive that can support all +# the idioms we need for our automatic dependency tracking code. +AC_DEFUN([AM_MAKE_INCLUDE], +[AC_MSG_CHECKING([whether ${MAKE-make} supports the include directive]) +cat > confinc.mk << 'END' +am__doit: + @echo this is the am__doit target >confinc.out +.PHONY: am__doit +END +am__include="#" +am__quote= +# BSD make does it like this. +echo '.include "confinc.mk" # ignored' > confmf.BSD +# Other make implementations (GNU, Solaris 10, AIX) do it like this. +echo 'include confinc.mk # ignored' > confmf.GNU +_am_result=no +for s in GNU BSD; do + AM_RUN_LOG([${MAKE-make} -f confmf.$s && cat confinc.out]) + AS_CASE([$?:`cat confinc.out 2>/dev/null`], + ['0:this is the am__doit target'], + [AS_CASE([$s], + [BSD], [am__include='.include' am__quote='"'], + [am__include='include' am__quote=''])]) + if test "$am__include" != "#"; then + _am_result="yes ($s style)" + break + fi +done +rm -f confinc.* confmf.* +AC_MSG_RESULT([${_am_result}]) +AC_SUBST([am__include])]) +AC_SUBST([am__quote])]) + +# Fake the existence of programs that GNU maintainers use. -*- Autoconf -*- + +# Copyright (C) 1997-2020 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# AM_MISSING_PROG(NAME, PROGRAM) +# ------------------------------ +AC_DEFUN([AM_MISSING_PROG], +[AC_REQUIRE([AM_MISSING_HAS_RUN]) +$1=${$1-"${am_missing_run}$2"} +AC_SUBST($1)]) + +# AM_MISSING_HAS_RUN +# ------------------ +# Define MISSING if not defined so far and test if it is modern enough. +# If it is, set am_missing_run to use it, otherwise, to nothing. +AC_DEFUN([AM_MISSING_HAS_RUN], +[AC_REQUIRE([AM_AUX_DIR_EXPAND])dnl +AC_REQUIRE_AUX_FILE([missing])dnl +if test x"${MISSING+set}" != xset; then + case $am_aux_dir in + *\ * | *\ *) + MISSING="\${SHELL} \"$am_aux_dir/missing\"" ;; + *) + MISSING="\${SHELL} $am_aux_dir/missing" ;; + esac +fi +# Use eval to expand $SHELL +if eval "$MISSING --is-lightweight"; then + am_missing_run="$MISSING " +else + am_missing_run= + AC_MSG_WARN(['missing' script is too old or missing]) +fi +]) + +# Helper functions for option handling. -*- Autoconf -*- + +# Copyright (C) 2001-2020 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# _AM_MANGLE_OPTION(NAME) +# ----------------------- +AC_DEFUN([_AM_MANGLE_OPTION], +[[_AM_OPTION_]m4_bpatsubst($1, [[^a-zA-Z0-9_]], [_])]) + +# _AM_SET_OPTION(NAME) +# -------------------- +# Set option NAME. Presently that only means defining a flag for this option. +AC_DEFUN([_AM_SET_OPTION], +[m4_define(_AM_MANGLE_OPTION([$1]), [1])]) + +# _AM_SET_OPTIONS(OPTIONS) +# ------------------------ +# OPTIONS is a space-separated list of Automake options. +AC_DEFUN([_AM_SET_OPTIONS], +[m4_foreach_w([_AM_Option], [$1], [_AM_SET_OPTION(_AM_Option)])]) + +# _AM_IF_OPTION(OPTION, IF-SET, [IF-NOT-SET]) +# ------------------------------------------- +# Execute IF-SET if OPTION is set, IF-NOT-SET otherwise. +AC_DEFUN([_AM_IF_OPTION], +[m4_ifset(_AM_MANGLE_OPTION([$1]), [$2], [$3])]) + +# Copyright (C) 1999-2020 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# _AM_PROG_CC_C_O +# --------------- +# Like AC_PROG_CC_C_O, but changed for automake. We rewrite AC_PROG_CC +# to automatically call this. +AC_DEFUN([_AM_PROG_CC_C_O], +[AC_REQUIRE([AM_AUX_DIR_EXPAND])dnl +AC_REQUIRE_AUX_FILE([compile])dnl +AC_LANG_PUSH([C])dnl +AC_CACHE_CHECK( + [whether $CC understands -c and -o together], + [am_cv_prog_cc_c_o], + [AC_LANG_CONFTEST([AC_LANG_PROGRAM([])]) + # Make sure it works both with $CC and with simple cc. + # Following AC_PROG_CC_C_O, we do the test twice because some + # compilers refuse to overwrite an existing .o file with -o, + # though they will create one. + am_cv_prog_cc_c_o=yes + for am_i in 1 2; do + if AM_RUN_LOG([$CC -c conftest.$ac_ext -o conftest2.$ac_objext]) \ + && test -f conftest2.$ac_objext; then + : OK + else + am_cv_prog_cc_c_o=no + break + fi + done + rm -f core conftest* + unset am_i]) +if test "$am_cv_prog_cc_c_o" != yes; then + # Losing compiler, so override with the script. + # FIXME: It is wrong to rewrite CC. + # But if we don't then we get into trouble of one sort or another. + # A longer-term fix would be to have automake use am__CC in this case, + # and then we could set am__CC="\$(top_srcdir)/compile \$(CC)" + CC="$am_aux_dir/compile $CC" +fi +AC_LANG_POP([C])]) + +# For backward compatibility. +AC_DEFUN_ONCE([AM_PROG_CC_C_O], [AC_REQUIRE([AC_PROG_CC])]) + +# Copyright (C) 2001-2020 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# AM_RUN_LOG(COMMAND) +# ------------------- +# Run COMMAND, save the exit status in ac_status, and log it. +# (This has been adapted from Autoconf's _AC_RUN_LOG macro.) +AC_DEFUN([AM_RUN_LOG], +[{ echo "$as_me:$LINENO: $1" >&AS_MESSAGE_LOG_FD + ($1) >&AS_MESSAGE_LOG_FD 2>&AS_MESSAGE_LOG_FD + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&AS_MESSAGE_LOG_FD + (exit $ac_status); }]) + +# Check to make sure that the build environment is sane. -*- Autoconf -*- + +# Copyright (C) 1996-2020 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# AM_SANITY_CHECK +# --------------- +AC_DEFUN([AM_SANITY_CHECK], +[AC_MSG_CHECKING([whether build environment is sane]) +# Reject unsafe characters in $srcdir or the absolute working directory +# name. Accept space and tab only in the latter. +am_lf=' +' +case `pwd` in + *[[\\\"\#\$\&\'\`$am_lf]]*) + AC_MSG_ERROR([unsafe absolute working directory name]);; +esac +case $srcdir in + *[[\\\"\#\$\&\'\`$am_lf\ \ ]]*) + AC_MSG_ERROR([unsafe srcdir value: '$srcdir']);; +esac + +# Do 'set' in a subshell so we don't clobber the current shell's +# arguments. Must try -L first in case configure is actually a +# symlink; some systems play weird games with the mod time of symlinks +# (eg FreeBSD returns the mod time of the symlink's containing +# directory). +if ( + am_has_slept=no + for am_try in 1 2; do + echo "timestamp, slept: $am_has_slept" > conftest.file + set X `ls -Lt "$srcdir/configure" conftest.file 2> /dev/null` + if test "$[*]" = "X"; then + # -L didn't work. + set X `ls -t "$srcdir/configure" conftest.file` + fi + if test "$[*]" != "X $srcdir/configure conftest.file" \ + && test "$[*]" != "X conftest.file $srcdir/configure"; then + + # If neither matched, then we have a broken ls. This can happen + # if, for instance, CONFIG_SHELL is bash and it inherits a + # broken ls alias from the environment. This has actually + # happened. Such a system could not be considered "sane". + AC_MSG_ERROR([ls -t appears to fail. Make sure there is not a broken + alias in your environment]) + fi + if test "$[2]" = conftest.file || test $am_try -eq 2; then + break + fi + # Just in case. + sleep 1 + am_has_slept=yes + done + test "$[2]" = conftest.file + ) +then + # Ok. + : +else + AC_MSG_ERROR([newly created file is older than distributed files! +Check your system clock]) +fi +AC_MSG_RESULT([yes]) +# If we didn't sleep, we still need to ensure time stamps of config.status and +# generated files are strictly newer. +am_sleep_pid= +if grep 'slept: no' conftest.file >/dev/null 2>&1; then + ( sleep 1 ) & + am_sleep_pid=$! +fi +AC_CONFIG_COMMANDS_PRE( + [AC_MSG_CHECKING([that generated files are newer than configure]) + if test -n "$am_sleep_pid"; then + # Hide warnings about reused PIDs. + wait $am_sleep_pid 2>/dev/null + fi + AC_MSG_RESULT([done])]) +rm -f conftest.file +]) + +# Copyright (C) 2009-2020 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# AM_SILENT_RULES([DEFAULT]) +# -------------------------- +# Enable less verbose build rules; with the default set to DEFAULT +# ("yes" being less verbose, "no" or empty being verbose). +AC_DEFUN([AM_SILENT_RULES], +[AC_ARG_ENABLE([silent-rules], [dnl +AS_HELP_STRING( + [--enable-silent-rules], + [less verbose build output (undo: "make V=1")]) +AS_HELP_STRING( + [--disable-silent-rules], + [verbose build output (undo: "make V=0")])dnl +]) +case $enable_silent_rules in @%:@ ((( + yes) AM_DEFAULT_VERBOSITY=0;; + no) AM_DEFAULT_VERBOSITY=1;; + *) AM_DEFAULT_VERBOSITY=m4_if([$1], [yes], [0], [1]);; +esac +dnl +dnl A few 'make' implementations (e.g., NonStop OS and NextStep) +dnl do not support nested variable expansions. +dnl See automake bug#9928 and bug#10237. +am_make=${MAKE-make} +AC_CACHE_CHECK([whether $am_make supports nested variables], + [am_cv_make_support_nested_variables], + [if AS_ECHO([['TRUE=$(BAR$(V)) +BAR0=false +BAR1=true +V=1 +am__doit: + @$(TRUE) +.PHONY: am__doit']]) | $am_make -f - >/dev/null 2>&1; then + am_cv_make_support_nested_variables=yes +else + am_cv_make_support_nested_variables=no +fi]) +if test $am_cv_make_support_nested_variables = yes; then + dnl Using '$V' instead of '$(V)' breaks IRIX make. + AM_V='$(V)' + AM_DEFAULT_V='$(AM_DEFAULT_VERBOSITY)' +else + AM_V=$AM_DEFAULT_VERBOSITY + AM_DEFAULT_V=$AM_DEFAULT_VERBOSITY +fi +AC_SUBST([AM_V])dnl +AM_SUBST_NOTMAKE([AM_V])dnl +AC_SUBST([AM_DEFAULT_V])dnl +AM_SUBST_NOTMAKE([AM_DEFAULT_V])dnl +AC_SUBST([AM_DEFAULT_VERBOSITY])dnl +AM_BACKSLASH='\' +AC_SUBST([AM_BACKSLASH])dnl +_AM_SUBST_NOTMAKE([AM_BACKSLASH])dnl +]) + +# Copyright (C) 2001-2020 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# AM_PROG_INSTALL_STRIP +# --------------------- +# One issue with vendor 'install' (even GNU) is that you can't +# specify the program used to strip binaries. This is especially +# annoying in cross-compiling environments, where the build's strip +# is unlikely to handle the host's binaries. +# Fortunately install-sh will honor a STRIPPROG variable, so we +# always use install-sh in "make install-strip", and initialize +# STRIPPROG with the value of the STRIP variable (set by the user). +AC_DEFUN([AM_PROG_INSTALL_STRIP], +[AC_REQUIRE([AM_PROG_INSTALL_SH])dnl +# Installed binaries are usually stripped using 'strip' when the user +# run "make install-strip". However 'strip' might not be the right +# tool to use in cross-compilation environments, therefore Automake +# will honor the 'STRIP' environment variable to overrule this program. +dnl Don't test for $cross_compiling = yes, because it might be 'maybe'. +if test "$cross_compiling" != no; then + AC_CHECK_TOOL([STRIP], [strip], :) +fi +INSTALL_STRIP_PROGRAM="\$(install_sh) -c -s" +AC_SUBST([INSTALL_STRIP_PROGRAM])]) + +# Copyright (C) 2006-2020 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# _AM_SUBST_NOTMAKE(VARIABLE) +# --------------------------- +# Prevent Automake from outputting VARIABLE = @VARIABLE@ in Makefile.in. +# This macro is traced by Automake. +AC_DEFUN([_AM_SUBST_NOTMAKE]) + +# AM_SUBST_NOTMAKE(VARIABLE) +# -------------------------- +# Public sister of _AM_SUBST_NOTMAKE. +AC_DEFUN([AM_SUBST_NOTMAKE], [_AM_SUBST_NOTMAKE($@)]) + +# Check how to create a tarball. -*- Autoconf -*- + +# Copyright (C) 2004-2020 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# _AM_PROG_TAR(FORMAT) +# -------------------- +# Check how to create a tarball in format FORMAT. +# FORMAT should be one of 'v7', 'ustar', or 'pax'. +# +# Substitute a variable $(am__tar) that is a command +# writing to stdout a FORMAT-tarball containing the directory +# $tardir. +# tardir=directory && $(am__tar) > result.tar +# +# Substitute a variable $(am__untar) that extract such +# a tarball read from stdin. +# $(am__untar) < result.tar +# +AC_DEFUN([_AM_PROG_TAR], +[# Always define AMTAR for backward compatibility. Yes, it's still used +# in the wild :-( We should find a proper way to deprecate it ... +AC_SUBST([AMTAR], ['$${TAR-tar}']) + +# We'll loop over all known methods to create a tar archive until one works. +_am_tools='gnutar m4_if([$1], [ustar], [plaintar]) pax cpio none' + +m4_if([$1], [v7], + [am__tar='$${TAR-tar} chof - "$$tardir"' am__untar='$${TAR-tar} xf -'], + + [m4_case([$1], + [ustar], + [# The POSIX 1988 'ustar' format is defined with fixed-size fields. + # There is notably a 21 bits limit for the UID and the GID. In fact, + # the 'pax' utility can hang on bigger UID/GID (see automake bug#8343 + # and bug#13588). + am_max_uid=2097151 # 2^21 - 1 + am_max_gid=$am_max_uid + # The $UID and $GID variables are not portable, so we need to resort + # to the POSIX-mandated id(1) utility. Errors in the 'id' calls + # below are definitely unexpected, so allow the users to see them + # (that is, avoid stderr redirection). + am_uid=`id -u || echo unknown` + am_gid=`id -g || echo unknown` + AC_MSG_CHECKING([whether UID '$am_uid' is supported by ustar format]) + if test $am_uid -le $am_max_uid; then + AC_MSG_RESULT([yes]) + else + AC_MSG_RESULT([no]) + _am_tools=none + fi + AC_MSG_CHECKING([whether GID '$am_gid' is supported by ustar format]) + if test $am_gid -le $am_max_gid; then + AC_MSG_RESULT([yes]) + else + AC_MSG_RESULT([no]) + _am_tools=none + fi], + + [pax], + [], + + [m4_fatal([Unknown tar format])]) + + AC_MSG_CHECKING([how to create a $1 tar archive]) + + # Go ahead even if we have the value already cached. We do so because we + # need to set the values for the 'am__tar' and 'am__untar' variables. + _am_tools=${am_cv_prog_tar_$1-$_am_tools} + + for _am_tool in $_am_tools; do + case $_am_tool in + gnutar) + for _am_tar in tar gnutar gtar; do + AM_RUN_LOG([$_am_tar --version]) && break + done + am__tar="$_am_tar --format=m4_if([$1], [pax], [posix], [$1]) -chf - "'"$$tardir"' + am__tar_="$_am_tar --format=m4_if([$1], [pax], [posix], [$1]) -chf - "'"$tardir"' + am__untar="$_am_tar -xf -" + ;; + plaintar) + # Must skip GNU tar: if it does not support --format= it doesn't create + # ustar tarball either. + (tar --version) >/dev/null 2>&1 && continue + am__tar='tar chf - "$$tardir"' + am__tar_='tar chf - "$tardir"' + am__untar='tar xf -' + ;; + pax) + am__tar='pax -L -x $1 -w "$$tardir"' + am__tar_='pax -L -x $1 -w "$tardir"' + am__untar='pax -r' + ;; + cpio) + am__tar='find "$$tardir" -print | cpio -o -H $1 -L' + am__tar_='find "$tardir" -print | cpio -o -H $1 -L' + am__untar='cpio -i -H $1 -d' + ;; + none) + am__tar=false + am__tar_=false + am__untar=false + ;; + esac + + # If the value was cached, stop now. We just wanted to have am__tar + # and am__untar set. + test -n "${am_cv_prog_tar_$1}" && break + + # tar/untar a dummy directory, and stop if the command works. + rm -rf conftest.dir + mkdir conftest.dir + echo GrepMe > conftest.dir/file + AM_RUN_LOG([tardir=conftest.dir && eval $am__tar_ >conftest.tar]) + rm -rf conftest.dir + if test -s conftest.tar; then + AM_RUN_LOG([$am__untar /dev/null 2>&1 && break + fi + done + rm -rf conftest.dir + + AC_CACHE_VAL([am_cv_prog_tar_$1], [am_cv_prog_tar_$1=$_am_tool]) + AC_MSG_RESULT([$am_cv_prog_tar_$1])]) + +AC_SUBST([am__tar]) +AC_SUBST([am__untar]) +]) # _AM_PROG_TAR + +m4_include([m4/ax_pthread.m4]) +m4_include([m4/libtool.m4]) +m4_include([m4/ltoptions.m4]) +m4_include([m4/ltsugar.m4]) +m4_include([m4/ltversion.m4]) +m4_include([m4/lt~obsolete.m4]) +m4_include([m4/pcre2_visibility.m4]) diff --git a/pcre2/ar-lib b/pcre2/ar-lib new file mode 100755 index 000000000..1e9388e2a --- /dev/null +++ b/pcre2/ar-lib @@ -0,0 +1,271 @@ +#! /bin/sh +# Wrapper for Microsoft lib.exe + +me=ar-lib +scriptversion=2019-07-04.01; # UTC + +# Copyright (C) 2010-2020 Free Software Foundation, Inc. +# Written by Peter Rosin . +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +# As a special exception to the GNU General Public License, if you +# distribute this file as part of a program that contains a +# configuration script generated by Autoconf, you may include it under +# the same distribution terms that you use for the rest of that program. + +# This file is maintained in Automake, please report +# bugs to or send patches to +# . + + +# func_error message +func_error () +{ + echo "$me: $1" 1>&2 + exit 1 +} + +file_conv= + +# func_file_conv build_file +# Convert a $build file to $host form and store it in $file +# Currently only supports Windows hosts. +func_file_conv () +{ + file=$1 + case $file in + / | /[!/]*) # absolute file, and not a UNC file + if test -z "$file_conv"; then + # lazily determine how to convert abs files + case `uname -s` in + MINGW*) + file_conv=mingw + ;; + CYGWIN* | MSYS*) + file_conv=cygwin + ;; + *) + file_conv=wine + ;; + esac + fi + case $file_conv in + mingw) + file=`cmd //C echo "$file " | sed -e 's/"\(.*\) " *$/\1/'` + ;; + cygwin | msys) + file=`cygpath -m "$file" || echo "$file"` + ;; + wine) + file=`winepath -w "$file" || echo "$file"` + ;; + esac + ;; + esac +} + +# func_at_file at_file operation archive +# Iterate over all members in AT_FILE performing OPERATION on ARCHIVE +# for each of them. +# When interpreting the content of the @FILE, do NOT use func_file_conv, +# since the user would need to supply preconverted file names to +# binutils ar, at least for MinGW. +func_at_file () +{ + operation=$2 + archive=$3 + at_file_contents=`cat "$1"` + eval set x "$at_file_contents" + shift + + for member + do + $AR -NOLOGO $operation:"$member" "$archive" || exit $? + done +} + +case $1 in + '') + func_error "no command. Try '$0 --help' for more information." + ;; + -h | --h*) + cat <_FOUND variable. +# The package is found if all variables listed are TRUE. +# Example: +# +# FIND_PACKAGE_HANDLE_STANDARD_ARGS(LibXml2 DEFAULT_MSG LIBXML2_LIBRARIES LIBXML2_INCLUDE_DIR) +# +# LibXml2 is considered to be found, if both LIBXML2_LIBRARIES and +# LIBXML2_INCLUDE_DIR are valid. Then also LIBXML2_FOUND is set to TRUE. +# If it is not found and REQUIRED was used, it fails with FATAL_ERROR, +# independent whether QUIET was used or not. +# If it is found, the location is reported using the VAR1 argument, so +# here a message "Found LibXml2: /usr/lib/libxml2.so" will be printed out. +# If the second argument is DEFAULT_MSG, the message in the failure case will +# be "Could NOT find LibXml2", if you don't like this message you can specify +# your own custom failure message there. + +MACRO(FIND_PACKAGE_HANDLE_STANDARD_ARGS _NAME _FAIL_MSG _VAR1 ) + + IF("${_FAIL_MSG}" STREQUAL "DEFAULT_MSG") + IF (${_NAME}_FIND_REQUIRED) + SET(_FAIL_MESSAGE "Could not find REQUIRED package ${_NAME}") + ELSE (${_NAME}_FIND_REQUIRED) + SET(_FAIL_MESSAGE "Could not find OPTIONAL package ${_NAME}") + ENDIF (${_NAME}_FIND_REQUIRED) + ELSE("${_FAIL_MSG}" STREQUAL "DEFAULT_MSG") + SET(_FAIL_MESSAGE "${_FAIL_MSG}") + ENDIF("${_FAIL_MSG}" STREQUAL "DEFAULT_MSG") + + STRING(TOUPPER ${_NAME} _NAME_UPPER) + + SET(${_NAME_UPPER}_FOUND TRUE) + IF(NOT ${_VAR1}) + SET(${_NAME_UPPER}_FOUND FALSE) + ENDIF(NOT ${_VAR1}) + + FOREACH(_CURRENT_VAR ${ARGN}) + IF(NOT ${_CURRENT_VAR}) + SET(${_NAME_UPPER}_FOUND FALSE) + ENDIF(NOT ${_CURRENT_VAR}) + ENDFOREACH(_CURRENT_VAR) + + IF (${_NAME_UPPER}_FOUND) + IF (NOT ${_NAME}_FIND_QUIETLY) + MESSAGE(STATUS "Found ${_NAME}: ${${_VAR1}}") + ENDIF (NOT ${_NAME}_FIND_QUIETLY) + ELSE (${_NAME_UPPER}_FOUND) + IF (${_NAME}_FIND_REQUIRED) + MESSAGE(FATAL_ERROR "${_FAIL_MESSAGE}") + ELSE (${_NAME}_FIND_REQUIRED) + IF (NOT ${_NAME}_FIND_QUIETLY) + MESSAGE(STATUS "${_FAIL_MESSAGE}") + ENDIF (NOT ${_NAME}_FIND_QUIETLY) + ENDIF (${_NAME}_FIND_REQUIRED) + ENDIF (${_NAME_UPPER}_FOUND) +ENDMACRO(FIND_PACKAGE_HANDLE_STANDARD_ARGS) diff --git a/pcre2/cmake/FindReadline.cmake b/pcre2/cmake/FindReadline.cmake new file mode 100644 index 000000000..1d4cc5584 --- /dev/null +++ b/pcre2/cmake/FindReadline.cmake @@ -0,0 +1,29 @@ +# from http://websvn.kde.org/trunk/KDE/kdeedu/cmake/modules/FindReadline.cmake +# http://websvn.kde.org/trunk/KDE/kdeedu/cmake/modules/COPYING-CMAKE-SCRIPTS +# --> BSD licensed +# +# GNU Readline library finder +if(READLINE_INCLUDE_DIR AND READLINE_LIBRARY AND NCURSES_LIBRARY) + set(READLINE_FOUND TRUE) +else(READLINE_INCLUDE_DIR AND READLINE_LIBRARY AND NCURSES_LIBRARY) + FIND_PATH(READLINE_INCLUDE_DIR readline/readline.h + /usr/include/readline + ) + +# 2008-04-22 The next clause used to read like this: +# +# FIND_LIBRARY(READLINE_LIBRARY NAMES readline) +# FIND_LIBRARY(NCURSES_LIBRARY NAMES ncurses ) +# include(FindPackageHandleStandardArgs) +# FIND_PACKAGE_HANDLE_STANDARD_ARGS(Readline DEFAULT_MSG NCURSES_LIBRARY READLINE_INCLUDE_DIR READLINE_LIBRARY ) +# +# I was advised to modify it such that it will find an ncurses library if +# required, but not if one was explicitly given, that is, it allows the +# default to be overridden. PH + + FIND_LIBRARY(READLINE_LIBRARY NAMES readline) + include(FindPackageHandleStandardArgs) + FIND_PACKAGE_HANDLE_STANDARD_ARGS(Readline DEFAULT_MSG READLINE_INCLUDE_DIR READLINE_LIBRARY ) + + MARK_AS_ADVANCED(READLINE_INCLUDE_DIR READLINE_LIBRARY) +endif(READLINE_INCLUDE_DIR AND READLINE_LIBRARY AND NCURSES_LIBRARY) diff --git a/pcre2/compile b/pcre2/compile new file mode 100755 index 000000000..23fcba011 --- /dev/null +++ b/pcre2/compile @@ -0,0 +1,348 @@ +#! /bin/sh +# Wrapper for compilers which do not understand '-c -o'. + +scriptversion=2018-03-07.03; # UTC + +# Copyright (C) 1999-2020 Free Software Foundation, Inc. +# Written by Tom Tromey . +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +# As a special exception to the GNU General Public License, if you +# distribute this file as part of a program that contains a +# configuration script generated by Autoconf, you may include it under +# the same distribution terms that you use for the rest of that program. + +# This file is maintained in Automake, please report +# bugs to or send patches to +# . + +nl=' +' + +# We need space, tab and new line, in precisely that order. Quoting is +# there to prevent tools from complaining about whitespace usage. +IFS=" "" $nl" + +file_conv= + +# func_file_conv build_file lazy +# Convert a $build file to $host form and store it in $file +# Currently only supports Windows hosts. If the determined conversion +# type is listed in (the comma separated) LAZY, no conversion will +# take place. +func_file_conv () +{ + file=$1 + case $file in + / | /[!/]*) # absolute file, and not a UNC file + if test -z "$file_conv"; then + # lazily determine how to convert abs files + case `uname -s` in + MINGW*) + file_conv=mingw + ;; + CYGWIN* | MSYS*) + file_conv=cygwin + ;; + *) + file_conv=wine + ;; + esac + fi + case $file_conv/,$2, in + *,$file_conv,*) + ;; + mingw/*) + file=`cmd //C echo "$file " | sed -e 's/"\(.*\) " *$/\1/'` + ;; + cygwin/* | msys/*) + file=`cygpath -m "$file" || echo "$file"` + ;; + wine/*) + file=`winepath -w "$file" || echo "$file"` + ;; + esac + ;; + esac +} + +# func_cl_dashL linkdir +# Make cl look for libraries in LINKDIR +func_cl_dashL () +{ + func_file_conv "$1" + if test -z "$lib_path"; then + lib_path=$file + else + lib_path="$lib_path;$file" + fi + linker_opts="$linker_opts -LIBPATH:$file" +} + +# func_cl_dashl library +# Do a library search-path lookup for cl +func_cl_dashl () +{ + lib=$1 + found=no + save_IFS=$IFS + IFS=';' + for dir in $lib_path $LIB + do + IFS=$save_IFS + if $shared && test -f "$dir/$lib.dll.lib"; then + found=yes + lib=$dir/$lib.dll.lib + break + fi + if test -f "$dir/$lib.lib"; then + found=yes + lib=$dir/$lib.lib + break + fi + if test -f "$dir/lib$lib.a"; then + found=yes + lib=$dir/lib$lib.a + break + fi + done + IFS=$save_IFS + + if test "$found" != yes; then + lib=$lib.lib + fi +} + +# func_cl_wrapper cl arg... +# Adjust compile command to suit cl +func_cl_wrapper () +{ + # Assume a capable shell + lib_path= + shared=: + linker_opts= + for arg + do + if test -n "$eat"; then + eat= + else + case $1 in + -o) + # configure might choose to run compile as 'compile cc -o foo foo.c'. + eat=1 + case $2 in + *.o | *.[oO][bB][jJ]) + func_file_conv "$2" + set x "$@" -Fo"$file" + shift + ;; + *) + func_file_conv "$2" + set x "$@" -Fe"$file" + shift + ;; + esac + ;; + -I) + eat=1 + func_file_conv "$2" mingw + set x "$@" -I"$file" + shift + ;; + -I*) + func_file_conv "${1#-I}" mingw + set x "$@" -I"$file" + shift + ;; + -l) + eat=1 + func_cl_dashl "$2" + set x "$@" "$lib" + shift + ;; + -l*) + func_cl_dashl "${1#-l}" + set x "$@" "$lib" + shift + ;; + -L) + eat=1 + func_cl_dashL "$2" + ;; + -L*) + func_cl_dashL "${1#-L}" + ;; + -static) + shared=false + ;; + -Wl,*) + arg=${1#-Wl,} + save_ifs="$IFS"; IFS=',' + for flag in $arg; do + IFS="$save_ifs" + linker_opts="$linker_opts $flag" + done + IFS="$save_ifs" + ;; + -Xlinker) + eat=1 + linker_opts="$linker_opts $2" + ;; + -*) + set x "$@" "$1" + shift + ;; + *.cc | *.CC | *.cxx | *.CXX | *.[cC]++) + func_file_conv "$1" + set x "$@" -Tp"$file" + shift + ;; + *.c | *.cpp | *.CPP | *.lib | *.LIB | *.Lib | *.OBJ | *.obj | *.[oO]) + func_file_conv "$1" mingw + set x "$@" "$file" + shift + ;; + *) + set x "$@" "$1" + shift + ;; + esac + fi + shift + done + if test -n "$linker_opts"; then + linker_opts="-link$linker_opts" + fi + exec "$@" $linker_opts + exit 1 +} + +eat= + +case $1 in + '') + echo "$0: No command. Try '$0 --help' for more information." 1>&2 + exit 1; + ;; + -h | --h*) + cat <<\EOF +Usage: compile [--help] [--version] PROGRAM [ARGS] + +Wrapper for compilers which do not understand '-c -o'. +Remove '-o dest.o' from ARGS, run PROGRAM with the remaining +arguments, and rename the output as expected. + +If you are trying to build a whole package this is not the +right script to run: please start by reading the file 'INSTALL'. + +Report bugs to . +EOF + exit $? + ;; + -v | --v*) + echo "compile $scriptversion" + exit $? + ;; + cl | *[/\\]cl | cl.exe | *[/\\]cl.exe | \ + icl | *[/\\]icl | icl.exe | *[/\\]icl.exe ) + func_cl_wrapper "$@" # Doesn't return... + ;; +esac + +ofile= +cfile= + +for arg +do + if test -n "$eat"; then + eat= + else + case $1 in + -o) + # configure might choose to run compile as 'compile cc -o foo foo.c'. + # So we strip '-o arg' only if arg is an object. + eat=1 + case $2 in + *.o | *.obj) + ofile=$2 + ;; + *) + set x "$@" -o "$2" + shift + ;; + esac + ;; + *.c) + cfile=$1 + set x "$@" "$1" + shift + ;; + *) + set x "$@" "$1" + shift + ;; + esac + fi + shift +done + +if test -z "$ofile" || test -z "$cfile"; then + # If no '-o' option was seen then we might have been invoked from a + # pattern rule where we don't need one. That is ok -- this is a + # normal compilation that the losing compiler can handle. If no + # '.c' file was seen then we are probably linking. That is also + # ok. + exec "$@" +fi + +# Name of file we expect compiler to create. +cofile=`echo "$cfile" | sed 's|^.*[\\/]||; s|^[a-zA-Z]:||; s/\.c$/.o/'` + +# Create the lock directory. +# Note: use '[/\\:.-]' here to ensure that we don't use the same name +# that we are using for the .o file. Also, base the name on the expected +# object file name, since that is what matters with a parallel build. +lockdir=`echo "$cofile" | sed -e 's|[/\\:.-]|_|g'`.d +while true; do + if mkdir "$lockdir" >/dev/null 2>&1; then + break + fi + sleep 1 +done +# FIXME: race condition here if user kills between mkdir and trap. +trap "rmdir '$lockdir'; exit 1" 1 2 15 + +# Run the compile. +"$@" +ret=$? + +if test -f "$cofile"; then + test "$cofile" = "$ofile" || mv "$cofile" "$ofile" +elif test -f "${cofile}bj"; then + test "${cofile}bj" = "$ofile" || mv "${cofile}bj" "$ofile" +fi + +rmdir "$lockdir" +exit $ret + +# Local Variables: +# mode: shell-script +# sh-indentation: 2 +# eval: (add-hook 'before-save-hook 'time-stamp) +# time-stamp-start: "scriptversion=" +# time-stamp-format: "%:y-%02m-%02d.%02H" +# time-stamp-time-zone: "UTC0" +# time-stamp-end: "; # UTC" +# End: diff --git a/pcre2/config.guess b/pcre2/config.guess new file mode 100755 index 000000000..45001cfec --- /dev/null +++ b/pcre2/config.guess @@ -0,0 +1,1667 @@ +#! /bin/sh +# Attempt to guess a canonical system name. +# Copyright 1992-2020 Free Software Foundation, Inc. + +timestamp='2020-01-01' + +# This file is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, see . +# +# As a special exception to the GNU General Public License, if you +# distribute this file as part of a program that contains a +# configuration script generated by Autoconf, you may include it under +# the same distribution terms that you use for the rest of that +# program. This Exception is an additional permission under section 7 +# of the GNU General Public License, version 3 ("GPLv3"). +# +# Originally written by Per Bothner; maintained since 2000 by Ben Elliston. +# +# You can get the latest version of this script from: +# https://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.guess +# +# Please send patches to . + + +me=`echo "$0" | sed -e 's,.*/,,'` + +usage="\ +Usage: $0 [OPTION] + +Output the configuration name of the system \`$me' is run on. + +Options: + -h, --help print this help, then exit + -t, --time-stamp print date of last modification, then exit + -v, --version print version number, then exit + +Report bugs and patches to ." + +version="\ +GNU config.guess ($timestamp) + +Originally written by Per Bothner. +Copyright 1992-2020 Free Software Foundation, Inc. + +This is free software; see the source for copying conditions. There is NO +warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE." + +help=" +Try \`$me --help' for more information." + +# Parse command line +while test $# -gt 0 ; do + case $1 in + --time-stamp | --time* | -t ) + echo "$timestamp" ; exit ;; + --version | -v ) + echo "$version" ; exit ;; + --help | --h* | -h ) + echo "$usage"; exit ;; + -- ) # Stop option processing + shift; break ;; + - ) # Use stdin as input. + break ;; + -* ) + echo "$me: invalid option $1$help" >&2 + exit 1 ;; + * ) + break ;; + esac +done + +if test $# != 0; then + echo "$me: too many arguments$help" >&2 + exit 1 +fi + +# CC_FOR_BUILD -- compiler used by this script. Note that the use of a +# compiler to aid in system detection is discouraged as it requires +# temporary files to be created and, as you can see below, it is a +# headache to deal with in a portable fashion. + +# Historically, `CC_FOR_BUILD' used to be named `HOST_CC'. We still +# use `HOST_CC' if defined, but it is deprecated. + +# Portable tmp directory creation inspired by the Autoconf team. + +tmp= +# shellcheck disable=SC2172 +trap 'test -z "$tmp" || rm -fr "$tmp"' 0 1 2 13 15 + +set_cc_for_build() { + # prevent multiple calls if $tmp is already set + test "$tmp" && return 0 + : "${TMPDIR=/tmp}" + # shellcheck disable=SC2039 + { tmp=`(umask 077 && mktemp -d "$TMPDIR/cgXXXXXX") 2>/dev/null` && test -n "$tmp" && test -d "$tmp" ; } || + { test -n "$RANDOM" && tmp=$TMPDIR/cg$$-$RANDOM && (umask 077 && mkdir "$tmp" 2>/dev/null) ; } || + { tmp=$TMPDIR/cg-$$ && (umask 077 && mkdir "$tmp" 2>/dev/null) && echo "Warning: creating insecure temp directory" >&2 ; } || + { echo "$me: cannot create a temporary directory in $TMPDIR" >&2 ; exit 1 ; } + dummy=$tmp/dummy + case ${CC_FOR_BUILD-},${HOST_CC-},${CC-} in + ,,) echo "int x;" > "$dummy.c" + for driver in cc gcc c89 c99 ; do + if ($driver -c -o "$dummy.o" "$dummy.c") >/dev/null 2>&1 ; then + CC_FOR_BUILD="$driver" + break + fi + done + if test x"$CC_FOR_BUILD" = x ; then + CC_FOR_BUILD=no_compiler_found + fi + ;; + ,,*) CC_FOR_BUILD=$CC ;; + ,*,*) CC_FOR_BUILD=$HOST_CC ;; + esac +} + +# This is needed to find uname on a Pyramid OSx when run in the BSD universe. +# (ghazi@noc.rutgers.edu 1994-08-24) +if test -f /.attbin/uname ; then + PATH=$PATH:/.attbin ; export PATH +fi + +UNAME_MACHINE=`(uname -m) 2>/dev/null` || UNAME_MACHINE=unknown +UNAME_RELEASE=`(uname -r) 2>/dev/null` || UNAME_RELEASE=unknown +UNAME_SYSTEM=`(uname -s) 2>/dev/null` || UNAME_SYSTEM=unknown +UNAME_VERSION=`(uname -v) 2>/dev/null` || UNAME_VERSION=unknown + +case "$UNAME_SYSTEM" in +Linux|GNU|GNU/*) + # If the system lacks a compiler, then just pick glibc. + # We could probably try harder. + LIBC=gnu + + set_cc_for_build + cat <<-EOF > "$dummy.c" + #include + #if defined(__UCLIBC__) + LIBC=uclibc + #elif defined(__dietlibc__) + LIBC=dietlibc + #else + LIBC=gnu + #endif + EOF + eval "`$CC_FOR_BUILD -E "$dummy.c" 2>/dev/null | grep '^LIBC' | sed 's, ,,g'`" + + # If ldd exists, use it to detect musl libc. + if command -v ldd >/dev/null && \ + ldd --version 2>&1 | grep -q ^musl + then + LIBC=musl + fi + ;; +esac + +# Note: order is significant - the case branches are not exclusive. + +case "$UNAME_MACHINE:$UNAME_SYSTEM:$UNAME_RELEASE:$UNAME_VERSION" in + *:NetBSD:*:*) + # NetBSD (nbsd) targets should (where applicable) match one or + # more of the tuples: *-*-netbsdelf*, *-*-netbsdaout*, + # *-*-netbsdecoff* and *-*-netbsd*. For targets that recently + # switched to ELF, *-*-netbsd* would select the old + # object file format. This provides both forward + # compatibility and a consistent mechanism for selecting the + # object file format. + # + # Note: NetBSD doesn't particularly care about the vendor + # portion of the name. We always set it to "unknown". + sysctl="sysctl -n hw.machine_arch" + UNAME_MACHINE_ARCH=`(uname -p 2>/dev/null || \ + "/sbin/$sysctl" 2>/dev/null || \ + "/usr/sbin/$sysctl" 2>/dev/null || \ + echo unknown)` + case "$UNAME_MACHINE_ARCH" in + armeb) machine=armeb-unknown ;; + arm*) machine=arm-unknown ;; + sh3el) machine=shl-unknown ;; + sh3eb) machine=sh-unknown ;; + sh5el) machine=sh5le-unknown ;; + earmv*) + arch=`echo "$UNAME_MACHINE_ARCH" | sed -e 's,^e\(armv[0-9]\).*$,\1,'` + endian=`echo "$UNAME_MACHINE_ARCH" | sed -ne 's,^.*\(eb\)$,\1,p'` + machine="${arch}${endian}"-unknown + ;; + *) machine="$UNAME_MACHINE_ARCH"-unknown ;; + esac + # The Operating System including object format, if it has switched + # to ELF recently (or will in the future) and ABI. + case "$UNAME_MACHINE_ARCH" in + earm*) + os=netbsdelf + ;; + arm*|i386|m68k|ns32k|sh3*|sparc|vax) + set_cc_for_build + if echo __ELF__ | $CC_FOR_BUILD -E - 2>/dev/null \ + | grep -q __ELF__ + then + # Once all utilities can be ECOFF (netbsdecoff) or a.out (netbsdaout). + # Return netbsd for either. FIX? + os=netbsd + else + os=netbsdelf + fi + ;; + *) + os=netbsd + ;; + esac + # Determine ABI tags. + case "$UNAME_MACHINE_ARCH" in + earm*) + expr='s/^earmv[0-9]/-eabi/;s/eb$//' + abi=`echo "$UNAME_MACHINE_ARCH" | sed -e "$expr"` + ;; + esac + # The OS release + # Debian GNU/NetBSD machines have a different userland, and + # thus, need a distinct triplet. However, they do not need + # kernel version information, so it can be replaced with a + # suitable tag, in the style of linux-gnu. + case "$UNAME_VERSION" in + Debian*) + release='-gnu' + ;; + *) + release=`echo "$UNAME_RELEASE" | sed -e 's/[-_].*//' | cut -d. -f1,2` + ;; + esac + # Since CPU_TYPE-MANUFACTURER-KERNEL-OPERATING_SYSTEM: + # contains redundant information, the shorter form: + # CPU_TYPE-MANUFACTURER-OPERATING_SYSTEM is used. + echo "$machine-${os}${release}${abi-}" + exit ;; + *:Bitrig:*:*) + UNAME_MACHINE_ARCH=`arch | sed 's/Bitrig.//'` + echo "$UNAME_MACHINE_ARCH"-unknown-bitrig"$UNAME_RELEASE" + exit ;; + *:OpenBSD:*:*) + UNAME_MACHINE_ARCH=`arch | sed 's/OpenBSD.//'` + echo "$UNAME_MACHINE_ARCH"-unknown-openbsd"$UNAME_RELEASE" + exit ;; + *:LibertyBSD:*:*) + UNAME_MACHINE_ARCH=`arch | sed 's/^.*BSD\.//'` + echo "$UNAME_MACHINE_ARCH"-unknown-libertybsd"$UNAME_RELEASE" + exit ;; + *:MidnightBSD:*:*) + echo "$UNAME_MACHINE"-unknown-midnightbsd"$UNAME_RELEASE" + exit ;; + *:ekkoBSD:*:*) + echo "$UNAME_MACHINE"-unknown-ekkobsd"$UNAME_RELEASE" + exit ;; + *:SolidBSD:*:*) + echo "$UNAME_MACHINE"-unknown-solidbsd"$UNAME_RELEASE" + exit ;; + *:OS108:*:*) + echo "$UNAME_MACHINE"-unknown-os108_"$UNAME_RELEASE" + exit ;; + macppc:MirBSD:*:*) + echo powerpc-unknown-mirbsd"$UNAME_RELEASE" + exit ;; + *:MirBSD:*:*) + echo "$UNAME_MACHINE"-unknown-mirbsd"$UNAME_RELEASE" + exit ;; + *:Sortix:*:*) + echo "$UNAME_MACHINE"-unknown-sortix + exit ;; + *:Twizzler:*:*) + echo "$UNAME_MACHINE"-unknown-twizzler + exit ;; + *:Redox:*:*) + echo "$UNAME_MACHINE"-unknown-redox + exit ;; + mips:OSF1:*.*) + echo mips-dec-osf1 + exit ;; + alpha:OSF1:*:*) + case $UNAME_RELEASE in + *4.0) + UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $3}'` + ;; + *5.*) + UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $4}'` + ;; + esac + # According to Compaq, /usr/sbin/psrinfo has been available on + # OSF/1 and Tru64 systems produced since 1995. I hope that + # covers most systems running today. This code pipes the CPU + # types through head -n 1, so we only detect the type of CPU 0. + ALPHA_CPU_TYPE=`/usr/sbin/psrinfo -v | sed -n -e 's/^ The alpha \(.*\) processor.*$/\1/p' | head -n 1` + case "$ALPHA_CPU_TYPE" in + "EV4 (21064)") + UNAME_MACHINE=alpha ;; + "EV4.5 (21064)") + UNAME_MACHINE=alpha ;; + "LCA4 (21066/21068)") + UNAME_MACHINE=alpha ;; + "EV5 (21164)") + UNAME_MACHINE=alphaev5 ;; + "EV5.6 (21164A)") + UNAME_MACHINE=alphaev56 ;; + "EV5.6 (21164PC)") + UNAME_MACHINE=alphapca56 ;; + "EV5.7 (21164PC)") + UNAME_MACHINE=alphapca57 ;; + "EV6 (21264)") + UNAME_MACHINE=alphaev6 ;; + "EV6.7 (21264A)") + UNAME_MACHINE=alphaev67 ;; + "EV6.8CB (21264C)") + UNAME_MACHINE=alphaev68 ;; + "EV6.8AL (21264B)") + UNAME_MACHINE=alphaev68 ;; + "EV6.8CX (21264D)") + UNAME_MACHINE=alphaev68 ;; + "EV6.9A (21264/EV69A)") + UNAME_MACHINE=alphaev69 ;; + "EV7 (21364)") + UNAME_MACHINE=alphaev7 ;; + "EV7.9 (21364A)") + UNAME_MACHINE=alphaev79 ;; + esac + # A Pn.n version is a patched version. + # A Vn.n version is a released version. + # A Tn.n version is a released field test version. + # A Xn.n version is an unreleased experimental baselevel. + # 1.2 uses "1.2" for uname -r. + echo "$UNAME_MACHINE"-dec-osf"`echo "$UNAME_RELEASE" | sed -e 's/^[PVTX]//' | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz`" + # Reset EXIT trap before exiting to avoid spurious non-zero exit code. + exitcode=$? + trap '' 0 + exit $exitcode ;; + Amiga*:UNIX_System_V:4.0:*) + echo m68k-unknown-sysv4 + exit ;; + *:[Aa]miga[Oo][Ss]:*:*) + echo "$UNAME_MACHINE"-unknown-amigaos + exit ;; + *:[Mm]orph[Oo][Ss]:*:*) + echo "$UNAME_MACHINE"-unknown-morphos + exit ;; + *:OS/390:*:*) + echo i370-ibm-openedition + exit ;; + *:z/VM:*:*) + echo s390-ibm-zvmoe + exit ;; + *:OS400:*:*) + echo powerpc-ibm-os400 + exit ;; + arm:RISC*:1.[012]*:*|arm:riscix:1.[012]*:*) + echo arm-acorn-riscix"$UNAME_RELEASE" + exit ;; + arm*:riscos:*:*|arm*:RISCOS:*:*) + echo arm-unknown-riscos + exit ;; + SR2?01:HI-UX/MPP:*:* | SR8000:HI-UX/MPP:*:*) + echo hppa1.1-hitachi-hiuxmpp + exit ;; + Pyramid*:OSx*:*:* | MIS*:OSx*:*:* | MIS*:SMP_DC-OSx*:*:*) + # akee@wpdis03.wpafb.af.mil (Earle F. Ake) contributed MIS and NILE. + if test "`(/bin/universe) 2>/dev/null`" = att ; then + echo pyramid-pyramid-sysv3 + else + echo pyramid-pyramid-bsd + fi + exit ;; + NILE*:*:*:dcosx) + echo pyramid-pyramid-svr4 + exit ;; + DRS?6000:unix:4.0:6*) + echo sparc-icl-nx6 + exit ;; + DRS?6000:UNIX_SV:4.2*:7* | DRS?6000:isis:4.2*:7*) + case `/usr/bin/uname -p` in + sparc) echo sparc-icl-nx7; exit ;; + esac ;; + s390x:SunOS:*:*) + echo "$UNAME_MACHINE"-ibm-solaris2"`echo "$UNAME_RELEASE" | sed -e 's/[^.]*//'`" + exit ;; + sun4H:SunOS:5.*:*) + echo sparc-hal-solaris2"`echo "$UNAME_RELEASE"|sed -e 's/[^.]*//'`" + exit ;; + sun4*:SunOS:5.*:* | tadpole*:SunOS:5.*:*) + echo sparc-sun-solaris2"`echo "$UNAME_RELEASE" | sed -e 's/[^.]*//'`" + exit ;; + i86pc:AuroraUX:5.*:* | i86xen:AuroraUX:5.*:*) + echo i386-pc-auroraux"$UNAME_RELEASE" + exit ;; + i86pc:SunOS:5.*:* | i86xen:SunOS:5.*:*) + set_cc_for_build + SUN_ARCH=i386 + # If there is a compiler, see if it is configured for 64-bit objects. + # Note that the Sun cc does not turn __LP64__ into 1 like gcc does. + # This test works for both compilers. + if [ "$CC_FOR_BUILD" != no_compiler_found ]; then + if (echo '#ifdef __amd64'; echo IS_64BIT_ARCH; echo '#endif') | \ + (CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) | \ + grep IS_64BIT_ARCH >/dev/null + then + SUN_ARCH=x86_64 + fi + fi + echo "$SUN_ARCH"-pc-solaris2"`echo "$UNAME_RELEASE"|sed -e 's/[^.]*//'`" + exit ;; + sun4*:SunOS:6*:*) + # According to config.sub, this is the proper way to canonicalize + # SunOS6. Hard to guess exactly what SunOS6 will be like, but + # it's likely to be more like Solaris than SunOS4. + echo sparc-sun-solaris3"`echo "$UNAME_RELEASE"|sed -e 's/[^.]*//'`" + exit ;; + sun4*:SunOS:*:*) + case "`/usr/bin/arch -k`" in + Series*|S4*) + UNAME_RELEASE=`uname -v` + ;; + esac + # Japanese Language versions have a version number like `4.1.3-JL'. + echo sparc-sun-sunos"`echo "$UNAME_RELEASE"|sed -e 's/-/_/'`" + exit ;; + sun3*:SunOS:*:*) + echo m68k-sun-sunos"$UNAME_RELEASE" + exit ;; + sun*:*:4.2BSD:*) + UNAME_RELEASE=`(sed 1q /etc/motd | awk '{print substr($5,1,3)}') 2>/dev/null` + test "x$UNAME_RELEASE" = x && UNAME_RELEASE=3 + case "`/bin/arch`" in + sun3) + echo m68k-sun-sunos"$UNAME_RELEASE" + ;; + sun4) + echo sparc-sun-sunos"$UNAME_RELEASE" + ;; + esac + exit ;; + aushp:SunOS:*:*) + echo sparc-auspex-sunos"$UNAME_RELEASE" + exit ;; + # The situation for MiNT is a little confusing. The machine name + # can be virtually everything (everything which is not + # "atarist" or "atariste" at least should have a processor + # > m68000). The system name ranges from "MiNT" over "FreeMiNT" + # to the lowercase version "mint" (or "freemint"). Finally + # the system name "TOS" denotes a system which is actually not + # MiNT. But MiNT is downward compatible to TOS, so this should + # be no problem. + atarist[e]:*MiNT:*:* | atarist[e]:*mint:*:* | atarist[e]:*TOS:*:*) + echo m68k-atari-mint"$UNAME_RELEASE" + exit ;; + atari*:*MiNT:*:* | atari*:*mint:*:* | atarist[e]:*TOS:*:*) + echo m68k-atari-mint"$UNAME_RELEASE" + exit ;; + *falcon*:*MiNT:*:* | *falcon*:*mint:*:* | *falcon*:*TOS:*:*) + echo m68k-atari-mint"$UNAME_RELEASE" + exit ;; + milan*:*MiNT:*:* | milan*:*mint:*:* | *milan*:*TOS:*:*) + echo m68k-milan-mint"$UNAME_RELEASE" + exit ;; + hades*:*MiNT:*:* | hades*:*mint:*:* | *hades*:*TOS:*:*) + echo m68k-hades-mint"$UNAME_RELEASE" + exit ;; + *:*MiNT:*:* | *:*mint:*:* | *:*TOS:*:*) + echo m68k-unknown-mint"$UNAME_RELEASE" + exit ;; + m68k:machten:*:*) + echo m68k-apple-machten"$UNAME_RELEASE" + exit ;; + powerpc:machten:*:*) + echo powerpc-apple-machten"$UNAME_RELEASE" + exit ;; + RISC*:Mach:*:*) + echo mips-dec-mach_bsd4.3 + exit ;; + RISC*:ULTRIX:*:*) + echo mips-dec-ultrix"$UNAME_RELEASE" + exit ;; + VAX*:ULTRIX*:*:*) + echo vax-dec-ultrix"$UNAME_RELEASE" + exit ;; + 2020:CLIX:*:* | 2430:CLIX:*:*) + echo clipper-intergraph-clix"$UNAME_RELEASE" + exit ;; + mips:*:*:UMIPS | mips:*:*:RISCos) + set_cc_for_build + sed 's/^ //' << EOF > "$dummy.c" +#ifdef __cplusplus +#include /* for printf() prototype */ + int main (int argc, char *argv[]) { +#else + int main (argc, argv) int argc; char *argv[]; { +#endif + #if defined (host_mips) && defined (MIPSEB) + #if defined (SYSTYPE_SYSV) + printf ("mips-mips-riscos%ssysv\\n", argv[1]); exit (0); + #endif + #if defined (SYSTYPE_SVR4) + printf ("mips-mips-riscos%ssvr4\\n", argv[1]); exit (0); + #endif + #if defined (SYSTYPE_BSD43) || defined(SYSTYPE_BSD) + printf ("mips-mips-riscos%sbsd\\n", argv[1]); exit (0); + #endif + #endif + exit (-1); + } +EOF + $CC_FOR_BUILD -o "$dummy" "$dummy.c" && + dummyarg=`echo "$UNAME_RELEASE" | sed -n 's/\([0-9]*\).*/\1/p'` && + SYSTEM_NAME=`"$dummy" "$dummyarg"` && + { echo "$SYSTEM_NAME"; exit; } + echo mips-mips-riscos"$UNAME_RELEASE" + exit ;; + Motorola:PowerMAX_OS:*:*) + echo powerpc-motorola-powermax + exit ;; + Motorola:*:4.3:PL8-*) + echo powerpc-harris-powermax + exit ;; + Night_Hawk:*:*:PowerMAX_OS | Synergy:PowerMAX_OS:*:*) + echo powerpc-harris-powermax + exit ;; + Night_Hawk:Power_UNIX:*:*) + echo powerpc-harris-powerunix + exit ;; + m88k:CX/UX:7*:*) + echo m88k-harris-cxux7 + exit ;; + m88k:*:4*:R4*) + echo m88k-motorola-sysv4 + exit ;; + m88k:*:3*:R3*) + echo m88k-motorola-sysv3 + exit ;; + AViiON:dgux:*:*) + # DG/UX returns AViiON for all architectures + UNAME_PROCESSOR=`/usr/bin/uname -p` + if [ "$UNAME_PROCESSOR" = mc88100 ] || [ "$UNAME_PROCESSOR" = mc88110 ] + then + if [ "$TARGET_BINARY_INTERFACE"x = m88kdguxelfx ] || \ + [ "$TARGET_BINARY_INTERFACE"x = x ] + then + echo m88k-dg-dgux"$UNAME_RELEASE" + else + echo m88k-dg-dguxbcs"$UNAME_RELEASE" + fi + else + echo i586-dg-dgux"$UNAME_RELEASE" + fi + exit ;; + M88*:DolphinOS:*:*) # DolphinOS (SVR3) + echo m88k-dolphin-sysv3 + exit ;; + M88*:*:R3*:*) + # Delta 88k system running SVR3 + echo m88k-motorola-sysv3 + exit ;; + XD88*:*:*:*) # Tektronix XD88 system running UTekV (SVR3) + echo m88k-tektronix-sysv3 + exit ;; + Tek43[0-9][0-9]:UTek:*:*) # Tektronix 4300 system running UTek (BSD) + echo m68k-tektronix-bsd + exit ;; + *:IRIX*:*:*) + echo mips-sgi-irix"`echo "$UNAME_RELEASE"|sed -e 's/-/_/g'`" + exit ;; + ????????:AIX?:[12].1:2) # AIX 2.2.1 or AIX 2.1.1 is RT/PC AIX. + echo romp-ibm-aix # uname -m gives an 8 hex-code CPU id + exit ;; # Note that: echo "'`uname -s`'" gives 'AIX ' + i*86:AIX:*:*) + echo i386-ibm-aix + exit ;; + ia64:AIX:*:*) + if [ -x /usr/bin/oslevel ] ; then + IBM_REV=`/usr/bin/oslevel` + else + IBM_REV="$UNAME_VERSION.$UNAME_RELEASE" + fi + echo "$UNAME_MACHINE"-ibm-aix"$IBM_REV" + exit ;; + *:AIX:2:3) + if grep bos325 /usr/include/stdio.h >/dev/null 2>&1; then + set_cc_for_build + sed 's/^ //' << EOF > "$dummy.c" + #include + + main() + { + if (!__power_pc()) + exit(1); + puts("powerpc-ibm-aix3.2.5"); + exit(0); + } +EOF + if $CC_FOR_BUILD -o "$dummy" "$dummy.c" && SYSTEM_NAME=`"$dummy"` + then + echo "$SYSTEM_NAME" + else + echo rs6000-ibm-aix3.2.5 + fi + elif grep bos324 /usr/include/stdio.h >/dev/null 2>&1; then + echo rs6000-ibm-aix3.2.4 + else + echo rs6000-ibm-aix3.2 + fi + exit ;; + *:AIX:*:[4567]) + IBM_CPU_ID=`/usr/sbin/lsdev -C -c processor -S available | sed 1q | awk '{ print $1 }'` + if /usr/sbin/lsattr -El "$IBM_CPU_ID" | grep ' POWER' >/dev/null 2>&1; then + IBM_ARCH=rs6000 + else + IBM_ARCH=powerpc + fi + if [ -x /usr/bin/lslpp ] ; then + IBM_REV=`/usr/bin/lslpp -Lqc bos.rte.libc | + awk -F: '{ print $3 }' | sed s/[0-9]*$/0/` + else + IBM_REV="$UNAME_VERSION.$UNAME_RELEASE" + fi + echo "$IBM_ARCH"-ibm-aix"$IBM_REV" + exit ;; + *:AIX:*:*) + echo rs6000-ibm-aix + exit ;; + ibmrt:4.4BSD:*|romp-ibm:4.4BSD:*) + echo romp-ibm-bsd4.4 + exit ;; + ibmrt:*BSD:*|romp-ibm:BSD:*) # covers RT/PC BSD and + echo romp-ibm-bsd"$UNAME_RELEASE" # 4.3 with uname added to + exit ;; # report: romp-ibm BSD 4.3 + *:BOSX:*:*) + echo rs6000-bull-bosx + exit ;; + DPX/2?00:B.O.S.:*:*) + echo m68k-bull-sysv3 + exit ;; + 9000/[34]??:4.3bsd:1.*:*) + echo m68k-hp-bsd + exit ;; + hp300:4.4BSD:*:* | 9000/[34]??:4.3bsd:2.*:*) + echo m68k-hp-bsd4.4 + exit ;; + 9000/[34678]??:HP-UX:*:*) + HPUX_REV=`echo "$UNAME_RELEASE"|sed -e 's/[^.]*.[0B]*//'` + case "$UNAME_MACHINE" in + 9000/31?) HP_ARCH=m68000 ;; + 9000/[34]??) HP_ARCH=m68k ;; + 9000/[678][0-9][0-9]) + if [ -x /usr/bin/getconf ]; then + sc_cpu_version=`/usr/bin/getconf SC_CPU_VERSION 2>/dev/null` + sc_kernel_bits=`/usr/bin/getconf SC_KERNEL_BITS 2>/dev/null` + case "$sc_cpu_version" in + 523) HP_ARCH=hppa1.0 ;; # CPU_PA_RISC1_0 + 528) HP_ARCH=hppa1.1 ;; # CPU_PA_RISC1_1 + 532) # CPU_PA_RISC2_0 + case "$sc_kernel_bits" in + 32) HP_ARCH=hppa2.0n ;; + 64) HP_ARCH=hppa2.0w ;; + '') HP_ARCH=hppa2.0 ;; # HP-UX 10.20 + esac ;; + esac + fi + if [ "$HP_ARCH" = "" ]; then + set_cc_for_build + sed 's/^ //' << EOF > "$dummy.c" + + #define _HPUX_SOURCE + #include + #include + + int main () + { + #if defined(_SC_KERNEL_BITS) + long bits = sysconf(_SC_KERNEL_BITS); + #endif + long cpu = sysconf (_SC_CPU_VERSION); + + switch (cpu) + { + case CPU_PA_RISC1_0: puts ("hppa1.0"); break; + case CPU_PA_RISC1_1: puts ("hppa1.1"); break; + case CPU_PA_RISC2_0: + #if defined(_SC_KERNEL_BITS) + switch (bits) + { + case 64: puts ("hppa2.0w"); break; + case 32: puts ("hppa2.0n"); break; + default: puts ("hppa2.0"); break; + } break; + #else /* !defined(_SC_KERNEL_BITS) */ + puts ("hppa2.0"); break; + #endif + default: puts ("hppa1.0"); break; + } + exit (0); + } +EOF + (CCOPTS="" $CC_FOR_BUILD -o "$dummy" "$dummy.c" 2>/dev/null) && HP_ARCH=`"$dummy"` + test -z "$HP_ARCH" && HP_ARCH=hppa + fi ;; + esac + if [ "$HP_ARCH" = hppa2.0w ] + then + set_cc_for_build + + # hppa2.0w-hp-hpux* has a 64-bit kernel and a compiler generating + # 32-bit code. hppa64-hp-hpux* has the same kernel and a compiler + # generating 64-bit code. GNU and HP use different nomenclature: + # + # $ CC_FOR_BUILD=cc ./config.guess + # => hppa2.0w-hp-hpux11.23 + # $ CC_FOR_BUILD="cc +DA2.0w" ./config.guess + # => hppa64-hp-hpux11.23 + + if echo __LP64__ | (CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) | + grep -q __LP64__ + then + HP_ARCH=hppa2.0w + else + HP_ARCH=hppa64 + fi + fi + echo "$HP_ARCH"-hp-hpux"$HPUX_REV" + exit ;; + ia64:HP-UX:*:*) + HPUX_REV=`echo "$UNAME_RELEASE"|sed -e 's/[^.]*.[0B]*//'` + echo ia64-hp-hpux"$HPUX_REV" + exit ;; + 3050*:HI-UX:*:*) + set_cc_for_build + sed 's/^ //' << EOF > "$dummy.c" + #include + int + main () + { + long cpu = sysconf (_SC_CPU_VERSION); + /* The order matters, because CPU_IS_HP_MC68K erroneously returns + true for CPU_PA_RISC1_0. CPU_IS_PA_RISC returns correct + results, however. */ + if (CPU_IS_PA_RISC (cpu)) + { + switch (cpu) + { + case CPU_PA_RISC1_0: puts ("hppa1.0-hitachi-hiuxwe2"); break; + case CPU_PA_RISC1_1: puts ("hppa1.1-hitachi-hiuxwe2"); break; + case CPU_PA_RISC2_0: puts ("hppa2.0-hitachi-hiuxwe2"); break; + default: puts ("hppa-hitachi-hiuxwe2"); break; + } + } + else if (CPU_IS_HP_MC68K (cpu)) + puts ("m68k-hitachi-hiuxwe2"); + else puts ("unknown-hitachi-hiuxwe2"); + exit (0); + } +EOF + $CC_FOR_BUILD -o "$dummy" "$dummy.c" && SYSTEM_NAME=`"$dummy"` && + { echo "$SYSTEM_NAME"; exit; } + echo unknown-hitachi-hiuxwe2 + exit ;; + 9000/7??:4.3bsd:*:* | 9000/8?[79]:4.3bsd:*:*) + echo hppa1.1-hp-bsd + exit ;; + 9000/8??:4.3bsd:*:*) + echo hppa1.0-hp-bsd + exit ;; + *9??*:MPE/iX:*:* | *3000*:MPE/iX:*:*) + echo hppa1.0-hp-mpeix + exit ;; + hp7??:OSF1:*:* | hp8?[79]:OSF1:*:*) + echo hppa1.1-hp-osf + exit ;; + hp8??:OSF1:*:*) + echo hppa1.0-hp-osf + exit ;; + i*86:OSF1:*:*) + if [ -x /usr/sbin/sysversion ] ; then + echo "$UNAME_MACHINE"-unknown-osf1mk + else + echo "$UNAME_MACHINE"-unknown-osf1 + fi + exit ;; + parisc*:Lites*:*:*) + echo hppa1.1-hp-lites + exit ;; + C1*:ConvexOS:*:* | convex:ConvexOS:C1*:*) + echo c1-convex-bsd + exit ;; + C2*:ConvexOS:*:* | convex:ConvexOS:C2*:*) + if getsysinfo -f scalar_acc + then echo c32-convex-bsd + else echo c2-convex-bsd + fi + exit ;; + C34*:ConvexOS:*:* | convex:ConvexOS:C34*:*) + echo c34-convex-bsd + exit ;; + C38*:ConvexOS:*:* | convex:ConvexOS:C38*:*) + echo c38-convex-bsd + exit ;; + C4*:ConvexOS:*:* | convex:ConvexOS:C4*:*) + echo c4-convex-bsd + exit ;; + CRAY*Y-MP:*:*:*) + echo ymp-cray-unicos"$UNAME_RELEASE" | sed -e 's/\.[^.]*$/.X/' + exit ;; + CRAY*[A-Z]90:*:*:*) + echo "$UNAME_MACHINE"-cray-unicos"$UNAME_RELEASE" \ + | sed -e 's/CRAY.*\([A-Z]90\)/\1/' \ + -e y/ABCDEFGHIJKLMNOPQRSTUVWXYZ/abcdefghijklmnopqrstuvwxyz/ \ + -e 's/\.[^.]*$/.X/' + exit ;; + CRAY*TS:*:*:*) + echo t90-cray-unicos"$UNAME_RELEASE" | sed -e 's/\.[^.]*$/.X/' + exit ;; + CRAY*T3E:*:*:*) + echo alphaev5-cray-unicosmk"$UNAME_RELEASE" | sed -e 's/\.[^.]*$/.X/' + exit ;; + CRAY*SV1:*:*:*) + echo sv1-cray-unicos"$UNAME_RELEASE" | sed -e 's/\.[^.]*$/.X/' + exit ;; + *:UNICOS/mp:*:*) + echo craynv-cray-unicosmp"$UNAME_RELEASE" | sed -e 's/\.[^.]*$/.X/' + exit ;; + F30[01]:UNIX_System_V:*:* | F700:UNIX_System_V:*:*) + FUJITSU_PROC=`uname -m | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz` + FUJITSU_SYS=`uname -p | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz | sed -e 's/\///'` + FUJITSU_REL=`echo "$UNAME_RELEASE" | sed -e 's/ /_/'` + echo "${FUJITSU_PROC}-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}" + exit ;; + 5000:UNIX_System_V:4.*:*) + FUJITSU_SYS=`uname -p | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz | sed -e 's/\///'` + FUJITSU_REL=`echo "$UNAME_RELEASE" | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz | sed -e 's/ /_/'` + echo "sparc-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}" + exit ;; + i*86:BSD/386:*:* | i*86:BSD/OS:*:* | *:Ascend\ Embedded/OS:*:*) + echo "$UNAME_MACHINE"-pc-bsdi"$UNAME_RELEASE" + exit ;; + sparc*:BSD/OS:*:*) + echo sparc-unknown-bsdi"$UNAME_RELEASE" + exit ;; + *:BSD/OS:*:*) + echo "$UNAME_MACHINE"-unknown-bsdi"$UNAME_RELEASE" + exit ;; + arm:FreeBSD:*:*) + UNAME_PROCESSOR=`uname -p` + set_cc_for_build + if echo __ARM_PCS_VFP | $CC_FOR_BUILD -E - 2>/dev/null \ + | grep -q __ARM_PCS_VFP + then + echo "${UNAME_PROCESSOR}"-unknown-freebsd"`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'`"-gnueabi + else + echo "${UNAME_PROCESSOR}"-unknown-freebsd"`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'`"-gnueabihf + fi + exit ;; + *:FreeBSD:*:*) + UNAME_PROCESSOR=`/usr/bin/uname -p` + case "$UNAME_PROCESSOR" in + amd64) + UNAME_PROCESSOR=x86_64 ;; + i386) + UNAME_PROCESSOR=i586 ;; + esac + echo "$UNAME_PROCESSOR"-unknown-freebsd"`echo "$UNAME_RELEASE"|sed -e 's/[-(].*//'`" + exit ;; + i*:CYGWIN*:*) + echo "$UNAME_MACHINE"-pc-cygwin + exit ;; + *:MINGW64*:*) + echo "$UNAME_MACHINE"-pc-mingw64 + exit ;; + *:MINGW*:*) + echo "$UNAME_MACHINE"-pc-mingw32 + exit ;; + *:MSYS*:*) + echo "$UNAME_MACHINE"-pc-msys + exit ;; + i*:PW*:*) + echo "$UNAME_MACHINE"-pc-pw32 + exit ;; + *:Interix*:*) + case "$UNAME_MACHINE" in + x86) + echo i586-pc-interix"$UNAME_RELEASE" + exit ;; + authenticamd | genuineintel | EM64T) + echo x86_64-unknown-interix"$UNAME_RELEASE" + exit ;; + IA64) + echo ia64-unknown-interix"$UNAME_RELEASE" + exit ;; + esac ;; + i*:UWIN*:*) + echo "$UNAME_MACHINE"-pc-uwin + exit ;; + amd64:CYGWIN*:*:* | x86_64:CYGWIN*:*:*) + echo x86_64-pc-cygwin + exit ;; + prep*:SunOS:5.*:*) + echo powerpcle-unknown-solaris2"`echo "$UNAME_RELEASE"|sed -e 's/[^.]*//'`" + exit ;; + *:GNU:*:*) + # the GNU system + echo "`echo "$UNAME_MACHINE"|sed -e 's,[-/].*$,,'`-unknown-$LIBC`echo "$UNAME_RELEASE"|sed -e 's,/.*$,,'`" + exit ;; + *:GNU/*:*:*) + # other systems with GNU libc and userland + echo "$UNAME_MACHINE-unknown-`echo "$UNAME_SYSTEM" | sed 's,^[^/]*/,,' | tr "[:upper:]" "[:lower:]"``echo "$UNAME_RELEASE"|sed -e 's/[-(].*//'`-$LIBC" + exit ;; + *:Minix:*:*) + echo "$UNAME_MACHINE"-unknown-minix + exit ;; + aarch64:Linux:*:*) + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" + exit ;; + aarch64_be:Linux:*:*) + UNAME_MACHINE=aarch64_be + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" + exit ;; + alpha:Linux:*:*) + case `sed -n '/^cpu model/s/^.*: \(.*\)/\1/p' /proc/cpuinfo 2>/dev/null` in + EV5) UNAME_MACHINE=alphaev5 ;; + EV56) UNAME_MACHINE=alphaev56 ;; + PCA56) UNAME_MACHINE=alphapca56 ;; + PCA57) UNAME_MACHINE=alphapca56 ;; + EV6) UNAME_MACHINE=alphaev6 ;; + EV67) UNAME_MACHINE=alphaev67 ;; + EV68*) UNAME_MACHINE=alphaev68 ;; + esac + objdump --private-headers /bin/sh | grep -q ld.so.1 + if test "$?" = 0 ; then LIBC=gnulibc1 ; fi + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" + exit ;; + arc:Linux:*:* | arceb:Linux:*:*) + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" + exit ;; + arm*:Linux:*:*) + set_cc_for_build + if echo __ARM_EABI__ | $CC_FOR_BUILD -E - 2>/dev/null \ + | grep -q __ARM_EABI__ + then + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" + else + if echo __ARM_PCS_VFP | $CC_FOR_BUILD -E - 2>/dev/null \ + | grep -q __ARM_PCS_VFP + then + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"eabi + else + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"eabihf + fi + fi + exit ;; + avr32*:Linux:*:*) + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" + exit ;; + cris:Linux:*:*) + echo "$UNAME_MACHINE"-axis-linux-"$LIBC" + exit ;; + crisv32:Linux:*:*) + echo "$UNAME_MACHINE"-axis-linux-"$LIBC" + exit ;; + e2k:Linux:*:*) + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" + exit ;; + frv:Linux:*:*) + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" + exit ;; + hexagon:Linux:*:*) + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" + exit ;; + i*86:Linux:*:*) + echo "$UNAME_MACHINE"-pc-linux-"$LIBC" + exit ;; + ia64:Linux:*:*) + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" + exit ;; + k1om:Linux:*:*) + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" + exit ;; + m32r*:Linux:*:*) + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" + exit ;; + m68*:Linux:*:*) + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" + exit ;; + mips:Linux:*:* | mips64:Linux:*:*) + set_cc_for_build + IS_GLIBC=0 + test x"${LIBC}" = xgnu && IS_GLIBC=1 + sed 's/^ //' << EOF > "$dummy.c" + #undef CPU + #undef mips + #undef mipsel + #undef mips64 + #undef mips64el + #if ${IS_GLIBC} && defined(_ABI64) + LIBCABI=gnuabi64 + #else + #if ${IS_GLIBC} && defined(_ABIN32) + LIBCABI=gnuabin32 + #else + LIBCABI=${LIBC} + #endif + #endif + + #if ${IS_GLIBC} && defined(__mips64) && defined(__mips_isa_rev) && __mips_isa_rev>=6 + CPU=mipsisa64r6 + #else + #if ${IS_GLIBC} && !defined(__mips64) && defined(__mips_isa_rev) && __mips_isa_rev>=6 + CPU=mipsisa32r6 + #else + #if defined(__mips64) + CPU=mips64 + #else + CPU=mips + #endif + #endif + #endif + + #if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL) || defined(MIPSEL) + MIPS_ENDIAN=el + #else + #if defined(__MIPSEB__) || defined(__MIPSEB) || defined(_MIPSEB) || defined(MIPSEB) + MIPS_ENDIAN= + #else + MIPS_ENDIAN= + #endif + #endif +EOF + eval "`$CC_FOR_BUILD -E "$dummy.c" 2>/dev/null | grep '^CPU\|^MIPS_ENDIAN\|^LIBCABI'`" + test "x$CPU" != x && { echo "$CPU${MIPS_ENDIAN}-unknown-linux-$LIBCABI"; exit; } + ;; + mips64el:Linux:*:*) + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" + exit ;; + openrisc*:Linux:*:*) + echo or1k-unknown-linux-"$LIBC" + exit ;; + or32:Linux:*:* | or1k*:Linux:*:*) + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" + exit ;; + padre:Linux:*:*) + echo sparc-unknown-linux-"$LIBC" + exit ;; + parisc64:Linux:*:* | hppa64:Linux:*:*) + echo hppa64-unknown-linux-"$LIBC" + exit ;; + parisc:Linux:*:* | hppa:Linux:*:*) + # Look for CPU level + case `grep '^cpu[^a-z]*:' /proc/cpuinfo 2>/dev/null | cut -d' ' -f2` in + PA7*) echo hppa1.1-unknown-linux-"$LIBC" ;; + PA8*) echo hppa2.0-unknown-linux-"$LIBC" ;; + *) echo hppa-unknown-linux-"$LIBC" ;; + esac + exit ;; + ppc64:Linux:*:*) + echo powerpc64-unknown-linux-"$LIBC" + exit ;; + ppc:Linux:*:*) + echo powerpc-unknown-linux-"$LIBC" + exit ;; + ppc64le:Linux:*:*) + echo powerpc64le-unknown-linux-"$LIBC" + exit ;; + ppcle:Linux:*:*) + echo powerpcle-unknown-linux-"$LIBC" + exit ;; + riscv32:Linux:*:* | riscv64:Linux:*:*) + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" + exit ;; + s390:Linux:*:* | s390x:Linux:*:*) + echo "$UNAME_MACHINE"-ibm-linux-"$LIBC" + exit ;; + sh64*:Linux:*:*) + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" + exit ;; + sh*:Linux:*:*) + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" + exit ;; + sparc:Linux:*:* | sparc64:Linux:*:*) + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" + exit ;; + tile*:Linux:*:*) + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" + exit ;; + vax:Linux:*:*) + echo "$UNAME_MACHINE"-dec-linux-"$LIBC" + exit ;; + x86_64:Linux:*:*) + echo "$UNAME_MACHINE"-pc-linux-"$LIBC" + exit ;; + xtensa*:Linux:*:*) + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" + exit ;; + i*86:DYNIX/ptx:4*:*) + # ptx 4.0 does uname -s correctly, with DYNIX/ptx in there. + # earlier versions are messed up and put the nodename in both + # sysname and nodename. + echo i386-sequent-sysv4 + exit ;; + i*86:UNIX_SV:4.2MP:2.*) + # Unixware is an offshoot of SVR4, but it has its own version + # number series starting with 2... + # I am not positive that other SVR4 systems won't match this, + # I just have to hope. -- rms. + # Use sysv4.2uw... so that sysv4* matches it. + echo "$UNAME_MACHINE"-pc-sysv4.2uw"$UNAME_VERSION" + exit ;; + i*86:OS/2:*:*) + # If we were able to find `uname', then EMX Unix compatibility + # is probably installed. + echo "$UNAME_MACHINE"-pc-os2-emx + exit ;; + i*86:XTS-300:*:STOP) + echo "$UNAME_MACHINE"-unknown-stop + exit ;; + i*86:atheos:*:*) + echo "$UNAME_MACHINE"-unknown-atheos + exit ;; + i*86:syllable:*:*) + echo "$UNAME_MACHINE"-pc-syllable + exit ;; + i*86:LynxOS:2.*:* | i*86:LynxOS:3.[01]*:* | i*86:LynxOS:4.[02]*:*) + echo i386-unknown-lynxos"$UNAME_RELEASE" + exit ;; + i*86:*DOS:*:*) + echo "$UNAME_MACHINE"-pc-msdosdjgpp + exit ;; + i*86:*:4.*:*) + UNAME_REL=`echo "$UNAME_RELEASE" | sed 's/\/MP$//'` + if grep Novell /usr/include/link.h >/dev/null 2>/dev/null; then + echo "$UNAME_MACHINE"-univel-sysv"$UNAME_REL" + else + echo "$UNAME_MACHINE"-pc-sysv"$UNAME_REL" + fi + exit ;; + i*86:*:5:[678]*) + # UnixWare 7.x, OpenUNIX and OpenServer 6. + case `/bin/uname -X | grep "^Machine"` in + *486*) UNAME_MACHINE=i486 ;; + *Pentium) UNAME_MACHINE=i586 ;; + *Pent*|*Celeron) UNAME_MACHINE=i686 ;; + esac + echo "$UNAME_MACHINE-unknown-sysv${UNAME_RELEASE}${UNAME_SYSTEM}${UNAME_VERSION}" + exit ;; + i*86:*:3.2:*) + if test -f /usr/options/cb.name; then + UNAME_REL=`sed -n 's/.*Version //p' /dev/null >/dev/null ; then + UNAME_REL=`(/bin/uname -X|grep Release|sed -e 's/.*= //')` + (/bin/uname -X|grep i80486 >/dev/null) && UNAME_MACHINE=i486 + (/bin/uname -X|grep '^Machine.*Pentium' >/dev/null) \ + && UNAME_MACHINE=i586 + (/bin/uname -X|grep '^Machine.*Pent *II' >/dev/null) \ + && UNAME_MACHINE=i686 + (/bin/uname -X|grep '^Machine.*Pentium Pro' >/dev/null) \ + && UNAME_MACHINE=i686 + echo "$UNAME_MACHINE"-pc-sco"$UNAME_REL" + else + echo "$UNAME_MACHINE"-pc-sysv32 + fi + exit ;; + pc:*:*:*) + # Left here for compatibility: + # uname -m prints for DJGPP always 'pc', but it prints nothing about + # the processor, so we play safe by assuming i586. + # Note: whatever this is, it MUST be the same as what config.sub + # prints for the "djgpp" host, or else GDB configure will decide that + # this is a cross-build. + echo i586-pc-msdosdjgpp + exit ;; + Intel:Mach:3*:*) + echo i386-pc-mach3 + exit ;; + paragon:*:*:*) + echo i860-intel-osf1 + exit ;; + i860:*:4.*:*) # i860-SVR4 + if grep Stardent /usr/include/sys/uadmin.h >/dev/null 2>&1 ; then + echo i860-stardent-sysv"$UNAME_RELEASE" # Stardent Vistra i860-SVR4 + else # Add other i860-SVR4 vendors below as they are discovered. + echo i860-unknown-sysv"$UNAME_RELEASE" # Unknown i860-SVR4 + fi + exit ;; + mini*:CTIX:SYS*5:*) + # "miniframe" + echo m68010-convergent-sysv + exit ;; + mc68k:UNIX:SYSTEM5:3.51m) + echo m68k-convergent-sysv + exit ;; + M680?0:D-NIX:5.3:*) + echo m68k-diab-dnix + exit ;; + M68*:*:R3V[5678]*:*) + test -r /sysV68 && { echo 'm68k-motorola-sysv'; exit; } ;; + 3[345]??:*:4.0:3.0 | 3[34]??A:*:4.0:3.0 | 3[34]??,*:*:4.0:3.0 | 3[34]??/*:*:4.0:3.0 | 4400:*:4.0:3.0 | 4850:*:4.0:3.0 | SKA40:*:4.0:3.0 | SDS2:*:4.0:3.0 | SHG2:*:4.0:3.0 | S7501*:*:4.0:3.0) + OS_REL='' + test -r /etc/.relid \ + && OS_REL=.`sed -n 's/[^ ]* [^ ]* \([0-9][0-9]\).*/\1/p' < /etc/.relid` + /bin/uname -p 2>/dev/null | grep 86 >/dev/null \ + && { echo i486-ncr-sysv4.3"$OS_REL"; exit; } + /bin/uname -p 2>/dev/null | /bin/grep entium >/dev/null \ + && { echo i586-ncr-sysv4.3"$OS_REL"; exit; } ;; + 3[34]??:*:4.0:* | 3[34]??,*:*:4.0:*) + /bin/uname -p 2>/dev/null | grep 86 >/dev/null \ + && { echo i486-ncr-sysv4; exit; } ;; + NCR*:*:4.2:* | MPRAS*:*:4.2:*) + OS_REL='.3' + test -r /etc/.relid \ + && OS_REL=.`sed -n 's/[^ ]* [^ ]* \([0-9][0-9]\).*/\1/p' < /etc/.relid` + /bin/uname -p 2>/dev/null | grep 86 >/dev/null \ + && { echo i486-ncr-sysv4.3"$OS_REL"; exit; } + /bin/uname -p 2>/dev/null | /bin/grep entium >/dev/null \ + && { echo i586-ncr-sysv4.3"$OS_REL"; exit; } + /bin/uname -p 2>/dev/null | /bin/grep pteron >/dev/null \ + && { echo i586-ncr-sysv4.3"$OS_REL"; exit; } ;; + m68*:LynxOS:2.*:* | m68*:LynxOS:3.0*:*) + echo m68k-unknown-lynxos"$UNAME_RELEASE" + exit ;; + mc68030:UNIX_System_V:4.*:*) + echo m68k-atari-sysv4 + exit ;; + TSUNAMI:LynxOS:2.*:*) + echo sparc-unknown-lynxos"$UNAME_RELEASE" + exit ;; + rs6000:LynxOS:2.*:*) + echo rs6000-unknown-lynxos"$UNAME_RELEASE" + exit ;; + PowerPC:LynxOS:2.*:* | PowerPC:LynxOS:3.[01]*:* | PowerPC:LynxOS:4.[02]*:*) + echo powerpc-unknown-lynxos"$UNAME_RELEASE" + exit ;; + SM[BE]S:UNIX_SV:*:*) + echo mips-dde-sysv"$UNAME_RELEASE" + exit ;; + RM*:ReliantUNIX-*:*:*) + echo mips-sni-sysv4 + exit ;; + RM*:SINIX-*:*:*) + echo mips-sni-sysv4 + exit ;; + *:SINIX-*:*:*) + if uname -p 2>/dev/null >/dev/null ; then + UNAME_MACHINE=`(uname -p) 2>/dev/null` + echo "$UNAME_MACHINE"-sni-sysv4 + else + echo ns32k-sni-sysv + fi + exit ;; + PENTIUM:*:4.0*:*) # Unisys `ClearPath HMP IX 4000' SVR4/MP effort + # says + echo i586-unisys-sysv4 + exit ;; + *:UNIX_System_V:4*:FTX*) + # From Gerald Hewes . + # How about differentiating between stratus architectures? -djm + echo hppa1.1-stratus-sysv4 + exit ;; + *:*:*:FTX*) + # From seanf@swdc.stratus.com. + echo i860-stratus-sysv4 + exit ;; + i*86:VOS:*:*) + # From Paul.Green@stratus.com. + echo "$UNAME_MACHINE"-stratus-vos + exit ;; + *:VOS:*:*) + # From Paul.Green@stratus.com. + echo hppa1.1-stratus-vos + exit ;; + mc68*:A/UX:*:*) + echo m68k-apple-aux"$UNAME_RELEASE" + exit ;; + news*:NEWS-OS:6*:*) + echo mips-sony-newsos6 + exit ;; + R[34]000:*System_V*:*:* | R4000:UNIX_SYSV:*:* | R*000:UNIX_SV:*:*) + if [ -d /usr/nec ]; then + echo mips-nec-sysv"$UNAME_RELEASE" + else + echo mips-unknown-sysv"$UNAME_RELEASE" + fi + exit ;; + BeBox:BeOS:*:*) # BeOS running on hardware made by Be, PPC only. + echo powerpc-be-beos + exit ;; + BeMac:BeOS:*:*) # BeOS running on Mac or Mac clone, PPC only. + echo powerpc-apple-beos + exit ;; + BePC:BeOS:*:*) # BeOS running on Intel PC compatible. + echo i586-pc-beos + exit ;; + BePC:Haiku:*:*) # Haiku running on Intel PC compatible. + echo i586-pc-haiku + exit ;; + x86_64:Haiku:*:*) + echo x86_64-unknown-haiku + exit ;; + SX-4:SUPER-UX:*:*) + echo sx4-nec-superux"$UNAME_RELEASE" + exit ;; + SX-5:SUPER-UX:*:*) + echo sx5-nec-superux"$UNAME_RELEASE" + exit ;; + SX-6:SUPER-UX:*:*) + echo sx6-nec-superux"$UNAME_RELEASE" + exit ;; + SX-7:SUPER-UX:*:*) + echo sx7-nec-superux"$UNAME_RELEASE" + exit ;; + SX-8:SUPER-UX:*:*) + echo sx8-nec-superux"$UNAME_RELEASE" + exit ;; + SX-8R:SUPER-UX:*:*) + echo sx8r-nec-superux"$UNAME_RELEASE" + exit ;; + SX-ACE:SUPER-UX:*:*) + echo sxace-nec-superux"$UNAME_RELEASE" + exit ;; + Power*:Rhapsody:*:*) + echo powerpc-apple-rhapsody"$UNAME_RELEASE" + exit ;; + *:Rhapsody:*:*) + echo "$UNAME_MACHINE"-apple-rhapsody"$UNAME_RELEASE" + exit ;; + *:Darwin:*:*) + UNAME_PROCESSOR=`uname -p` + case $UNAME_PROCESSOR in + unknown) UNAME_PROCESSOR=powerpc ;; + esac + if command -v xcode-select > /dev/null 2> /dev/null && \ + ! xcode-select --print-path > /dev/null 2> /dev/null ; then + # Avoid executing cc if there is no toolchain installed as + # cc will be a stub that puts up a graphical alert + # prompting the user to install developer tools. + CC_FOR_BUILD=no_compiler_found + else + set_cc_for_build + fi + if [ "$CC_FOR_BUILD" != no_compiler_found ]; then + if (echo '#ifdef __LP64__'; echo IS_64BIT_ARCH; echo '#endif') | \ + (CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) | \ + grep IS_64BIT_ARCH >/dev/null + then + case $UNAME_PROCESSOR in + i386) UNAME_PROCESSOR=x86_64 ;; + powerpc) UNAME_PROCESSOR=powerpc64 ;; + esac + fi + # On 10.4-10.6 one might compile for PowerPC via gcc -arch ppc + if (echo '#ifdef __POWERPC__'; echo IS_PPC; echo '#endif') | \ + (CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) | \ + grep IS_PPC >/dev/null + then + UNAME_PROCESSOR=powerpc + fi + elif test "$UNAME_PROCESSOR" = i386 ; then + # uname -m returns i386 or x86_64 + UNAME_PROCESSOR=$UNAME_MACHINE + fi + echo "$UNAME_PROCESSOR"-apple-darwin"$UNAME_RELEASE" + exit ;; + *:procnto*:*:* | *:QNX:[0123456789]*:*) + UNAME_PROCESSOR=`uname -p` + if test "$UNAME_PROCESSOR" = x86; then + UNAME_PROCESSOR=i386 + UNAME_MACHINE=pc + fi + echo "$UNAME_PROCESSOR"-"$UNAME_MACHINE"-nto-qnx"$UNAME_RELEASE" + exit ;; + *:QNX:*:4*) + echo i386-pc-qnx + exit ;; + NEO-*:NONSTOP_KERNEL:*:*) + echo neo-tandem-nsk"$UNAME_RELEASE" + exit ;; + NSE-*:NONSTOP_KERNEL:*:*) + echo nse-tandem-nsk"$UNAME_RELEASE" + exit ;; + NSR-*:NONSTOP_KERNEL:*:*) + echo nsr-tandem-nsk"$UNAME_RELEASE" + exit ;; + NSV-*:NONSTOP_KERNEL:*:*) + echo nsv-tandem-nsk"$UNAME_RELEASE" + exit ;; + NSX-*:NONSTOP_KERNEL:*:*) + echo nsx-tandem-nsk"$UNAME_RELEASE" + exit ;; + *:NonStop-UX:*:*) + echo mips-compaq-nonstopux + exit ;; + BS2000:POSIX*:*:*) + echo bs2000-siemens-sysv + exit ;; + DS/*:UNIX_System_V:*:*) + echo "$UNAME_MACHINE"-"$UNAME_SYSTEM"-"$UNAME_RELEASE" + exit ;; + *:Plan9:*:*) + # "uname -m" is not consistent, so use $cputype instead. 386 + # is converted to i386 for consistency with other x86 + # operating systems. + # shellcheck disable=SC2154 + if test "$cputype" = 386; then + UNAME_MACHINE=i386 + else + UNAME_MACHINE="$cputype" + fi + echo "$UNAME_MACHINE"-unknown-plan9 + exit ;; + *:TOPS-10:*:*) + echo pdp10-unknown-tops10 + exit ;; + *:TENEX:*:*) + echo pdp10-unknown-tenex + exit ;; + KS10:TOPS-20:*:* | KL10:TOPS-20:*:* | TYPE4:TOPS-20:*:*) + echo pdp10-dec-tops20 + exit ;; + XKL-1:TOPS-20:*:* | TYPE5:TOPS-20:*:*) + echo pdp10-xkl-tops20 + exit ;; + *:TOPS-20:*:*) + echo pdp10-unknown-tops20 + exit ;; + *:ITS:*:*) + echo pdp10-unknown-its + exit ;; + SEI:*:*:SEIUX) + echo mips-sei-seiux"$UNAME_RELEASE" + exit ;; + *:DragonFly:*:*) + echo "$UNAME_MACHINE"-unknown-dragonfly"`echo "$UNAME_RELEASE"|sed -e 's/[-(].*//'`" + exit ;; + *:*VMS:*:*) + UNAME_MACHINE=`(uname -p) 2>/dev/null` + case "$UNAME_MACHINE" in + A*) echo alpha-dec-vms ; exit ;; + I*) echo ia64-dec-vms ; exit ;; + V*) echo vax-dec-vms ; exit ;; + esac ;; + *:XENIX:*:SysV) + echo i386-pc-xenix + exit ;; + i*86:skyos:*:*) + echo "$UNAME_MACHINE"-pc-skyos"`echo "$UNAME_RELEASE" | sed -e 's/ .*$//'`" + exit ;; + i*86:rdos:*:*) + echo "$UNAME_MACHINE"-pc-rdos + exit ;; + i*86:AROS:*:*) + echo "$UNAME_MACHINE"-pc-aros + exit ;; + x86_64:VMkernel:*:*) + echo "$UNAME_MACHINE"-unknown-esx + exit ;; + amd64:Isilon\ OneFS:*:*) + echo x86_64-unknown-onefs + exit ;; + *:Unleashed:*:*) + echo "$UNAME_MACHINE"-unknown-unleashed"$UNAME_RELEASE" + exit ;; +esac + +# No uname command or uname output not recognized. +set_cc_for_build +cat > "$dummy.c" < +#include +#endif +#if defined(ultrix) || defined(_ultrix) || defined(__ultrix) || defined(__ultrix__) +#if defined (vax) || defined (__vax) || defined (__vax__) || defined(mips) || defined(__mips) || defined(__mips__) || defined(MIPS) || defined(__MIPS__) +#include +#if defined(_SIZE_T_) || defined(SIGLOST) +#include +#endif +#endif +#endif +main () +{ +#if defined (sony) +#if defined (MIPSEB) + /* BFD wants "bsd" instead of "newsos". Perhaps BFD should be changed, + I don't know.... */ + printf ("mips-sony-bsd\n"); exit (0); +#else +#include + printf ("m68k-sony-newsos%s\n", +#ifdef NEWSOS4 + "4" +#else + "" +#endif + ); exit (0); +#endif +#endif + +#if defined (NeXT) +#if !defined (__ARCHITECTURE__) +#define __ARCHITECTURE__ "m68k" +#endif + int version; + version=`(hostinfo | sed -n 's/.*NeXT Mach \([0-9]*\).*/\1/p') 2>/dev/null`; + if (version < 4) + printf ("%s-next-nextstep%d\n", __ARCHITECTURE__, version); + else + printf ("%s-next-openstep%d\n", __ARCHITECTURE__, version); + exit (0); +#endif + +#if defined (MULTIMAX) || defined (n16) +#if defined (UMAXV) + printf ("ns32k-encore-sysv\n"); exit (0); +#else +#if defined (CMU) + printf ("ns32k-encore-mach\n"); exit (0); +#else + printf ("ns32k-encore-bsd\n"); exit (0); +#endif +#endif +#endif + +#if defined (__386BSD__) + printf ("i386-pc-bsd\n"); exit (0); +#endif + +#if defined (sequent) +#if defined (i386) + printf ("i386-sequent-dynix\n"); exit (0); +#endif +#if defined (ns32000) + printf ("ns32k-sequent-dynix\n"); exit (0); +#endif +#endif + +#if defined (_SEQUENT_) + struct utsname un; + + uname(&un); + if (strncmp(un.version, "V2", 2) == 0) { + printf ("i386-sequent-ptx2\n"); exit (0); + } + if (strncmp(un.version, "V1", 2) == 0) { /* XXX is V1 correct? */ + printf ("i386-sequent-ptx1\n"); exit (0); + } + printf ("i386-sequent-ptx\n"); exit (0); +#endif + +#if defined (vax) +#if !defined (ultrix) +#include +#if defined (BSD) +#if BSD == 43 + printf ("vax-dec-bsd4.3\n"); exit (0); +#else +#if BSD == 199006 + printf ("vax-dec-bsd4.3reno\n"); exit (0); +#else + printf ("vax-dec-bsd\n"); exit (0); +#endif +#endif +#else + printf ("vax-dec-bsd\n"); exit (0); +#endif +#else +#if defined(_SIZE_T_) || defined(SIGLOST) + struct utsname un; + uname (&un); + printf ("vax-dec-ultrix%s\n", un.release); exit (0); +#else + printf ("vax-dec-ultrix\n"); exit (0); +#endif +#endif +#endif +#if defined(ultrix) || defined(_ultrix) || defined(__ultrix) || defined(__ultrix__) +#if defined(mips) || defined(__mips) || defined(__mips__) || defined(MIPS) || defined(__MIPS__) +#if defined(_SIZE_T_) || defined(SIGLOST) + struct utsname *un; + uname (&un); + printf ("mips-dec-ultrix%s\n", un.release); exit (0); +#else + printf ("mips-dec-ultrix\n"); exit (0); +#endif +#endif +#endif + +#if defined (alliant) && defined (i860) + printf ("i860-alliant-bsd\n"); exit (0); +#endif + + exit (1); +} +EOF + +$CC_FOR_BUILD -o "$dummy" "$dummy.c" 2>/dev/null && SYSTEM_NAME=`$dummy` && + { echo "$SYSTEM_NAME"; exit; } + +# Apollos put the system type in the environment. +test -d /usr/apollo && { echo "$ISP-apollo-$SYSTYPE"; exit; } + +echo "$0: unable to guess system type" >&2 + +case "$UNAME_MACHINE:$UNAME_SYSTEM" in + mips:Linux | mips64:Linux) + # If we got here on MIPS GNU/Linux, output extra information. + cat >&2 <&2 </dev/null || echo unknown` +uname -r = `(uname -r) 2>/dev/null || echo unknown` +uname -s = `(uname -s) 2>/dev/null || echo unknown` +uname -v = `(uname -v) 2>/dev/null || echo unknown` + +/usr/bin/uname -p = `(/usr/bin/uname -p) 2>/dev/null` +/bin/uname -X = `(/bin/uname -X) 2>/dev/null` + +hostinfo = `(hostinfo) 2>/dev/null` +/bin/universe = `(/bin/universe) 2>/dev/null` +/usr/bin/arch -k = `(/usr/bin/arch -k) 2>/dev/null` +/bin/arch = `(/bin/arch) 2>/dev/null` +/usr/bin/oslevel = `(/usr/bin/oslevel) 2>/dev/null` +/usr/convex/getsysinfo = `(/usr/convex/getsysinfo) 2>/dev/null` + +UNAME_MACHINE = "$UNAME_MACHINE" +UNAME_RELEASE = "$UNAME_RELEASE" +UNAME_SYSTEM = "$UNAME_SYSTEM" +UNAME_VERSION = "$UNAME_VERSION" +EOF + +exit 1 + +# Local variables: +# eval: (add-hook 'before-save-hook 'time-stamp) +# time-stamp-start: "timestamp='" +# time-stamp-format: "%:y-%02m-%02d" +# time-stamp-end: "'" +# End: diff --git a/pcre2/config.sub b/pcre2/config.sub new file mode 100755 index 000000000..f02d43ad5 --- /dev/null +++ b/pcre2/config.sub @@ -0,0 +1,1793 @@ +#! /bin/sh +# Configuration validation subroutine script. +# Copyright 1992-2020 Free Software Foundation, Inc. + +timestamp='2020-01-01' + +# This file is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, see . +# +# As a special exception to the GNU General Public License, if you +# distribute this file as part of a program that contains a +# configuration script generated by Autoconf, you may include it under +# the same distribution terms that you use for the rest of that +# program. This Exception is an additional permission under section 7 +# of the GNU General Public License, version 3 ("GPLv3"). + + +# Please send patches to . +# +# Configuration subroutine to validate and canonicalize a configuration type. +# Supply the specified configuration type as an argument. +# If it is invalid, we print an error message on stderr and exit with code 1. +# Otherwise, we print the canonical config type on stdout and succeed. + +# You can get the latest version of this script from: +# https://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.sub + +# This file is supposed to be the same for all GNU packages +# and recognize all the CPU types, system types and aliases +# that are meaningful with *any* GNU software. +# Each package is responsible for reporting which valid configurations +# it does not support. The user should be able to distinguish +# a failure to support a valid configuration from a meaningless +# configuration. + +# The goal of this file is to map all the various variations of a given +# machine specification into a single specification in the form: +# CPU_TYPE-MANUFACTURER-OPERATING_SYSTEM +# or in some cases, the newer four-part form: +# CPU_TYPE-MANUFACTURER-KERNEL-OPERATING_SYSTEM +# It is wrong to echo any other type of specification. + +me=`echo "$0" | sed -e 's,.*/,,'` + +usage="\ +Usage: $0 [OPTION] CPU-MFR-OPSYS or ALIAS + +Canonicalize a configuration name. + +Options: + -h, --help print this help, then exit + -t, --time-stamp print date of last modification, then exit + -v, --version print version number, then exit + +Report bugs and patches to ." + +version="\ +GNU config.sub ($timestamp) + +Copyright 1992-2020 Free Software Foundation, Inc. + +This is free software; see the source for copying conditions. There is NO +warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE." + +help=" +Try \`$me --help' for more information." + +# Parse command line +while test $# -gt 0 ; do + case $1 in + --time-stamp | --time* | -t ) + echo "$timestamp" ; exit ;; + --version | -v ) + echo "$version" ; exit ;; + --help | --h* | -h ) + echo "$usage"; exit ;; + -- ) # Stop option processing + shift; break ;; + - ) # Use stdin as input. + break ;; + -* ) + echo "$me: invalid option $1$help" >&2 + exit 1 ;; + + *local*) + # First pass through any local machine types. + echo "$1" + exit ;; + + * ) + break ;; + esac +done + +case $# in + 0) echo "$me: missing argument$help" >&2 + exit 1;; + 1) ;; + *) echo "$me: too many arguments$help" >&2 + exit 1;; +esac + +# Split fields of configuration type +# shellcheck disable=SC2162 +IFS="-" read field1 field2 field3 field4 <&2 + exit 1 + ;; + *-*-*-*) + basic_machine=$field1-$field2 + os=$field3-$field4 + ;; + *-*-*) + # Ambiguous whether COMPANY is present, or skipped and KERNEL-OS is two + # parts + maybe_os=$field2-$field3 + case $maybe_os in + nto-qnx* | linux-gnu* | linux-android* | linux-dietlibc \ + | linux-newlib* | linux-musl* | linux-uclibc* | uclinux-uclibc* \ + | uclinux-gnu* | kfreebsd*-gnu* | knetbsd*-gnu* | netbsd*-gnu* \ + | netbsd*-eabi* | kopensolaris*-gnu* | cloudabi*-eabi* \ + | storm-chaos* | os2-emx* | rtmk-nova*) + basic_machine=$field1 + os=$maybe_os + ;; + android-linux) + basic_machine=$field1-unknown + os=linux-android + ;; + *) + basic_machine=$field1-$field2 + os=$field3 + ;; + esac + ;; + *-*) + # A lone config we happen to match not fitting any pattern + case $field1-$field2 in + decstation-3100) + basic_machine=mips-dec + os= + ;; + *-*) + # Second component is usually, but not always the OS + case $field2 in + # Prevent following clause from handling this valid os + sun*os*) + basic_machine=$field1 + os=$field2 + ;; + # Manufacturers + dec* | mips* | sequent* | encore* | pc533* | sgi* | sony* \ + | att* | 7300* | 3300* | delta* | motorola* | sun[234]* \ + | unicom* | ibm* | next | hp | isi* | apollo | altos* \ + | convergent* | ncr* | news | 32* | 3600* | 3100* \ + | hitachi* | c[123]* | convex* | sun | crds | omron* | dg \ + | ultra | tti* | harris | dolphin | highlevel | gould \ + | cbm | ns | masscomp | apple | axis | knuth | cray \ + | microblaze* | sim | cisco \ + | oki | wec | wrs | winbond) + basic_machine=$field1-$field2 + os= + ;; + *) + basic_machine=$field1 + os=$field2 + ;; + esac + ;; + esac + ;; + *) + # Convert single-component short-hands not valid as part of + # multi-component configurations. + case $field1 in + 386bsd) + basic_machine=i386-pc + os=bsd + ;; + a29khif) + basic_machine=a29k-amd + os=udi + ;; + adobe68k) + basic_machine=m68010-adobe + os=scout + ;; + alliant) + basic_machine=fx80-alliant + os= + ;; + altos | altos3068) + basic_machine=m68k-altos + os= + ;; + am29k) + basic_machine=a29k-none + os=bsd + ;; + amdahl) + basic_machine=580-amdahl + os=sysv + ;; + amiga) + basic_machine=m68k-unknown + os= + ;; + amigaos | amigados) + basic_machine=m68k-unknown + os=amigaos + ;; + amigaunix | amix) + basic_machine=m68k-unknown + os=sysv4 + ;; + apollo68) + basic_machine=m68k-apollo + os=sysv + ;; + apollo68bsd) + basic_machine=m68k-apollo + os=bsd + ;; + aros) + basic_machine=i386-pc + os=aros + ;; + aux) + basic_machine=m68k-apple + os=aux + ;; + balance) + basic_machine=ns32k-sequent + os=dynix + ;; + blackfin) + basic_machine=bfin-unknown + os=linux + ;; + cegcc) + basic_machine=arm-unknown + os=cegcc + ;; + convex-c1) + basic_machine=c1-convex + os=bsd + ;; + convex-c2) + basic_machine=c2-convex + os=bsd + ;; + convex-c32) + basic_machine=c32-convex + os=bsd + ;; + convex-c34) + basic_machine=c34-convex + os=bsd + ;; + convex-c38) + basic_machine=c38-convex + os=bsd + ;; + cray) + basic_machine=j90-cray + os=unicos + ;; + crds | unos) + basic_machine=m68k-crds + os= + ;; + da30) + basic_machine=m68k-da30 + os= + ;; + decstation | pmax | pmin | dec3100 | decstatn) + basic_machine=mips-dec + os= + ;; + delta88) + basic_machine=m88k-motorola + os=sysv3 + ;; + dicos) + basic_machine=i686-pc + os=dicos + ;; + djgpp) + basic_machine=i586-pc + os=msdosdjgpp + ;; + ebmon29k) + basic_machine=a29k-amd + os=ebmon + ;; + es1800 | OSE68k | ose68k | ose | OSE) + basic_machine=m68k-ericsson + os=ose + ;; + gmicro) + basic_machine=tron-gmicro + os=sysv + ;; + go32) + basic_machine=i386-pc + os=go32 + ;; + h8300hms) + basic_machine=h8300-hitachi + os=hms + ;; + h8300xray) + basic_machine=h8300-hitachi + os=xray + ;; + h8500hms) + basic_machine=h8500-hitachi + os=hms + ;; + harris) + basic_machine=m88k-harris + os=sysv3 + ;; + hp300 | hp300hpux) + basic_machine=m68k-hp + os=hpux + ;; + hp300bsd) + basic_machine=m68k-hp + os=bsd + ;; + hppaosf) + basic_machine=hppa1.1-hp + os=osf + ;; + hppro) + basic_machine=hppa1.1-hp + os=proelf + ;; + i386mach) + basic_machine=i386-mach + os=mach + ;; + isi68 | isi) + basic_machine=m68k-isi + os=sysv + ;; + m68knommu) + basic_machine=m68k-unknown + os=linux + ;; + magnum | m3230) + basic_machine=mips-mips + os=sysv + ;; + merlin) + basic_machine=ns32k-utek + os=sysv + ;; + mingw64) + basic_machine=x86_64-pc + os=mingw64 + ;; + mingw32) + basic_machine=i686-pc + os=mingw32 + ;; + mingw32ce) + basic_machine=arm-unknown + os=mingw32ce + ;; + monitor) + basic_machine=m68k-rom68k + os=coff + ;; + morphos) + basic_machine=powerpc-unknown + os=morphos + ;; + moxiebox) + basic_machine=moxie-unknown + os=moxiebox + ;; + msdos) + basic_machine=i386-pc + os=msdos + ;; + msys) + basic_machine=i686-pc + os=msys + ;; + mvs) + basic_machine=i370-ibm + os=mvs + ;; + nacl) + basic_machine=le32-unknown + os=nacl + ;; + ncr3000) + basic_machine=i486-ncr + os=sysv4 + ;; + netbsd386) + basic_machine=i386-pc + os=netbsd + ;; + netwinder) + basic_machine=armv4l-rebel + os=linux + ;; + news | news700 | news800 | news900) + basic_machine=m68k-sony + os=newsos + ;; + news1000) + basic_machine=m68030-sony + os=newsos + ;; + necv70) + basic_machine=v70-nec + os=sysv + ;; + nh3000) + basic_machine=m68k-harris + os=cxux + ;; + nh[45]000) + basic_machine=m88k-harris + os=cxux + ;; + nindy960) + basic_machine=i960-intel + os=nindy + ;; + mon960) + basic_machine=i960-intel + os=mon960 + ;; + nonstopux) + basic_machine=mips-compaq + os=nonstopux + ;; + os400) + basic_machine=powerpc-ibm + os=os400 + ;; + OSE68000 | ose68000) + basic_machine=m68000-ericsson + os=ose + ;; + os68k) + basic_machine=m68k-none + os=os68k + ;; + paragon) + basic_machine=i860-intel + os=osf + ;; + parisc) + basic_machine=hppa-unknown + os=linux + ;; + pw32) + basic_machine=i586-unknown + os=pw32 + ;; + rdos | rdos64) + basic_machine=x86_64-pc + os=rdos + ;; + rdos32) + basic_machine=i386-pc + os=rdos + ;; + rom68k) + basic_machine=m68k-rom68k + os=coff + ;; + sa29200) + basic_machine=a29k-amd + os=udi + ;; + sei) + basic_machine=mips-sei + os=seiux + ;; + sequent) + basic_machine=i386-sequent + os= + ;; + sps7) + basic_machine=m68k-bull + os=sysv2 + ;; + st2000) + basic_machine=m68k-tandem + os= + ;; + stratus) + basic_machine=i860-stratus + os=sysv4 + ;; + sun2) + basic_machine=m68000-sun + os= + ;; + sun2os3) + basic_machine=m68000-sun + os=sunos3 + ;; + sun2os4) + basic_machine=m68000-sun + os=sunos4 + ;; + sun3) + basic_machine=m68k-sun + os= + ;; + sun3os3) + basic_machine=m68k-sun + os=sunos3 + ;; + sun3os4) + basic_machine=m68k-sun + os=sunos4 + ;; + sun4) + basic_machine=sparc-sun + os= + ;; + sun4os3) + basic_machine=sparc-sun + os=sunos3 + ;; + sun4os4) + basic_machine=sparc-sun + os=sunos4 + ;; + sun4sol2) + basic_machine=sparc-sun + os=solaris2 + ;; + sun386 | sun386i | roadrunner) + basic_machine=i386-sun + os= + ;; + sv1) + basic_machine=sv1-cray + os=unicos + ;; + symmetry) + basic_machine=i386-sequent + os=dynix + ;; + t3e) + basic_machine=alphaev5-cray + os=unicos + ;; + t90) + basic_machine=t90-cray + os=unicos + ;; + toad1) + basic_machine=pdp10-xkl + os=tops20 + ;; + tpf) + basic_machine=s390x-ibm + os=tpf + ;; + udi29k) + basic_machine=a29k-amd + os=udi + ;; + ultra3) + basic_machine=a29k-nyu + os=sym1 + ;; + v810 | necv810) + basic_machine=v810-nec + os=none + ;; + vaxv) + basic_machine=vax-dec + os=sysv + ;; + vms) + basic_machine=vax-dec + os=vms + ;; + vsta) + basic_machine=i386-pc + os=vsta + ;; + vxworks960) + basic_machine=i960-wrs + os=vxworks + ;; + vxworks68) + basic_machine=m68k-wrs + os=vxworks + ;; + vxworks29k) + basic_machine=a29k-wrs + os=vxworks + ;; + xbox) + basic_machine=i686-pc + os=mingw32 + ;; + ymp) + basic_machine=ymp-cray + os=unicos + ;; + *) + basic_machine=$1 + os= + ;; + esac + ;; +esac + +# Decode 1-component or ad-hoc basic machines +case $basic_machine in + # Here we handle the default manufacturer of certain CPU types. It is in + # some cases the only manufacturer, in others, it is the most popular. + w89k) + cpu=hppa1.1 + vendor=winbond + ;; + op50n) + cpu=hppa1.1 + vendor=oki + ;; + op60c) + cpu=hppa1.1 + vendor=oki + ;; + ibm*) + cpu=i370 + vendor=ibm + ;; + orion105) + cpu=clipper + vendor=highlevel + ;; + mac | mpw | mac-mpw) + cpu=m68k + vendor=apple + ;; + pmac | pmac-mpw) + cpu=powerpc + vendor=apple + ;; + + # Recognize the various machine names and aliases which stand + # for a CPU type and a company and sometimes even an OS. + 3b1 | 7300 | 7300-att | att-7300 | pc7300 | safari | unixpc) + cpu=m68000 + vendor=att + ;; + 3b*) + cpu=we32k + vendor=att + ;; + bluegene*) + cpu=powerpc + vendor=ibm + os=cnk + ;; + decsystem10* | dec10*) + cpu=pdp10 + vendor=dec + os=tops10 + ;; + decsystem20* | dec20*) + cpu=pdp10 + vendor=dec + os=tops20 + ;; + delta | 3300 | motorola-3300 | motorola-delta \ + | 3300-motorola | delta-motorola) + cpu=m68k + vendor=motorola + ;; + dpx2*) + cpu=m68k + vendor=bull + os=sysv3 + ;; + encore | umax | mmax) + cpu=ns32k + vendor=encore + ;; + elxsi) + cpu=elxsi + vendor=elxsi + os=${os:-bsd} + ;; + fx2800) + cpu=i860 + vendor=alliant + ;; + genix) + cpu=ns32k + vendor=ns + ;; + h3050r* | hiux*) + cpu=hppa1.1 + vendor=hitachi + os=hiuxwe2 + ;; + hp3k9[0-9][0-9] | hp9[0-9][0-9]) + cpu=hppa1.0 + vendor=hp + ;; + hp9k2[0-9][0-9] | hp9k31[0-9]) + cpu=m68000 + vendor=hp + ;; + hp9k3[2-9][0-9]) + cpu=m68k + vendor=hp + ;; + hp9k6[0-9][0-9] | hp6[0-9][0-9]) + cpu=hppa1.0 + vendor=hp + ;; + hp9k7[0-79][0-9] | hp7[0-79][0-9]) + cpu=hppa1.1 + vendor=hp + ;; + hp9k78[0-9] | hp78[0-9]) + # FIXME: really hppa2.0-hp + cpu=hppa1.1 + vendor=hp + ;; + hp9k8[67]1 | hp8[67]1 | hp9k80[24] | hp80[24] | hp9k8[78]9 | hp8[78]9 | hp9k893 | hp893) + # FIXME: really hppa2.0-hp + cpu=hppa1.1 + vendor=hp + ;; + hp9k8[0-9][13679] | hp8[0-9][13679]) + cpu=hppa1.1 + vendor=hp + ;; + hp9k8[0-9][0-9] | hp8[0-9][0-9]) + cpu=hppa1.0 + vendor=hp + ;; + i*86v32) + cpu=`echo "$1" | sed -e 's/86.*/86/'` + vendor=pc + os=sysv32 + ;; + i*86v4*) + cpu=`echo "$1" | sed -e 's/86.*/86/'` + vendor=pc + os=sysv4 + ;; + i*86v) + cpu=`echo "$1" | sed -e 's/86.*/86/'` + vendor=pc + os=sysv + ;; + i*86sol2) + cpu=`echo "$1" | sed -e 's/86.*/86/'` + vendor=pc + os=solaris2 + ;; + j90 | j90-cray) + cpu=j90 + vendor=cray + os=${os:-unicos} + ;; + iris | iris4d) + cpu=mips + vendor=sgi + case $os in + irix*) + ;; + *) + os=irix4 + ;; + esac + ;; + miniframe) + cpu=m68000 + vendor=convergent + ;; + *mint | mint[0-9]* | *MiNT | *MiNT[0-9]*) + cpu=m68k + vendor=atari + os=mint + ;; + news-3600 | risc-news) + cpu=mips + vendor=sony + os=newsos + ;; + next | m*-next) + cpu=m68k + vendor=next + case $os in + openstep*) + ;; + nextstep*) + ;; + ns2*) + os=nextstep2 + ;; + *) + os=nextstep3 + ;; + esac + ;; + np1) + cpu=np1 + vendor=gould + ;; + op50n-* | op60c-*) + cpu=hppa1.1 + vendor=oki + os=proelf + ;; + pa-hitachi) + cpu=hppa1.1 + vendor=hitachi + os=hiuxwe2 + ;; + pbd) + cpu=sparc + vendor=tti + ;; + pbb) + cpu=m68k + vendor=tti + ;; + pc532) + cpu=ns32k + vendor=pc532 + ;; + pn) + cpu=pn + vendor=gould + ;; + power) + cpu=power + vendor=ibm + ;; + ps2) + cpu=i386 + vendor=ibm + ;; + rm[46]00) + cpu=mips + vendor=siemens + ;; + rtpc | rtpc-*) + cpu=romp + vendor=ibm + ;; + sde) + cpu=mipsisa32 + vendor=sde + os=${os:-elf} + ;; + simso-wrs) + cpu=sparclite + vendor=wrs + os=vxworks + ;; + tower | tower-32) + cpu=m68k + vendor=ncr + ;; + vpp*|vx|vx-*) + cpu=f301 + vendor=fujitsu + ;; + w65) + cpu=w65 + vendor=wdc + ;; + w89k-*) + cpu=hppa1.1 + vendor=winbond + os=proelf + ;; + none) + cpu=none + vendor=none + ;; + leon|leon[3-9]) + cpu=sparc + vendor=$basic_machine + ;; + leon-*|leon[3-9]-*) + cpu=sparc + vendor=`echo "$basic_machine" | sed 's/-.*//'` + ;; + + *-*) + # shellcheck disable=SC2162 + IFS="-" read cpu vendor <&2 + exit 1 + ;; + esac + ;; +esac + +# Here we canonicalize certain aliases for manufacturers. +case $vendor in + digital*) + vendor=dec + ;; + commodore*) + vendor=cbm + ;; + *) + ;; +esac + +# Decode manufacturer-specific aliases for certain operating systems. + +if [ x$os != x ] +then +case $os in + # First match some system type aliases that might get confused + # with valid system types. + # solaris* is a basic system type, with this one exception. + auroraux) + os=auroraux + ;; + bluegene*) + os=cnk + ;; + solaris1 | solaris1.*) + os=`echo $os | sed -e 's|solaris1|sunos4|'` + ;; + solaris) + os=solaris2 + ;; + unixware*) + os=sysv4.2uw + ;; + gnu/linux*) + os=`echo $os | sed -e 's|gnu/linux|linux-gnu|'` + ;; + # es1800 is here to avoid being matched by es* (a different OS) + es1800*) + os=ose + ;; + # Some version numbers need modification + chorusos*) + os=chorusos + ;; + isc) + os=isc2.2 + ;; + sco6) + os=sco5v6 + ;; + sco5) + os=sco3.2v5 + ;; + sco4) + os=sco3.2v4 + ;; + sco3.2.[4-9]*) + os=`echo $os | sed -e 's/sco3.2./sco3.2v/'` + ;; + sco3.2v[4-9]* | sco5v6*) + # Don't forget version if it is 3.2v4 or newer. + ;; + scout) + # Don't match below + ;; + sco*) + os=sco3.2v2 + ;; + psos*) + os=psos + ;; + # Now accept the basic system types. + # The portable systems comes first. + # Each alternative MUST end in a * to match a version number. + # sysv* is not here because it comes later, after sysvr4. + gnu* | bsd* | mach* | minix* | genix* | ultrix* | irix* \ + | *vms* | esix* | aix* | cnk* | sunos | sunos[34]*\ + | hpux* | unos* | osf* | luna* | dgux* | auroraux* | solaris* \ + | sym* | kopensolaris* | plan9* \ + | amigaos* | amigados* | msdos* | newsos* | unicos* | aof* \ + | aos* | aros* | cloudabi* | sortix* | twizzler* \ + | nindy* | vxsim* | vxworks* | ebmon* | hms* | mvs* \ + | clix* | riscos* | uniplus* | iris* | isc* | rtu* | xenix* \ + | knetbsd* | mirbsd* | netbsd* \ + | bitrig* | openbsd* | solidbsd* | libertybsd* | os108* \ + | ekkobsd* | kfreebsd* | freebsd* | riscix* | lynxos* \ + | bosx* | nextstep* | cxux* | aout* | elf* | oabi* \ + | ptx* | coff* | ecoff* | winnt* | domain* | vsta* \ + | udi* | eabi* | lites* | ieee* | go32* | aux* | hcos* \ + | chorusrdb* | cegcc* | glidix* \ + | cygwin* | msys* | pe* | moss* | proelf* | rtems* \ + | midipix* | mingw32* | mingw64* | linux-gnu* | linux-android* \ + | linux-newlib* | linux-musl* | linux-uclibc* \ + | uxpv* | beos* | mpeix* | udk* | moxiebox* \ + | interix* | uwin* | mks* | rhapsody* | darwin* \ + | openstep* | oskit* | conix* | pw32* | nonstopux* \ + | storm-chaos* | tops10* | tenex* | tops20* | its* \ + | os2* | vos* | palmos* | uclinux* | nucleus* \ + | morphos* | superux* | rtmk* | windiss* \ + | powermax* | dnix* | nx6 | nx7 | sei* | dragonfly* \ + | skyos* | haiku* | rdos* | toppers* | drops* | es* \ + | onefs* | tirtos* | phoenix* | fuchsia* | redox* | bme* \ + | midnightbsd* | amdhsa* | unleashed* | emscripten* | wasi* \ + | nsk* | powerunix) + # Remember, each alternative MUST END IN *, to match a version number. + ;; + qnx*) + case $cpu in + x86 | i*86) + ;; + *) + os=nto-$os + ;; + esac + ;; + hiux*) + os=hiuxwe2 + ;; + nto-qnx*) + ;; + nto*) + os=`echo $os | sed -e 's|nto|nto-qnx|'` + ;; + sim | xray | os68k* | v88r* \ + | windows* | osx | abug | netware* | os9* \ + | macos* | mpw* | magic* | mmixware* | mon960* | lnews*) + ;; + linux-dietlibc) + os=linux-dietlibc + ;; + linux*) + os=`echo $os | sed -e 's|linux|linux-gnu|'` + ;; + lynx*178) + os=lynxos178 + ;; + lynx*5) + os=lynxos5 + ;; + lynx*) + os=lynxos + ;; + mac*) + os=`echo "$os" | sed -e 's|mac|macos|'` + ;; + opened*) + os=openedition + ;; + os400*) + os=os400 + ;; + sunos5*) + os=`echo "$os" | sed -e 's|sunos5|solaris2|'` + ;; + sunos6*) + os=`echo "$os" | sed -e 's|sunos6|solaris3|'` + ;; + wince*) + os=wince + ;; + utek*) + os=bsd + ;; + dynix*) + os=bsd + ;; + acis*) + os=aos + ;; + atheos*) + os=atheos + ;; + syllable*) + os=syllable + ;; + 386bsd) + os=bsd + ;; + ctix* | uts*) + os=sysv + ;; + nova*) + os=rtmk-nova + ;; + ns2) + os=nextstep2 + ;; + # Preserve the version number of sinix5. + sinix5.*) + os=`echo $os | sed -e 's|sinix|sysv|'` + ;; + sinix*) + os=sysv4 + ;; + tpf*) + os=tpf + ;; + triton*) + os=sysv3 + ;; + oss*) + os=sysv3 + ;; + svr4*) + os=sysv4 + ;; + svr3) + os=sysv3 + ;; + sysvr4) + os=sysv4 + ;; + # This must come after sysvr4. + sysv*) + ;; + ose*) + os=ose + ;; + *mint | mint[0-9]* | *MiNT | MiNT[0-9]*) + os=mint + ;; + zvmoe) + os=zvmoe + ;; + dicos*) + os=dicos + ;; + pikeos*) + # Until real need of OS specific support for + # particular features comes up, bare metal + # configurations are quite functional. + case $cpu in + arm*) + os=eabi + ;; + *) + os=elf + ;; + esac + ;; + nacl*) + ;; + ios) + ;; + none) + ;; + *-eabi) + ;; + *) + echo Invalid configuration \`"$1"\': system \`"$os"\' not recognized 1>&2 + exit 1 + ;; +esac +else + +# Here we handle the default operating systems that come with various machines. +# The value should be what the vendor currently ships out the door with their +# machine or put another way, the most popular os provided with the machine. + +# Note that if you're going to try to match "-MANUFACTURER" here (say, +# "-sun"), then you have to tell the case statement up towards the top +# that MANUFACTURER isn't an operating system. Otherwise, code above +# will signal an error saying that MANUFACTURER isn't an operating +# system, and we'll never get to this point. + +case $cpu-$vendor in + score-*) + os=elf + ;; + spu-*) + os=elf + ;; + *-acorn) + os=riscix1.2 + ;; + arm*-rebel) + os=linux + ;; + arm*-semi) + os=aout + ;; + c4x-* | tic4x-*) + os=coff + ;; + c8051-*) + os=elf + ;; + clipper-intergraph) + os=clix + ;; + hexagon-*) + os=elf + ;; + tic54x-*) + os=coff + ;; + tic55x-*) + os=coff + ;; + tic6x-*) + os=coff + ;; + # This must come before the *-dec entry. + pdp10-*) + os=tops20 + ;; + pdp11-*) + os=none + ;; + *-dec | vax-*) + os=ultrix4.2 + ;; + m68*-apollo) + os=domain + ;; + i386-sun) + os=sunos4.0.2 + ;; + m68000-sun) + os=sunos3 + ;; + m68*-cisco) + os=aout + ;; + mep-*) + os=elf + ;; + mips*-cisco) + os=elf + ;; + mips*-*) + os=elf + ;; + or32-*) + os=coff + ;; + *-tti) # must be before sparc entry or we get the wrong os. + os=sysv3 + ;; + sparc-* | *-sun) + os=sunos4.1.1 + ;; + pru-*) + os=elf + ;; + *-be) + os=beos + ;; + *-ibm) + os=aix + ;; + *-knuth) + os=mmixware + ;; + *-wec) + os=proelf + ;; + *-winbond) + os=proelf + ;; + *-oki) + os=proelf + ;; + *-hp) + os=hpux + ;; + *-hitachi) + os=hiux + ;; + i860-* | *-att | *-ncr | *-altos | *-motorola | *-convergent) + os=sysv + ;; + *-cbm) + os=amigaos + ;; + *-dg) + os=dgux + ;; + *-dolphin) + os=sysv3 + ;; + m68k-ccur) + os=rtu + ;; + m88k-omron*) + os=luna + ;; + *-next) + os=nextstep + ;; + *-sequent) + os=ptx + ;; + *-crds) + os=unos + ;; + *-ns) + os=genix + ;; + i370-*) + os=mvs + ;; + *-gould) + os=sysv + ;; + *-highlevel) + os=bsd + ;; + *-encore) + os=bsd + ;; + *-sgi) + os=irix + ;; + *-siemens) + os=sysv4 + ;; + *-masscomp) + os=rtu + ;; + f30[01]-fujitsu | f700-fujitsu) + os=uxpv + ;; + *-rom68k) + os=coff + ;; + *-*bug) + os=coff + ;; + *-apple) + os=macos + ;; + *-atari*) + os=mint + ;; + *-wrs) + os=vxworks + ;; + *) + os=none + ;; +esac +fi + +# Here we handle the case where we know the os, and the CPU type, but not the +# manufacturer. We pick the logical manufacturer. +case $vendor in + unknown) + case $os in + riscix*) + vendor=acorn + ;; + sunos*) + vendor=sun + ;; + cnk*|-aix*) + vendor=ibm + ;; + beos*) + vendor=be + ;; + hpux*) + vendor=hp + ;; + mpeix*) + vendor=hp + ;; + hiux*) + vendor=hitachi + ;; + unos*) + vendor=crds + ;; + dgux*) + vendor=dg + ;; + luna*) + vendor=omron + ;; + genix*) + vendor=ns + ;; + clix*) + vendor=intergraph + ;; + mvs* | opened*) + vendor=ibm + ;; + os400*) + vendor=ibm + ;; + ptx*) + vendor=sequent + ;; + tpf*) + vendor=ibm + ;; + vxsim* | vxworks* | windiss*) + vendor=wrs + ;; + aux*) + vendor=apple + ;; + hms*) + vendor=hitachi + ;; + mpw* | macos*) + vendor=apple + ;; + *mint | mint[0-9]* | *MiNT | MiNT[0-9]*) + vendor=atari + ;; + vos*) + vendor=stratus + ;; + esac + ;; +esac + +echo "$cpu-$vendor-$os" +exit + +# Local variables: +# eval: (add-hook 'before-save-hook 'time-stamp) +# time-stamp-start: "timestamp='" +# time-stamp-format: "%:y-%02m-%02d" +# time-stamp-end: "'" +# End: diff --git a/pcre2/configure.ac b/pcre2/configure.ac index d74c76a87..af26f0b8f 100644 --- a/pcre2/configure.ac +++ b/pcre2/configure.ac @@ -1,13 +1,1117 @@ +dnl Process this file with autoconf to produce a configure script. + +dnl NOTE FOR MAINTAINERS: Do not use minor version numbers 08 or 09 because +dnl the leading zeros may cause them to be treated as invalid octal constants +dnl if a PCRE2 user writes code that uses PCRE2_MINOR as a number. There is now +dnl a check further down that throws an error if 08 or 09 are used. + +dnl The PCRE2_PRERELEASE feature is for identifying release candidates. It might +dnl be defined as -RC2, for example. For real releases, it should be empty. + m4_define(pcre2_major, [10]) -m4_define(pcre2_minor, [35]) +m4_define(pcre2_minor, [36]) m4_define(pcre2_prerelease, []) -m4_define(pcre2_date, [2020-05-09]) +m4_define(pcre2_date, [2020-12-04]) # Libtool shared library interface versions (current:revision:age) -m4_define(libpcre2_8_version, [10:0:10]) -m4_define(libpcre2_16_version, [10:0:10]) -m4_define(libpcre2_32_version, [10:0:10]) +m4_define(libpcre2_8_version, [10:1:10]) +m4_define(libpcre2_16_version, [10:1:10]) +m4_define(libpcre2_32_version, [10:1:10]) m4_define(libpcre2_posix_version, [2:3:0]) # NOTE: The CMakeLists.txt file searches for the above variables in the first # 50 lines of this file. Please update that if the variables above are moved. + +AC_PREREQ(2.57) +AC_INIT(PCRE2, pcre2_major.pcre2_minor[]pcre2_prerelease, , pcre2) +AC_CONFIG_SRCDIR([src/pcre2.h.in]) +AM_INIT_AUTOMAKE([dist-bzip2 dist-zip]) +m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])]) +AC_CONFIG_HEADERS(src/config.h) + +# This was added at the suggestion of libtoolize (03-Jan-10) +AC_CONFIG_MACRO_DIR([m4]) + +# The default CFLAGS in Autoconf are "-g -O2" for gcc and just "-g" for any +# other compiler. There doesn't seem to be a standard way of getting rid of the +# -g (which I don't think is needed for a production library). This fudge seems +# to achieve the necessary. First, we remember the externally set values of +# CFLAGS. Then call the AC_PROG_CC macro to find the compiler - if CFLAGS is +# not set, it will be set to Autoconf's defaults. Afterwards, if the original +# values were not set, remove the -g from the Autoconf defaults. + +remember_set_CFLAGS="$CFLAGS" + +AC_PROG_CC +AM_PROG_CC_C_O +AC_USE_SYSTEM_EXTENSIONS + +if test "x$remember_set_CFLAGS" = "x" +then + if test "$CFLAGS" = "-g -O2" + then + CFLAGS="-O2" + elif test "$CFLAGS" = "-g" + then + CFLAGS="" + fi +fi + +# This is a new thing required to stop a warning from automake 1.12 +m4_ifdef([AM_PROG_AR], [AM_PROG_AR]) + +# Check for a 64-bit integer type +AC_TYPE_INT64_T + +AC_PROG_INSTALL +AC_LIBTOOL_WIN32_DLL +LT_INIT +AC_PROG_LN_S + +# Check for GCC visibility feature + +PCRE2_VISIBILITY + +# Check for Clang __attribute__((uninitialized)) feature + +AC_MSG_CHECKING([for __attribute__((uninitialized))]) +AC_LANG_PUSH([C]) +tmp_CFLAGS=$CFLAGS +CFLAGS="$CFLAGS -Werror" +AC_COMPILE_IFELSE([AC_LANG_PROGRAM(, + [[char buf[128] __attribute__((uninitialized));(void)buf]])], + [pcre2_cc_cv_attribute_uninitialized=yes], + [pcre2_cc_cv_attribute_uninitialized=no]) +AC_MSG_RESULT([$pcre2_cc_cv_attribute_uninitialized]) +if test "$pcre2_cc_cv_attribute_uninitialized" = yes; then + AC_DEFINE([HAVE_ATTRIBUTE_UNINITIALIZED], 1, [Define this if your compiler + supports __attribute__((uninitialized))]) +fi +CFLAGS=$tmp_CFLAGS +AC_LANG_POP([C]) + +# Versioning + +PCRE2_MAJOR="pcre2_major" +PCRE2_MINOR="pcre2_minor" +PCRE2_PRERELEASE="pcre2_prerelease" +PCRE2_DATE="pcre2_date" + +if test "$PCRE2_MINOR" = "08" -o "$PCRE2_MINOR" = "09" +then + echo "***" + echo "*** Minor version number $PCRE2_MINOR must not be used. ***" + echo "*** Use only 00 to 07 or 10 onwards, to avoid octal issues. ***" + echo "***" + exit 1 +fi + +AC_SUBST(PCRE2_MAJOR) +AC_SUBST(PCRE2_MINOR) +AC_SUBST(PCRE2_PRERELEASE) +AC_SUBST(PCRE2_DATE) + +# Set a more sensible default value for $(htmldir). +if test "x$htmldir" = 'x${docdir}' +then + htmldir='${docdir}/html' +fi + +# Force an error for PCRE1 size options +AC_ARG_ENABLE(pcre8,,,enable_pcre8=no) +AC_ARG_ENABLE(pcre16,,,enable_pcre16=no) +AC_ARG_ENABLE(pcre32,,,enable_pcre32=no) + +if test "$enable_pcre8$enable_pcre16$enable_pcre32" != "nonono" +then + echo "** ERROR: Use --[[en|dis]]able-pcre2-[[8|16|32]], not --[[en|dis]]able-pcre[[8|16|32]]" + exit 1 +fi + +# Handle --disable-pcre2-8 (enabled by default) +AC_ARG_ENABLE(pcre2-8, + AS_HELP_STRING([--disable-pcre2-8], + [disable 8 bit character support]), + , enable_pcre2_8=unset) +AC_SUBST(enable_pcre2_8) + +# Handle --enable-pcre2-16 (disabled by default) +AC_ARG_ENABLE(pcre2-16, + AS_HELP_STRING([--enable-pcre2-16], + [enable 16 bit character support]), + , enable_pcre2_16=unset) +AC_SUBST(enable_pcre2_16) + +# Handle --enable-pcre2-32 (disabled by default) +AC_ARG_ENABLE(pcre2-32, + AS_HELP_STRING([--enable-pcre2-32], + [enable 32 bit character support]), + , enable_pcre2_32=unset) +AC_SUBST(enable_pcre2_32) + +# Handle --enable-debug (disabled by default) +AC_ARG_ENABLE(debug, + AS_HELP_STRING([--enable-debug], + [enable debugging code]), + , enable_debug=no) + +# Handle --enable-jit (disabled by default) +AC_ARG_ENABLE(jit, + AS_HELP_STRING([--enable-jit], + [enable Just-In-Time compiling support]), + , enable_jit=no) + +# This code enables JIT if the hardware supports it. +if test "$enable_jit" = "auto"; then + AC_LANG(C) + SAVE_CPPFLAGS=$CPPFLAGS + CPPFLAGS=-I$srcdir + AC_COMPILE_IFELSE([AC_LANG_SOURCE([[ + #define SLJIT_CONFIG_AUTO 1 + #include "src/sljit/sljitConfigInternal.h" + #if (defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED) + #error unsupported + #endif]])], enable_jit=yes, enable_jit=no) + CPPFLAGS=$SAVE_CPPFLAGS + echo checking for JIT support on this hardware... $enable_jit +fi + +# Handle --enable-jit-sealloc (disabled by default and only experimental) +case $host_os in + linux* | netbsd*) + AC_ARG_ENABLE(jit-sealloc, + AS_HELP_STRING([--enable-jit-sealloc], + [enable SELinux compatible execmem allocator in JIT (experimental)]), + ,enable_jit_sealloc=no) + ;; + *) + enable_jit_sealloc=unsupported + ;; +esac + +# Handle --disable-pcre2grep-jit (enabled by default) +AC_ARG_ENABLE(pcre2grep-jit, + AS_HELP_STRING([--disable-pcre2grep-jit], + [disable JIT support in pcre2grep]), + , enable_pcre2grep_jit=yes) + +# Handle --disable-pcre2grep-callout (enabled by default) +AC_ARG_ENABLE(pcre2grep-callout, + AS_HELP_STRING([--disable-pcre2grep-callout], + [disable callout script support in pcre2grep]), + , enable_pcre2grep_callout=yes) + +# Handle --disable-pcre2grep-callout-fork (enabled by default) +AC_ARG_ENABLE(pcre2grep-callout-fork, + AS_HELP_STRING([--disable-pcre2grep-callout-fork], + [disable callout script fork support in pcre2grep]), + , enable_pcre2grep_callout_fork=yes) + +# Handle --enable-rebuild-chartables +AC_ARG_ENABLE(rebuild-chartables, + AS_HELP_STRING([--enable-rebuild-chartables], + [rebuild character tables in current locale]), + , enable_rebuild_chartables=no) + +# Handle --disable-unicode (enabled by default) +AC_ARG_ENABLE(unicode, + AS_HELP_STRING([--disable-unicode], + [disable Unicode support]), + , enable_unicode=unset) + +# Handle newline options +ac_pcre2_newline=lf +AC_ARG_ENABLE(newline-is-cr, + AS_HELP_STRING([--enable-newline-is-cr], + [use CR as newline character]), + ac_pcre2_newline=cr) +AC_ARG_ENABLE(newline-is-lf, + AS_HELP_STRING([--enable-newline-is-lf], + [use LF as newline character (default)]), + ac_pcre2_newline=lf) +AC_ARG_ENABLE(newline-is-crlf, + AS_HELP_STRING([--enable-newline-is-crlf], + [use CRLF as newline sequence]), + ac_pcre2_newline=crlf) +AC_ARG_ENABLE(newline-is-anycrlf, + AS_HELP_STRING([--enable-newline-is-anycrlf], + [use CR, LF, or CRLF as newline sequence]), + ac_pcre2_newline=anycrlf) +AC_ARG_ENABLE(newline-is-any, + AS_HELP_STRING([--enable-newline-is-any], + [use any valid Unicode newline sequence]), + ac_pcre2_newline=any) +AC_ARG_ENABLE(newline-is-nul, + AS_HELP_STRING([--enable-newline-is-nul], + [use NUL (binary zero) as newline character]), + ac_pcre2_newline=nul) +enable_newline="$ac_pcre2_newline" + +# Handle --enable-bsr-anycrlf +AC_ARG_ENABLE(bsr-anycrlf, + AS_HELP_STRING([--enable-bsr-anycrlf], + [\R matches only CR, LF, CRLF by default]), + , enable_bsr_anycrlf=no) + +# Handle --enable-never-backslash-C +AC_ARG_ENABLE(never-backslash-C, + AS_HELP_STRING([--enable-never-backslash-C], + [use of \C causes an error]), + , enable_never_backslash_C=no) + +# Handle --enable-ebcdic +AC_ARG_ENABLE(ebcdic, + AS_HELP_STRING([--enable-ebcdic], + [assume EBCDIC coding rather than ASCII; incompatible with --enable-utf; use only in (uncommon) EBCDIC environments; it implies --enable-rebuild-chartables]), + , enable_ebcdic=no) + +# Handle --enable-ebcdic-nl25 +AC_ARG_ENABLE(ebcdic-nl25, + AS_HELP_STRING([--enable-ebcdic-nl25], + [set EBCDIC code for NL to 0x25 instead of 0x15; it implies --enable-ebcdic]), + , enable_ebcdic_nl25=no) + +# Handle --enable-pcre2grep-libz +AC_ARG_ENABLE(pcre2grep-libz, + AS_HELP_STRING([--enable-pcre2grep-libz], + [link pcre2grep with libz to handle .gz files]), + , enable_pcre2grep_libz=no) + +# Handle --enable-pcre2grep-libbz2 +AC_ARG_ENABLE(pcre2grep-libbz2, + AS_HELP_STRING([--enable-pcre2grep-libbz2], + [link pcre2grep with libbz2 to handle .bz2 files]), + , enable_pcre2grep_libbz2=no) + +# Handle --with-pcre2grep-bufsize=N +AC_ARG_WITH(pcre2grep-bufsize, + AS_HELP_STRING([--with-pcre2grep-bufsize=N], + [pcre2grep initial buffer size (default=20480, minimum=8192)]), + , with_pcre2grep_bufsize=20480) + +# Handle --with-pcre2grep-max-bufsize=N +AC_ARG_WITH(pcre2grep-max-bufsize, + AS_HELP_STRING([--with-pcre2grep-max-bufsize=N], + [pcre2grep maximum buffer size (default=1048576, minimum=8192)]), + , with_pcre2grep_max_bufsize=1048576) + +# Handle --enable-pcre2test-libedit +AC_ARG_ENABLE(pcre2test-libedit, + AS_HELP_STRING([--enable-pcre2test-libedit], + [link pcre2test with libedit]), + , enable_pcre2test_libedit=no) + +# Handle --enable-pcre2test-libreadline +AC_ARG_ENABLE(pcre2test-libreadline, + AS_HELP_STRING([--enable-pcre2test-libreadline], + [link pcre2test with libreadline]), + , enable_pcre2test_libreadline=no) + +# Handle --with-link-size=N +AC_ARG_WITH(link-size, + AS_HELP_STRING([--with-link-size=N], + [internal link size (2, 3, or 4 allowed; default=2)]), + , with_link_size=2) + +# Handle --with-parens-nest-limit=N +AC_ARG_WITH(parens-nest-limit, + AS_HELP_STRING([--with-parens-nest-limit=N], + [nested parentheses limit (default=250)]), + , with_parens_nest_limit=250) + +# Handle --with-heap-limit +AC_ARG_WITH(heap-limit, + AS_HELP_STRING([--with-heap-limit=N], + [default limit on heap memory (kibibytes, default=20000000)]), + , with_heap_limit=20000000) + +# Handle --with-match-limit=N +AC_ARG_WITH(match-limit, + AS_HELP_STRING([--with-match-limit=N], + [default limit on internal looping (default=10000000)]), + , with_match_limit=10000000) + +# Handle --with-match-limit-depth=N +# Recognize old synonym --with-match-limit-recursion +# +# Note: In config.h, the default is to define MATCH_LIMIT_DEPTH symbolically as +# MATCH_LIMIT, which in turn is defined to be some numeric value (e.g. +# 10000000). MATCH_LIMIT_DEPTH can otherwise be set to some different numeric +# value (or even the same numeric value as MATCH_LIMIT, though no longer +# defined in terms of the latter). +# +AC_ARG_WITH(match-limit-depth, + AS_HELP_STRING([--with-match-limit-depth=N], + [default limit on match tree depth (default=MATCH_LIMIT)]), + , with_match_limit_depth=MATCH_LIMIT) + +AC_ARG_WITH(match-limit-recursion,, + , with_match_limit_recursion=UNSET) + +# Handle --enable-valgrind +AC_ARG_ENABLE(valgrind, + AS_HELP_STRING([--enable-valgrind], + [enable valgrind support]), + , enable_valgrind=no) + +# Enable code coverage reports using gcov +AC_ARG_ENABLE(coverage, + AS_HELP_STRING([--enable-coverage], + [enable code coverage reports using gcov]), + , enable_coverage=no) + +# Handle --enable-fuzz-support +AC_ARG_ENABLE(fuzz_support, + AS_HELP_STRING([--enable-fuzz-support], + [enable fuzzer support]), + , enable_fuzz_support=no) + +# Handle --disable-stack-for-recursion +# This option became obsolete at release 10.30. +AC_ARG_ENABLE(stack-for-recursion,, + , enable_stack_for_recursion=yes) + +# Original code +# AC_ARG_ENABLE(stack-for-recursion, +# AS_HELP_STRING([--disable-stack-for-recursion], +# [don't use stack recursion when matching]), +# , enable_stack_for_recursion=yes) + +# Handle --disable-percent_zt (set as "auto" by default) +AC_ARG_ENABLE(percent-zt, + AS_HELP_STRING([--disable-percent-zt], + [disable the use of z and t formatting modifiers]), + , enable_percent_zt=auto) + +# Set the default value for pcre2-8 +if test "x$enable_pcre2_8" = "xunset" +then + enable_pcre2_8=yes +fi + +# Set the default value for pcre2-16 +if test "x$enable_pcre2_16" = "xunset" +then + enable_pcre2_16=no +fi + +# Set the default value for pcre2-32 +if test "x$enable_pcre2_32" = "xunset" +then + enable_pcre2_32=no +fi + +# Make sure at least one library is selected +if test "x$enable_pcre2_8$enable_pcre2_16$enable_pcre2_32" = "xnonono" +then + AC_MSG_ERROR([At least one of the 8, 16 or 32 bit libraries must be enabled]) +fi + +# Unicode is enabled by default. +if test "x$enable_unicode" = "xunset" +then + enable_unicode=yes +fi + +# Convert the newline identifier into the appropriate integer value. These must +# agree with the PCRE2_NEWLINE_xxx values in pcre2.h. + +case "$enable_newline" in + cr) ac_pcre2_newline_value=1 ;; + lf) ac_pcre2_newline_value=2 ;; + crlf) ac_pcre2_newline_value=3 ;; + any) ac_pcre2_newline_value=4 ;; + anycrlf) ac_pcre2_newline_value=5 ;; + nul) ac_pcre2_newline_value=6 ;; + *) + AC_MSG_ERROR([invalid argument \"$enable_newline\" to --enable-newline option]) + ;; +esac + +# --enable-ebcdic-nl25 implies --enable-ebcdic +if test "x$enable_ebcdic_nl25" = "xyes"; then + enable_ebcdic=yes +fi + +# Make sure that if enable_ebcdic is set, rebuild_chartables is also enabled. +# Also check that UTF support is not requested, because PCRE2 cannot handle +# EBCDIC and UTF in the same build. To do so it would need to use different +# character constants depending on the mode. Also, EBCDIC cannot be used with +# 16-bit and 32-bit libraries. +# +if test "x$enable_ebcdic" = "xyes"; then + enable_rebuild_chartables=yes + if test "x$enable_unicode" = "xyes"; then + AC_MSG_ERROR([support for EBCDIC and Unicode cannot be enabled at the same time]) + fi + if test "x$enable_pcre2_16" = "xyes" -o "x$enable_pcre2_32" = "xyes"; then + AC_MSG_ERROR([EBCDIC support is available only for the 8-bit library]) + fi +fi + +# Check argument to --with-link-size +case "$with_link_size" in + 2|3|4) ;; + *) + AC_MSG_ERROR([invalid argument \"$with_link_size\" to --with-link-size option]) + ;; +esac + +AH_TOP([ +/* PCRE2 is written in Standard C, but there are a few non-standard things it +can cope with, allowing it to run on SunOS4 and other "close to standard" +systems. + +In environments that support the GNU autotools, config.h.in is converted into +config.h by the "configure" script. In environments that use CMake, +config-cmake.in is converted into config.h. If you are going to build PCRE2 "by +hand" without using "configure" or CMake, you should copy the distributed +config.h.generic to config.h, and edit the macro definitions to be the way you +need them. You must then add -DHAVE_CONFIG_H to all of your compile commands, +so that config.h is included at the start of every source. + +Alternatively, you can avoid editing by using -D on the compiler command line +to set the macro values. In this case, you do not have to set -DHAVE_CONFIG_H, +but if you do, default values will be taken from config.h for non-boolean +macros that are not defined on the command line. + +Boolean macros such as HAVE_STDLIB_H and SUPPORT_PCRE2_8 should either be +defined (conventionally to 1) for TRUE, and not defined at all for FALSE. All +such macros are listed as a commented #undef in config.h.generic. Macros such +as MATCH_LIMIT, whose actual value is relevant, have defaults defined, but are +surrounded by #ifndef/#endif lines so that the value can be overridden by -D. + +PCRE2 uses memmove() if HAVE_MEMMOVE is defined; otherwise it uses bcopy() if +HAVE_BCOPY is defined. If your system has neither bcopy() nor memmove(), make +sure both macros are undefined; an emulation function will then be used. */]) + +# Checks for header files. +AC_HEADER_STDC +AC_CHECK_HEADERS(limits.h sys/types.h sys/stat.h dirent.h) +AC_CHECK_HEADERS([windows.h], [HAVE_WINDOWS_H=1]) +AC_CHECK_HEADERS([sys/wait.h], [HAVE_SYS_WAIT_H=1]) + +# Conditional compilation +AM_CONDITIONAL(WITH_PCRE2_8, test "x$enable_pcre2_8" = "xyes") +AM_CONDITIONAL(WITH_PCRE2_16, test "x$enable_pcre2_16" = "xyes") +AM_CONDITIONAL(WITH_PCRE2_32, test "x$enable_pcre2_32" = "xyes") +AM_CONDITIONAL(WITH_DEBUG, test "x$enable_debug" = "xyes") +AM_CONDITIONAL(WITH_REBUILD_CHARTABLES, test "x$enable_rebuild_chartables" = "xyes") +AM_CONDITIONAL(WITH_JIT, test "x$enable_jit" = "xyes") +AM_CONDITIONAL(WITH_UNICODE, test "x$enable_unicode" = "xyes") +AM_CONDITIONAL(WITH_VALGRIND, test "x$enable_valgrind" = "xyes") +AM_CONDITIONAL(WITH_FUZZ_SUPPORT, test "x$enable_fuzz_support" = "xyes") + +if test "$enable_fuzz_support" = "yes" -a "$enable_pcre2_8" = "no"; then + echo "** ERROR: Fuzzer support requires the 8-bit library" + exit 1 +fi + +# Checks for typedefs, structures, and compiler characteristics. + +AC_C_CONST +AC_TYPE_SIZE_T + +# Checks for library functions. + +AC_CHECK_FUNCS(bcopy memfd_create memmove mkostemp secure_getenv strerror) + +# Check for the availability of libz (aka zlib) + +AC_CHECK_HEADERS([zlib.h], [HAVE_ZLIB_H=1]) +AC_CHECK_LIB([z], [gzopen], [HAVE_LIBZ=1]) + +# Check for the availability of libbz2. Originally we just used AC_CHECK_LIB, +# as for libz. However, this had the following problem, diagnosed and fixed by +# a user: +# +# - libbz2 uses the Pascal calling convention (WINAPI) for the functions +# under Win32. +# - The standard autoconf AC_CHECK_LIB fails to include "bzlib.h", +# therefore missing the function definition. +# - The compiler thus generates a "C" signature for the test function. +# - The linker fails to find the "C" function. +# - PCRE2 fails to configure if asked to do so against libbz2. +# +# Solution: +# +# - Replace the AC_CHECK_LIB test with a custom test. + +AC_CHECK_HEADERS([bzlib.h], [HAVE_BZLIB_H=1]) +# Original test +# AC_CHECK_LIB([bz2], [BZ2_bzopen], [HAVE_LIBBZ2=1]) +# +# Custom test follows + +AC_MSG_CHECKING([for libbz2]) +OLD_LIBS="$LIBS" +LIBS="$LIBS -lbz2" +AC_LINK_IFELSE([AC_LANG_PROGRAM([[ +#ifdef HAVE_BZLIB_H +#include +#endif]], +[[return (int)BZ2_bzopen("conftest", "rb");]])], +[AC_MSG_RESULT([yes]);HAVE_LIBBZ2=1; break;], +AC_MSG_RESULT([no])) +LIBS="$OLD_LIBS" + +# Check for the availabiity of libreadline + +if test "$enable_pcre2test_libreadline" = "yes"; then + AC_CHECK_HEADERS([readline/readline.h], [HAVE_READLINE_H=1]) + AC_CHECK_HEADERS([readline/history.h], [HAVE_HISTORY_H=1]) + AC_CHECK_LIB([readline], [readline], [LIBREADLINE="-lreadline"], + [unset ac_cv_lib_readline_readline; + AC_CHECK_LIB([readline], [readline], [LIBREADLINE="-ltinfo"], + [unset ac_cv_lib_readline_readline; + AC_CHECK_LIB([readline], [readline], [LIBREADLINE="-lcurses"], + [unset ac_cv_lib_readline_readline; + AC_CHECK_LIB([readline], [readline], [LIBREADLINE="-lncurses"], + [unset ac_cv_lib_readline_readline; + AC_CHECK_LIB([readline], [readline], [LIBREADLINE="-lncursesw"], + [unset ac_cv_lib_readline_readline; + AC_CHECK_LIB([readline], [readline], [LIBREADLINE="-ltermcap"], + [LIBREADLINE=""], + [-ltermcap])], + [-lncursesw])], + [-lncurses])], + [-lcurses])], + [-ltinfo])]) + AC_SUBST(LIBREADLINE) + if test -n "$LIBREADLINE"; then + if test "$LIBREADLINE" != "-lreadline"; then + echo "-lreadline needs $LIBREADLINE" + LIBREADLINE="-lreadline $LIBREADLINE" + fi + fi +fi + + +# Check for the availability of libedit. Different distributions put its +# headers in different places. Try to cover the most common ones. + +if test "$enable_pcre2test_libedit" = "yes"; then + AC_CHECK_HEADERS([editline/readline.h], [HAVE_EDITLINE_READLINE_H=1], + [AC_CHECK_HEADERS([edit/readline/readline.h], [HAVE_READLINE_READLINE_H=1], + [AC_CHECK_HEADERS([readline/readline.h], [HAVE_READLINE_READLINE_H=1])])]) + AC_CHECK_LIB([edit], [readline], [LIBEDIT="-ledit"]) +fi + +PCRE2_STATIC_CFLAG="" +if test "x$enable_shared" = "xno" ; then + AC_DEFINE([PCRE2_STATIC], [1], [ + Define to any value if linking statically (TODO: make nice with Libtool)]) + PCRE2_STATIC_CFLAG="-DPCRE2_STATIC" +fi +AC_SUBST(PCRE2_STATIC_CFLAG) + +# Here is where PCRE2-specific defines are handled + +if test "$enable_pcre2_8" = "yes"; then + AC_DEFINE([SUPPORT_PCRE2_8], [], [ + Define to any value to enable the 8 bit PCRE2 library.]) +fi + +if test "$enable_pcre2_16" = "yes"; then + AC_DEFINE([SUPPORT_PCRE2_16], [], [ + Define to any value to enable the 16 bit PCRE2 library.]) +fi + +if test "$enable_pcre2_32" = "yes"; then + AC_DEFINE([SUPPORT_PCRE2_32], [], [ + Define to any value to enable the 32 bit PCRE2 library.]) +fi + +if test "$enable_debug" = "yes"; then + AC_DEFINE([PCRE2_DEBUG], [], [ + Define to any value to include debugging code.]) +fi + +if test "$enable_percent_zt" = "no"; then + AC_DEFINE([DISABLE_PERCENT_ZT], [], [ + Define to any value to disable the use of the z and t modifiers in + formatting settings such as %zu or %td (this is rarely needed).]) +else + enable_percent_zt=auto +fi + +# Unless running under Windows, JIT support requires pthreads. + +if test "$enable_jit" = "yes"; then + if test "$HAVE_WINDOWS_H" != "1"; then + AX_PTHREAD([], [AC_MSG_ERROR([JIT support requires pthreads])]) + CC="$PTHREAD_CC" + CFLAGS="$PTHREAD_CFLAGS $CFLAGS" + LIBS="$PTHREAD_LIBS $LIBS" + fi + AC_DEFINE([SUPPORT_JIT], [], [ + Define to any value to enable support for Just-In-Time compiling.]) +else + enable_pcre2grep_jit="no" +fi + +if test "$enable_jit_sealloc" = "yes"; then + AC_DEFINE([SLJIT_PROT_EXECUTABLE_ALLOCATOR], [1], [ + Define to any non-zero number to enable support for SELinux + compatible executable memory allocator in JIT. Note that this + will have no effect unless SUPPORT_JIT is also defined.]) +fi + +if test "$enable_pcre2grep_jit" = "yes"; then + AC_DEFINE([SUPPORT_PCRE2GREP_JIT], [], [ + Define to any value to enable JIT support in pcre2grep. Note that this will + have no effect unless SUPPORT_JIT is also defined.]) +fi + +if test "$enable_pcre2grep_callout" = "yes"; then + if test "$enable_pcre2grep_callout_fork" = "yes"; then + if test "$HAVE_WINDOWS_H" != "1"; then + if test "$HAVE_SYS_WAIT_H" != "1"; then + AC_MSG_ERROR([Callout script support needs sys/wait.h.]) + fi + fi + AC_DEFINE([SUPPORT_PCRE2GREP_CALLOUT_FORK], [], [ + Define to any value to enable fork support in pcre2grep callout scripts. + This will have no effect unless SUPPORT_PCRE2GREP_CALLOUT is also + defined.]) + fi + AC_DEFINE([SUPPORT_PCRE2GREP_CALLOUT], [], [ + Define to any value to enable callout script support in pcre2grep.]) +else + enable_pcre2grep_callout_fork="no" +fi + +if test "$enable_unicode" = "yes"; then + AC_DEFINE([SUPPORT_UNICODE], [], [ + Define to any value to enable support for Unicode and UTF encoding. + This will work even in an EBCDIC environment, but it is incompatible + with the EBCDIC macro. That is, PCRE2 can support *either* EBCDIC + code *or* ASCII/Unicode, but not both at once.]) +fi + +if test "$enable_pcre2grep_libz" = "yes"; then + AC_DEFINE([SUPPORT_LIBZ], [], [ + Define to any value to allow pcre2grep to be linked with libz, so that it is + able to handle .gz files.]) +fi + +if test "$enable_pcre2grep_libbz2" = "yes"; then + AC_DEFINE([SUPPORT_LIBBZ2], [], [ + Define to any value to allow pcre2grep to be linked with libbz2, so that it + is able to handle .bz2 files.]) +fi + +if test $with_pcre2grep_bufsize -lt 8192 ; then + AC_MSG_WARN([$with_pcre2grep_bufsize is too small for --with-pcre2grep-bufsize; using 8192]) + with_pcre2grep_bufsize="8192" +else + if test $? -gt 1 ; then + AC_MSG_ERROR([Bad value for --with-pcre2grep-bufsize]) + fi +fi + +if test $with_pcre2grep_max_bufsize -lt $with_pcre2grep_bufsize ; then + with_pcre2grep_max_bufsize="$with_pcre2grep_bufsize" +else + if test $? -gt 1 ; then + AC_MSG_ERROR([Bad value for --with-pcre2grep-max-bufsize]) + fi +fi + +AC_DEFINE_UNQUOTED([PCRE2GREP_BUFSIZE], [$with_pcre2grep_bufsize], [ + The value of PCRE2GREP_BUFSIZE is the starting size of the buffer used by + pcre2grep to hold parts of the file it is searching. The buffer will be + expanded up to PCRE2GREP_MAX_BUFSIZE if necessary, for files containing very + long lines. The actual amount of memory used by pcre2grep is three times this + number, because it allows for the buffering of "before" and "after" lines.]) + +AC_DEFINE_UNQUOTED([PCRE2GREP_MAX_BUFSIZE], [$with_pcre2grep_max_bufsize], [ + The value of PCRE2GREP_MAX_BUFSIZE specifies the maximum size of the buffer + used by pcre2grep to hold parts of the file it is searching. The actual + amount of memory used by pcre2grep is three times this number, because it + allows for the buffering of "before" and "after" lines.]) + +if test "$enable_pcre2test_libedit" = "yes"; then + AC_DEFINE([SUPPORT_LIBEDIT], [], [ + Define to any value to allow pcre2test to be linked with libedit.]) + LIBREADLINE="$LIBEDIT" +elif test "$enable_pcre2test_libreadline" = "yes"; then + AC_DEFINE([SUPPORT_LIBREADLINE], [], [ + Define to any value to allow pcre2test to be linked with libreadline.]) +fi + +AC_DEFINE_UNQUOTED([NEWLINE_DEFAULT], [$ac_pcre2_newline_value], [ + The value of NEWLINE_DEFAULT determines the default newline character + sequence. PCRE2 client programs can override this by selecting other values + at run time. The valid values are 1 (CR), 2 (LF), 3 (CRLF), 4 (ANY), + 5 (ANYCRLF), and 6 (NUL).]) + +if test "$enable_bsr_anycrlf" = "yes"; then + AC_DEFINE([BSR_ANYCRLF], [], [ + By default, the \R escape sequence matches any Unicode line ending + character or sequence of characters. If BSR_ANYCRLF is defined (to any + value), this is changed so that backslash-R matches only CR, LF, or CRLF. + The build-time default can be overridden by the user of PCRE2 at runtime.]) +fi + +if test "$enable_never_backslash_C" = "yes"; then + AC_DEFINE([NEVER_BACKSLASH_C], [], [ + Defining NEVER_BACKSLASH_C locks out the use of \C in all patterns.]) +fi + +AC_DEFINE_UNQUOTED([LINK_SIZE], [$with_link_size], [ + The value of LINK_SIZE determines the number of bytes used to store + links as offsets within the compiled regex. The default is 2, which + allows for compiled patterns up to 65535 code units long. This covers the + vast majority of cases. However, PCRE2 can also be compiled to use 3 or 4 + bytes instead. This allows for longer patterns in extreme cases.]) + +AC_DEFINE_UNQUOTED([PARENS_NEST_LIMIT], [$with_parens_nest_limit], [ + The value of PARENS_NEST_LIMIT specifies the maximum depth of nested + parentheses (of any kind) in a pattern. This limits the amount of system + stack that is used while compiling a pattern.]) + +AC_DEFINE_UNQUOTED([MATCH_LIMIT], [$with_match_limit], [ + The value of MATCH_LIMIT determines the default number of times the + pcre2_match() function can record a backtrack position during a single + matching attempt. The value is also used to limit a loop counter in + pcre2_dfa_match(). There is a runtime interface for setting a different + limit. The limit exists in order to catch runaway regular expressions that + take for ever to determine that they do not match. The default is set very + large so that it does not accidentally catch legitimate cases.]) + +# --with-match-limit-recursion is an obsolete synonym for --with-match-limit-depth + +if test "$with_match_limit_recursion" != "UNSET"; then +cat <. + +# As a special exception to the GNU General Public License, if you +# distribute this file as part of a program that contains a +# configuration script generated by Autoconf, you may include it under +# the same distribution terms that you use for the rest of that program. + +# Originally written by Alexandre Oliva . + +case $1 in + '') + echo "$0: No command. Try '$0 --help' for more information." 1>&2 + exit 1; + ;; + -h | --h*) + cat <<\EOF +Usage: depcomp [--help] [--version] PROGRAM [ARGS] + +Run PROGRAMS ARGS to compile a file, generating dependencies +as side-effects. + +Environment variables: + depmode Dependency tracking mode. + source Source file read by 'PROGRAMS ARGS'. + object Object file output by 'PROGRAMS ARGS'. + DEPDIR directory where to store dependencies. + depfile Dependency file to output. + tmpdepfile Temporary file to use when outputting dependencies. + libtool Whether libtool is used (yes/no). + +Report bugs to . +EOF + exit $? + ;; + -v | --v*) + echo "depcomp $scriptversion" + exit $? + ;; +esac + +# Get the directory component of the given path, and save it in the +# global variables '$dir'. Note that this directory component will +# be either empty or ending with a '/' character. This is deliberate. +set_dir_from () +{ + case $1 in + */*) dir=`echo "$1" | sed -e 's|/[^/]*$|/|'`;; + *) dir=;; + esac +} + +# Get the suffix-stripped basename of the given path, and save it the +# global variable '$base'. +set_base_from () +{ + base=`echo "$1" | sed -e 's|^.*/||' -e 's/\.[^.]*$//'` +} + +# If no dependency file was actually created by the compiler invocation, +# we still have to create a dummy depfile, to avoid errors with the +# Makefile "include basename.Plo" scheme. +make_dummy_depfile () +{ + echo "#dummy" > "$depfile" +} + +# Factor out some common post-processing of the generated depfile. +# Requires the auxiliary global variable '$tmpdepfile' to be set. +aix_post_process_depfile () +{ + # If the compiler actually managed to produce a dependency file, + # post-process it. + if test -f "$tmpdepfile"; then + # Each line is of the form 'foo.o: dependency.h'. + # Do two passes, one to just change these to + # $object: dependency.h + # and one to simply output + # dependency.h: + # which is needed to avoid the deleted-header problem. + { sed -e "s,^.*\.[$lower]*:,$object:," < "$tmpdepfile" + sed -e "s,^.*\.[$lower]*:[$tab ]*,," -e 's,$,:,' < "$tmpdepfile" + } > "$depfile" + rm -f "$tmpdepfile" + else + make_dummy_depfile + fi +} + +# A tabulation character. +tab=' ' +# A newline character. +nl=' +' +# Character ranges might be problematic outside the C locale. +# These definitions help. +upper=ABCDEFGHIJKLMNOPQRSTUVWXYZ +lower=abcdefghijklmnopqrstuvwxyz +digits=0123456789 +alpha=${upper}${lower} + +if test -z "$depmode" || test -z "$source" || test -z "$object"; then + echo "depcomp: Variables source, object and depmode must be set" 1>&2 + exit 1 +fi + +# Dependencies for sub/bar.o or sub/bar.obj go into sub/.deps/bar.Po. +depfile=${depfile-`echo "$object" | + sed 's|[^\\/]*$|'${DEPDIR-.deps}'/&|;s|\.\([^.]*\)$|.P\1|;s|Pobj$|Po|'`} +tmpdepfile=${tmpdepfile-`echo "$depfile" | sed 's/\.\([^.]*\)$/.T\1/'`} + +rm -f "$tmpdepfile" + +# Avoid interferences from the environment. +gccflag= dashmflag= + +# Some modes work just like other modes, but use different flags. We +# parameterize here, but still list the modes in the big case below, +# to make depend.m4 easier to write. Note that we *cannot* use a case +# here, because this file can only contain one case statement. +if test "$depmode" = hp; then + # HP compiler uses -M and no extra arg. + gccflag=-M + depmode=gcc +fi + +if test "$depmode" = dashXmstdout; then + # This is just like dashmstdout with a different argument. + dashmflag=-xM + depmode=dashmstdout +fi + +cygpath_u="cygpath -u -f -" +if test "$depmode" = msvcmsys; then + # This is just like msvisualcpp but w/o cygpath translation. + # Just convert the backslash-escaped backslashes to single forward + # slashes to satisfy depend.m4 + cygpath_u='sed s,\\\\,/,g' + depmode=msvisualcpp +fi + +if test "$depmode" = msvc7msys; then + # This is just like msvc7 but w/o cygpath translation. + # Just convert the backslash-escaped backslashes to single forward + # slashes to satisfy depend.m4 + cygpath_u='sed s,\\\\,/,g' + depmode=msvc7 +fi + +if test "$depmode" = xlc; then + # IBM C/C++ Compilers xlc/xlC can output gcc-like dependency information. + gccflag=-qmakedep=gcc,-MF + depmode=gcc +fi + +case "$depmode" in +gcc3) +## gcc 3 implements dependency tracking that does exactly what +## we want. Yay! Note: for some reason libtool 1.4 doesn't like +## it if -MD -MP comes after the -MF stuff. Hmm. +## Unfortunately, FreeBSD c89 acceptance of flags depends upon +## the command line argument order; so add the flags where they +## appear in depend2.am. Note that the slowdown incurred here +## affects only configure: in makefiles, %FASTDEP% shortcuts this. + for arg + do + case $arg in + -c) set fnord "$@" -MT "$object" -MD -MP -MF "$tmpdepfile" "$arg" ;; + *) set fnord "$@" "$arg" ;; + esac + shift # fnord + shift # $arg + done + "$@" + stat=$? + if test $stat -ne 0; then + rm -f "$tmpdepfile" + exit $stat + fi + mv "$tmpdepfile" "$depfile" + ;; + +gcc) +## Note that this doesn't just cater to obsosete pre-3.x GCC compilers. +## but also to in-use compilers like IMB xlc/xlC and the HP C compiler. +## (see the conditional assignment to $gccflag above). +## There are various ways to get dependency output from gcc. Here's +## why we pick this rather obscure method: +## - Don't want to use -MD because we'd like the dependencies to end +## up in a subdir. Having to rename by hand is ugly. +## (We might end up doing this anyway to support other compilers.) +## - The DEPENDENCIES_OUTPUT environment variable makes gcc act like +## -MM, not -M (despite what the docs say). Also, it might not be +## supported by the other compilers which use the 'gcc' depmode. +## - Using -M directly means running the compiler twice (even worse +## than renaming). + if test -z "$gccflag"; then + gccflag=-MD, + fi + "$@" -Wp,"$gccflag$tmpdepfile" + stat=$? + if test $stat -ne 0; then + rm -f "$tmpdepfile" + exit $stat + fi + rm -f "$depfile" + echo "$object : \\" > "$depfile" + # The second -e expression handles DOS-style file names with drive + # letters. + sed -e 's/^[^:]*: / /' \ + -e 's/^['$alpha']:\/[^:]*: / /' < "$tmpdepfile" >> "$depfile" +## This next piece of magic avoids the "deleted header file" problem. +## The problem is that when a header file which appears in a .P file +## is deleted, the dependency causes make to die (because there is +## typically no way to rebuild the header). We avoid this by adding +## dummy dependencies for each header file. Too bad gcc doesn't do +## this for us directly. +## Some versions of gcc put a space before the ':'. On the theory +## that the space means something, we add a space to the output as +## well. hp depmode also adds that space, but also prefixes the VPATH +## to the object. Take care to not repeat it in the output. +## Some versions of the HPUX 10.20 sed can't process this invocation +## correctly. Breaking it into two sed invocations is a workaround. + tr ' ' "$nl" < "$tmpdepfile" \ + | sed -e 's/^\\$//' -e '/^$/d' -e "s|.*$object$||" -e '/:$/d' \ + | sed -e 's/$/ :/' >> "$depfile" + rm -f "$tmpdepfile" + ;; + +hp) + # This case exists only to let depend.m4 do its work. It works by + # looking at the text of this script. This case will never be run, + # since it is checked for above. + exit 1 + ;; + +sgi) + if test "$libtool" = yes; then + "$@" "-Wp,-MDupdate,$tmpdepfile" + else + "$@" -MDupdate "$tmpdepfile" + fi + stat=$? + if test $stat -ne 0; then + rm -f "$tmpdepfile" + exit $stat + fi + rm -f "$depfile" + + if test -f "$tmpdepfile"; then # yes, the sourcefile depend on other files + echo "$object : \\" > "$depfile" + # Clip off the initial element (the dependent). Don't try to be + # clever and replace this with sed code, as IRIX sed won't handle + # lines with more than a fixed number of characters (4096 in + # IRIX 6.2 sed, 8192 in IRIX 6.5). We also remove comment lines; + # the IRIX cc adds comments like '#:fec' to the end of the + # dependency line. + tr ' ' "$nl" < "$tmpdepfile" \ + | sed -e 's/^.*\.o://' -e 's/#.*$//' -e '/^$/ d' \ + | tr "$nl" ' ' >> "$depfile" + echo >> "$depfile" + # The second pass generates a dummy entry for each header file. + tr ' ' "$nl" < "$tmpdepfile" \ + | sed -e 's/^.*\.o://' -e 's/#.*$//' -e '/^$/ d' -e 's/$/:/' \ + >> "$depfile" + else + make_dummy_depfile + fi + rm -f "$tmpdepfile" + ;; + +xlc) + # This case exists only to let depend.m4 do its work. It works by + # looking at the text of this script. This case will never be run, + # since it is checked for above. + exit 1 + ;; + +aix) + # The C for AIX Compiler uses -M and outputs the dependencies + # in a .u file. In older versions, this file always lives in the + # current directory. Also, the AIX compiler puts '$object:' at the + # start of each line; $object doesn't have directory information. + # Version 6 uses the directory in both cases. + set_dir_from "$object" + set_base_from "$object" + if test "$libtool" = yes; then + tmpdepfile1=$dir$base.u + tmpdepfile2=$base.u + tmpdepfile3=$dir.libs/$base.u + "$@" -Wc,-M + else + tmpdepfile1=$dir$base.u + tmpdepfile2=$dir$base.u + tmpdepfile3=$dir$base.u + "$@" -M + fi + stat=$? + if test $stat -ne 0; then + rm -f "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3" + exit $stat + fi + + for tmpdepfile in "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3" + do + test -f "$tmpdepfile" && break + done + aix_post_process_depfile + ;; + +tcc) + # tcc (Tiny C Compiler) understand '-MD -MF file' since version 0.9.26 + # FIXME: That version still under development at the moment of writing. + # Make that this statement remains true also for stable, released + # versions. + # It will wrap lines (doesn't matter whether long or short) with a + # trailing '\', as in: + # + # foo.o : \ + # foo.c \ + # foo.h \ + # + # It will put a trailing '\' even on the last line, and will use leading + # spaces rather than leading tabs (at least since its commit 0394caf7 + # "Emit spaces for -MD"). + "$@" -MD -MF "$tmpdepfile" + stat=$? + if test $stat -ne 0; then + rm -f "$tmpdepfile" + exit $stat + fi + rm -f "$depfile" + # Each non-empty line is of the form 'foo.o : \' or ' dep.h \'. + # We have to change lines of the first kind to '$object: \'. + sed -e "s|.*:|$object :|" < "$tmpdepfile" > "$depfile" + # And for each line of the second kind, we have to emit a 'dep.h:' + # dummy dependency, to avoid the deleted-header problem. + sed -n -e 's|^ *\(.*\) *\\$|\1:|p' < "$tmpdepfile" >> "$depfile" + rm -f "$tmpdepfile" + ;; + +## The order of this option in the case statement is important, since the +## shell code in configure will try each of these formats in the order +## listed in this file. A plain '-MD' option would be understood by many +## compilers, so we must ensure this comes after the gcc and icc options. +pgcc) + # Portland's C compiler understands '-MD'. + # Will always output deps to 'file.d' where file is the root name of the + # source file under compilation, even if file resides in a subdirectory. + # The object file name does not affect the name of the '.d' file. + # pgcc 10.2 will output + # foo.o: sub/foo.c sub/foo.h + # and will wrap long lines using '\' : + # foo.o: sub/foo.c ... \ + # sub/foo.h ... \ + # ... + set_dir_from "$object" + # Use the source, not the object, to determine the base name, since + # that's sadly what pgcc will do too. + set_base_from "$source" + tmpdepfile=$base.d + + # For projects that build the same source file twice into different object + # files, the pgcc approach of using the *source* file root name can cause + # problems in parallel builds. Use a locking strategy to avoid stomping on + # the same $tmpdepfile. + lockdir=$base.d-lock + trap " + echo '$0: caught signal, cleaning up...' >&2 + rmdir '$lockdir' + exit 1 + " 1 2 13 15 + numtries=100 + i=$numtries + while test $i -gt 0; do + # mkdir is a portable test-and-set. + if mkdir "$lockdir" 2>/dev/null; then + # This process acquired the lock. + "$@" -MD + stat=$? + # Release the lock. + rmdir "$lockdir" + break + else + # If the lock is being held by a different process, wait + # until the winning process is done or we timeout. + while test -d "$lockdir" && test $i -gt 0; do + sleep 1 + i=`expr $i - 1` + done + fi + i=`expr $i - 1` + done + trap - 1 2 13 15 + if test $i -le 0; then + echo "$0: failed to acquire lock after $numtries attempts" >&2 + echo "$0: check lockdir '$lockdir'" >&2 + exit 1 + fi + + if test $stat -ne 0; then + rm -f "$tmpdepfile" + exit $stat + fi + rm -f "$depfile" + # Each line is of the form `foo.o: dependent.h', + # or `foo.o: dep1.h dep2.h \', or ` dep3.h dep4.h \'. + # Do two passes, one to just change these to + # `$object: dependent.h' and one to simply `dependent.h:'. + sed "s,^[^:]*:,$object :," < "$tmpdepfile" > "$depfile" + # Some versions of the HPUX 10.20 sed can't process this invocation + # correctly. Breaking it into two sed invocations is a workaround. + sed 's,^[^:]*: \(.*\)$,\1,;s/^\\$//;/^$/d;/:$/d' < "$tmpdepfile" \ + | sed -e 's/$/ :/' >> "$depfile" + rm -f "$tmpdepfile" + ;; + +hp2) + # The "hp" stanza above does not work with aCC (C++) and HP's ia64 + # compilers, which have integrated preprocessors. The correct option + # to use with these is +Maked; it writes dependencies to a file named + # 'foo.d', which lands next to the object file, wherever that + # happens to be. + # Much of this is similar to the tru64 case; see comments there. + set_dir_from "$object" + set_base_from "$object" + if test "$libtool" = yes; then + tmpdepfile1=$dir$base.d + tmpdepfile2=$dir.libs/$base.d + "$@" -Wc,+Maked + else + tmpdepfile1=$dir$base.d + tmpdepfile2=$dir$base.d + "$@" +Maked + fi + stat=$? + if test $stat -ne 0; then + rm -f "$tmpdepfile1" "$tmpdepfile2" + exit $stat + fi + + for tmpdepfile in "$tmpdepfile1" "$tmpdepfile2" + do + test -f "$tmpdepfile" && break + done + if test -f "$tmpdepfile"; then + sed -e "s,^.*\.[$lower]*:,$object:," "$tmpdepfile" > "$depfile" + # Add 'dependent.h:' lines. + sed -ne '2,${ + s/^ *// + s/ \\*$// + s/$/:/ + p + }' "$tmpdepfile" >> "$depfile" + else + make_dummy_depfile + fi + rm -f "$tmpdepfile" "$tmpdepfile2" + ;; + +tru64) + # The Tru64 compiler uses -MD to generate dependencies as a side + # effect. 'cc -MD -o foo.o ...' puts the dependencies into 'foo.o.d'. + # At least on Alpha/Redhat 6.1, Compaq CCC V6.2-504 seems to put + # dependencies in 'foo.d' instead, so we check for that too. + # Subdirectories are respected. + set_dir_from "$object" + set_base_from "$object" + + if test "$libtool" = yes; then + # Libtool generates 2 separate objects for the 2 libraries. These + # two compilations output dependencies in $dir.libs/$base.o.d and + # in $dir$base.o.d. We have to check for both files, because + # one of the two compilations can be disabled. We should prefer + # $dir$base.o.d over $dir.libs/$base.o.d because the latter is + # automatically cleaned when .libs/ is deleted, while ignoring + # the former would cause a distcleancheck panic. + tmpdepfile1=$dir$base.o.d # libtool 1.5 + tmpdepfile2=$dir.libs/$base.o.d # Likewise. + tmpdepfile3=$dir.libs/$base.d # Compaq CCC V6.2-504 + "$@" -Wc,-MD + else + tmpdepfile1=$dir$base.d + tmpdepfile2=$dir$base.d + tmpdepfile3=$dir$base.d + "$@" -MD + fi + + stat=$? + if test $stat -ne 0; then + rm -f "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3" + exit $stat + fi + + for tmpdepfile in "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3" + do + test -f "$tmpdepfile" && break + done + # Same post-processing that is required for AIX mode. + aix_post_process_depfile + ;; + +msvc7) + if test "$libtool" = yes; then + showIncludes=-Wc,-showIncludes + else + showIncludes=-showIncludes + fi + "$@" $showIncludes > "$tmpdepfile" + stat=$? + grep -v '^Note: including file: ' "$tmpdepfile" + if test $stat -ne 0; then + rm -f "$tmpdepfile" + exit $stat + fi + rm -f "$depfile" + echo "$object : \\" > "$depfile" + # The first sed program below extracts the file names and escapes + # backslashes for cygpath. The second sed program outputs the file + # name when reading, but also accumulates all include files in the + # hold buffer in order to output them again at the end. This only + # works with sed implementations that can handle large buffers. + sed < "$tmpdepfile" -n ' +/^Note: including file: *\(.*\)/ { + s//\1/ + s/\\/\\\\/g + p +}' | $cygpath_u | sort -u | sed -n ' +s/ /\\ /g +s/\(.*\)/'"$tab"'\1 \\/p +s/.\(.*\) \\/\1:/ +H +$ { + s/.*/'"$tab"'/ + G + p +}' >> "$depfile" + echo >> "$depfile" # make sure the fragment doesn't end with a backslash + rm -f "$tmpdepfile" + ;; + +msvc7msys) + # This case exists only to let depend.m4 do its work. It works by + # looking at the text of this script. This case will never be run, + # since it is checked for above. + exit 1 + ;; + +#nosideeffect) + # This comment above is used by automake to tell side-effect + # dependency tracking mechanisms from slower ones. + +dashmstdout) + # Important note: in order to support this mode, a compiler *must* + # always write the preprocessed file to stdout, regardless of -o. + "$@" || exit $? + + # Remove the call to Libtool. + if test "$libtool" = yes; then + while test "X$1" != 'X--mode=compile'; do + shift + done + shift + fi + + # Remove '-o $object'. + IFS=" " + for arg + do + case $arg in + -o) + shift + ;; + $object) + shift + ;; + *) + set fnord "$@" "$arg" + shift # fnord + shift # $arg + ;; + esac + done + + test -z "$dashmflag" && dashmflag=-M + # Require at least two characters before searching for ':' + # in the target name. This is to cope with DOS-style filenames: + # a dependency such as 'c:/foo/bar' could be seen as target 'c' otherwise. + "$@" $dashmflag | + sed "s|^[$tab ]*[^:$tab ][^:][^:]*:[$tab ]*|$object: |" > "$tmpdepfile" + rm -f "$depfile" + cat < "$tmpdepfile" > "$depfile" + # Some versions of the HPUX 10.20 sed can't process this sed invocation + # correctly. Breaking it into two sed invocations is a workaround. + tr ' ' "$nl" < "$tmpdepfile" \ + | sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' \ + | sed -e 's/$/ :/' >> "$depfile" + rm -f "$tmpdepfile" + ;; + +dashXmstdout) + # This case only exists to satisfy depend.m4. It is never actually + # run, as this mode is specially recognized in the preamble. + exit 1 + ;; + +makedepend) + "$@" || exit $? + # Remove any Libtool call + if test "$libtool" = yes; then + while test "X$1" != 'X--mode=compile'; do + shift + done + shift + fi + # X makedepend + shift + cleared=no eat=no + for arg + do + case $cleared in + no) + set ""; shift + cleared=yes ;; + esac + if test $eat = yes; then + eat=no + continue + fi + case "$arg" in + -D*|-I*) + set fnord "$@" "$arg"; shift ;; + # Strip any option that makedepend may not understand. Remove + # the object too, otherwise makedepend will parse it as a source file. + -arch) + eat=yes ;; + -*|$object) + ;; + *) + set fnord "$@" "$arg"; shift ;; + esac + done + obj_suffix=`echo "$object" | sed 's/^.*\././'` + touch "$tmpdepfile" + ${MAKEDEPEND-makedepend} -o"$obj_suffix" -f"$tmpdepfile" "$@" + rm -f "$depfile" + # makedepend may prepend the VPATH from the source file name to the object. + # No need to regex-escape $object, excess matching of '.' is harmless. + sed "s|^.*\($object *:\)|\1|" "$tmpdepfile" > "$depfile" + # Some versions of the HPUX 10.20 sed can't process the last invocation + # correctly. Breaking it into two sed invocations is a workaround. + sed '1,2d' "$tmpdepfile" \ + | tr ' ' "$nl" \ + | sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' \ + | sed -e 's/$/ :/' >> "$depfile" + rm -f "$tmpdepfile" "$tmpdepfile".bak + ;; + +cpp) + # Important note: in order to support this mode, a compiler *must* + # always write the preprocessed file to stdout. + "$@" || exit $? + + # Remove the call to Libtool. + if test "$libtool" = yes; then + while test "X$1" != 'X--mode=compile'; do + shift + done + shift + fi + + # Remove '-o $object'. + IFS=" " + for arg + do + case $arg in + -o) + shift + ;; + $object) + shift + ;; + *) + set fnord "$@" "$arg" + shift # fnord + shift # $arg + ;; + esac + done + + "$@" -E \ + | sed -n -e '/^# [0-9][0-9]* "\([^"]*\)".*/ s:: \1 \\:p' \ + -e '/^#line [0-9][0-9]* "\([^"]*\)".*/ s:: \1 \\:p' \ + | sed '$ s: \\$::' > "$tmpdepfile" + rm -f "$depfile" + echo "$object : \\" > "$depfile" + cat < "$tmpdepfile" >> "$depfile" + sed < "$tmpdepfile" '/^$/d;s/^ //;s/ \\$//;s/$/ :/' >> "$depfile" + rm -f "$tmpdepfile" + ;; + +msvisualcpp) + # Important note: in order to support this mode, a compiler *must* + # always write the preprocessed file to stdout. + "$@" || exit $? + + # Remove the call to Libtool. + if test "$libtool" = yes; then + while test "X$1" != 'X--mode=compile'; do + shift + done + shift + fi + + IFS=" " + for arg + do + case "$arg" in + -o) + shift + ;; + $object) + shift + ;; + "-Gm"|"/Gm"|"-Gi"|"/Gi"|"-ZI"|"/ZI") + set fnord "$@" + shift + shift + ;; + *) + set fnord "$@" "$arg" + shift + shift + ;; + esac + done + "$@" -E 2>/dev/null | + sed -n '/^#line [0-9][0-9]* "\([^"]*\)"/ s::\1:p' | $cygpath_u | sort -u > "$tmpdepfile" + rm -f "$depfile" + echo "$object : \\" > "$depfile" + sed < "$tmpdepfile" -n -e 's% %\\ %g' -e '/^\(.*\)$/ s::'"$tab"'\1 \\:p' >> "$depfile" + echo "$tab" >> "$depfile" + sed < "$tmpdepfile" -n -e 's% %\\ %g' -e '/^\(.*\)$/ s::\1\::p' >> "$depfile" + rm -f "$tmpdepfile" + ;; + +msvcmsys) + # This case exists only to let depend.m4 do its work. It works by + # looking at the text of this script. This case will never be run, + # since it is checked for above. + exit 1 + ;; + +none) + exec "$@" + ;; + +*) + echo "Unknown depmode $depmode" 1>&2 + exit 1 + ;; +esac + +exit 0 + +# Local Variables: +# mode: shell-script +# sh-indentation: 2 +# eval: (add-hook 'before-save-hook 'time-stamp) +# time-stamp-start: "scriptversion=" +# time-stamp-format: "%:y-%02m-%02d.%02H" +# time-stamp-time-zone: "UTC0" +# time-stamp-end: "; # UTC" +# End: diff --git a/pcre2/install-sh b/pcre2/install-sh new file mode 100755 index 000000000..20d8b2eae --- /dev/null +++ b/pcre2/install-sh @@ -0,0 +1,529 @@ +#!/bin/sh +# install - install a program, script, or datafile + +scriptversion=2018-03-11.20; # UTC + +# This originates from X11R5 (mit/util/scripts/install.sh), which was +# later released in X11R6 (xc/config/util/install.sh) with the +# following copyright and license. +# +# Copyright (C) 1994 X Consortium +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to +# deal in the Software without restriction, including without limitation the +# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or +# sell copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# X CONSORTIUM BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN +# AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNEC- +# TION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +# +# Except as contained in this notice, the name of the X Consortium shall not +# be used in advertising or otherwise to promote the sale, use or other deal- +# ings in this Software without prior written authorization from the X Consor- +# tium. +# +# +# FSF changes to this file are in the public domain. +# +# Calling this script install-sh is preferred over install.sh, to prevent +# 'make' implicit rules from creating a file called install from it +# when there is no Makefile. +# +# This script is compatible with the BSD install script, but was written +# from scratch. + +tab=' ' +nl=' +' +IFS=" $tab$nl" + +# Set DOITPROG to "echo" to test this script. + +doit=${DOITPROG-} +doit_exec=${doit:-exec} + +# Put in absolute file names if you don't have them in your path; +# or use environment vars. + +chgrpprog=${CHGRPPROG-chgrp} +chmodprog=${CHMODPROG-chmod} +chownprog=${CHOWNPROG-chown} +cmpprog=${CMPPROG-cmp} +cpprog=${CPPROG-cp} +mkdirprog=${MKDIRPROG-mkdir} +mvprog=${MVPROG-mv} +rmprog=${RMPROG-rm} +stripprog=${STRIPPROG-strip} + +posix_mkdir= + +# Desired mode of installed file. +mode=0755 + +chgrpcmd= +chmodcmd=$chmodprog +chowncmd= +mvcmd=$mvprog +rmcmd="$rmprog -f" +stripcmd= + +src= +dst= +dir_arg= +dst_arg= + +copy_on_change=false +is_target_a_directory=possibly + +usage="\ +Usage: $0 [OPTION]... [-T] SRCFILE DSTFILE + or: $0 [OPTION]... SRCFILES... DIRECTORY + or: $0 [OPTION]... -t DIRECTORY SRCFILES... + or: $0 [OPTION]... -d DIRECTORIES... + +In the 1st form, copy SRCFILE to DSTFILE. +In the 2nd and 3rd, copy all SRCFILES to DIRECTORY. +In the 4th, create DIRECTORIES. + +Options: + --help display this help and exit. + --version display version info and exit. + + -c (ignored) + -C install only if different (preserve the last data modification time) + -d create directories instead of installing files. + -g GROUP $chgrpprog installed files to GROUP. + -m MODE $chmodprog installed files to MODE. + -o USER $chownprog installed files to USER. + -s $stripprog installed files. + -t DIRECTORY install into DIRECTORY. + -T report an error if DSTFILE is a directory. + +Environment variables override the default commands: + CHGRPPROG CHMODPROG CHOWNPROG CMPPROG CPPROG MKDIRPROG MVPROG + RMPROG STRIPPROG +" + +while test $# -ne 0; do + case $1 in + -c) ;; + + -C) copy_on_change=true;; + + -d) dir_arg=true;; + + -g) chgrpcmd="$chgrpprog $2" + shift;; + + --help) echo "$usage"; exit $?;; + + -m) mode=$2 + case $mode in + *' '* | *"$tab"* | *"$nl"* | *'*'* | *'?'* | *'['*) + echo "$0: invalid mode: $mode" >&2 + exit 1;; + esac + shift;; + + -o) chowncmd="$chownprog $2" + shift;; + + -s) stripcmd=$stripprog;; + + -t) + is_target_a_directory=always + dst_arg=$2 + # Protect names problematic for 'test' and other utilities. + case $dst_arg in + -* | [=\(\)!]) dst_arg=./$dst_arg;; + esac + shift;; + + -T) is_target_a_directory=never;; + + --version) echo "$0 $scriptversion"; exit $?;; + + --) shift + break;; + + -*) echo "$0: invalid option: $1" >&2 + exit 1;; + + *) break;; + esac + shift +done + +# We allow the use of options -d and -T together, by making -d +# take the precedence; this is for compatibility with GNU install. + +if test -n "$dir_arg"; then + if test -n "$dst_arg"; then + echo "$0: target directory not allowed when installing a directory." >&2 + exit 1 + fi +fi + +if test $# -ne 0 && test -z "$dir_arg$dst_arg"; then + # When -d is used, all remaining arguments are directories to create. + # When -t is used, the destination is already specified. + # Otherwise, the last argument is the destination. Remove it from $@. + for arg + do + if test -n "$dst_arg"; then + # $@ is not empty: it contains at least $arg. + set fnord "$@" "$dst_arg" + shift # fnord + fi + shift # arg + dst_arg=$arg + # Protect names problematic for 'test' and other utilities. + case $dst_arg in + -* | [=\(\)!]) dst_arg=./$dst_arg;; + esac + done +fi + +if test $# -eq 0; then + if test -z "$dir_arg"; then + echo "$0: no input file specified." >&2 + exit 1 + fi + # It's OK to call 'install-sh -d' without argument. + # This can happen when creating conditional directories. + exit 0 +fi + +if test -z "$dir_arg"; then + if test $# -gt 1 || test "$is_target_a_directory" = always; then + if test ! -d "$dst_arg"; then + echo "$0: $dst_arg: Is not a directory." >&2 + exit 1 + fi + fi +fi + +if test -z "$dir_arg"; then + do_exit='(exit $ret); exit $ret' + trap "ret=129; $do_exit" 1 + trap "ret=130; $do_exit" 2 + trap "ret=141; $do_exit" 13 + trap "ret=143; $do_exit" 15 + + # Set umask so as not to create temps with too-generous modes. + # However, 'strip' requires both read and write access to temps. + case $mode in + # Optimize common cases. + *644) cp_umask=133;; + *755) cp_umask=22;; + + *[0-7]) + if test -z "$stripcmd"; then + u_plus_rw= + else + u_plus_rw='% 200' + fi + cp_umask=`expr '(' 777 - $mode % 1000 ')' $u_plus_rw`;; + *) + if test -z "$stripcmd"; then + u_plus_rw= + else + u_plus_rw=,u+rw + fi + cp_umask=$mode$u_plus_rw;; + esac +fi + +for src +do + # Protect names problematic for 'test' and other utilities. + case $src in + -* | [=\(\)!]) src=./$src;; + esac + + if test -n "$dir_arg"; then + dst=$src + dstdir=$dst + test -d "$dstdir" + dstdir_status=$? + else + + # Waiting for this to be detected by the "$cpprog $src $dsttmp" command + # might cause directories to be created, which would be especially bad + # if $src (and thus $dsttmp) contains '*'. + if test ! -f "$src" && test ! -d "$src"; then + echo "$0: $src does not exist." >&2 + exit 1 + fi + + if test -z "$dst_arg"; then + echo "$0: no destination specified." >&2 + exit 1 + fi + dst=$dst_arg + + # If destination is a directory, append the input filename. + if test -d "$dst"; then + if test "$is_target_a_directory" = never; then + echo "$0: $dst_arg: Is a directory" >&2 + exit 1 + fi + dstdir=$dst + dstbase=`basename "$src"` + case $dst in + */) dst=$dst$dstbase;; + *) dst=$dst/$dstbase;; + esac + dstdir_status=0 + else + dstdir=`dirname "$dst"` + test -d "$dstdir" + dstdir_status=$? + fi + fi + + case $dstdir in + */) dstdirslash=$dstdir;; + *) dstdirslash=$dstdir/;; + esac + + obsolete_mkdir_used=false + + if test $dstdir_status != 0; then + case $posix_mkdir in + '') + # Create intermediate dirs using mode 755 as modified by the umask. + # This is like FreeBSD 'install' as of 1997-10-28. + umask=`umask` + case $stripcmd.$umask in + # Optimize common cases. + *[2367][2367]) mkdir_umask=$umask;; + .*0[02][02] | .[02][02] | .[02]) mkdir_umask=22;; + + *[0-7]) + mkdir_umask=`expr $umask + 22 \ + - $umask % 100 % 40 + $umask % 20 \ + - $umask % 10 % 4 + $umask % 2 + `;; + *) mkdir_umask=$umask,go-w;; + esac + + # With -d, create the new directory with the user-specified mode. + # Otherwise, rely on $mkdir_umask. + if test -n "$dir_arg"; then + mkdir_mode=-m$mode + else + mkdir_mode= + fi + + posix_mkdir=false + case $umask in + *[123567][0-7][0-7]) + # POSIX mkdir -p sets u+wx bits regardless of umask, which + # is incompatible with FreeBSD 'install' when (umask & 300) != 0. + ;; + *) + # Note that $RANDOM variable is not portable (e.g. dash); Use it + # here however when possible just to lower collision chance. + tmpdir=${TMPDIR-/tmp}/ins$RANDOM-$$ + + trap 'ret=$?; rmdir "$tmpdir/a/b" "$tmpdir/a" "$tmpdir" 2>/dev/null; exit $ret' 0 + + # Because "mkdir -p" follows existing symlinks and we likely work + # directly in world-writeable /tmp, make sure that the '$tmpdir' + # directory is successfully created first before we actually test + # 'mkdir -p' feature. + if (umask $mkdir_umask && + $mkdirprog $mkdir_mode "$tmpdir" && + exec $mkdirprog $mkdir_mode -p -- "$tmpdir/a/b") >/dev/null 2>&1 + then + if test -z "$dir_arg" || { + # Check for POSIX incompatibilities with -m. + # HP-UX 11.23 and IRIX 6.5 mkdir -m -p sets group- or + # other-writable bit of parent directory when it shouldn't. + # FreeBSD 6.1 mkdir -m -p sets mode of existing directory. + test_tmpdir="$tmpdir/a" + ls_ld_tmpdir=`ls -ld "$test_tmpdir"` + case $ls_ld_tmpdir in + d????-?r-*) different_mode=700;; + d????-?--*) different_mode=755;; + *) false;; + esac && + $mkdirprog -m$different_mode -p -- "$test_tmpdir" && { + ls_ld_tmpdir_1=`ls -ld "$test_tmpdir"` + test "$ls_ld_tmpdir" = "$ls_ld_tmpdir_1" + } + } + then posix_mkdir=: + fi + rmdir "$tmpdir/a/b" "$tmpdir/a" "$tmpdir" + else + # Remove any dirs left behind by ancient mkdir implementations. + rmdir ./$mkdir_mode ./-p ./-- "$tmpdir" 2>/dev/null + fi + trap '' 0;; + esac;; + esac + + if + $posix_mkdir && ( + umask $mkdir_umask && + $doit_exec $mkdirprog $mkdir_mode -p -- "$dstdir" + ) + then : + else + + # The umask is ridiculous, or mkdir does not conform to POSIX, + # or it failed possibly due to a race condition. Create the + # directory the slow way, step by step, checking for races as we go. + + case $dstdir in + /*) prefix='/';; + [-=\(\)!]*) prefix='./';; + *) prefix='';; + esac + + oIFS=$IFS + IFS=/ + set -f + set fnord $dstdir + shift + set +f + IFS=$oIFS + + prefixes= + + for d + do + test X"$d" = X && continue + + prefix=$prefix$d + if test -d "$prefix"; then + prefixes= + else + if $posix_mkdir; then + (umask=$mkdir_umask && + $doit_exec $mkdirprog $mkdir_mode -p -- "$dstdir") && break + # Don't fail if two instances are running concurrently. + test -d "$prefix" || exit 1 + else + case $prefix in + *\'*) qprefix=`echo "$prefix" | sed "s/'/'\\\\\\\\''/g"`;; + *) qprefix=$prefix;; + esac + prefixes="$prefixes '$qprefix'" + fi + fi + prefix=$prefix/ + done + + if test -n "$prefixes"; then + # Don't fail if two instances are running concurrently. + (umask $mkdir_umask && + eval "\$doit_exec \$mkdirprog $prefixes") || + test -d "$dstdir" || exit 1 + obsolete_mkdir_used=true + fi + fi + fi + + if test -n "$dir_arg"; then + { test -z "$chowncmd" || $doit $chowncmd "$dst"; } && + { test -z "$chgrpcmd" || $doit $chgrpcmd "$dst"; } && + { test "$obsolete_mkdir_used$chowncmd$chgrpcmd" = false || + test -z "$chmodcmd" || $doit $chmodcmd $mode "$dst"; } || exit 1 + else + + # Make a couple of temp file names in the proper directory. + dsttmp=${dstdirslash}_inst.$$_ + rmtmp=${dstdirslash}_rm.$$_ + + # Trap to clean up those temp files at exit. + trap 'ret=$?; rm -f "$dsttmp" "$rmtmp" && exit $ret' 0 + + # Copy the file name to the temp name. + (umask $cp_umask && + { test -z "$stripcmd" || { + # Create $dsttmp read-write so that cp doesn't create it read-only, + # which would cause strip to fail. + if test -z "$doit"; then + : >"$dsttmp" # No need to fork-exec 'touch'. + else + $doit touch "$dsttmp" + fi + } + } && + $doit_exec $cpprog "$src" "$dsttmp") && + + # and set any options; do chmod last to preserve setuid bits. + # + # If any of these fail, we abort the whole thing. If we want to + # ignore errors from any of these, just make sure not to ignore + # errors from the above "$doit $cpprog $src $dsttmp" command. + # + { test -z "$chowncmd" || $doit $chowncmd "$dsttmp"; } && + { test -z "$chgrpcmd" || $doit $chgrpcmd "$dsttmp"; } && + { test -z "$stripcmd" || $doit $stripcmd "$dsttmp"; } && + { test -z "$chmodcmd" || $doit $chmodcmd $mode "$dsttmp"; } && + + # If -C, don't bother to copy if it wouldn't change the file. + if $copy_on_change && + old=`LC_ALL=C ls -dlL "$dst" 2>/dev/null` && + new=`LC_ALL=C ls -dlL "$dsttmp" 2>/dev/null` && + set -f && + set X $old && old=:$2:$4:$5:$6 && + set X $new && new=:$2:$4:$5:$6 && + set +f && + test "$old" = "$new" && + $cmpprog "$dst" "$dsttmp" >/dev/null 2>&1 + then + rm -f "$dsttmp" + else + # Rename the file to the real destination. + $doit $mvcmd -f "$dsttmp" "$dst" 2>/dev/null || + + # The rename failed, perhaps because mv can't rename something else + # to itself, or perhaps because mv is so ancient that it does not + # support -f. + { + # Now remove or move aside any old file at destination location. + # We try this two ways since rm can't unlink itself on some + # systems and the destination file might be busy for other + # reasons. In this case, the final cleanup might fail but the new + # file should still install successfully. + { + test ! -f "$dst" || + $doit $rmcmd -f "$dst" 2>/dev/null || + { $doit $mvcmd -f "$dst" "$rmtmp" 2>/dev/null && + { $doit $rmcmd -f "$rmtmp" 2>/dev/null; :; } + } || + { echo "$0: cannot unlink or rename $dst" >&2 + (exit 1); exit 1 + } + } && + + # Now rename the file to the real destination. + $doit $mvcmd "$dsttmp" "$dst" + } + fi || exit 1 + + trap '' 0 + fi +done + +# Local variables: +# eval: (add-hook 'before-save-hook 'time-stamp) +# time-stamp-start: "scriptversion=" +# time-stamp-format: "%:y-%02m-%02d.%02H" +# time-stamp-time-zone: "UTC0" +# time-stamp-end: "; # UTC" +# End: diff --git a/pcre2/libpcre2-16.pc.in b/pcre2/libpcre2-16.pc.in index 978040dfe..bacb46651 100644 --- a/pcre2/libpcre2-16.pc.in +++ b/pcre2/libpcre2-16.pc.in @@ -8,6 +8,6 @@ includedir=@includedir@ Name: libpcre2-16 Description: PCRE2 - Perl compatible regular expressions C library (2nd API) with 16 bit character support Version: @PACKAGE_VERSION@ -Libs: -L${libdir} -lpcre2-16 +Libs: -L${libdir} -lpcre2-16@LIB_POSTFIX@ Libs.private: @PTHREAD_CFLAGS@ @PTHREAD_LIBS@ Cflags: -I${includedir} @PCRE2_STATIC_CFLAG@ diff --git a/pcre2/libpcre2-32.pc.in b/pcre2/libpcre2-32.pc.in index d8fb18713..06241f066 100644 --- a/pcre2/libpcre2-32.pc.in +++ b/pcre2/libpcre2-32.pc.in @@ -8,6 +8,6 @@ includedir=@includedir@ Name: libpcre2-32 Description: PCRE2 - Perl compatible regular expressions C library (2nd API) with 32 bit character support Version: @PACKAGE_VERSION@ -Libs: -L${libdir} -lpcre2-32 +Libs: -L${libdir} -lpcre2-32@LIB_POSTFIX@ Libs.private: @PTHREAD_CFLAGS@ @PTHREAD_LIBS@ Cflags: -I${includedir} @PCRE2_STATIC_CFLAG@ diff --git a/pcre2/libpcre2-8.pc.in b/pcre2/libpcre2-8.pc.in index 5c872d0b3..246bb9ea3 100644 --- a/pcre2/libpcre2-8.pc.in +++ b/pcre2/libpcre2-8.pc.in @@ -8,6 +8,6 @@ includedir=@includedir@ Name: libpcre2-8 Description: PCRE2 - Perl compatible regular expressions C library (2nd API) with 8 bit character support Version: @PACKAGE_VERSION@ -Libs: -L${libdir} -lpcre2-8 +Libs: -L${libdir} -lpcre2-8@LIB_POSTFIX@ Libs.private: @PTHREAD_CFLAGS@ @PTHREAD_LIBS@ Cflags: -I${includedir} @PCRE2_STATIC_CFLAG@ diff --git a/pcre2/libpcre2-posix.pc.in b/pcre2/libpcre2-posix.pc.in index 96415558e..758c30688 100644 --- a/pcre2/libpcre2-posix.pc.in +++ b/pcre2/libpcre2-posix.pc.in @@ -8,6 +8,6 @@ includedir=@includedir@ Name: libpcre2-posix Description: Posix compatible interface to libpcre2-8 Version: @PACKAGE_VERSION@ -Libs: -L${libdir} -lpcre2-posix +Libs: -L${libdir} -lpcre2-posix@LIB_POSTFIX@ Cflags: -I${includedir} @PCRE2_STATIC_CFLAG@ Requires.private: libpcre2-8 diff --git a/pcre2/missing b/pcre2/missing new file mode 100755 index 000000000..8d0eaad25 --- /dev/null +++ b/pcre2/missing @@ -0,0 +1,215 @@ +#! /bin/sh +# Common wrapper for a few potentially missing GNU programs. + +scriptversion=2018-03-07.03; # UTC + +# Copyright (C) 1996-2020 Free Software Foundation, Inc. +# Originally written by Fran,cois Pinard , 1996. + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +# As a special exception to the GNU General Public License, if you +# distribute this file as part of a program that contains a +# configuration script generated by Autoconf, you may include it under +# the same distribution terms that you use for the rest of that program. + +if test $# -eq 0; then + echo 1>&2 "Try '$0 --help' for more information" + exit 1 +fi + +case $1 in + + --is-lightweight) + # Used by our autoconf macros to check whether the available missing + # script is modern enough. + exit 0 + ;; + + --run) + # Back-compat with the calling convention used by older automake. + shift + ;; + + -h|--h|--he|--hel|--help) + echo "\ +$0 [OPTION]... PROGRAM [ARGUMENT]... + +Run 'PROGRAM [ARGUMENT]...', returning a proper advice when this fails due +to PROGRAM being missing or too old. + +Options: + -h, --help display this help and exit + -v, --version output version information and exit + +Supported PROGRAM values: + aclocal autoconf autoheader autom4te automake makeinfo + bison yacc flex lex help2man + +Version suffixes to PROGRAM as well as the prefixes 'gnu-', 'gnu', and +'g' are ignored when checking the name. + +Send bug reports to ." + exit $? + ;; + + -v|--v|--ve|--ver|--vers|--versi|--versio|--version) + echo "missing $scriptversion (GNU Automake)" + exit $? + ;; + + -*) + echo 1>&2 "$0: unknown '$1' option" + echo 1>&2 "Try '$0 --help' for more information" + exit 1 + ;; + +esac + +# Run the given program, remember its exit status. +"$@"; st=$? + +# If it succeeded, we are done. +test $st -eq 0 && exit 0 + +# Also exit now if we it failed (or wasn't found), and '--version' was +# passed; such an option is passed most likely to detect whether the +# program is present and works. +case $2 in --version|--help) exit $st;; esac + +# Exit code 63 means version mismatch. This often happens when the user +# tries to use an ancient version of a tool on a file that requires a +# minimum version. +if test $st -eq 63; then + msg="probably too old" +elif test $st -eq 127; then + # Program was missing. + msg="missing on your system" +else + # Program was found and executed, but failed. Give up. + exit $st +fi + +perl_URL=https://www.perl.org/ +flex_URL=https://github.com/westes/flex +gnu_software_URL=https://www.gnu.org/software + +program_details () +{ + case $1 in + aclocal|automake) + echo "The '$1' program is part of the GNU Automake package:" + echo "<$gnu_software_URL/automake>" + echo "It also requires GNU Autoconf, GNU m4 and Perl in order to run:" + echo "<$gnu_software_URL/autoconf>" + echo "<$gnu_software_URL/m4/>" + echo "<$perl_URL>" + ;; + autoconf|autom4te|autoheader) + echo "The '$1' program is part of the GNU Autoconf package:" + echo "<$gnu_software_URL/autoconf/>" + echo "It also requires GNU m4 and Perl in order to run:" + echo "<$gnu_software_URL/m4/>" + echo "<$perl_URL>" + ;; + esac +} + +give_advice () +{ + # Normalize program name to check for. + normalized_program=`echo "$1" | sed ' + s/^gnu-//; t + s/^gnu//; t + s/^g//; t'` + + printf '%s\n' "'$1' is $msg." + + configure_deps="'configure.ac' or m4 files included by 'configure.ac'" + case $normalized_program in + autoconf*) + echo "You should only need it if you modified 'configure.ac'," + echo "or m4 files included by it." + program_details 'autoconf' + ;; + autoheader*) + echo "You should only need it if you modified 'acconfig.h' or" + echo "$configure_deps." + program_details 'autoheader' + ;; + automake*) + echo "You should only need it if you modified 'Makefile.am' or" + echo "$configure_deps." + program_details 'automake' + ;; + aclocal*) + echo "You should only need it if you modified 'acinclude.m4' or" + echo "$configure_deps." + program_details 'aclocal' + ;; + autom4te*) + echo "You might have modified some maintainer files that require" + echo "the 'autom4te' program to be rebuilt." + program_details 'autom4te' + ;; + bison*|yacc*) + echo "You should only need it if you modified a '.y' file." + echo "You may want to install the GNU Bison package:" + echo "<$gnu_software_URL/bison/>" + ;; + lex*|flex*) + echo "You should only need it if you modified a '.l' file." + echo "You may want to install the Fast Lexical Analyzer package:" + echo "<$flex_URL>" + ;; + help2man*) + echo "You should only need it if you modified a dependency" \ + "of a man page." + echo "You may want to install the GNU Help2man package:" + echo "<$gnu_software_URL/help2man/>" + ;; + makeinfo*) + echo "You should only need it if you modified a '.texi' file, or" + echo "any other file indirectly affecting the aspect of the manual." + echo "You might want to install the Texinfo package:" + echo "<$gnu_software_URL/texinfo/>" + echo "The spurious makeinfo call might also be the consequence of" + echo "using a buggy 'make' (AIX, DU, IRIX), in which case you might" + echo "want to install GNU make:" + echo "<$gnu_software_URL/make/>" + ;; + *) + echo "You might have modified some files without having the proper" + echo "tools for further handling them. Check the 'README' file, it" + echo "often tells you about the needed prerequisites for installing" + echo "this package. You may also peek at any GNU archive site, in" + echo "case some other package contains this missing '$1' program." + ;; + esac +} + +give_advice "$1" | sed -e '1s/^/WARNING: /' \ + -e '2,$s/^/ /' >&2 + +# Propagate the correct exit status (expected to be 127 for a program +# not found, 63 for a program that failed due to version mismatch). +exit $st + +# Local variables: +# eval: (add-hook 'before-save-hook 'time-stamp) +# time-stamp-start: "scriptversion=" +# time-stamp-format: "%:y-%02m-%02d.%02H" +# time-stamp-time-zone: "UTC0" +# time-stamp-end: "; # UTC" +# End: diff --git a/pcre2/pcre2-config.in b/pcre2/pcre2-config.in index 74271c088..bacea876e 100644 --- a/pcre2/pcre2-config.in +++ b/pcre2/pcre2-config.in @@ -86,28 +86,28 @@ while test $# -gt 0; do ;; --libs-posix) if test @enable_pcre2_8@ = yes ; then - echo $libS$libR -lpcre2-posix -lpcre2-8 + echo $libS$libR -lpcre2-posix@LIB_POSTFIX@ -lpcre2-8@LIB_POSTFIX@ else echo "${usage}" 1>&2 fi ;; --libs8) if test @enable_pcre2_8@ = yes ; then - echo $libS$libR -lpcre2-8 + echo $libS$libR -lpcre2-8@LIB_POSTFIX@ else echo "${usage}" 1>&2 fi ;; --libs16) if test @enable_pcre2_16@ = yes ; then - echo $libS$libR -lpcre2-16 + echo $libS$libR -lpcre2-16@LIB_POSTFIX@ else echo "${usage}" 1>&2 fi ;; --libs32) if test @enable_pcre2_32@ = yes ; then - echo $libS$libR -lpcre2-32 + echo $libS$libR -lpcre2-32@LIB_POSTFIX@ else echo "${usage}" 1>&2 fi diff --git a/pcre2/src/config.h.generic b/pcre2/src/config.h.generic new file mode 100644 index 000000000..10f410479 --- /dev/null +++ b/pcre2/src/config.h.generic @@ -0,0 +1,381 @@ +/* src/config.h. Generated from config.h.in by configure. */ +/* src/config.h.in. Generated from configure.ac by autoheader. */ + +/* PCRE2 is written in Standard C, but there are a few non-standard things it +can cope with, allowing it to run on SunOS4 and other "close to standard" +systems. + +In environments that support the GNU autotools, config.h.in is converted into +config.h by the "configure" script. In environments that use CMake, +config-cmake.in is converted into config.h. If you are going to build PCRE2 "by +hand" without using "configure" or CMake, you should copy the distributed +config.h.generic to config.h, and edit the macro definitions to be the way you +need them. You must then add -DHAVE_CONFIG_H to all of your compile commands, +so that config.h is included at the start of every source. + +Alternatively, you can avoid editing by using -D on the compiler command line +to set the macro values. In this case, you do not have to set -DHAVE_CONFIG_H, +but if you do, default values will be taken from config.h for non-boolean +macros that are not defined on the command line. + +Boolean macros such as HAVE_STDLIB_H and SUPPORT_PCRE2_8 should either be +defined (conventionally to 1) for TRUE, and not defined at all for FALSE. All +such macros are listed as a commented #undef in config.h.generic. Macros such +as MATCH_LIMIT, whose actual value is relevant, have defaults defined, but are +surrounded by #ifndef/#endif lines so that the value can be overridden by -D. + +PCRE2 uses memmove() if HAVE_MEMMOVE is defined; otherwise it uses bcopy() if +HAVE_BCOPY is defined. If your system has neither bcopy() nor memmove(), make +sure both macros are undefined; an emulation function will then be used. */ + +/* By default, the \R escape sequence matches any Unicode line ending + character or sequence of characters. If BSR_ANYCRLF is defined (to any + value), this is changed so that backslash-R matches only CR, LF, or CRLF. + The build-time default can be overridden by the user of PCRE2 at runtime. + */ +/* #undef BSR_ANYCRLF */ + +/* Define to any value to disable the use of the z and t modifiers in + formatting settings such as %zu or %td (this is rarely needed). */ +/* #undef DISABLE_PERCENT_ZT */ + +/* If you are compiling for a system that uses EBCDIC instead of ASCII + character codes, define this macro to any value. When EBCDIC is set, PCRE2 + assumes that all input strings are in EBCDIC. If you do not define this + macro, PCRE2 will assume input strings are ASCII or UTF-8/16/32 Unicode. It + is not possible to build a version of PCRE2 that supports both EBCDIC and + UTF-8/16/32. */ +/* #undef EBCDIC */ + +/* In an EBCDIC environment, define this macro to any value to arrange for the + NL character to be 0x25 instead of the default 0x15. NL plays the role that + LF does in an ASCII/Unicode environment. */ +/* #undef EBCDIC_NL25 */ + +/* Define this if your compiler supports __attribute__((uninitialized)) */ +/* #undef HAVE_ATTRIBUTE_UNINITIALIZED */ + +/* Define to 1 if you have the `bcopy' function. */ +/* #undef HAVE_BCOPY */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_BZLIB_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_DIRENT_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_DLFCN_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_EDITLINE_READLINE_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_EDIT_READLINE_READLINE_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_INTTYPES_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_LIMITS_H */ + +/* Define to 1 if you have the `memfd_create' function. */ +/* #undef HAVE_MEMFD_CREATE */ + +/* Define to 1 if you have the `memmove' function. */ +/* #undef HAVE_MEMMOVE */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_MEMORY_H */ + +/* Define to 1 if you have the `mkostemp' function. */ +/* #undef HAVE_MKOSTEMP */ + +/* Define if you have POSIX threads libraries and header files. */ +/* #undef HAVE_PTHREAD */ + +/* Have PTHREAD_PRIO_INHERIT. */ +/* #undef HAVE_PTHREAD_PRIO_INHERIT */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_READLINE_HISTORY_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_READLINE_READLINE_H */ + +/* Define to 1 if you have the `secure_getenv' function. */ +/* #undef HAVE_SECURE_GETENV */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_STDINT_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_STDLIB_H */ + +/* Define to 1 if you have the `strerror' function. */ +/* #undef HAVE_STRERROR */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_STRINGS_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_STRING_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_SYS_STAT_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_SYS_TYPES_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_SYS_WAIT_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_UNISTD_H */ + +/* Define to 1 if the compiler supports simple visibility declarations. */ +/* #undef HAVE_VISIBILITY */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_WINDOWS_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_ZLIB_H */ + +/* This limits the amount of memory that may be used while matching a pattern. + It applies to both pcre2_match() and pcre2_dfa_match(). It does not apply + to JIT matching. The value is in kibibytes (units of 1024 bytes). */ +#ifndef HEAP_LIMIT +#define HEAP_LIMIT 20000000 +#endif + +/* The value of LINK_SIZE determines the number of bytes used to store links + as offsets within the compiled regex. The default is 2, which allows for + compiled patterns up to 65535 code units long. This covers the vast + majority of cases. However, PCRE2 can also be compiled to use 3 or 4 bytes + instead. This allows for longer patterns in extreme cases. */ +#ifndef LINK_SIZE +#define LINK_SIZE 2 +#endif + +/* Define to the sub-directory where libtool stores uninstalled libraries. */ +/* This is ignored unless you are using libtool. */ +#ifndef LT_OBJDIR +#define LT_OBJDIR ".libs/" +#endif + +/* The value of MATCH_LIMIT determines the default number of times the + pcre2_match() function can record a backtrack position during a single + matching attempt. The value is also used to limit a loop counter in + pcre2_dfa_match(). There is a runtime interface for setting a different + limit. The limit exists in order to catch runaway regular expressions that + take for ever to determine that they do not match. The default is set very + large so that it does not accidentally catch legitimate cases. */ +#ifndef MATCH_LIMIT +#define MATCH_LIMIT 10000000 +#endif + +/* The above limit applies to all backtracks, whether or not they are nested. + In some environments it is desirable to limit the nesting of backtracking + (that is, the depth of tree that is searched) more strictly, in order to + restrict the maximum amount of heap memory that is used. The value of + MATCH_LIMIT_DEPTH provides this facility. To have any useful effect, it + must be less than the value of MATCH_LIMIT. The default is to use the same + value as MATCH_LIMIT. There is a runtime method for setting a different + limit. In the case of pcre2_dfa_match(), this limit controls the depth of + the internal nested function calls that are used for pattern recursions, + lookarounds, and atomic groups. */ +#ifndef MATCH_LIMIT_DEPTH +#define MATCH_LIMIT_DEPTH MATCH_LIMIT +#endif + +/* This limit is parameterized just in case anybody ever wants to change it. + Care must be taken if it is increased, because it guards against integer + overflow caused by enormously large patterns. */ +#ifndef MAX_NAME_COUNT +#define MAX_NAME_COUNT 10000 +#endif + +/* This limit is parameterized just in case anybody ever wants to change it. + Care must be taken if it is increased, because it guards against integer + overflow caused by enormously large patterns. */ +#ifndef MAX_NAME_SIZE +#define MAX_NAME_SIZE 32 +#endif + +/* Defining NEVER_BACKSLASH_C locks out the use of \C in all patterns. */ +/* #undef NEVER_BACKSLASH_C */ + +/* The value of NEWLINE_DEFAULT determines the default newline character + sequence. PCRE2 client programs can override this by selecting other values + at run time. The valid values are 1 (CR), 2 (LF), 3 (CRLF), 4 (ANY), 5 + (ANYCRLF), and 6 (NUL). */ +#ifndef NEWLINE_DEFAULT +#define NEWLINE_DEFAULT 2 +#endif + +/* Name of package */ +#define PACKAGE "pcre2" + +/* Define to the address where bug reports for this package should be sent. */ +#define PACKAGE_BUGREPORT "" + +/* Define to the full name of this package. */ +#define PACKAGE_NAME "PCRE2" + +/* Define to the full name and version of this package. */ +#define PACKAGE_STRING "PCRE2 10.36" + +/* Define to the one symbol short name of this package. */ +#define PACKAGE_TARNAME "pcre2" + +/* Define to the home page for this package. */ +#define PACKAGE_URL "" + +/* Define to the version of this package. */ +#define PACKAGE_VERSION "10.36" + +/* The value of PARENS_NEST_LIMIT specifies the maximum depth of nested + parentheses (of any kind) in a pattern. This limits the amount of system + stack that is used while compiling a pattern. */ +#ifndef PARENS_NEST_LIMIT +#define PARENS_NEST_LIMIT 250 +#endif + +/* The value of PCRE2GREP_BUFSIZE is the starting size of the buffer used by + pcre2grep to hold parts of the file it is searching. The buffer will be + expanded up to PCRE2GREP_MAX_BUFSIZE if necessary, for files containing + very long lines. The actual amount of memory used by pcre2grep is three + times this number, because it allows for the buffering of "before" and + "after" lines. */ +#ifndef PCRE2GREP_BUFSIZE +#define PCRE2GREP_BUFSIZE 20480 +#endif + +/* The value of PCRE2GREP_MAX_BUFSIZE specifies the maximum size of the buffer + used by pcre2grep to hold parts of the file it is searching. The actual + amount of memory used by pcre2grep is three times this number, because it + allows for the buffering of "before" and "after" lines. */ +#ifndef PCRE2GREP_MAX_BUFSIZE +#define PCRE2GREP_MAX_BUFSIZE 1048576 +#endif + +/* Define to any value to include debugging code. */ +/* #undef PCRE2_DEBUG */ + +/* If you are compiling for a system other than a Unix-like system or + Win32, and it needs some magic to be inserted before the definition + of a function that is exported by the library, define this macro to + contain the relevant magic. If you do not define this macro, a suitable + __declspec value is used for Windows systems; in other environments + "extern" is used for a C compiler and "extern C" for a C++ compiler. + This macro apears at the start of every exported function that is part + of the external API. It does not appear on functions that are "external" + in the C sense, but which are internal to the library. */ +/* #undef PCRE2_EXP_DEFN */ + +/* Define to any value if linking statically (TODO: make nice with Libtool) */ +/* #undef PCRE2_STATIC */ + +/* Define to necessary symbol if this constant uses a non-standard name on + your system. */ +/* #undef PTHREAD_CREATE_JOINABLE */ + +/* Define to any non-zero number to enable support for SELinux compatible + executable memory allocator in JIT. Note that this will have no effect + unless SUPPORT_JIT is also defined. */ +/* #undef SLJIT_PROT_EXECUTABLE_ALLOCATOR */ + +/* Define to 1 if you have the ANSI C header files. */ +/* #undef STDC_HEADERS */ + +/* Define to any value to enable support for Just-In-Time compiling. */ +/* #undef SUPPORT_JIT */ + +/* Define to any value to allow pcre2grep to be linked with libbz2, so that it + is able to handle .bz2 files. */ +/* #undef SUPPORT_LIBBZ2 */ + +/* Define to any value to allow pcre2test to be linked with libedit. */ +/* #undef SUPPORT_LIBEDIT */ + +/* Define to any value to allow pcre2test to be linked with libreadline. */ +/* #undef SUPPORT_LIBREADLINE */ + +/* Define to any value to allow pcre2grep to be linked with libz, so that it + is able to handle .gz files. */ +/* #undef SUPPORT_LIBZ */ + +/* Define to any value to enable callout script support in pcre2grep. */ +/* #undef SUPPORT_PCRE2GREP_CALLOUT */ + +/* Define to any value to enable fork support in pcre2grep callout scripts. + This will have no effect unless SUPPORT_PCRE2GREP_CALLOUT is also defined. + */ +/* #undef SUPPORT_PCRE2GREP_CALLOUT_FORK */ + +/* Define to any value to enable JIT support in pcre2grep. Note that this will + have no effect unless SUPPORT_JIT is also defined. */ +/* #undef SUPPORT_PCRE2GREP_JIT */ + +/* Define to any value to enable the 16 bit PCRE2 library. */ +/* #undef SUPPORT_PCRE2_16 */ + +/* Define to any value to enable the 32 bit PCRE2 library. */ +/* #undef SUPPORT_PCRE2_32 */ + +/* Define to any value to enable the 8 bit PCRE2 library. */ +/* #undef SUPPORT_PCRE2_8 */ + +/* Define to any value to enable support for Unicode and UTF encoding. This + will work even in an EBCDIC environment, but it is incompatible with the + EBCDIC macro. That is, PCRE2 can support *either* EBCDIC code *or* + ASCII/Unicode, but not both at once. */ +/* #undef SUPPORT_UNICODE */ + +/* Define to any value for valgrind support to find invalid memory reads. */ +/* #undef SUPPORT_VALGRIND */ + +/* Enable extensions on AIX 3, Interix. */ +#ifndef _ALL_SOURCE +# define _ALL_SOURCE 1 +#endif +/* Enable GNU extensions on systems that have them. */ +#ifndef _GNU_SOURCE +# define _GNU_SOURCE 1 +#endif +/* Enable threading extensions on Solaris. */ +#ifndef _POSIX_PTHREAD_SEMANTICS +# define _POSIX_PTHREAD_SEMANTICS 1 +#endif +/* Enable extensions on HP NonStop. */ +#ifndef _TANDEM_SOURCE +# define _TANDEM_SOURCE 1 +#endif +/* Enable general extensions on Solaris. */ +#ifndef __EXTENSIONS__ +# define __EXTENSIONS__ 1 +#endif + +/* Version number of package */ +#define VERSION "10.36" + +/* Define to 1 if on MINIX. */ +/* #undef _MINIX */ + +/* Define to 2 if the system does not provide POSIX.1 features except with + this defined. */ +/* #undef _POSIX_1_SOURCE */ + +/* Define to 1 if you need to in order for `stat' and other things to work. */ +/* #undef _POSIX_SOURCE */ + +/* Define to empty if `const' does not conform to ANSI C. */ +/* #undef const */ + +/* Define to the type of a signed integer type of width exactly 64 bits if + such a type exists and the standard includes do not define it. */ +/* #undef int64_t */ + +/* Define to `unsigned int' if does not define. */ +/* #undef size_t */ diff --git a/pcre2/src/config.h.in b/pcre2/src/config.h.in new file mode 100644 index 000000000..d42cc0053 --- /dev/null +++ b/pcre2/src/config.h.in @@ -0,0 +1,369 @@ +/* src/config.h.in. Generated from configure.ac by autoheader. */ + + +/* PCRE2 is written in Standard C, but there are a few non-standard things it +can cope with, allowing it to run on SunOS4 and other "close to standard" +systems. + +In environments that support the GNU autotools, config.h.in is converted into +config.h by the "configure" script. In environments that use CMake, +config-cmake.in is converted into config.h. If you are going to build PCRE2 "by +hand" without using "configure" or CMake, you should copy the distributed +config.h.generic to config.h, and edit the macro definitions to be the way you +need them. You must then add -DHAVE_CONFIG_H to all of your compile commands, +so that config.h is included at the start of every source. + +Alternatively, you can avoid editing by using -D on the compiler command line +to set the macro values. In this case, you do not have to set -DHAVE_CONFIG_H, +but if you do, default values will be taken from config.h for non-boolean +macros that are not defined on the command line. + +Boolean macros such as HAVE_STDLIB_H and SUPPORT_PCRE2_8 should either be +defined (conventionally to 1) for TRUE, and not defined at all for FALSE. All +such macros are listed as a commented #undef in config.h.generic. Macros such +as MATCH_LIMIT, whose actual value is relevant, have defaults defined, but are +surrounded by #ifndef/#endif lines so that the value can be overridden by -D. + +PCRE2 uses memmove() if HAVE_MEMMOVE is defined; otherwise it uses bcopy() if +HAVE_BCOPY is defined. If your system has neither bcopy() nor memmove(), make +sure both macros are undefined; an emulation function will then be used. */ + +/* By default, the \R escape sequence matches any Unicode line ending + character or sequence of characters. If BSR_ANYCRLF is defined (to any + value), this is changed so that backslash-R matches only CR, LF, or CRLF. + The build-time default can be overridden by the user of PCRE2 at runtime. + */ +#undef BSR_ANYCRLF + +/* Define to any value to disable the use of the z and t modifiers in + formatting settings such as %zu or %td (this is rarely needed). */ +#undef DISABLE_PERCENT_ZT + +/* If you are compiling for a system that uses EBCDIC instead of ASCII + character codes, define this macro to any value. When EBCDIC is set, PCRE2 + assumes that all input strings are in EBCDIC. If you do not define this + macro, PCRE2 will assume input strings are ASCII or UTF-8/16/32 Unicode. It + is not possible to build a version of PCRE2 that supports both EBCDIC and + UTF-8/16/32. */ +#undef EBCDIC + +/* In an EBCDIC environment, define this macro to any value to arrange for the + NL character to be 0x25 instead of the default 0x15. NL plays the role that + LF does in an ASCII/Unicode environment. */ +#undef EBCDIC_NL25 + +/* Define this if your compiler supports __attribute__((uninitialized)) */ +#undef HAVE_ATTRIBUTE_UNINITIALIZED + +/* Define to 1 if you have the `bcopy' function. */ +#undef HAVE_BCOPY + +/* Define to 1 if you have the header file. */ +#undef HAVE_BZLIB_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_DIRENT_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_DLFCN_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_EDITLINE_READLINE_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_EDIT_READLINE_READLINE_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_INTTYPES_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_LIMITS_H + +/* Define to 1 if you have the `memfd_create' function. */ +#undef HAVE_MEMFD_CREATE + +/* Define to 1 if you have the `memmove' function. */ +#undef HAVE_MEMMOVE + +/* Define to 1 if you have the header file. */ +#undef HAVE_MEMORY_H + +/* Define to 1 if you have the `mkostemp' function. */ +#undef HAVE_MKOSTEMP + +/* Define if you have POSIX threads libraries and header files. */ +#undef HAVE_PTHREAD + +/* Have PTHREAD_PRIO_INHERIT. */ +#undef HAVE_PTHREAD_PRIO_INHERIT + +/* Define to 1 if you have the header file. */ +#undef HAVE_READLINE_HISTORY_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_READLINE_READLINE_H + +/* Define to 1 if you have the `secure_getenv' function. */ +#undef HAVE_SECURE_GETENV + +/* Define to 1 if you have the header file. */ +#undef HAVE_STDINT_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_STDLIB_H + +/* Define to 1 if you have the `strerror' function. */ +#undef HAVE_STRERROR + +/* Define to 1 if you have the header file. */ +#undef HAVE_STRINGS_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_STRING_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_SYS_STAT_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_SYS_TYPES_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_SYS_WAIT_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_UNISTD_H + +/* Define to 1 if the compiler supports simple visibility declarations. */ +#undef HAVE_VISIBILITY + +/* Define to 1 if you have the header file. */ +#undef HAVE_WINDOWS_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_ZLIB_H + +/* This limits the amount of memory that may be used while matching a pattern. + It applies to both pcre2_match() and pcre2_dfa_match(). It does not apply + to JIT matching. The value is in kibibytes (units of 1024 bytes). */ +#undef HEAP_LIMIT + +/* The value of LINK_SIZE determines the number of bytes used to store links + as offsets within the compiled regex. The default is 2, which allows for + compiled patterns up to 65535 code units long. This covers the vast + majority of cases. However, PCRE2 can also be compiled to use 3 or 4 bytes + instead. This allows for longer patterns in extreme cases. */ +#undef LINK_SIZE + +/* Define to the sub-directory where libtool stores uninstalled libraries. */ +#undef LT_OBJDIR + +/* The value of MATCH_LIMIT determines the default number of times the + pcre2_match() function can record a backtrack position during a single + matching attempt. The value is also used to limit a loop counter in + pcre2_dfa_match(). There is a runtime interface for setting a different + limit. The limit exists in order to catch runaway regular expressions that + take for ever to determine that they do not match. The default is set very + large so that it does not accidentally catch legitimate cases. */ +#undef MATCH_LIMIT + +/* The above limit applies to all backtracks, whether or not they are nested. + In some environments it is desirable to limit the nesting of backtracking + (that is, the depth of tree that is searched) more strictly, in order to + restrict the maximum amount of heap memory that is used. The value of + MATCH_LIMIT_DEPTH provides this facility. To have any useful effect, it + must be less than the value of MATCH_LIMIT. The default is to use the same + value as MATCH_LIMIT. There is a runtime method for setting a different + limit. In the case of pcre2_dfa_match(), this limit controls the depth of + the internal nested function calls that are used for pattern recursions, + lookarounds, and atomic groups. */ +#undef MATCH_LIMIT_DEPTH + +/* This limit is parameterized just in case anybody ever wants to change it. + Care must be taken if it is increased, because it guards against integer + overflow caused by enormously large patterns. */ +#undef MAX_NAME_COUNT + +/* This limit is parameterized just in case anybody ever wants to change it. + Care must be taken if it is increased, because it guards against integer + overflow caused by enormously large patterns. */ +#undef MAX_NAME_SIZE + +/* Defining NEVER_BACKSLASH_C locks out the use of \C in all patterns. */ +#undef NEVER_BACKSLASH_C + +/* The value of NEWLINE_DEFAULT determines the default newline character + sequence. PCRE2 client programs can override this by selecting other values + at run time. The valid values are 1 (CR), 2 (LF), 3 (CRLF), 4 (ANY), 5 + (ANYCRLF), and 6 (NUL). */ +#undef NEWLINE_DEFAULT + +/* Name of package */ +#undef PACKAGE + +/* Define to the address where bug reports for this package should be sent. */ +#undef PACKAGE_BUGREPORT + +/* Define to the full name of this package. */ +#undef PACKAGE_NAME + +/* Define to the full name and version of this package. */ +#undef PACKAGE_STRING + +/* Define to the one symbol short name of this package. */ +#undef PACKAGE_TARNAME + +/* Define to the home page for this package. */ +#undef PACKAGE_URL + +/* Define to the version of this package. */ +#undef PACKAGE_VERSION + +/* The value of PARENS_NEST_LIMIT specifies the maximum depth of nested + parentheses (of any kind) in a pattern. This limits the amount of system + stack that is used while compiling a pattern. */ +#undef PARENS_NEST_LIMIT + +/* The value of PCRE2GREP_BUFSIZE is the starting size of the buffer used by + pcre2grep to hold parts of the file it is searching. The buffer will be + expanded up to PCRE2GREP_MAX_BUFSIZE if necessary, for files containing + very long lines. The actual amount of memory used by pcre2grep is three + times this number, because it allows for the buffering of "before" and + "after" lines. */ +#undef PCRE2GREP_BUFSIZE + +/* The value of PCRE2GREP_MAX_BUFSIZE specifies the maximum size of the buffer + used by pcre2grep to hold parts of the file it is searching. The actual + amount of memory used by pcre2grep is three times this number, because it + allows for the buffering of "before" and "after" lines. */ +#undef PCRE2GREP_MAX_BUFSIZE + +/* to make a symbol visible */ +#undef PCRE2POSIX_EXP_DECL + +/* to make a symbol visible */ +#undef PCRE2POSIX_EXP_DEFN + +/* Define to any value to include debugging code. */ +#undef PCRE2_DEBUG + +/* to make a symbol visible */ +#undef PCRE2_EXP_DECL + + +/* If you are compiling for a system other than a Unix-like system or + Win32, and it needs some magic to be inserted before the definition + of a function that is exported by the library, define this macro to + contain the relevant magic. If you do not define this macro, a suitable + __declspec value is used for Windows systems; in other environments + "extern" is used for a C compiler and "extern C" for a C++ compiler. + This macro apears at the start of every exported function that is part + of the external API. It does not appear on functions that are "external" + in the C sense, but which are internal to the library. */ +#undef PCRE2_EXP_DEFN + +/* Define to any value if linking statically (TODO: make nice with Libtool) */ +#undef PCRE2_STATIC + +/* Define to necessary symbol if this constant uses a non-standard name on + your system. */ +#undef PTHREAD_CREATE_JOINABLE + +/* Define to any non-zero number to enable support for SELinux compatible + executable memory allocator in JIT. Note that this will have no effect + unless SUPPORT_JIT is also defined. */ +#undef SLJIT_PROT_EXECUTABLE_ALLOCATOR + +/* Define to 1 if you have the ANSI C header files. */ +#undef STDC_HEADERS + +/* Define to any value to enable support for Just-In-Time compiling. */ +#undef SUPPORT_JIT + +/* Define to any value to allow pcre2grep to be linked with libbz2, so that it + is able to handle .bz2 files. */ +#undef SUPPORT_LIBBZ2 + +/* Define to any value to allow pcre2test to be linked with libedit. */ +#undef SUPPORT_LIBEDIT + +/* Define to any value to allow pcre2test to be linked with libreadline. */ +#undef SUPPORT_LIBREADLINE + +/* Define to any value to allow pcre2grep to be linked with libz, so that it + is able to handle .gz files. */ +#undef SUPPORT_LIBZ + +/* Define to any value to enable callout script support in pcre2grep. */ +#undef SUPPORT_PCRE2GREP_CALLOUT + +/* Define to any value to enable fork support in pcre2grep callout scripts. + This will have no effect unless SUPPORT_PCRE2GREP_CALLOUT is also defined. + */ +#undef SUPPORT_PCRE2GREP_CALLOUT_FORK + +/* Define to any value to enable JIT support in pcre2grep. Note that this will + have no effect unless SUPPORT_JIT is also defined. */ +#undef SUPPORT_PCRE2GREP_JIT + +/* Define to any value to enable the 16 bit PCRE2 library. */ +#undef SUPPORT_PCRE2_16 + +/* Define to any value to enable the 32 bit PCRE2 library. */ +#undef SUPPORT_PCRE2_32 + +/* Define to any value to enable the 8 bit PCRE2 library. */ +#undef SUPPORT_PCRE2_8 + +/* Define to any value to enable support for Unicode and UTF encoding. This + will work even in an EBCDIC environment, but it is incompatible with the + EBCDIC macro. That is, PCRE2 can support *either* EBCDIC code *or* + ASCII/Unicode, but not both at once. */ +#undef SUPPORT_UNICODE + +/* Define to any value for valgrind support to find invalid memory reads. */ +#undef SUPPORT_VALGRIND + +/* Enable extensions on AIX 3, Interix. */ +#ifndef _ALL_SOURCE +# undef _ALL_SOURCE +#endif +/* Enable GNU extensions on systems that have them. */ +#ifndef _GNU_SOURCE +# undef _GNU_SOURCE +#endif +/* Enable threading extensions on Solaris. */ +#ifndef _POSIX_PTHREAD_SEMANTICS +# undef _POSIX_PTHREAD_SEMANTICS +#endif +/* Enable extensions on HP NonStop. */ +#ifndef _TANDEM_SOURCE +# undef _TANDEM_SOURCE +#endif +/* Enable general extensions on Solaris. */ +#ifndef __EXTENSIONS__ +# undef __EXTENSIONS__ +#endif + + +/* Version number of package */ +#undef VERSION + +/* Define to 1 if on MINIX. */ +#undef _MINIX + +/* Define to 2 if the system does not provide POSIX.1 features except with + this defined. */ +#undef _POSIX_1_SOURCE + +/* Define to 1 if you need to in order for `stat' and other things to work. */ +#undef _POSIX_SOURCE + +/* Define to empty if `const' does not conform to ANSI C. */ +#undef const + +/* Define to the type of a signed integer type of width exactly 64 bits if + such a type exists and the standard includes do not define it. */ +#undef int64_t + +/* Define to `unsigned int' if does not define. */ +#undef size_t diff --git a/pcre2/src/pcre2.h.generic b/pcre2/src/pcre2.h.generic new file mode 100644 index 000000000..f204ec818 --- /dev/null +++ b/pcre2/src/pcre2.h.generic @@ -0,0 +1,991 @@ +/************************************************* +* Perl-Compatible Regular Expressions * +*************************************************/ + +/* This is the public header file for the PCRE library, second API, to be +#included by applications that call PCRE2 functions. + + Copyright (c) 2016-2020 University of Cambridge + +----------------------------------------------------------------------------- +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of the University of Cambridge nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +----------------------------------------------------------------------------- +*/ + +#ifndef PCRE2_H_IDEMPOTENT_GUARD +#define PCRE2_H_IDEMPOTENT_GUARD + +/* The current PCRE version information. */ + +#define PCRE2_MAJOR 10 +#define PCRE2_MINOR 36 +#define PCRE2_PRERELEASE +#define PCRE2_DATE 2020-12-04 + +/* When an application links to a PCRE DLL in Windows, the symbols that are +imported have to be identified as such. When building PCRE2, the appropriate +export setting is defined in pcre2_internal.h, which includes this file. So we +don't change existing definitions of PCRE2_EXP_DECL. */ + +#if defined(_WIN32) && !defined(PCRE2_STATIC) +# ifndef PCRE2_EXP_DECL +# define PCRE2_EXP_DECL extern __declspec(dllimport) +# endif +#endif + +/* By default, we use the standard "extern" declarations. */ + +#ifndef PCRE2_EXP_DECL +# ifdef __cplusplus +# define PCRE2_EXP_DECL extern "C" +# else +# define PCRE2_EXP_DECL extern +# endif +#endif + +/* When compiling with the MSVC compiler, it is sometimes necessary to include +a "calling convention" before exported function names. (This is secondhand +information; I know nothing about MSVC myself). For example, something like + + void __cdecl function(....) + +might be needed. In order so make this easy, all the exported functions have +PCRE2_CALL_CONVENTION just before their names. It is rarely needed; if not +set, we ensure here that it has no effect. */ + +#ifndef PCRE2_CALL_CONVENTION +#define PCRE2_CALL_CONVENTION +#endif + +/* Have to include limits.h, stdlib.h, and inttypes.h to ensure that size_t and +uint8_t, UCHAR_MAX, etc are defined. Some systems that do have inttypes.h do +not have stdint.h, which is why we use inttypes.h, which according to the C +standard is a superset of stdint.h. If none of these headers are available, +the relevant values must be provided by some other means. */ + +#include +#include +#include + +/* Allow for C++ users compiling this directly. */ + +#ifdef __cplusplus +extern "C" { +#endif + +/* The following option bits can be passed to pcre2_compile(), pcre2_match(), +or pcre2_dfa_match(). PCRE2_NO_UTF_CHECK affects only the function to which it +is passed. Put these bits at the most significant end of the options word so +others can be added next to them */ + +#define PCRE2_ANCHORED 0x80000000u +#define PCRE2_NO_UTF_CHECK 0x40000000u +#define PCRE2_ENDANCHORED 0x20000000u + +/* The following option bits can be passed only to pcre2_compile(). However, +they may affect compilation, JIT compilation, and/or interpretive execution. +The following tags indicate which: + +C alters what is compiled by pcre2_compile() +J alters what is compiled by pcre2_jit_compile() +M is inspected during pcre2_match() execution +D is inspected during pcre2_dfa_match() execution +*/ + +#define PCRE2_ALLOW_EMPTY_CLASS 0x00000001u /* C */ +#define PCRE2_ALT_BSUX 0x00000002u /* C */ +#define PCRE2_AUTO_CALLOUT 0x00000004u /* C */ +#define PCRE2_CASELESS 0x00000008u /* C */ +#define PCRE2_DOLLAR_ENDONLY 0x00000010u /* J M D */ +#define PCRE2_DOTALL 0x00000020u /* C */ +#define PCRE2_DUPNAMES 0x00000040u /* C */ +#define PCRE2_EXTENDED 0x00000080u /* C */ +#define PCRE2_FIRSTLINE 0x00000100u /* J M D */ +#define PCRE2_MATCH_UNSET_BACKREF 0x00000200u /* C J M */ +#define PCRE2_MULTILINE 0x00000400u /* C */ +#define PCRE2_NEVER_UCP 0x00000800u /* C */ +#define PCRE2_NEVER_UTF 0x00001000u /* C */ +#define PCRE2_NO_AUTO_CAPTURE 0x00002000u /* C */ +#define PCRE2_NO_AUTO_POSSESS 0x00004000u /* C */ +#define PCRE2_NO_DOTSTAR_ANCHOR 0x00008000u /* C */ +#define PCRE2_NO_START_OPTIMIZE 0x00010000u /* J M D */ +#define PCRE2_UCP 0x00020000u /* C J M D */ +#define PCRE2_UNGREEDY 0x00040000u /* C */ +#define PCRE2_UTF 0x00080000u /* C J M D */ +#define PCRE2_NEVER_BACKSLASH_C 0x00100000u /* C */ +#define PCRE2_ALT_CIRCUMFLEX 0x00200000u /* J M D */ +#define PCRE2_ALT_VERBNAMES 0x00400000u /* C */ +#define PCRE2_USE_OFFSET_LIMIT 0x00800000u /* J M D */ +#define PCRE2_EXTENDED_MORE 0x01000000u /* C */ +#define PCRE2_LITERAL 0x02000000u /* C */ +#define PCRE2_MATCH_INVALID_UTF 0x04000000u /* J M D */ + +/* An additional compile options word is available in the compile context. */ + +#define PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES 0x00000001u /* C */ +#define PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL 0x00000002u /* C */ +#define PCRE2_EXTRA_MATCH_WORD 0x00000004u /* C */ +#define PCRE2_EXTRA_MATCH_LINE 0x00000008u /* C */ +#define PCRE2_EXTRA_ESCAPED_CR_IS_LF 0x00000010u /* C */ +#define PCRE2_EXTRA_ALT_BSUX 0x00000020u /* C */ + +/* These are for pcre2_jit_compile(). */ + +#define PCRE2_JIT_COMPLETE 0x00000001u /* For full matching */ +#define PCRE2_JIT_PARTIAL_SOFT 0x00000002u +#define PCRE2_JIT_PARTIAL_HARD 0x00000004u +#define PCRE2_JIT_INVALID_UTF 0x00000100u + +/* These are for pcre2_match(), pcre2_dfa_match(), pcre2_jit_match(), and +pcre2_substitute(). Some are allowed only for one of the functions, and in +these cases it is noted below. Note that PCRE2_ANCHORED, PCRE2_ENDANCHORED and +PCRE2_NO_UTF_CHECK can also be passed to these functions (though +pcre2_jit_match() ignores the latter since it bypasses all sanity checks). */ + +#define PCRE2_NOTBOL 0x00000001u +#define PCRE2_NOTEOL 0x00000002u +#define PCRE2_NOTEMPTY 0x00000004u /* ) These two must be kept */ +#define PCRE2_NOTEMPTY_ATSTART 0x00000008u /* ) adjacent to each other. */ +#define PCRE2_PARTIAL_SOFT 0x00000010u +#define PCRE2_PARTIAL_HARD 0x00000020u +#define PCRE2_DFA_RESTART 0x00000040u /* pcre2_dfa_match() only */ +#define PCRE2_DFA_SHORTEST 0x00000080u /* pcre2_dfa_match() only */ +#define PCRE2_SUBSTITUTE_GLOBAL 0x00000100u /* pcre2_substitute() only */ +#define PCRE2_SUBSTITUTE_EXTENDED 0x00000200u /* pcre2_substitute() only */ +#define PCRE2_SUBSTITUTE_UNSET_EMPTY 0x00000400u /* pcre2_substitute() only */ +#define PCRE2_SUBSTITUTE_UNKNOWN_UNSET 0x00000800u /* pcre2_substitute() only */ +#define PCRE2_SUBSTITUTE_OVERFLOW_LENGTH 0x00001000u /* pcre2_substitute() only */ +#define PCRE2_NO_JIT 0x00002000u /* Not for pcre2_dfa_match() */ +#define PCRE2_COPY_MATCHED_SUBJECT 0x00004000u +#define PCRE2_SUBSTITUTE_LITERAL 0x00008000u /* pcre2_substitute() only */ +#define PCRE2_SUBSTITUTE_MATCHED 0x00010000u /* pcre2_substitute() only */ +#define PCRE2_SUBSTITUTE_REPLACEMENT_ONLY 0x00020000u /* pcre2_substitute() only */ + +/* Options for pcre2_pattern_convert(). */ + +#define PCRE2_CONVERT_UTF 0x00000001u +#define PCRE2_CONVERT_NO_UTF_CHECK 0x00000002u +#define PCRE2_CONVERT_POSIX_BASIC 0x00000004u +#define PCRE2_CONVERT_POSIX_EXTENDED 0x00000008u +#define PCRE2_CONVERT_GLOB 0x00000010u +#define PCRE2_CONVERT_GLOB_NO_WILD_SEPARATOR 0x00000030u +#define PCRE2_CONVERT_GLOB_NO_STARSTAR 0x00000050u + +/* Newline and \R settings, for use in compile contexts. The newline values +must be kept in step with values set in config.h and both sets must all be +greater than zero. */ + +#define PCRE2_NEWLINE_CR 1 +#define PCRE2_NEWLINE_LF 2 +#define PCRE2_NEWLINE_CRLF 3 +#define PCRE2_NEWLINE_ANY 4 +#define PCRE2_NEWLINE_ANYCRLF 5 +#define PCRE2_NEWLINE_NUL 6 + +#define PCRE2_BSR_UNICODE 1 +#define PCRE2_BSR_ANYCRLF 2 + +/* Error codes for pcre2_compile(). Some of these are also used by +pcre2_pattern_convert(). */ + +#define PCRE2_ERROR_END_BACKSLASH 101 +#define PCRE2_ERROR_END_BACKSLASH_C 102 +#define PCRE2_ERROR_UNKNOWN_ESCAPE 103 +#define PCRE2_ERROR_QUANTIFIER_OUT_OF_ORDER 104 +#define PCRE2_ERROR_QUANTIFIER_TOO_BIG 105 +#define PCRE2_ERROR_MISSING_SQUARE_BRACKET 106 +#define PCRE2_ERROR_ESCAPE_INVALID_IN_CLASS 107 +#define PCRE2_ERROR_CLASS_RANGE_ORDER 108 +#define PCRE2_ERROR_QUANTIFIER_INVALID 109 +#define PCRE2_ERROR_INTERNAL_UNEXPECTED_REPEAT 110 +#define PCRE2_ERROR_INVALID_AFTER_PARENS_QUERY 111 +#define PCRE2_ERROR_POSIX_CLASS_NOT_IN_CLASS 112 +#define PCRE2_ERROR_POSIX_NO_SUPPORT_COLLATING 113 +#define PCRE2_ERROR_MISSING_CLOSING_PARENTHESIS 114 +#define PCRE2_ERROR_BAD_SUBPATTERN_REFERENCE 115 +#define PCRE2_ERROR_NULL_PATTERN 116 +#define PCRE2_ERROR_BAD_OPTIONS 117 +#define PCRE2_ERROR_MISSING_COMMENT_CLOSING 118 +#define PCRE2_ERROR_PARENTHESES_NEST_TOO_DEEP 119 +#define PCRE2_ERROR_PATTERN_TOO_LARGE 120 +#define PCRE2_ERROR_HEAP_FAILED 121 +#define PCRE2_ERROR_UNMATCHED_CLOSING_PARENTHESIS 122 +#define PCRE2_ERROR_INTERNAL_CODE_OVERFLOW 123 +#define PCRE2_ERROR_MISSING_CONDITION_CLOSING 124 +#define PCRE2_ERROR_LOOKBEHIND_NOT_FIXED_LENGTH 125 +#define PCRE2_ERROR_ZERO_RELATIVE_REFERENCE 126 +#define PCRE2_ERROR_TOO_MANY_CONDITION_BRANCHES 127 +#define PCRE2_ERROR_CONDITION_ASSERTION_EXPECTED 128 +#define PCRE2_ERROR_BAD_RELATIVE_REFERENCE 129 +#define PCRE2_ERROR_UNKNOWN_POSIX_CLASS 130 +#define PCRE2_ERROR_INTERNAL_STUDY_ERROR 131 +#define PCRE2_ERROR_UNICODE_NOT_SUPPORTED 132 +#define PCRE2_ERROR_PARENTHESES_STACK_CHECK 133 +#define PCRE2_ERROR_CODE_POINT_TOO_BIG 134 +#define PCRE2_ERROR_LOOKBEHIND_TOO_COMPLICATED 135 +#define PCRE2_ERROR_LOOKBEHIND_INVALID_BACKSLASH_C 136 +#define PCRE2_ERROR_UNSUPPORTED_ESCAPE_SEQUENCE 137 +#define PCRE2_ERROR_CALLOUT_NUMBER_TOO_BIG 138 +#define PCRE2_ERROR_MISSING_CALLOUT_CLOSING 139 +#define PCRE2_ERROR_ESCAPE_INVALID_IN_VERB 140 +#define PCRE2_ERROR_UNRECOGNIZED_AFTER_QUERY_P 141 +#define PCRE2_ERROR_MISSING_NAME_TERMINATOR 142 +#define PCRE2_ERROR_DUPLICATE_SUBPATTERN_NAME 143 +#define PCRE2_ERROR_INVALID_SUBPATTERN_NAME 144 +#define PCRE2_ERROR_UNICODE_PROPERTIES_UNAVAILABLE 145 +#define PCRE2_ERROR_MALFORMED_UNICODE_PROPERTY 146 +#define PCRE2_ERROR_UNKNOWN_UNICODE_PROPERTY 147 +#define PCRE2_ERROR_SUBPATTERN_NAME_TOO_LONG 148 +#define PCRE2_ERROR_TOO_MANY_NAMED_SUBPATTERNS 149 +#define PCRE2_ERROR_CLASS_INVALID_RANGE 150 +#define PCRE2_ERROR_OCTAL_BYTE_TOO_BIG 151 +#define PCRE2_ERROR_INTERNAL_OVERRAN_WORKSPACE 152 +#define PCRE2_ERROR_INTERNAL_MISSING_SUBPATTERN 153 +#define PCRE2_ERROR_DEFINE_TOO_MANY_BRANCHES 154 +#define PCRE2_ERROR_BACKSLASH_O_MISSING_BRACE 155 +#define PCRE2_ERROR_INTERNAL_UNKNOWN_NEWLINE 156 +#define PCRE2_ERROR_BACKSLASH_G_SYNTAX 157 +#define PCRE2_ERROR_PARENS_QUERY_R_MISSING_CLOSING 158 +/* Error 159 is obsolete and should now never occur */ +#define PCRE2_ERROR_VERB_ARGUMENT_NOT_ALLOWED 159 +#define PCRE2_ERROR_VERB_UNKNOWN 160 +#define PCRE2_ERROR_SUBPATTERN_NUMBER_TOO_BIG 161 +#define PCRE2_ERROR_SUBPATTERN_NAME_EXPECTED 162 +#define PCRE2_ERROR_INTERNAL_PARSED_OVERFLOW 163 +#define PCRE2_ERROR_INVALID_OCTAL 164 +#define PCRE2_ERROR_SUBPATTERN_NAMES_MISMATCH 165 +#define PCRE2_ERROR_MARK_MISSING_ARGUMENT 166 +#define PCRE2_ERROR_INVALID_HEXADECIMAL 167 +#define PCRE2_ERROR_BACKSLASH_C_SYNTAX 168 +#define PCRE2_ERROR_BACKSLASH_K_SYNTAX 169 +#define PCRE2_ERROR_INTERNAL_BAD_CODE_LOOKBEHINDS 170 +#define PCRE2_ERROR_BACKSLASH_N_IN_CLASS 171 +#define PCRE2_ERROR_CALLOUT_STRING_TOO_LONG 172 +#define PCRE2_ERROR_UNICODE_DISALLOWED_CODE_POINT 173 +#define PCRE2_ERROR_UTF_IS_DISABLED 174 +#define PCRE2_ERROR_UCP_IS_DISABLED 175 +#define PCRE2_ERROR_VERB_NAME_TOO_LONG 176 +#define PCRE2_ERROR_BACKSLASH_U_CODE_POINT_TOO_BIG 177 +#define PCRE2_ERROR_MISSING_OCTAL_OR_HEX_DIGITS 178 +#define PCRE2_ERROR_VERSION_CONDITION_SYNTAX 179 +#define PCRE2_ERROR_INTERNAL_BAD_CODE_AUTO_POSSESS 180 +#define PCRE2_ERROR_CALLOUT_NO_STRING_DELIMITER 181 +#define PCRE2_ERROR_CALLOUT_BAD_STRING_DELIMITER 182 +#define PCRE2_ERROR_BACKSLASH_C_CALLER_DISABLED 183 +#define PCRE2_ERROR_QUERY_BARJX_NEST_TOO_DEEP 184 +#define PCRE2_ERROR_BACKSLASH_C_LIBRARY_DISABLED 185 +#define PCRE2_ERROR_PATTERN_TOO_COMPLICATED 186 +#define PCRE2_ERROR_LOOKBEHIND_TOO_LONG 187 +#define PCRE2_ERROR_PATTERN_STRING_TOO_LONG 188 +#define PCRE2_ERROR_INTERNAL_BAD_CODE 189 +#define PCRE2_ERROR_INTERNAL_BAD_CODE_IN_SKIP 190 +#define PCRE2_ERROR_NO_SURROGATES_IN_UTF16 191 +#define PCRE2_ERROR_BAD_LITERAL_OPTIONS 192 +#define PCRE2_ERROR_SUPPORTED_ONLY_IN_UNICODE 193 +#define PCRE2_ERROR_INVALID_HYPHEN_IN_OPTIONS 194 +#define PCRE2_ERROR_ALPHA_ASSERTION_UNKNOWN 195 +#define PCRE2_ERROR_SCRIPT_RUN_NOT_AVAILABLE 196 +#define PCRE2_ERROR_TOO_MANY_CAPTURES 197 +#define PCRE2_ERROR_CONDITION_ATOMIC_ASSERTION_EXPECTED 198 + + +/* "Expected" matching error codes: no match and partial match. */ + +#define PCRE2_ERROR_NOMATCH (-1) +#define PCRE2_ERROR_PARTIAL (-2) + +/* Error codes for UTF-8 validity checks */ + +#define PCRE2_ERROR_UTF8_ERR1 (-3) +#define PCRE2_ERROR_UTF8_ERR2 (-4) +#define PCRE2_ERROR_UTF8_ERR3 (-5) +#define PCRE2_ERROR_UTF8_ERR4 (-6) +#define PCRE2_ERROR_UTF8_ERR5 (-7) +#define PCRE2_ERROR_UTF8_ERR6 (-8) +#define PCRE2_ERROR_UTF8_ERR7 (-9) +#define PCRE2_ERROR_UTF8_ERR8 (-10) +#define PCRE2_ERROR_UTF8_ERR9 (-11) +#define PCRE2_ERROR_UTF8_ERR10 (-12) +#define PCRE2_ERROR_UTF8_ERR11 (-13) +#define PCRE2_ERROR_UTF8_ERR12 (-14) +#define PCRE2_ERROR_UTF8_ERR13 (-15) +#define PCRE2_ERROR_UTF8_ERR14 (-16) +#define PCRE2_ERROR_UTF8_ERR15 (-17) +#define PCRE2_ERROR_UTF8_ERR16 (-18) +#define PCRE2_ERROR_UTF8_ERR17 (-19) +#define PCRE2_ERROR_UTF8_ERR18 (-20) +#define PCRE2_ERROR_UTF8_ERR19 (-21) +#define PCRE2_ERROR_UTF8_ERR20 (-22) +#define PCRE2_ERROR_UTF8_ERR21 (-23) + +/* Error codes for UTF-16 validity checks */ + +#define PCRE2_ERROR_UTF16_ERR1 (-24) +#define PCRE2_ERROR_UTF16_ERR2 (-25) +#define PCRE2_ERROR_UTF16_ERR3 (-26) + +/* Error codes for UTF-32 validity checks */ + +#define PCRE2_ERROR_UTF32_ERR1 (-27) +#define PCRE2_ERROR_UTF32_ERR2 (-28) + +/* Miscellaneous error codes for pcre2[_dfa]_match(), substring extraction +functions, context functions, and serializing functions. They are in numerical +order. Originally they were in alphabetical order too, but now that PCRE2 is +released, the numbers must not be changed. */ + +#define PCRE2_ERROR_BADDATA (-29) +#define PCRE2_ERROR_MIXEDTABLES (-30) /* Name was changed */ +#define PCRE2_ERROR_BADMAGIC (-31) +#define PCRE2_ERROR_BADMODE (-32) +#define PCRE2_ERROR_BADOFFSET (-33) +#define PCRE2_ERROR_BADOPTION (-34) +#define PCRE2_ERROR_BADREPLACEMENT (-35) +#define PCRE2_ERROR_BADUTFOFFSET (-36) +#define PCRE2_ERROR_CALLOUT (-37) /* Never used by PCRE2 itself */ +#define PCRE2_ERROR_DFA_BADRESTART (-38) +#define PCRE2_ERROR_DFA_RECURSE (-39) +#define PCRE2_ERROR_DFA_UCOND (-40) +#define PCRE2_ERROR_DFA_UFUNC (-41) +#define PCRE2_ERROR_DFA_UITEM (-42) +#define PCRE2_ERROR_DFA_WSSIZE (-43) +#define PCRE2_ERROR_INTERNAL (-44) +#define PCRE2_ERROR_JIT_BADOPTION (-45) +#define PCRE2_ERROR_JIT_STACKLIMIT (-46) +#define PCRE2_ERROR_MATCHLIMIT (-47) +#define PCRE2_ERROR_NOMEMORY (-48) +#define PCRE2_ERROR_NOSUBSTRING (-49) +#define PCRE2_ERROR_NOUNIQUESUBSTRING (-50) +#define PCRE2_ERROR_NULL (-51) +#define PCRE2_ERROR_RECURSELOOP (-52) +#define PCRE2_ERROR_DEPTHLIMIT (-53) +#define PCRE2_ERROR_RECURSIONLIMIT (-53) /* Obsolete synonym */ +#define PCRE2_ERROR_UNAVAILABLE (-54) +#define PCRE2_ERROR_UNSET (-55) +#define PCRE2_ERROR_BADOFFSETLIMIT (-56) +#define PCRE2_ERROR_BADREPESCAPE (-57) +#define PCRE2_ERROR_REPMISSINGBRACE (-58) +#define PCRE2_ERROR_BADSUBSTITUTION (-59) +#define PCRE2_ERROR_BADSUBSPATTERN (-60) +#define PCRE2_ERROR_TOOMANYREPLACE (-61) +#define PCRE2_ERROR_BADSERIALIZEDDATA (-62) +#define PCRE2_ERROR_HEAPLIMIT (-63) +#define PCRE2_ERROR_CONVERT_SYNTAX (-64) +#define PCRE2_ERROR_INTERNAL_DUPMATCH (-65) +#define PCRE2_ERROR_DFA_UINVALID_UTF (-66) + + +/* Request types for pcre2_pattern_info() */ + +#define PCRE2_INFO_ALLOPTIONS 0 +#define PCRE2_INFO_ARGOPTIONS 1 +#define PCRE2_INFO_BACKREFMAX 2 +#define PCRE2_INFO_BSR 3 +#define PCRE2_INFO_CAPTURECOUNT 4 +#define PCRE2_INFO_FIRSTCODEUNIT 5 +#define PCRE2_INFO_FIRSTCODETYPE 6 +#define PCRE2_INFO_FIRSTBITMAP 7 +#define PCRE2_INFO_HASCRORLF 8 +#define PCRE2_INFO_JCHANGED 9 +#define PCRE2_INFO_JITSIZE 10 +#define PCRE2_INFO_LASTCODEUNIT 11 +#define PCRE2_INFO_LASTCODETYPE 12 +#define PCRE2_INFO_MATCHEMPTY 13 +#define PCRE2_INFO_MATCHLIMIT 14 +#define PCRE2_INFO_MAXLOOKBEHIND 15 +#define PCRE2_INFO_MINLENGTH 16 +#define PCRE2_INFO_NAMECOUNT 17 +#define PCRE2_INFO_NAMEENTRYSIZE 18 +#define PCRE2_INFO_NAMETABLE 19 +#define PCRE2_INFO_NEWLINE 20 +#define PCRE2_INFO_DEPTHLIMIT 21 +#define PCRE2_INFO_RECURSIONLIMIT 21 /* Obsolete synonym */ +#define PCRE2_INFO_SIZE 22 +#define PCRE2_INFO_HASBACKSLASHC 23 +#define PCRE2_INFO_FRAMESIZE 24 +#define PCRE2_INFO_HEAPLIMIT 25 +#define PCRE2_INFO_EXTRAOPTIONS 26 + +/* Request types for pcre2_config(). */ + +#define PCRE2_CONFIG_BSR 0 +#define PCRE2_CONFIG_JIT 1 +#define PCRE2_CONFIG_JITTARGET 2 +#define PCRE2_CONFIG_LINKSIZE 3 +#define PCRE2_CONFIG_MATCHLIMIT 4 +#define PCRE2_CONFIG_NEWLINE 5 +#define PCRE2_CONFIG_PARENSLIMIT 6 +#define PCRE2_CONFIG_DEPTHLIMIT 7 +#define PCRE2_CONFIG_RECURSIONLIMIT 7 /* Obsolete synonym */ +#define PCRE2_CONFIG_STACKRECURSE 8 /* Obsolete */ +#define PCRE2_CONFIG_UNICODE 9 +#define PCRE2_CONFIG_UNICODE_VERSION 10 +#define PCRE2_CONFIG_VERSION 11 +#define PCRE2_CONFIG_HEAPLIMIT 12 +#define PCRE2_CONFIG_NEVER_BACKSLASH_C 13 +#define PCRE2_CONFIG_COMPILED_WIDTHS 14 +#define PCRE2_CONFIG_TABLES_LENGTH 15 + + +/* Types for code units in patterns and subject strings. */ + +typedef uint8_t PCRE2_UCHAR8; +typedef uint16_t PCRE2_UCHAR16; +typedef uint32_t PCRE2_UCHAR32; + +typedef const PCRE2_UCHAR8 *PCRE2_SPTR8; +typedef const PCRE2_UCHAR16 *PCRE2_SPTR16; +typedef const PCRE2_UCHAR32 *PCRE2_SPTR32; + +/* The PCRE2_SIZE type is used for all string lengths and offsets in PCRE2, +including pattern offsets for errors and subject offsets after a match. We +define special values to indicate zero-terminated strings and unset offsets in +the offset vector (ovector). */ + +#define PCRE2_SIZE size_t +#define PCRE2_SIZE_MAX SIZE_MAX +#define PCRE2_ZERO_TERMINATED (~(PCRE2_SIZE)0) +#define PCRE2_UNSET (~(PCRE2_SIZE)0) + +/* Generic types for opaque structures and JIT callback functions. These +declarations are defined in a macro that is expanded for each width later. */ + +#define PCRE2_TYPES_LIST \ +struct pcre2_real_general_context; \ +typedef struct pcre2_real_general_context pcre2_general_context; \ +\ +struct pcre2_real_compile_context; \ +typedef struct pcre2_real_compile_context pcre2_compile_context; \ +\ +struct pcre2_real_match_context; \ +typedef struct pcre2_real_match_context pcre2_match_context; \ +\ +struct pcre2_real_convert_context; \ +typedef struct pcre2_real_convert_context pcre2_convert_context; \ +\ +struct pcre2_real_code; \ +typedef struct pcre2_real_code pcre2_code; \ +\ +struct pcre2_real_match_data; \ +typedef struct pcre2_real_match_data pcre2_match_data; \ +\ +struct pcre2_real_jit_stack; \ +typedef struct pcre2_real_jit_stack pcre2_jit_stack; \ +\ +typedef pcre2_jit_stack *(*pcre2_jit_callback)(void *); + + +/* The structures for passing out data via callout functions. We use structures +so that new fields can be added on the end in future versions, without changing +the API of the function, thereby allowing old clients to work without +modification. Define the generic versions in a macro; the width-specific +versions are generated from this macro below. */ + +/* Flags for the callout_flags field. These are cleared after a callout. */ + +#define PCRE2_CALLOUT_STARTMATCH 0x00000001u /* Set for each bumpalong */ +#define PCRE2_CALLOUT_BACKTRACK 0x00000002u /* Set after a backtrack */ + +#define PCRE2_STRUCTURE_LIST \ +typedef struct pcre2_callout_block { \ + uint32_t version; /* Identifies version of block */ \ + /* ------------------------ Version 0 ------------------------------- */ \ + uint32_t callout_number; /* Number compiled into pattern */ \ + uint32_t capture_top; /* Max current capture */ \ + uint32_t capture_last; /* Most recently closed capture */ \ + PCRE2_SIZE *offset_vector; /* The offset vector */ \ + PCRE2_SPTR mark; /* Pointer to current mark or NULL */ \ + PCRE2_SPTR subject; /* The subject being matched */ \ + PCRE2_SIZE subject_length; /* The length of the subject */ \ + PCRE2_SIZE start_match; /* Offset to start of this match attempt */ \ + PCRE2_SIZE current_position; /* Where we currently are in the subject */ \ + PCRE2_SIZE pattern_position; /* Offset to next item in the pattern */ \ + PCRE2_SIZE next_item_length; /* Length of next item in the pattern */ \ + /* ------------------- Added for Version 1 -------------------------- */ \ + PCRE2_SIZE callout_string_offset; /* Offset to string within pattern */ \ + PCRE2_SIZE callout_string_length; /* Length of string compiled into pattern */ \ + PCRE2_SPTR callout_string; /* String compiled into pattern */ \ + /* ------------------- Added for Version 2 -------------------------- */ \ + uint32_t callout_flags; /* See above for list */ \ + /* ------------------------------------------------------------------ */ \ +} pcre2_callout_block; \ +\ +typedef struct pcre2_callout_enumerate_block { \ + uint32_t version; /* Identifies version of block */ \ + /* ------------------------ Version 0 ------------------------------- */ \ + PCRE2_SIZE pattern_position; /* Offset to next item in the pattern */ \ + PCRE2_SIZE next_item_length; /* Length of next item in the pattern */ \ + uint32_t callout_number; /* Number compiled into pattern */ \ + PCRE2_SIZE callout_string_offset; /* Offset to string within pattern */ \ + PCRE2_SIZE callout_string_length; /* Length of string compiled into pattern */ \ + PCRE2_SPTR callout_string; /* String compiled into pattern */ \ + /* ------------------------------------------------------------------ */ \ +} pcre2_callout_enumerate_block; \ +\ +typedef struct pcre2_substitute_callout_block { \ + uint32_t version; /* Identifies version of block */ \ + /* ------------------------ Version 0 ------------------------------- */ \ + PCRE2_SPTR input; /* Pointer to input subject string */ \ + PCRE2_SPTR output; /* Pointer to output buffer */ \ + PCRE2_SIZE output_offsets[2]; /* Changed portion of the output */ \ + PCRE2_SIZE *ovector; /* Pointer to current ovector */ \ + uint32_t oveccount; /* Count of pairs set in ovector */ \ + uint32_t subscount; /* Substitution number */ \ + /* ------------------------------------------------------------------ */ \ +} pcre2_substitute_callout_block; + + +/* List the generic forms of all other functions in macros, which will be +expanded for each width below. Start with functions that give general +information. */ + +#define PCRE2_GENERAL_INFO_FUNCTIONS \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION pcre2_config(uint32_t, void *); + + +/* Functions for manipulating contexts. */ + +#define PCRE2_GENERAL_CONTEXT_FUNCTIONS \ +PCRE2_EXP_DECL pcre2_general_context PCRE2_CALL_CONVENTION \ + *pcre2_general_context_copy(pcre2_general_context *); \ +PCRE2_EXP_DECL pcre2_general_context PCRE2_CALL_CONVENTION \ + *pcre2_general_context_create(void *(*)(PCRE2_SIZE, void *), \ + void (*)(void *, void *), void *); \ +PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \ + pcre2_general_context_free(pcre2_general_context *); + +#define PCRE2_COMPILE_CONTEXT_FUNCTIONS \ +PCRE2_EXP_DECL pcre2_compile_context PCRE2_CALL_CONVENTION \ + *pcre2_compile_context_copy(pcre2_compile_context *); \ +PCRE2_EXP_DECL pcre2_compile_context PCRE2_CALL_CONVENTION \ + *pcre2_compile_context_create(pcre2_general_context *);\ +PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \ + pcre2_compile_context_free(pcre2_compile_context *); \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_set_bsr(pcre2_compile_context *, uint32_t); \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_set_character_tables(pcre2_compile_context *, const uint8_t *); \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_set_compile_extra_options(pcre2_compile_context *, uint32_t); \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_set_max_pattern_length(pcre2_compile_context *, PCRE2_SIZE); \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_set_newline(pcre2_compile_context *, uint32_t); \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_set_parens_nest_limit(pcre2_compile_context *, uint32_t); \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_set_compile_recursion_guard(pcre2_compile_context *, \ + int (*)(uint32_t, void *), void *); + +#define PCRE2_MATCH_CONTEXT_FUNCTIONS \ +PCRE2_EXP_DECL pcre2_match_context PCRE2_CALL_CONVENTION \ + *pcre2_match_context_copy(pcre2_match_context *); \ +PCRE2_EXP_DECL pcre2_match_context PCRE2_CALL_CONVENTION \ + *pcre2_match_context_create(pcre2_general_context *); \ +PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \ + pcre2_match_context_free(pcre2_match_context *); \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_set_callout(pcre2_match_context *, \ + int (*)(pcre2_callout_block *, void *), void *); \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_set_substitute_callout(pcre2_match_context *, \ + int (*)(pcre2_substitute_callout_block *, void *), void *); \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_set_depth_limit(pcre2_match_context *, uint32_t); \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_set_heap_limit(pcre2_match_context *, uint32_t); \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_set_match_limit(pcre2_match_context *, uint32_t); \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_set_offset_limit(pcre2_match_context *, PCRE2_SIZE); \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_set_recursion_limit(pcre2_match_context *, uint32_t); \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_set_recursion_memory_management(pcre2_match_context *, \ + void *(*)(PCRE2_SIZE, void *), void (*)(void *, void *), void *); + +#define PCRE2_CONVERT_CONTEXT_FUNCTIONS \ +PCRE2_EXP_DECL pcre2_convert_context PCRE2_CALL_CONVENTION \ + *pcre2_convert_context_copy(pcre2_convert_context *); \ +PCRE2_EXP_DECL pcre2_convert_context PCRE2_CALL_CONVENTION \ + *pcre2_convert_context_create(pcre2_general_context *); \ +PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \ + pcre2_convert_context_free(pcre2_convert_context *); \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_set_glob_escape(pcre2_convert_context *, uint32_t); \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_set_glob_separator(pcre2_convert_context *, uint32_t); + + +/* Functions concerned with compiling a pattern to PCRE internal code. */ + +#define PCRE2_COMPILE_FUNCTIONS \ +PCRE2_EXP_DECL pcre2_code PCRE2_CALL_CONVENTION \ + *pcre2_compile(PCRE2_SPTR, PCRE2_SIZE, uint32_t, int *, PCRE2_SIZE *, \ + pcre2_compile_context *); \ +PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \ + pcre2_code_free(pcre2_code *); \ +PCRE2_EXP_DECL pcre2_code PCRE2_CALL_CONVENTION \ + *pcre2_code_copy(const pcre2_code *); \ +PCRE2_EXP_DECL pcre2_code PCRE2_CALL_CONVENTION \ + *pcre2_code_copy_with_tables(const pcre2_code *); + + +/* Functions that give information about a compiled pattern. */ + +#define PCRE2_PATTERN_INFO_FUNCTIONS \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_pattern_info(const pcre2_code *, uint32_t, void *); \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_callout_enumerate(const pcre2_code *, \ + int (*)(pcre2_callout_enumerate_block *, void *), void *); + + +/* Functions for running a match and inspecting the result. */ + +#define PCRE2_MATCH_FUNCTIONS \ +PCRE2_EXP_DECL pcre2_match_data PCRE2_CALL_CONVENTION \ + *pcre2_match_data_create(uint32_t, pcre2_general_context *); \ +PCRE2_EXP_DECL pcre2_match_data PCRE2_CALL_CONVENTION \ + *pcre2_match_data_create_from_pattern(const pcre2_code *, \ + pcre2_general_context *); \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_dfa_match(const pcre2_code *, PCRE2_SPTR, PCRE2_SIZE, PCRE2_SIZE, \ + uint32_t, pcre2_match_data *, pcre2_match_context *, int *, PCRE2_SIZE); \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_match(const pcre2_code *, PCRE2_SPTR, PCRE2_SIZE, PCRE2_SIZE, \ + uint32_t, pcre2_match_data *, pcre2_match_context *); \ +PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \ + pcre2_match_data_free(pcre2_match_data *); \ +PCRE2_EXP_DECL PCRE2_SPTR PCRE2_CALL_CONVENTION \ + pcre2_get_mark(pcre2_match_data *); \ +PCRE2_EXP_DECL PCRE2_SIZE PCRE2_CALL_CONVENTION \ + pcre2_get_match_data_size(pcre2_match_data *); \ +PCRE2_EXP_DECL uint32_t PCRE2_CALL_CONVENTION \ + pcre2_get_ovector_count(pcre2_match_data *); \ +PCRE2_EXP_DECL PCRE2_SIZE PCRE2_CALL_CONVENTION \ + *pcre2_get_ovector_pointer(pcre2_match_data *); \ +PCRE2_EXP_DECL PCRE2_SIZE PCRE2_CALL_CONVENTION \ + pcre2_get_startchar(pcre2_match_data *); + + +/* Convenience functions for handling matched substrings. */ + +#define PCRE2_SUBSTRING_FUNCTIONS \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_substring_copy_byname(pcre2_match_data *, PCRE2_SPTR, PCRE2_UCHAR *, \ + PCRE2_SIZE *); \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_substring_copy_bynumber(pcre2_match_data *, uint32_t, PCRE2_UCHAR *, \ + PCRE2_SIZE *); \ +PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \ + pcre2_substring_free(PCRE2_UCHAR *); \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_substring_get_byname(pcre2_match_data *, PCRE2_SPTR, PCRE2_UCHAR **, \ + PCRE2_SIZE *); \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_substring_get_bynumber(pcre2_match_data *, uint32_t, PCRE2_UCHAR **, \ + PCRE2_SIZE *); \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_substring_length_byname(pcre2_match_data *, PCRE2_SPTR, PCRE2_SIZE *); \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_substring_length_bynumber(pcre2_match_data *, uint32_t, PCRE2_SIZE *); \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_substring_nametable_scan(const pcre2_code *, PCRE2_SPTR, PCRE2_SPTR *, \ + PCRE2_SPTR *); \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_substring_number_from_name(const pcre2_code *, PCRE2_SPTR); \ +PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \ + pcre2_substring_list_free(PCRE2_SPTR *); \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_substring_list_get(pcre2_match_data *, PCRE2_UCHAR ***, PCRE2_SIZE **); + +/* Functions for serializing / deserializing compiled patterns. */ + +#define PCRE2_SERIALIZE_FUNCTIONS \ +PCRE2_EXP_DECL int32_t PCRE2_CALL_CONVENTION \ + pcre2_serialize_encode(const pcre2_code **, int32_t, uint8_t **, \ + PCRE2_SIZE *, pcre2_general_context *); \ +PCRE2_EXP_DECL int32_t PCRE2_CALL_CONVENTION \ + pcre2_serialize_decode(pcre2_code **, int32_t, const uint8_t *, \ + pcre2_general_context *); \ +PCRE2_EXP_DECL int32_t PCRE2_CALL_CONVENTION \ + pcre2_serialize_get_number_of_codes(const uint8_t *); \ +PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \ + pcre2_serialize_free(uint8_t *); + + +/* Convenience function for match + substitute. */ + +#define PCRE2_SUBSTITUTE_FUNCTION \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_substitute(const pcre2_code *, PCRE2_SPTR, PCRE2_SIZE, PCRE2_SIZE, \ + uint32_t, pcre2_match_data *, pcre2_match_context *, PCRE2_SPTR, \ + PCRE2_SIZE, PCRE2_UCHAR *, PCRE2_SIZE *); + + +/* Functions for converting pattern source strings. */ + +#define PCRE2_CONVERT_FUNCTIONS \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_pattern_convert(PCRE2_SPTR, PCRE2_SIZE, uint32_t, PCRE2_UCHAR **, \ + PCRE2_SIZE *, pcre2_convert_context *); \ +PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \ + pcre2_converted_pattern_free(PCRE2_UCHAR *); + + +/* Functions for JIT processing */ + +#define PCRE2_JIT_FUNCTIONS \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_jit_compile(pcre2_code *, uint32_t); \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_jit_match(const pcre2_code *, PCRE2_SPTR, PCRE2_SIZE, PCRE2_SIZE, \ + uint32_t, pcre2_match_data *, pcre2_match_context *); \ +PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \ + pcre2_jit_free_unused_memory(pcre2_general_context *); \ +PCRE2_EXP_DECL pcre2_jit_stack PCRE2_CALL_CONVENTION \ + *pcre2_jit_stack_create(PCRE2_SIZE, PCRE2_SIZE, pcre2_general_context *); \ +PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \ + pcre2_jit_stack_assign(pcre2_match_context *, pcre2_jit_callback, void *); \ +PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \ + pcre2_jit_stack_free(pcre2_jit_stack *); + + +/* Other miscellaneous functions. */ + +#define PCRE2_OTHER_FUNCTIONS \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_get_error_message(int, PCRE2_UCHAR *, PCRE2_SIZE); \ +PCRE2_EXP_DECL const uint8_t PCRE2_CALL_CONVENTION \ + *pcre2_maketables(pcre2_general_context *); \ +PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \ + pcre2_maketables_free(pcre2_general_context *, const uint8_t *); + +/* Define macros that generate width-specific names from generic versions. The +three-level macro scheme is necessary to get the macros expanded when we want +them to be. First we get the width from PCRE2_LOCAL_WIDTH, which is used for +generating three versions of everything below. After that, PCRE2_SUFFIX will be +re-defined to use PCRE2_CODE_UNIT_WIDTH, for use when macros such as +pcre2_compile are called by application code. */ + +#define PCRE2_JOIN(a,b) a ## b +#define PCRE2_GLUE(a,b) PCRE2_JOIN(a,b) +#define PCRE2_SUFFIX(a) PCRE2_GLUE(a,PCRE2_LOCAL_WIDTH) + + +/* Data types */ + +#define PCRE2_UCHAR PCRE2_SUFFIX(PCRE2_UCHAR) +#define PCRE2_SPTR PCRE2_SUFFIX(PCRE2_SPTR) + +#define pcre2_code PCRE2_SUFFIX(pcre2_code_) +#define pcre2_jit_callback PCRE2_SUFFIX(pcre2_jit_callback_) +#define pcre2_jit_stack PCRE2_SUFFIX(pcre2_jit_stack_) + +#define pcre2_real_code PCRE2_SUFFIX(pcre2_real_code_) +#define pcre2_real_general_context PCRE2_SUFFIX(pcre2_real_general_context_) +#define pcre2_real_compile_context PCRE2_SUFFIX(pcre2_real_compile_context_) +#define pcre2_real_convert_context PCRE2_SUFFIX(pcre2_real_convert_context_) +#define pcre2_real_match_context PCRE2_SUFFIX(pcre2_real_match_context_) +#define pcre2_real_jit_stack PCRE2_SUFFIX(pcre2_real_jit_stack_) +#define pcre2_real_match_data PCRE2_SUFFIX(pcre2_real_match_data_) + + +/* Data blocks */ + +#define pcre2_callout_block PCRE2_SUFFIX(pcre2_callout_block_) +#define pcre2_callout_enumerate_block PCRE2_SUFFIX(pcre2_callout_enumerate_block_) +#define pcre2_substitute_callout_block PCRE2_SUFFIX(pcre2_substitute_callout_block_) +#define pcre2_general_context PCRE2_SUFFIX(pcre2_general_context_) +#define pcre2_compile_context PCRE2_SUFFIX(pcre2_compile_context_) +#define pcre2_convert_context PCRE2_SUFFIX(pcre2_convert_context_) +#define pcre2_match_context PCRE2_SUFFIX(pcre2_match_context_) +#define pcre2_match_data PCRE2_SUFFIX(pcre2_match_data_) + + +/* Functions: the complete list in alphabetical order */ + +#define pcre2_callout_enumerate PCRE2_SUFFIX(pcre2_callout_enumerate_) +#define pcre2_code_copy PCRE2_SUFFIX(pcre2_code_copy_) +#define pcre2_code_copy_with_tables PCRE2_SUFFIX(pcre2_code_copy_with_tables_) +#define pcre2_code_free PCRE2_SUFFIX(pcre2_code_free_) +#define pcre2_compile PCRE2_SUFFIX(pcre2_compile_) +#define pcre2_compile_context_copy PCRE2_SUFFIX(pcre2_compile_context_copy_) +#define pcre2_compile_context_create PCRE2_SUFFIX(pcre2_compile_context_create_) +#define pcre2_compile_context_free PCRE2_SUFFIX(pcre2_compile_context_free_) +#define pcre2_config PCRE2_SUFFIX(pcre2_config_) +#define pcre2_convert_context_copy PCRE2_SUFFIX(pcre2_convert_context_copy_) +#define pcre2_convert_context_create PCRE2_SUFFIX(pcre2_convert_context_create_) +#define pcre2_convert_context_free PCRE2_SUFFIX(pcre2_convert_context_free_) +#define pcre2_converted_pattern_free PCRE2_SUFFIX(pcre2_converted_pattern_free_) +#define pcre2_dfa_match PCRE2_SUFFIX(pcre2_dfa_match_) +#define pcre2_general_context_copy PCRE2_SUFFIX(pcre2_general_context_copy_) +#define pcre2_general_context_create PCRE2_SUFFIX(pcre2_general_context_create_) +#define pcre2_general_context_free PCRE2_SUFFIX(pcre2_general_context_free_) +#define pcre2_get_error_message PCRE2_SUFFIX(pcre2_get_error_message_) +#define pcre2_get_mark PCRE2_SUFFIX(pcre2_get_mark_) +#define pcre2_get_match_data_size PCRE2_SUFFIX(pcre2_get_match_data_size_) +#define pcre2_get_ovector_pointer PCRE2_SUFFIX(pcre2_get_ovector_pointer_) +#define pcre2_get_ovector_count PCRE2_SUFFIX(pcre2_get_ovector_count_) +#define pcre2_get_startchar PCRE2_SUFFIX(pcre2_get_startchar_) +#define pcre2_jit_compile PCRE2_SUFFIX(pcre2_jit_compile_) +#define pcre2_jit_match PCRE2_SUFFIX(pcre2_jit_match_) +#define pcre2_jit_free_unused_memory PCRE2_SUFFIX(pcre2_jit_free_unused_memory_) +#define pcre2_jit_stack_assign PCRE2_SUFFIX(pcre2_jit_stack_assign_) +#define pcre2_jit_stack_create PCRE2_SUFFIX(pcre2_jit_stack_create_) +#define pcre2_jit_stack_free PCRE2_SUFFIX(pcre2_jit_stack_free_) +#define pcre2_maketables PCRE2_SUFFIX(pcre2_maketables_) +#define pcre2_maketables_free PCRE2_SUFFIX(pcre2_maketables_free_) +#define pcre2_match PCRE2_SUFFIX(pcre2_match_) +#define pcre2_match_context_copy PCRE2_SUFFIX(pcre2_match_context_copy_) +#define pcre2_match_context_create PCRE2_SUFFIX(pcre2_match_context_create_) +#define pcre2_match_context_free PCRE2_SUFFIX(pcre2_match_context_free_) +#define pcre2_match_data_create PCRE2_SUFFIX(pcre2_match_data_create_) +#define pcre2_match_data_create_from_pattern PCRE2_SUFFIX(pcre2_match_data_create_from_pattern_) +#define pcre2_match_data_free PCRE2_SUFFIX(pcre2_match_data_free_) +#define pcre2_pattern_convert PCRE2_SUFFIX(pcre2_pattern_convert_) +#define pcre2_pattern_info PCRE2_SUFFIX(pcre2_pattern_info_) +#define pcre2_serialize_decode PCRE2_SUFFIX(pcre2_serialize_decode_) +#define pcre2_serialize_encode PCRE2_SUFFIX(pcre2_serialize_encode_) +#define pcre2_serialize_free PCRE2_SUFFIX(pcre2_serialize_free_) +#define pcre2_serialize_get_number_of_codes PCRE2_SUFFIX(pcre2_serialize_get_number_of_codes_) +#define pcre2_set_bsr PCRE2_SUFFIX(pcre2_set_bsr_) +#define pcre2_set_callout PCRE2_SUFFIX(pcre2_set_callout_) +#define pcre2_set_character_tables PCRE2_SUFFIX(pcre2_set_character_tables_) +#define pcre2_set_compile_extra_options PCRE2_SUFFIX(pcre2_set_compile_extra_options_) +#define pcre2_set_compile_recursion_guard PCRE2_SUFFIX(pcre2_set_compile_recursion_guard_) +#define pcre2_set_depth_limit PCRE2_SUFFIX(pcre2_set_depth_limit_) +#define pcre2_set_glob_escape PCRE2_SUFFIX(pcre2_set_glob_escape_) +#define pcre2_set_glob_separator PCRE2_SUFFIX(pcre2_set_glob_separator_) +#define pcre2_set_heap_limit PCRE2_SUFFIX(pcre2_set_heap_limit_) +#define pcre2_set_match_limit PCRE2_SUFFIX(pcre2_set_match_limit_) +#define pcre2_set_max_pattern_length PCRE2_SUFFIX(pcre2_set_max_pattern_length_) +#define pcre2_set_newline PCRE2_SUFFIX(pcre2_set_newline_) +#define pcre2_set_parens_nest_limit PCRE2_SUFFIX(pcre2_set_parens_nest_limit_) +#define pcre2_set_offset_limit PCRE2_SUFFIX(pcre2_set_offset_limit_) +#define pcre2_set_substitute_callout PCRE2_SUFFIX(pcre2_set_substitute_callout_) +#define pcre2_substitute PCRE2_SUFFIX(pcre2_substitute_) +#define pcre2_substring_copy_byname PCRE2_SUFFIX(pcre2_substring_copy_byname_) +#define pcre2_substring_copy_bynumber PCRE2_SUFFIX(pcre2_substring_copy_bynumber_) +#define pcre2_substring_free PCRE2_SUFFIX(pcre2_substring_free_) +#define pcre2_substring_get_byname PCRE2_SUFFIX(pcre2_substring_get_byname_) +#define pcre2_substring_get_bynumber PCRE2_SUFFIX(pcre2_substring_get_bynumber_) +#define pcre2_substring_length_byname PCRE2_SUFFIX(pcre2_substring_length_byname_) +#define pcre2_substring_length_bynumber PCRE2_SUFFIX(pcre2_substring_length_bynumber_) +#define pcre2_substring_list_get PCRE2_SUFFIX(pcre2_substring_list_get_) +#define pcre2_substring_list_free PCRE2_SUFFIX(pcre2_substring_list_free_) +#define pcre2_substring_nametable_scan PCRE2_SUFFIX(pcre2_substring_nametable_scan_) +#define pcre2_substring_number_from_name PCRE2_SUFFIX(pcre2_substring_number_from_name_) + +/* Keep this old function name for backwards compatibility */ +#define pcre2_set_recursion_limit PCRE2_SUFFIX(pcre2_set_recursion_limit_) + +/* Keep this obsolete function for backwards compatibility: it is now a noop. */ +#define pcre2_set_recursion_memory_management PCRE2_SUFFIX(pcre2_set_recursion_memory_management_) + +/* Now generate all three sets of width-specific structures and function +prototypes. */ + +#define PCRE2_TYPES_STRUCTURES_AND_FUNCTIONS \ +PCRE2_TYPES_LIST \ +PCRE2_STRUCTURE_LIST \ +PCRE2_GENERAL_INFO_FUNCTIONS \ +PCRE2_GENERAL_CONTEXT_FUNCTIONS \ +PCRE2_COMPILE_CONTEXT_FUNCTIONS \ +PCRE2_CONVERT_CONTEXT_FUNCTIONS \ +PCRE2_CONVERT_FUNCTIONS \ +PCRE2_MATCH_CONTEXT_FUNCTIONS \ +PCRE2_COMPILE_FUNCTIONS \ +PCRE2_PATTERN_INFO_FUNCTIONS \ +PCRE2_MATCH_FUNCTIONS \ +PCRE2_SUBSTRING_FUNCTIONS \ +PCRE2_SERIALIZE_FUNCTIONS \ +PCRE2_SUBSTITUTE_FUNCTION \ +PCRE2_JIT_FUNCTIONS \ +PCRE2_OTHER_FUNCTIONS + +#define PCRE2_LOCAL_WIDTH 8 +PCRE2_TYPES_STRUCTURES_AND_FUNCTIONS +#undef PCRE2_LOCAL_WIDTH + +#define PCRE2_LOCAL_WIDTH 16 +PCRE2_TYPES_STRUCTURES_AND_FUNCTIONS +#undef PCRE2_LOCAL_WIDTH + +#define PCRE2_LOCAL_WIDTH 32 +PCRE2_TYPES_STRUCTURES_AND_FUNCTIONS +#undef PCRE2_LOCAL_WIDTH + +/* Undefine the list macros; they are no longer needed. */ + +#undef PCRE2_TYPES_LIST +#undef PCRE2_STRUCTURE_LIST +#undef PCRE2_GENERAL_INFO_FUNCTIONS +#undef PCRE2_GENERAL_CONTEXT_FUNCTIONS +#undef PCRE2_COMPILE_CONTEXT_FUNCTIONS +#undef PCRE2_CONVERT_CONTEXT_FUNCTIONS +#undef PCRE2_MATCH_CONTEXT_FUNCTIONS +#undef PCRE2_COMPILE_FUNCTIONS +#undef PCRE2_PATTERN_INFO_FUNCTIONS +#undef PCRE2_MATCH_FUNCTIONS +#undef PCRE2_SUBSTRING_FUNCTIONS +#undef PCRE2_SERIALIZE_FUNCTIONS +#undef PCRE2_SUBSTITUTE_FUNCTION +#undef PCRE2_JIT_FUNCTIONS +#undef PCRE2_OTHER_FUNCTIONS +#undef PCRE2_TYPES_STRUCTURES_AND_FUNCTIONS + +/* PCRE2_CODE_UNIT_WIDTH must be defined. If it is 8, 16, or 32, redefine +PCRE2_SUFFIX to use it. If it is 0, undefine the other macros and make +PCRE2_SUFFIX a no-op. Otherwise, generate an error. */ + +#undef PCRE2_SUFFIX +#ifndef PCRE2_CODE_UNIT_WIDTH +#error PCRE2_CODE_UNIT_WIDTH must be defined before including pcre2.h. +#error Use 8, 16, or 32; or 0 for a multi-width application. +#else /* PCRE2_CODE_UNIT_WIDTH is defined */ +#if PCRE2_CODE_UNIT_WIDTH == 8 || \ + PCRE2_CODE_UNIT_WIDTH == 16 || \ + PCRE2_CODE_UNIT_WIDTH == 32 +#define PCRE2_SUFFIX(a) PCRE2_GLUE(a, PCRE2_CODE_UNIT_WIDTH) +#elif PCRE2_CODE_UNIT_WIDTH == 0 +#undef PCRE2_JOIN +#undef PCRE2_GLUE +#define PCRE2_SUFFIX(a) a +#else +#error PCRE2_CODE_UNIT_WIDTH must be 0, 8, 16, or 32. +#endif +#endif /* PCRE2_CODE_UNIT_WIDTH is defined */ + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* PCRE2_H_IDEMPOTENT_GUARD */ + +/* End of pcre2.h */ diff --git a/pcre2/src/pcre2_compile.c b/pcre2/src/pcre2_compile.c index 62393bea7..e811f12f0 100644 --- a/pcre2/src/pcre2_compile.c +++ b/pcre2/src/pcre2_compile.c @@ -2344,7 +2344,7 @@ if (ptr > *nameptr + MAX_NAME_SIZE) *errorcodeptr = ERR48; goto FAILED; } -*namelenptr = ptr - *nameptr; +*namelenptr = (uint32_t)(ptr - *nameptr); /* Subpattern names must not be empty, and their terminator is checked here. (What follows a verb or alpha assertion name is checked separately.) */ @@ -4331,6 +4331,7 @@ while (ptr < ptrend) { if (++ptr >= ptrend || !IS_DIGIT(*ptr)) goto BAD_VERSION_CONDITION; minor = (*ptr++ - CHAR_0) * 10; + if (ptr >= ptrend) goto BAD_VERSION_CONDITION; if (IS_DIGIT(*ptr)) minor += *ptr++ - CHAR_0; if (ptr >= ptrend || *ptr != CHAR_RIGHT_PARENTHESIS) goto BAD_VERSION_CONDITION; diff --git a/pcre2/src/pcre2_dftables.c b/pcre2/src/pcre2_dftables.c new file mode 100644 index 000000000..71b90ce83 --- /dev/null +++ b/pcre2/src/pcre2_dftables.c @@ -0,0 +1,303 @@ +/************************************************* +* Perl-Compatible Regular Expressions * +*************************************************/ + +/* PCRE is a library of functions to support regular expressions whose syntax +and semantics are as close as possible to those of the Perl 5 language. + + Written by Philip Hazel + Original API code Copyright (c) 1997-2012 University of Cambridge + New API code Copyright (c) 2016-2020 University of Cambridge + +----------------------------------------------------------------------------- +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of the University of Cambridge nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +----------------------------------------------------------------------------- +*/ + + +/* This is a freestanding support program to generate a file containing +character tables for PCRE2. The tables are built using the pcre2_maketables() +function, which is part of the PCRE2 API. By default, the system's "C" locale +is used rather than what the building user happens to have set, but the -L +option can be used to select the current locale from the LC_ALL environment +variable. By default, the tables are written in source form, but if -b is +given, they are written in binary. */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include +#include +#include +#include + +#define PCRE2_CODE_UNIT_WIDTH 0 /* Must be set, but not relevant here */ +#include "pcre2_internal.h" + +#define PCRE2_DFTABLES /* pcre2_maketables.c notices this */ +#include "pcre2_maketables.c" + + +static const char *classlist[] = + { + "space", "xdigit", "digit", "upper", "lower", + "word", "graph", "print", "punct", "cntrl" + }; + + + +/************************************************* +* Usage * +*************************************************/ + +static void +usage(void) +{ +(void)fprintf(stderr, + "Usage: pcre2_dftables [options] \n" + " -b Write output in binary (default is source code)\n" + " -L Use locale from LC_ALL (default is \"C\" locale)\n" + ); +} + + + +/************************************************* +* Entry point * +*************************************************/ + +int main(int argc, char **argv) +{ +FILE *f; +int i; +int nclass = 0; +BOOL binary = FALSE; +char *env = (char *)"C"; +const unsigned char *tables; +const unsigned char *base_of_tables; + +/* Process options */ + +for (i = 1; i < argc; i++) + { + char *arg = argv[i]; + if (*arg != '-') break; + + if (strcmp(arg, "-help") == 0 || strcmp(arg, "--help") == 0) + { + usage(); + return 0; + } + + else if (strcmp(arg, "-L") == 0) + { + if (setlocale(LC_ALL, "") == NULL) + { + (void)fprintf(stderr, "pcre2_dftables: setlocale() failed\n"); + return 1; + } + env = getenv("LC_ALL"); + } + + else if (strcmp(arg, "-b") == 0) + binary = TRUE; + + else + { + (void)fprintf(stderr, "pcre2_dftables: unrecognized option %s\n", arg); + return 1; + } + } + +if (i != argc - 1) + { + (void)fprintf(stderr, "pcre2_dftables: one filename argument is required\n"); + return 1; + } + +/* Make the tables */ + +tables = maketables(); +base_of_tables = tables; + +f = fopen(argv[i], "wb"); +if (f == NULL) + { + fprintf(stderr, "pcre2_dftables: failed to open %s for writing\n", argv[1]); + return 1; + } + +/* If -b was specified, we write the tables in binary. */ + +if (binary) + { + int yield = 0; + size_t len = fwrite(tables, 1, TABLES_LENGTH, f); + if (len != TABLES_LENGTH) + { + (void)fprintf(stderr, "pcre2_dftables: fwrite() returned wrong length %d " + "instead of %d\n", (int)len, TABLES_LENGTH); + yield = 1; + } + fclose(f); + free((void *)base_of_tables); + return yield; + } + +/* Write the tables as source code for inclusion in the PCRE2 library. There +are several fprintf() calls here, because gcc in pedantic mode complains about +the very long string otherwise. */ + +(void)fprintf(f, + "/*************************************************\n" + "* Perl-Compatible Regular Expressions *\n" + "*************************************************/\n\n" + "/* This file was automatically written by the pcre2_dftables auxiliary\n" + "program. It contains character tables that are used when no external\n" + "tables are passed to PCRE2 by the application that calls it. The tables\n" + "are used only for characters whose code values are less than 256. */\n\n"); + +(void)fprintf(f, + "/* This set of tables was written in the %s locale. */\n\n", env); + +(void)fprintf(f, + "/* The pcre2_ftables program (which is distributed with PCRE2) can be used\n" + "to build alternative versions of this file. This is necessary if you are\n" + "running in an EBCDIC environment, or if you want to default to a different\n" + "encoding, for example ISO-8859-1. When pcre2_dftables is run, it creates\n" + "these tables in the \"C\" locale by default. This happens automatically if\n" + "PCRE2 is configured with --enable-rebuild-chartables. However, you can run\n" + "pcre2_dftables manually with the -L option to build tables using the LC_ALL\n" + "locale. */\n\n"); + +/* Force config.h in z/OS */ + +#if defined NATIVE_ZOS +(void)fprintf(f, + "/* For z/OS, config.h is forced */\n" + "#ifndef HAVE_CONFIG_H\n" + "#define HAVE_CONFIG_H 1\n" + "#endif\n\n"); +#endif + +(void)fprintf(f, + "/* The following #include is present because without it gcc 4.x may remove\n" + "the array definition from the final binary if PCRE2 is built into a static\n" + "library and dead code stripping is activated. This leads to link errors.\n" + "Pulling in the header ensures that the array gets flagged as \"someone\n" + "outside this compilation unit might reference this\" and so it will always\n" + "be supplied to the linker. */\n\n"); + +(void)fprintf(f, + "#ifdef HAVE_CONFIG_H\n" + "#include \"config.h\"\n" + "#endif\n\n" + "#include \"pcre2_internal.h\"\n\n"); + +(void)fprintf(f, + "const uint8_t PRIV(default_tables)[] = {\n\n" + "/* This table is a lower casing table. */\n\n"); + +(void)fprintf(f, " "); +for (i = 0; i < 256; i++) + { + if ((i & 7) == 0 && i != 0) fprintf(f, "\n "); + fprintf(f, "%3d", *tables++); + if (i != 255) fprintf(f, ","); + } +(void)fprintf(f, ",\n\n"); + +(void)fprintf(f, "/* This table is a case flipping table. */\n\n"); + +(void)fprintf(f, " "); +for (i = 0; i < 256; i++) + { + if ((i & 7) == 0 && i != 0) fprintf(f, "\n "); + fprintf(f, "%3d", *tables++); + if (i != 255) fprintf(f, ","); + } +(void)fprintf(f, ",\n\n"); + +(void)fprintf(f, + "/* This table contains bit maps for various character classes. Each map is 32\n" + "bytes long and the bits run from the least significant end of each byte. The\n" + "classes that have their own maps are: space, xdigit, digit, upper, lower, word,\n" + "graph, print, punct, and cntrl. Other classes are built from combinations. */\n\n"); + +(void)fprintf(f, " "); +for (i = 0; i < cbit_length; i++) + { + if ((i & 7) == 0 && i != 0) + { + if ((i & 31) == 0) (void)fprintf(f, "\n"); + if ((i & 24) == 8) (void)fprintf(f, " /* %s */", classlist[nclass++]); + (void)fprintf(f, "\n "); + } + (void)fprintf(f, "0x%02x", *tables++); + if (i != cbit_length - 1) (void)fprintf(f, ","); + } +(void)fprintf(f, ",\n\n"); + +(void)fprintf(f, + "/* This table identifies various classes of character by individual bits:\n" + " 0x%02x white space character\n" + " 0x%02x letter\n" + " 0x%02x lower case letter\n" + " 0x%02x decimal digit\n" + " 0x%02x alphanumeric or '_'\n*/\n\n", + ctype_space, ctype_letter, ctype_lcletter, ctype_digit, ctype_word); + +(void)fprintf(f, " "); +for (i = 0; i < 256; i++) + { + if ((i & 7) == 0 && i != 0) + { + (void)fprintf(f, " /* "); + if (isprint(i-8)) (void)fprintf(f, " %c -", i-8); + else (void)fprintf(f, "%3d-", i-8); + if (isprint(i-1)) (void)fprintf(f, " %c ", i-1); + else (void)fprintf(f, "%3d", i-1); + (void)fprintf(f, " */\n "); + } + (void)fprintf(f, "0x%02x", *tables++); + if (i != 255) (void)fprintf(f, ","); + } + +(void)fprintf(f, "};/* "); +if (isprint(i-8)) (void)fprintf(f, " %c -", i-8); + else (void)fprintf(f, "%3d-", i-8); +if (isprint(i-1)) (void)fprintf(f, " %c ", i-1); + else (void)fprintf(f, "%3d", i-1); +(void)fprintf(f, " */\n\n/* End of pcre2_chartables.c */\n"); + +fclose(f); +free((void *)base_of_tables); +return 0; +} + +/* End of pcre2_dftables.c */ diff --git a/pcre2/src/pcre2_fuzzsupport.c b/pcre2/src/pcre2_fuzzsupport.c new file mode 100644 index 000000000..48781ffc0 --- /dev/null +++ b/pcre2/src/pcre2_fuzzsupport.c @@ -0,0 +1,365 @@ +/*************************************************************************** +Fuzzer driver for PCRE2. Given an arbitrary string of bytes and a length, it +tries to compile and match it, deriving options from the string itself. If +STANDALONE is defined, a main program that calls the driver with the contents +of specified files is compiled, and commentary on what is happening is output. +If an argument starts with '=' the rest of it it is taken as a literal string +rather than a file name. This allows easy testing of short strings. + +Written by Philip Hazel, October 2016 +***************************************************************************/ + +#include +#include +#include +#include + +#define PCRE2_CODE_UNIT_WIDTH 8 +#include "pcre2.h" + +#define MAX_MATCH_SIZE 1000 + +#define DFA_WORKSPACE_COUNT 100 + +#define ALLOWED_COMPILE_OPTIONS \ + (PCRE2_ANCHORED|PCRE2_ALLOW_EMPTY_CLASS|PCRE2_ALT_BSUX|PCRE2_ALT_CIRCUMFLEX| \ + PCRE2_ALT_VERBNAMES|PCRE2_AUTO_CALLOUT|PCRE2_CASELESS|PCRE2_DOLLAR_ENDONLY| \ + PCRE2_DOTALL|PCRE2_DUPNAMES|PCRE2_ENDANCHORED|PCRE2_EXTENDED|PCRE2_FIRSTLINE| \ + PCRE2_MATCH_UNSET_BACKREF|PCRE2_MULTILINE|PCRE2_NEVER_BACKSLASH_C| \ + PCRE2_NO_AUTO_CAPTURE| \ + PCRE2_NO_AUTO_POSSESS|PCRE2_NO_DOTSTAR_ANCHOR|PCRE2_NO_START_OPTIMIZE| \ + PCRE2_UCP|PCRE2_UNGREEDY|PCRE2_USE_OFFSET_LIMIT| \ + PCRE2_UTF) + +#define ALLOWED_MATCH_OPTIONS \ + (PCRE2_ANCHORED|PCRE2_ENDANCHORED|PCRE2_NOTBOL|PCRE2_NOTEOL|PCRE2_NOTEMPTY| \ + PCRE2_NOTEMPTY_ATSTART|PCRE2_PARTIAL_HARD| \ + PCRE2_PARTIAL_SOFT|PCRE2_NO_JIT) + +/* This is the callout function. Its only purpose is to halt matching if there +are more than 100 callouts, as one way of stopping too much time being spent on +fruitless matches. The callout data is a pointer to the counter. */ + +static int callout_function(pcre2_callout_block *cb, void *callout_data) +{ +(void)cb; /* Avoid unused parameter warning */ +*((uint32_t *)callout_data) += 1; +return (*((uint32_t *)callout_data) > 100)? PCRE2_ERROR_CALLOUT : 0; +} + +/* Putting in this apparently unnecessary prototype prevents gcc from giving a +"no previous prototype" warning when compiling at high warning level. */ + +int LLVMFuzzerTestOneInput(const unsigned char *, size_t); + +/* Here's the driving function. */ + +int LLVMFuzzerTestOneInput(const unsigned char *data, size_t size) +{ +uint32_t compile_options; +uint32_t match_options; +pcre2_match_data *match_data = NULL; +pcre2_match_context *match_context = NULL; +size_t match_size; +int dfa_workspace[DFA_WORKSPACE_COUNT]; +int r1, r2; +int i; + +if (size < 1) return 0; + +/* Limiting the length of the subject for matching stops fruitless searches +in large trees taking too much time. */ + +match_size = (size > MAX_MATCH_SIZE)? MAX_MATCH_SIZE : size; + +/* Figure out some options to use. Initialize the random number to ensure +repeatability. Ensure that we get a 32-bit unsigned random number for testing +options. (RAND_MAX is required to be at least 32767, but is commonly +2147483647, which excludes the top bit.) */ + +srand((unsigned int)(data[size/2])); +r1 = rand(); +r2 = rand(); + +/* Ensure that all undefined option bits are zero (waste of time trying them) +and also that PCRE2_NO_UTF_CHECK is unset, as there is no guarantee that the +input is UTF-8. Also unset PCRE2_NEVER_UTF and PCRE2_NEVER_UCP as there is no +reason to disallow UTF and UCP. Force PCRE2_NEVER_BACKSLASH_C to be set because +\C in random patterns is highly likely to cause a crash. */ + +compile_options = + ((((uint32_t)r1 << 16) | ((uint32_t)r2 & 0xffff)) & ALLOWED_COMPILE_OPTIONS) | + PCRE2_NEVER_BACKSLASH_C; + +match_options = + ((((uint32_t)r1 << 16) | ((uint32_t)r2 & 0xffff)) & ALLOWED_MATCH_OPTIONS); + +/* Discard partial matching if PCRE2_ENDANCHORED is set, because they are not +allowed together and just give an immediate error return. */ + +if (((compile_options|match_options) & PCRE2_ENDANCHORED) != 0) + match_options &= ~(PCRE2_PARTIAL_HARD|PCRE2_PARTIAL_SOFT); + +/* Do the compile with and without the options, and after a successful compile, +likewise do the match with and without the options. */ + +for (i = 0; i < 2; i++) + { + uint32_t callout_count; + int errorcode; + PCRE2_SIZE erroroffset; + pcre2_code *code; + +#ifdef STANDALONE + printf("Compile options %.8x never_backslash_c", compile_options); + printf("%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n", + ((compile_options & PCRE2_ALT_BSUX) != 0)? ",alt_bsux" : "", + ((compile_options & PCRE2_ALT_CIRCUMFLEX) != 0)? ",alt_circumflex" : "", + ((compile_options & PCRE2_ALT_VERBNAMES) != 0)? ",alt_verbnames" : "", + ((compile_options & PCRE2_ALLOW_EMPTY_CLASS) != 0)? ",allow_empty_class" : "", + ((compile_options & PCRE2_ANCHORED) != 0)? ",anchored" : "", + ((compile_options & PCRE2_AUTO_CALLOUT) != 0)? ",auto_callout" : "", + ((compile_options & PCRE2_CASELESS) != 0)? ",caseless" : "", + ((compile_options & PCRE2_DOLLAR_ENDONLY) != 0)? ",dollar_endonly" : "", + ((compile_options & PCRE2_DOTALL) != 0)? ",dotall" : "", + ((compile_options & PCRE2_DUPNAMES) != 0)? ",dupnames" : "", + ((compile_options & PCRE2_ENDANCHORED) != 0)? ",endanchored" : "", + ((compile_options & PCRE2_EXTENDED) != 0)? ",extended" : "", + ((compile_options & PCRE2_FIRSTLINE) != 0)? ",firstline" : "", + ((compile_options & PCRE2_MATCH_UNSET_BACKREF) != 0)? ",match_unset_backref" : "", + ((compile_options & PCRE2_MULTILINE) != 0)? ",multiline" : "", + ((compile_options & PCRE2_NEVER_UCP) != 0)? ",never_ucp" : "", + ((compile_options & PCRE2_NEVER_UTF) != 0)? ",never_utf" : "", + ((compile_options & PCRE2_NO_AUTO_CAPTURE) != 0)? ",no_auto_capture" : "", + ((compile_options & PCRE2_NO_AUTO_POSSESS) != 0)? ",no_auto_possess" : "", + ((compile_options & PCRE2_NO_DOTSTAR_ANCHOR) != 0)? ",no_dotstar_anchor" : "", + ((compile_options & PCRE2_NO_UTF_CHECK) != 0)? ",no_utf_check" : "", + ((compile_options & PCRE2_NO_START_OPTIMIZE) != 0)? ",no_start_optimize" : "", + ((compile_options & PCRE2_UCP) != 0)? ",ucp" : "", + ((compile_options & PCRE2_UNGREEDY) != 0)? ",ungreedy" : "", + ((compile_options & PCRE2_USE_OFFSET_LIMIT) != 0)? ",use_offset_limit" : "", + ((compile_options & PCRE2_UTF) != 0)? ",utf" : ""); +#endif + + code = pcre2_compile((PCRE2_SPTR)data, (PCRE2_SIZE)size, compile_options, + &errorcode, &erroroffset, NULL); + + /* Compilation succeeded */ + + if (code != NULL) + { + int j; + uint32_t save_match_options = match_options; + + /* Create match data and context blocks only when we first need them. Set + low match and depth limits to avoid wasting too much searching large + pattern trees. Almost all matches are going to fail. */ + + if (match_data == NULL) + { + match_data = pcre2_match_data_create(32, NULL); + if (match_data == NULL) + { +#ifdef STANDALONE + printf("** Failed to create match data block\n"); +#endif + return 0; + } + } + + if (match_context == NULL) + { + match_context = pcre2_match_context_create(NULL); + if (match_context == NULL) + { +#ifdef STANDALONE + printf("** Failed to create match context block\n"); +#endif + return 0; + } + (void)pcre2_set_match_limit(match_context, 100); + (void)pcre2_set_depth_limit(match_context, 100); + (void)pcre2_set_callout(match_context, callout_function, &callout_count); + } + + /* Match twice, with and without options. */ + + for (j = 0; j < 2; j++) + { +#ifdef STANDALONE + printf("Match options %.8x", match_options); + printf("%s%s%s%s%s%s%s%s%s%s\n", + ((match_options & PCRE2_ANCHORED) != 0)? ",anchored" : "", + ((match_options & PCRE2_ENDANCHORED) != 0)? ",endanchored" : "", + ((match_options & PCRE2_NO_JIT) != 0)? ",no_jit" : "", + ((match_options & PCRE2_NO_UTF_CHECK) != 0)? ",no_utf_check" : "", + ((match_options & PCRE2_NOTBOL) != 0)? ",notbol" : "", + ((match_options & PCRE2_NOTEMPTY) != 0)? ",notempty" : "", + ((match_options & PCRE2_NOTEMPTY_ATSTART) != 0)? ",notempty_atstart" : "", + ((match_options & PCRE2_NOTEOL) != 0)? ",noteol" : "", + ((match_options & PCRE2_PARTIAL_HARD) != 0)? ",partial_hard" : "", + ((match_options & PCRE2_PARTIAL_SOFT) != 0)? ",partial_soft" : ""); +#endif + + callout_count = 0; + errorcode = pcre2_match(code, (PCRE2_SPTR)data, (PCRE2_SIZE)match_size, 0, + match_options, match_data, match_context); + +#ifdef STANDALONE + if (errorcode >= 0) printf("Match returned %d\n", errorcode); else + { + unsigned char buffer[256]; + pcre2_get_error_message(errorcode, buffer, 256); + printf("Match failed: error %d: %s\n", errorcode, buffer); + } +#endif + + match_options = 0; /* For second time */ + } + + /* Match with DFA twice, with and without options. */ + + match_options = save_match_options & ~PCRE2_NO_JIT; /* Not valid for DFA */ + + for (j = 0; j < 2; j++) + { +#ifdef STANDALONE + printf("DFA match options %.8x", match_options); + printf("%s%s%s%s%s%s%s%s%s\n", + ((match_options & PCRE2_ANCHORED) != 0)? ",anchored" : "", + ((match_options & PCRE2_ENDANCHORED) != 0)? ",endanchored" : "", + ((match_options & PCRE2_NO_UTF_CHECK) != 0)? ",no_utf_check" : "", + ((match_options & PCRE2_NOTBOL) != 0)? ",notbol" : "", + ((match_options & PCRE2_NOTEMPTY) != 0)? ",notempty" : "", + ((match_options & PCRE2_NOTEMPTY_ATSTART) != 0)? ",notempty_atstart" : "", + ((match_options & PCRE2_NOTEOL) != 0)? ",noteol" : "", + ((match_options & PCRE2_PARTIAL_HARD) != 0)? ",partial_hard" : "", + ((match_options & PCRE2_PARTIAL_SOFT) != 0)? ",partial_soft" : ""); +#endif + + callout_count = 0; + errorcode = pcre2_dfa_match(code, (PCRE2_SPTR)data, + (PCRE2_SIZE)match_size, 0, match_options, match_data, match_context, + dfa_workspace, DFA_WORKSPACE_COUNT); + +#ifdef STANDALONE + if (errorcode >= 0) printf("Match returned %d\n", errorcode); else + { + unsigned char buffer[256]; + pcre2_get_error_message(errorcode, buffer, 256); + printf("Match failed: error %d: %s\n", errorcode, buffer); + } +#endif + + match_options = 0; /* For second time */ + } + + match_options = save_match_options; /* Reset for the second compile */ + pcre2_code_free(code); + } + + /* Compilation failed */ + + else + { + unsigned char buffer[256]; + pcre2_get_error_message(errorcode, buffer, 256); +#ifdef STANDALONE + printf("Error %d at offset %lu: %s\n", errorcode, erroroffset, buffer); +#else + if (strstr((const char *)buffer, "internal error") != NULL) abort(); +#endif + } + + compile_options = PCRE2_NEVER_BACKSLASH_C; /* For second time */ + } + +if (match_data != NULL) pcre2_match_data_free(match_data); +if (match_context != NULL) pcre2_match_context_free(match_context); + +return 0; +} + + +/* Optional main program. */ + +#ifdef STANDALONE +int main(int argc, char **argv) +{ +int i; + +if (argc < 2) + { + printf("** No arguments given\n"); + return 0; + } + +for (i = 1; i < argc; i++) + { + size_t filelen; + size_t readsize; + unsigned char *buffer; + FILE *f; + + /* Handle a literal string. Copy to an exact size buffer so that checks for + overrunning work. */ + + if (argv[i][0] == '=') + { + readsize = strlen(argv[i]) - 1; + printf("------ ------\n"); + printf("Length = %lu\n", readsize); + printf("%.*s\n", (int)readsize, argv[i]+1); + buffer = (unsigned char *)malloc(readsize); + if (buffer == NULL) + printf("** Failed to allocate %lu bytes of memory\n", readsize); + else + { + memcpy(buffer, argv[i]+1, readsize); + LLVMFuzzerTestOneInput(buffer, readsize); + free(buffer); + } + continue; + } + + /* Handle a string given in a file */ + + f = fopen(argv[i], "rb"); + if (f == NULL) + { + printf("** Failed to open %s: %s\n", argv[i], strerror(errno)); + continue; + } + + printf("------ %s ------\n", argv[i]); + + fseek(f, 0, SEEK_END); + filelen = ftell(f); + fseek(f, 0, SEEK_SET); + + buffer = (unsigned char *)malloc(filelen); + if (buffer == NULL) + { + printf("** Failed to allocate %lu bytes of memory\n", filelen); + fclose(f); + continue; + } + + readsize = fread(buffer, 1, filelen, f); + fclose(f); + + if (readsize != filelen) + printf("** File size is %lu but fread() returned %lu\n", filelen, readsize); + else + { + printf("Length = %lu\n", filelen); + LLVMFuzzerTestOneInput(buffer, filelen); + } + free(buffer); + } + +return 0; +} +#endif /* STANDALONE */ + +/* End */ diff --git a/pcre2/src/pcre2_jit_compile.c b/pcre2/src/pcre2_jit_compile.c index 33ad7e655..1977d28aa 100644 --- a/pcre2/src/pcre2_jit_compile.c +++ b/pcre2/src/pcre2_jit_compile.c @@ -1466,9 +1466,9 @@ do default: accelerated_start = NULL; fast_forward_allowed = FALSE; - break; + continue; } - continue; + break; case OP_ONCE: case OP_BRA: @@ -1834,57 +1834,57 @@ while (cc < ccend) case OP_BRAZERO: case OP_BRAMINZERO: case OP_BRAPOSZERO: - repeat_check = FALSE; size = 1; + repeat_check = FALSE; break; CASE_ITERATOR_PRIVATE_DATA_1 - space = 1; size = -2; + space = 1; break; CASE_ITERATOR_PRIVATE_DATA_2A - space = 2; size = -2; + space = 2; break; CASE_ITERATOR_PRIVATE_DATA_2B - space = 2; size = -(2 + IMM2_SIZE); + space = 2; break; CASE_ITERATOR_TYPE_PRIVATE_DATA_1 - space = 1; size = 1; + space = 1; break; CASE_ITERATOR_TYPE_PRIVATE_DATA_2A + size = 1; if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI) space = 2; - size = 1; break; case OP_TYPEUPTO: + size = 1 + IMM2_SIZE; if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI) space = 2; - size = 1 + IMM2_SIZE; break; case OP_TYPEMINUPTO: - space = 2; size = 1 + IMM2_SIZE; + space = 2; break; case OP_CLASS: case OP_NCLASS: - space = get_class_iterator_size(cc + size); size = 1 + 32 / sizeof(PCRE2_UCHAR); + space = get_class_iterator_size(cc + size); break; #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8 case OP_XCLASS: - space = get_class_iterator_size(cc + size); size = GET(cc, 1); + space = get_class_iterator_size(cc + size); break; #endif @@ -4578,7 +4578,14 @@ if (common->nltype != NLTYPE_ANY) /* All newlines are ascii, just skip intermediate octets. */ jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); loop = LABEL(); - OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); + if (sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, TMP2, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)) == SLJIT_SUCCESS) + sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_POST, TMP2, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); + else + { + OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + } + OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc0); CMPTO(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0x80, loop); OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); @@ -6161,9 +6168,9 @@ static SLJIT_INLINE void fast_forward_newline(compiler_common *common) { DEFINE_COMPILER; struct sljit_label *loop; -struct sljit_jump *lastchar; +struct sljit_jump *lastchar = NULL; struct sljit_jump *firstchar; -struct sljit_jump *quit; +struct sljit_jump *quit = NULL; struct sljit_jump *foundcr = NULL; struct sljit_jump *notfoundnl; jump_list *newline = NULL; @@ -6176,39 +6183,71 @@ if (common->match_end_ptr != 0) if (common->nltype == NLTYPE_FIXED && common->newline > 255) { - lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); - if (HAS_VIRTUAL_REGISTERS) +#ifdef JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD + if (JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD && common->mode == PCRE2_JIT_COMPLETE) { - OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); - OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str)); - OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin)); + if (HAS_VIRTUAL_REGISTERS) + { + OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str)); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin)); + } + else + { + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str)); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin)); + } + firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0); + + OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0); + OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_NOT_EQUAL); +#if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32 + OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT); +#endif + OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0); + + fast_forward_char_pair_simd(common, 1, common->newline & 0xff, common->newline & 0xff, 0, (common->newline >> 8) & 0xff, (common->newline >> 8) & 0xff); + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2)); } else +#endif /* JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD */ { - OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str)); - OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin)); - } - firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0); + lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); + if (HAS_VIRTUAL_REGISTERS) + { + OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str)); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin)); + } + else + { + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str)); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin)); + } + firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0); - OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2)); - OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0); - OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_GREATER_EQUAL); + OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2)); + OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0); + OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_GREATER_EQUAL); #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32 - OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT); + OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT); #endif - OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0); + OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0); - loop = LABEL(); - OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); - quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); - OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2)); - OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1)); - CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop); - CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop); + loop = LABEL(); + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2)); + OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1)); + CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop); + CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop); + + JUMPHERE(quit); + JUMPHERE(lastchar); + } - JUMPHERE(quit); JUMPHERE(firstchar); - JUMPHERE(lastchar); if (common->match_end_ptr != 0) OP1(SLJIT_MOV, STR_END, 0, TMP3, 0); @@ -6225,22 +6264,59 @@ else /* Example: match /^/ to \r\n from offset 1. */ firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0); -move_back(common, NULL, FALSE); + +if (common->nltype == NLTYPE_ANY) + move_back(common, NULL, FALSE); +else + OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); loop = LABEL(); common->ff_newline_shortcut = loop; -read_char(common, common->nlmin, common->nlmax, NULL, READ_CHAR_NEWLINE); -lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); -if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF) - foundcr = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR); -check_newlinechar(common, common->nltype, &newline, FALSE); -set_jumps(newline, loop); +#ifdef JIT_HAS_FAST_FORWARD_CHAR_SIMD +if (JIT_HAS_FAST_FORWARD_CHAR_SIMD && (common->nltype == NLTYPE_FIXED || common->nltype == NLTYPE_ANYCRLF)) + { + if (common->nltype == NLTYPE_ANYCRLF) + { + fast_forward_char_simd(common, CHAR_CR, CHAR_LF, 0); + if (common->mode != PCRE2_JIT_COMPLETE) + lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); + + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + quit = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR); + } + else + { + fast_forward_char_simd(common, common->newline, common->newline, 0); + + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + if (common->mode != PCRE2_JIT_COMPLETE) + { + OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, STR_PTR, 0, STR_END, 0); + CMOV(SLJIT_GREATER, STR_PTR, STR_END, 0); + } + } + } +else +#endif /* JIT_HAS_FAST_FORWARD_CHAR_SIMD */ + { + read_char(common, common->nlmin, common->nlmax, NULL, READ_CHAR_NEWLINE); + lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); + if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF) + foundcr = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR); + check_newlinechar(common, common->nltype, &newline, FALSE); + set_jumps(newline, loop); + } if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF) { - quit = JUMP(SLJIT_JUMP); - JUMPHERE(foundcr); + if (quit == NULL) + { + quit = JUMP(SLJIT_JUMP); + JUMPHERE(foundcr); + } + notfoundnl = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL); @@ -6252,7 +6328,9 @@ if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF) JUMPHERE(notfoundnl); JUMPHERE(quit); } -JUMPHERE(lastchar); + +if (lastchar) + JUMPHERE(lastchar); JUMPHERE(firstchar); if (common->match_end_ptr != 0) @@ -6493,9 +6571,11 @@ if (common->invalid_utf) if (common->mode != PCRE2_JIT_COMPLETE) { + OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0); OP1(SLJIT_MOV, TMP2, 0, STR_PTR, 0); move_back(common, NULL, TRUE); check_start_used_ptr(common); + OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0); OP1(SLJIT_MOV, STR_PTR, 0, TMP2, 0); } } @@ -7594,25 +7674,43 @@ if (needstype || needsscript) } cc = ccbegin; + + if (needstype) + { + /* TMP2 has already been shifted by 2 */ + if (!needschar) + { + OP2(SLJIT_ADD, TMP1, 0, TMP2, 0, TMP2, 0); + OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0); + + OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype)); + } + else + { + OP2(SLJIT_ADD, TMP1, 0, TMP2, 0, TMP2, 0); + OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0); + + OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0); + OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype)); + typereg = RETURN_ADDR; + } + } + else if (needschar) + OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0); } - - if (needschar) - OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0); - - if (needstype) + else if (needstype) { + OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 3); + OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 2); + if (!needschar) { - OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 3); - OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 2); OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0); OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype)); } else { - OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 2); - OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3); OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0); OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0); @@ -7620,6 +7718,8 @@ if (needstype || needsscript) typereg = RETURN_ADDR; } } + else if (needschar) + OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0); } #endif /* SUPPORT_UNICODE */ @@ -13581,7 +13681,7 @@ if (common->has_then) set_then_offsets(common, common->start, NULL); } -compiler = sljit_create_compiler(allocator_data); +compiler = sljit_create_compiler(allocator_data, NULL); if (!compiler) { SLJIT_FREE(common->optimized_cbracket, allocator_data); @@ -13983,7 +14083,7 @@ else { /* This case is highly unlikely since we just recently freed a lot of memory. Not impossible though. */ - sljit_free_code(executable_func); + sljit_free_code(executable_func, NULL); PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data); return PCRE2_ERROR_NOMEMORY; } @@ -14097,13 +14197,13 @@ if (executable_allocator_is_working == 0) /* Checks whether the executable allocator is working. This check might run multiple times in multi-threaded environments, but the result should not be affected by it. */ - void *ptr = SLJIT_MALLOC_EXEC(32); + void *ptr = SLJIT_MALLOC_EXEC(32, NULL); executable_allocator_is_working = -1; if (ptr != NULL) { - SLJIT_FREE_EXEC(((sljit_u8*)(ptr)) + SLJIT_EXEC_OFFSET(ptr)); + SLJIT_FREE_EXEC(((sljit_u8*)(ptr)) + SLJIT_EXEC_OFFSET(ptr), NULL); executable_allocator_is_working = 1; } } diff --git a/pcre2/src/pcre2_jit_misc.c b/pcre2/src/pcre2_jit_misc.c index 36abdbaf9..ec924e0f9 100644 --- a/pcre2/src/pcre2_jit_misc.c +++ b/pcre2/src/pcre2_jit_misc.c @@ -89,7 +89,7 @@ int i; for (i = 0; i < JIT_NUMBER_OF_COMPILE_MODES; i++) { if (functions->executable_funcs[i] != NULL) - sljit_free_code(functions->executable_funcs[i]); + sljit_free_code(functions->executable_funcs[i], NULL); PRIV(jit_free_rodata)(functions->read_only_data_heads[i], allocator_data); } diff --git a/pcre2/src/pcre2_jit_neon_inc.h b/pcre2/src/pcre2_jit_neon_inc.h new file mode 100644 index 000000000..150da29eb --- /dev/null +++ b/pcre2/src/pcre2_jit_neon_inc.h @@ -0,0 +1,347 @@ +/************************************************* +* Perl-Compatible Regular Expressions * +*************************************************/ + +/* PCRE is a library of functions to support regular expressions whose syntax +and semantics are as close as possible to those of the Perl 5 language. + + Written by Philip Hazel + This module by Zoltan Herczeg and Sebastian Pop + Original API code Copyright (c) 1997-2012 University of Cambridge + New API code Copyright (c) 2016-2019 University of Cambridge + +----------------------------------------------------------------------------- +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of the University of Cambridge nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +----------------------------------------------------------------------------- +*/ + +# if defined(FFCS) +# if defined(FF_UTF) +# define FF_FUN ffcs_utf +# else +# define FF_FUN ffcs +# endif + +# elif defined(FFCS_2) +# if defined(FF_UTF) +# define FF_FUN ffcs_2_utf +# else +# define FF_FUN ffcs_2 +# endif + +# elif defined(FFCS_MASK) +# if defined(FF_UTF) +# define FF_FUN ffcs_mask_utf +# else +# define FF_FUN ffcs_mask +# endif + +# elif defined(FFCPS_0) +# if defined (FF_UTF) +# define FF_FUN ffcps_0_utf +# else +# define FF_FUN ffcps_0 +# endif + +# elif defined (FFCPS_1) +# if defined (FF_UTF) +# define FF_FUN ffcps_1_utf +# else +# define FF_FUN ffcps_1 +# endif + +# elif defined (FFCPS_DEFAULT) +# if defined (FF_UTF) +# define FF_FUN ffcps_default_utf +# else +# define FF_FUN ffcps_default +# endif +# endif + +static sljit_u8* SLJIT_FUNC FF_FUN(sljit_u8 *str_end, sljit_u8 *str_ptr, sljit_uw offs1, sljit_uw offs2, sljit_uw chars) +#undef FF_FUN +{ +quad_word qw; +int_char ic; + +SLJIT_UNUSED_ARG(offs1); +SLJIT_UNUSED_ARG(offs2); + +ic.x = chars; + +#if defined(FFCS) +sljit_u8 c1 = ic.c.c1; +vect_t vc1 = VDUPQ(c1); + +#elif defined(FFCS_2) +sljit_u8 c1 = ic.c.c1; +vect_t vc1 = VDUPQ(c1); +sljit_u8 c2 = ic.c.c2; +vect_t vc2 = VDUPQ(c2); + +#elif defined(FFCS_MASK) +sljit_u8 c1 = ic.c.c1; +vect_t vc1 = VDUPQ(c1); +sljit_u8 mask = ic.c.c2; +vect_t vmask = VDUPQ(mask); +#endif + +#if defined(FFCPS) +compare_type compare1_type = compare_match1; +compare_type compare2_type = compare_match1; +vect_t cmp1a, cmp1b, cmp2a, cmp2b; +const sljit_u32 diff = IN_UCHARS(offs1 - offs2); +PCRE2_UCHAR char1a = ic.c.c1; +PCRE2_UCHAR char2a = ic.c.c3; + +# ifdef FFCPS_CHAR1A2A +cmp1a = VDUPQ(char1a); +cmp2a = VDUPQ(char2a); +cmp1b = VDUPQ(0); /* to avoid errors on older compilers -Werror=maybe-uninitialized */ +cmp2b = VDUPQ(0); /* to avoid errors on older compilers -Werror=maybe-uninitialized */ +# else +PCRE2_UCHAR char1b = ic.c.c2; +PCRE2_UCHAR char2b = ic.c.c4; +if (char1a == char1b) + { + cmp1a = VDUPQ(char1a); + cmp1b = VDUPQ(0); /* to avoid errors on older compilers -Werror=maybe-uninitialized */ + } +else + { + sljit_u32 bit1 = char1a ^ char1b; + if (is_powerof2(bit1)) + { + compare1_type = compare_match1i; + cmp1a = VDUPQ(char1a | bit1); + cmp1b = VDUPQ(bit1); + } + else + { + compare1_type = compare_match2; + cmp1a = VDUPQ(char1a); + cmp1b = VDUPQ(char1b); + } + } + +if (char2a == char2b) + { + cmp2a = VDUPQ(char2a); + cmp2b = VDUPQ(0); /* to avoid errors on older compilers -Werror=maybe-uninitialized */ + } +else + { + sljit_u32 bit2 = char2a ^ char2b; + if (is_powerof2(bit2)) + { + compare2_type = compare_match1i; + cmp2a = VDUPQ(char2a | bit2); + cmp2b = VDUPQ(bit2); + } + else + { + compare2_type = compare_match2; + cmp2a = VDUPQ(char2a); + cmp2b = VDUPQ(char2b); + } + } +# endif + +str_ptr += IN_UCHARS(offs1); +#endif + +#if PCRE2_CODE_UNIT_WIDTH != 8 +vect_t char_mask = VDUPQ(0xff); +#endif + +#if defined(FF_UTF) +restart:; +#endif + +#if defined(FFCPS) +sljit_u8 *p1 = str_ptr - diff; +#endif +sljit_s32 align_offset = ((uint64_t)str_ptr & 0xf); +str_ptr = (sljit_u8 *) ((uint64_t)str_ptr & ~0xf); +vect_t data = VLD1Q(str_ptr); +#if PCRE2_CODE_UNIT_WIDTH != 8 +data = VANDQ(data, char_mask); +#endif + +#if defined(FFCS) +vect_t eq = VCEQQ(data, vc1); + +#elif defined(FFCS_2) +vect_t eq1 = VCEQQ(data, vc1); +vect_t eq2 = VCEQQ(data, vc2); +vect_t eq = VORRQ(eq1, eq2); + +#elif defined(FFCS_MASK) +vect_t eq = VORRQ(data, vmask); +eq = VCEQQ(eq, vc1); + +#elif defined(FFCPS) +# if defined(FFCPS_DIFF1) +vect_t prev_data = data; +# endif + +vect_t data2; +if (p1 < str_ptr) + { + data2 = VLD1Q(str_ptr - diff); +#if PCRE2_CODE_UNIT_WIDTH != 8 + data2 = VANDQ(data2, char_mask); +#endif + } +else + data2 = shift_left_n_lanes(data, offs1 - offs2); + +if (compare1_type == compare_match1) + data = VCEQQ(data, cmp1a); +else + data = fast_forward_char_pair_compare(compare1_type, data, cmp1a, cmp1b); + +if (compare2_type == compare_match1) + data2 = VCEQQ(data2, cmp2a); +else + data2 = fast_forward_char_pair_compare(compare2_type, data2, cmp2a, cmp2b); + +vect_t eq = VANDQ(data, data2); +#endif + +VST1Q(qw.mem, eq); +/* Ignore matches before the first STR_PTR. */ +if (align_offset < 8) + { + qw.dw[0] >>= align_offset * 8; + if (qw.dw[0]) + { + str_ptr += align_offset + __builtin_ctzll(qw.dw[0]) / 8; + goto match; + } + if (qw.dw[1]) + { + str_ptr += 8 + __builtin_ctzll(qw.dw[1]) / 8; + goto match; + } + } +else + { + qw.dw[1] >>= (align_offset - 8) * 8; + if (qw.dw[1]) + { + str_ptr += align_offset + __builtin_ctzll(qw.dw[1]) / 8; + goto match; + } + } +str_ptr += 16; + +while (str_ptr < str_end) + { + vect_t orig_data = VLD1Q(str_ptr); +#if PCRE2_CODE_UNIT_WIDTH != 8 + orig_data = VANDQ(orig_data, char_mask); +#endif + data = orig_data; + +#if defined(FFCS) + eq = VCEQQ(data, vc1); + +#elif defined(FFCS_2) + eq1 = VCEQQ(data, vc1); + eq2 = VCEQQ(data, vc2); + eq = VORRQ(eq1, eq2); + +#elif defined(FFCS_MASK) + eq = VORRQ(data, vmask); + eq = VCEQQ(eq, vc1); +#endif + +#if defined(FFCPS) +# if defined (FFCPS_DIFF1) + data2 = VEXTQ(prev_data, data, VECTOR_FACTOR - 1); +# else + data2 = VLD1Q(str_ptr - diff); +# if PCRE2_CODE_UNIT_WIDTH != 8 + data2 = VANDQ(data2, char_mask); +# endif +# endif + +# ifdef FFCPS_CHAR1A2A + data = VCEQQ(data, cmp1a); + data2 = VCEQQ(data2, cmp2a); +# else + if (compare1_type == compare_match1) + data = VCEQQ(data, cmp1a); + else + data = fast_forward_char_pair_compare(compare1_type, data, cmp1a, cmp1b); + if (compare2_type == compare_match1) + data2 = VCEQQ(data2, cmp2a); + else + data2 = fast_forward_char_pair_compare(compare2_type, data2, cmp2a, cmp2b); +# endif + + eq = VANDQ(data, data2); +#endif + + VST1Q(qw.mem, eq); + if (qw.dw[0]) + str_ptr += __builtin_ctzll(qw.dw[0]) / 8; + else if (qw.dw[1]) + str_ptr += 8 + __builtin_ctzll(qw.dw[1]) / 8; + else { + str_ptr += 16; +#if defined (FFCPS_DIFF1) + prev_data = orig_data; +#endif + continue; + } + +match:; + if (str_ptr >= str_end) + /* Failed match. */ + return NULL; + +#if defined(FF_UTF) + if (utf_continue(str_ptr + IN_UCHARS(-offs1))) + { + /* Not a match. */ + str_ptr += IN_UCHARS(1); + goto restart; + } +#endif + + /* Match. */ +#if defined (FFCPS) + str_ptr -= IN_UCHARS(offs1); +#endif + return str_ptr; + } + +/* Failed match. */ +return NULL; +} diff --git a/pcre2/src/pcre2_jit_simd_inc.h b/pcre2/src/pcre2_jit_simd_inc.h new file mode 100644 index 000000000..5673d338c --- /dev/null +++ b/pcre2/src/pcre2_jit_simd_inc.h @@ -0,0 +1,1123 @@ +/************************************************* +* Perl-Compatible Regular Expressions * +*************************************************/ + +/* PCRE is a library of functions to support regular expressions whose syntax +and semantics are as close as possible to those of the Perl 5 language. + + Written by Philip Hazel + This module by Zoltan Herczeg + Original API code Copyright (c) 1997-2012 University of Cambridge + New API code Copyright (c) 2016-2019 University of Cambridge + +----------------------------------------------------------------------------- +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of the University of Cambridge nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +----------------------------------------------------------------------------- +*/ + +#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) && !(defined SUPPORT_VALGRIND) + +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 +static struct sljit_jump *jump_if_utf_char_start(struct sljit_compiler *compiler, sljit_s32 reg) +{ +#if PCRE2_CODE_UNIT_WIDTH == 8 +OP2(SLJIT_AND, reg, 0, reg, 0, SLJIT_IMM, 0xc0); +return CMP(SLJIT_NOT_EQUAL, reg, 0, SLJIT_IMM, 0x80); +#elif PCRE2_CODE_UNIT_WIDTH == 16 +OP2(SLJIT_AND, reg, 0, reg, 0, SLJIT_IMM, 0xfc00); +return CMP(SLJIT_NOT_EQUAL, reg, 0, SLJIT_IMM, 0xdc00); +#else +#error "Unknown code width" +#endif +} +#endif + +static sljit_s32 character_to_int32(PCRE2_UCHAR chr) +{ +sljit_u32 value = chr; +#if PCRE2_CODE_UNIT_WIDTH == 8 +#define SSE2_COMPARE_TYPE_INDEX 0 +return (sljit_s32)((value << 24) | (value << 16) | (value << 8) | value); +#elif PCRE2_CODE_UNIT_WIDTH == 16 +#define SSE2_COMPARE_TYPE_INDEX 1 +return (sljit_s32)((value << 16) | value); +#elif PCRE2_CODE_UNIT_WIDTH == 32 +#define SSE2_COMPARE_TYPE_INDEX 2 +return (sljit_s32)(value); +#else +#error "Unsupported unit width" +#endif +} + +static void load_from_mem_sse2(struct sljit_compiler *compiler, sljit_s32 dst_xmm_reg, sljit_s32 src_general_reg, sljit_s8 offset) +{ +sljit_u8 instruction[5]; + +SLJIT_ASSERT(dst_xmm_reg < 8); +SLJIT_ASSERT(src_general_reg < 8); + +/* MOVDQA xmm1, xmm2/m128 */ +instruction[0] = ((sljit_u8)offset & 0xf) == 0 ? 0x66 : 0xf3; +instruction[1] = 0x0f; +instruction[2] = 0x6f; + +if (offset == 0) + { + instruction[3] = (dst_xmm_reg << 3) | src_general_reg; + sljit_emit_op_custom(compiler, instruction, 4); + return; + } + +instruction[3] = 0x40 | (dst_xmm_reg << 3) | src_general_reg; +instruction[4] = (sljit_u8)offset; +sljit_emit_op_custom(compiler, instruction, 5); +} + +typedef enum { + sse2_compare_match1, + sse2_compare_match1i, + sse2_compare_match2, +} sse2_compare_type; + +static void fast_forward_char_pair_sse2_compare(struct sljit_compiler *compiler, sse2_compare_type compare_type, + int step, sljit_s32 dst_ind, sljit_s32 cmp1_ind, sljit_s32 cmp2_ind, sljit_s32 tmp_ind) +{ +sljit_u8 instruction[4]; +instruction[0] = 0x66; +instruction[1] = 0x0f; + +SLJIT_ASSERT(step >= 0 && step <= 3); + +if (compare_type != sse2_compare_match2) + { + if (step == 0) + { + if (compare_type == sse2_compare_match1i) + { + /* POR xmm1, xmm2/m128 */ + /* instruction[0] = 0x66; */ + /* instruction[1] = 0x0f; */ + instruction[2] = 0xeb; + instruction[3] = 0xc0 | (dst_ind << 3) | cmp2_ind; + sljit_emit_op_custom(compiler, instruction, 4); + } + return; + } + + if (step != 2) + return; + + /* PCMPEQB/W/D xmm1, xmm2/m128 */ + /* instruction[0] = 0x66; */ + /* instruction[1] = 0x0f; */ + instruction[2] = 0x74 + SSE2_COMPARE_TYPE_INDEX; + instruction[3] = 0xc0 | (dst_ind << 3) | cmp1_ind; + sljit_emit_op_custom(compiler, instruction, 4); + return; + } + +switch (step) + { + case 0: + /* MOVDQA xmm1, xmm2/m128 */ + /* instruction[0] = 0x66; */ + /* instruction[1] = 0x0f; */ + instruction[2] = 0x6f; + instruction[3] = 0xc0 | (tmp_ind << 3) | dst_ind; + sljit_emit_op_custom(compiler, instruction, 4); + return; + + case 1: + /* PCMPEQB/W/D xmm1, xmm2/m128 */ + /* instruction[0] = 0x66; */ + /* instruction[1] = 0x0f; */ + instruction[2] = 0x74 + SSE2_COMPARE_TYPE_INDEX; + instruction[3] = 0xc0 | (dst_ind << 3) | cmp1_ind; + sljit_emit_op_custom(compiler, instruction, 4); + return; + + case 2: + /* PCMPEQB/W/D xmm1, xmm2/m128 */ + /* instruction[0] = 0x66; */ + /* instruction[1] = 0x0f; */ + instruction[2] = 0x74 + SSE2_COMPARE_TYPE_INDEX; + instruction[3] = 0xc0 | (tmp_ind << 3) | cmp2_ind; + sljit_emit_op_custom(compiler, instruction, 4); + return; + + case 3: + /* POR xmm1, xmm2/m128 */ + /* instruction[0] = 0x66; */ + /* instruction[1] = 0x0f; */ + instruction[2] = 0xeb; + instruction[3] = 0xc0 | (dst_ind << 3) | tmp_ind; + sljit_emit_op_custom(compiler, instruction, 4); + return; + } +} + +#define JIT_HAS_FAST_FORWARD_CHAR_SIMD (sljit_has_cpu_feature(SLJIT_HAS_SSE2)) + +static void fast_forward_char_simd(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2, sljit_s32 offset) +{ +DEFINE_COMPILER; +struct sljit_label *start; +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 +struct sljit_label *restart; +#endif +struct sljit_jump *quit; +struct sljit_jump *partial_quit[2]; +sse2_compare_type compare_type = sse2_compare_match1; +sljit_u8 instruction[8]; +sljit_s32 tmp1_reg_ind = sljit_get_register_index(TMP1); +sljit_s32 str_ptr_reg_ind = sljit_get_register_index(STR_PTR); +sljit_s32 data_ind = 0; +sljit_s32 tmp_ind = 1; +sljit_s32 cmp1_ind = 2; +sljit_s32 cmp2_ind = 3; +sljit_u32 bit = 0; +int i; + +SLJIT_UNUSED_ARG(offset); + +if (char1 != char2) + { + bit = char1 ^ char2; + compare_type = sse2_compare_match1i; + + if (!is_powerof2(bit)) + { + bit = 0; + compare_type = sse2_compare_match2; + } + } + +partial_quit[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); +if (common->mode == PCRE2_JIT_COMPLETE) + add_jump(compiler, &common->failed_match, partial_quit[0]); + +/* First part (unaligned start) */ + +OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char1 | bit)); + +SLJIT_ASSERT(tmp1_reg_ind < 8); + +/* MOVD xmm, r/m32 */ +instruction[0] = 0x66; +instruction[1] = 0x0f; +instruction[2] = 0x6e; +instruction[3] = 0xc0 | (cmp1_ind << 3) | tmp1_reg_ind; +sljit_emit_op_custom(compiler, instruction, 4); + +if (char1 != char2) + { + OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(bit != 0 ? bit : char2)); + + /* MOVD xmm, r/m32 */ + instruction[3] = 0xc0 | (cmp2_ind << 3) | tmp1_reg_ind; + sljit_emit_op_custom(compiler, instruction, 4); + } + +OP1(SLJIT_MOV, TMP2, 0, STR_PTR, 0); + +/* PSHUFD xmm1, xmm2/m128, imm8 */ +/* instruction[0] = 0x66; */ +/* instruction[1] = 0x0f; */ +instruction[2] = 0x70; +instruction[3] = 0xc0 | (cmp1_ind << 3) | cmp1_ind; +instruction[4] = 0; +sljit_emit_op_custom(compiler, instruction, 5); + +if (char1 != char2) + { + /* PSHUFD xmm1, xmm2/m128, imm8 */ + instruction[3] = 0xc0 | (cmp2_ind << 3) | cmp2_ind; + sljit_emit_op_custom(compiler, instruction, 5); + } + +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 +restart = LABEL(); +#endif +OP2(SLJIT_AND, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, ~0xf); +OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xf); + +load_from_mem_sse2(compiler, data_ind, str_ptr_reg_ind, 0); +for (i = 0; i < 4; i++) + fast_forward_char_pair_sse2_compare(compiler, compare_type, i, data_ind, cmp1_ind, cmp2_ind, tmp_ind); + +/* PMOVMSKB reg, xmm */ +/* instruction[0] = 0x66; */ +/* instruction[1] = 0x0f; */ +instruction[2] = 0xd7; +instruction[3] = 0xc0 | (tmp1_reg_ind << 3) | data_ind; +sljit_emit_op_custom(compiler, instruction, 4); + +OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0); +OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, TMP2, 0); + +quit = CMP(SLJIT_NOT_ZERO, TMP1, 0, SLJIT_IMM, 0); + +OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0); + +/* Second part (aligned) */ +start = LABEL(); + +OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 16); + +partial_quit[1] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); +if (common->mode == PCRE2_JIT_COMPLETE) + add_jump(compiler, &common->failed_match, partial_quit[1]); + +load_from_mem_sse2(compiler, data_ind, str_ptr_reg_ind, 0); +for (i = 0; i < 4; i++) + fast_forward_char_pair_sse2_compare(compiler, compare_type, i, data_ind, cmp1_ind, cmp2_ind, tmp_ind); + +/* PMOVMSKB reg, xmm */ +/* instruction[0] = 0x66; */ +/* instruction[1] = 0x0f; */ +instruction[2] = 0xd7; +instruction[3] = 0xc0 | (tmp1_reg_ind << 3) | data_ind; +sljit_emit_op_custom(compiler, instruction, 4); + +CMPTO(SLJIT_ZERO, TMP1, 0, SLJIT_IMM, 0, start); + +JUMPHERE(quit); + +/* BSF r32, r/m32 */ +instruction[0] = 0x0f; +instruction[1] = 0xbc; +instruction[2] = 0xc0 | (tmp1_reg_ind << 3) | tmp1_reg_ind; +sljit_emit_op_custom(compiler, instruction, 3); + +OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); + +if (common->mode != PCRE2_JIT_COMPLETE) + { + JUMPHERE(partial_quit[0]); + JUMPHERE(partial_quit[1]); + OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, STR_PTR, 0, STR_END, 0); + CMOV(SLJIT_GREATER, STR_PTR, STR_END, 0); + } +else + add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); + +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 +if (common->utf && offset > 0) + { + SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE); + + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-offset)); + + quit = jump_if_utf_char_start(compiler, TMP1); + + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); + OP1(SLJIT_MOV, TMP2, 0, STR_PTR, 0); + JUMPTO(SLJIT_JUMP, restart); + + JUMPHERE(quit); + } +#endif +} + +#define JIT_HAS_FAST_REQUESTED_CHAR_SIMD (sljit_has_cpu_feature(SLJIT_HAS_SSE2)) + +static jump_list *fast_requested_char_simd(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2) +{ +DEFINE_COMPILER; +struct sljit_label *start; +struct sljit_jump *quit; +jump_list *not_found = NULL; +sse2_compare_type compare_type = sse2_compare_match1; +sljit_u8 instruction[8]; +sljit_s32 tmp1_reg_ind = sljit_get_register_index(TMP1); +sljit_s32 str_ptr_reg_ind = sljit_get_register_index(STR_PTR); +sljit_s32 data_ind = 0; +sljit_s32 tmp_ind = 1; +sljit_s32 cmp1_ind = 2; +sljit_s32 cmp2_ind = 3; +sljit_u32 bit = 0; +int i; + +if (char1 != char2) + { + bit = char1 ^ char2; + compare_type = sse2_compare_match1i; + + if (!is_powerof2(bit)) + { + bit = 0; + compare_type = sse2_compare_match2; + } + } + +add_jump(compiler, ¬_found, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0)); +OP1(SLJIT_MOV, TMP2, 0, TMP1, 0); +OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0); + +/* First part (unaligned start) */ + +OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char1 | bit)); + +SLJIT_ASSERT(tmp1_reg_ind < 8); + +/* MOVD xmm, r/m32 */ +instruction[0] = 0x66; +instruction[1] = 0x0f; +instruction[2] = 0x6e; +instruction[3] = 0xc0 | (cmp1_ind << 3) | tmp1_reg_ind; +sljit_emit_op_custom(compiler, instruction, 4); + +if (char1 != char2) + { + OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(bit != 0 ? bit : char2)); + + /* MOVD xmm, r/m32 */ + instruction[3] = 0xc0 | (cmp2_ind << 3) | tmp1_reg_ind; + sljit_emit_op_custom(compiler, instruction, 4); + } + +OP1(SLJIT_MOV, STR_PTR, 0, TMP2, 0); + +/* PSHUFD xmm1, xmm2/m128, imm8 */ +/* instruction[0] = 0x66; */ +/* instruction[1] = 0x0f; */ +instruction[2] = 0x70; +instruction[3] = 0xc0 | (cmp1_ind << 3) | cmp1_ind; +instruction[4] = 0; +sljit_emit_op_custom(compiler, instruction, 5); + +if (char1 != char2) + { + /* PSHUFD xmm1, xmm2/m128, imm8 */ + instruction[3] = 0xc0 | (cmp2_ind << 3) | cmp2_ind; + sljit_emit_op_custom(compiler, instruction, 5); + } + +OP2(SLJIT_AND, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, ~0xf); +OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xf); + +load_from_mem_sse2(compiler, data_ind, str_ptr_reg_ind, 0); +for (i = 0; i < 4; i++) + fast_forward_char_pair_sse2_compare(compiler, compare_type, i, data_ind, cmp1_ind, cmp2_ind, tmp_ind); + +/* PMOVMSKB reg, xmm */ +/* instruction[0] = 0x66; */ +/* instruction[1] = 0x0f; */ +instruction[2] = 0xd7; +instruction[3] = 0xc0 | (tmp1_reg_ind << 3) | data_ind; +sljit_emit_op_custom(compiler, instruction, 4); + +OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0); +OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, TMP2, 0); + +quit = CMP(SLJIT_NOT_ZERO, TMP1, 0, SLJIT_IMM, 0); + +OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0); + +/* Second part (aligned) */ +start = LABEL(); + +OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 16); + +add_jump(compiler, ¬_found, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); + +load_from_mem_sse2(compiler, data_ind, str_ptr_reg_ind, 0); +for (i = 0; i < 4; i++) + fast_forward_char_pair_sse2_compare(compiler, compare_type, i, data_ind, cmp1_ind, cmp2_ind, tmp_ind); + +/* PMOVMSKB reg, xmm */ +/* instruction[0] = 0x66; */ +/* instruction[1] = 0x0f; */ +instruction[2] = 0xd7; +instruction[3] = 0xc0 | (tmp1_reg_ind << 3) | data_ind; +sljit_emit_op_custom(compiler, instruction, 4); + +CMPTO(SLJIT_ZERO, TMP1, 0, SLJIT_IMM, 0, start); + +JUMPHERE(quit); + +/* BSF r32, r/m32 */ +instruction[0] = 0x0f; +instruction[1] = 0xbc; +instruction[2] = 0xc0 | (tmp1_reg_ind << 3) | tmp1_reg_ind; +sljit_emit_op_custom(compiler, instruction, 3); + +OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, STR_PTR, 0); +add_jump(compiler, ¬_found, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0)); + +OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0); +return not_found; +} + +#ifndef _WIN64 + +static SLJIT_INLINE sljit_u32 max_fast_forward_char_pair_offset(void) +{ +#if PCRE2_CODE_UNIT_WIDTH == 8 +return 15; +#elif PCRE2_CODE_UNIT_WIDTH == 16 +return 7; +#elif PCRE2_CODE_UNIT_WIDTH == 32 +return 3; +#else +#error "Unsupported unit width" +#endif +} + +#define JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD (sljit_has_cpu_feature(SLJIT_HAS_SSE2)) + +static void fast_forward_char_pair_simd(compiler_common *common, sljit_s32 offs1, + PCRE2_UCHAR char1a, PCRE2_UCHAR char1b, sljit_s32 offs2, PCRE2_UCHAR char2a, PCRE2_UCHAR char2b) +{ +DEFINE_COMPILER; +sse2_compare_type compare1_type = sse2_compare_match1; +sse2_compare_type compare2_type = sse2_compare_match1; +sljit_u32 bit1 = 0; +sljit_u32 bit2 = 0; +sljit_u32 diff = IN_UCHARS(offs1 - offs2); +sljit_s32 tmp1_reg_ind = sljit_get_register_index(TMP1); +sljit_s32 tmp2_reg_ind = sljit_get_register_index(TMP2); +sljit_s32 str_ptr_reg_ind = sljit_get_register_index(STR_PTR); +sljit_s32 data1_ind = 0; +sljit_s32 data2_ind = 1; +sljit_s32 tmp1_ind = 2; +sljit_s32 tmp2_ind = 3; +sljit_s32 cmp1a_ind = 4; +sljit_s32 cmp1b_ind = 5; +sljit_s32 cmp2a_ind = 6; +sljit_s32 cmp2b_ind = 7; +struct sljit_label *start; +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 +struct sljit_label *restart; +#endif +struct sljit_jump *jump[2]; +sljit_u8 instruction[8]; +int i; + +SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE && offs1 > offs2); +SLJIT_ASSERT(diff <= IN_UCHARS(max_fast_forward_char_pair_offset())); +SLJIT_ASSERT(tmp1_reg_ind < 8 && tmp2_reg_ind == 1); + +/* Initialize. */ +if (common->match_end_ptr != 0) + { + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr); + OP1(SLJIT_MOV, TMP3, 0, STR_END, 0); + OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(offs1 + 1)); + + OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, STR_END, 0); + CMOV(SLJIT_LESS, STR_END, TMP1, 0); + } + +OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offs1)); +add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); + +/* MOVD xmm, r/m32 */ +instruction[0] = 0x66; +instruction[1] = 0x0f; +instruction[2] = 0x6e; + +if (char1a == char1b) + OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char1a)); +else + { + bit1 = char1a ^ char1b; + if (is_powerof2(bit1)) + { + compare1_type = sse2_compare_match1i; + OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char1a | bit1)); + OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, character_to_int32(bit1)); + } + else + { + compare1_type = sse2_compare_match2; + bit1 = 0; + OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char1a)); + OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, character_to_int32(char1b)); + } + } + +instruction[3] = 0xc0 | (cmp1a_ind << 3) | tmp1_reg_ind; +sljit_emit_op_custom(compiler, instruction, 4); + +if (char1a != char1b) + { + instruction[3] = 0xc0 | (cmp1b_ind << 3) | tmp2_reg_ind; + sljit_emit_op_custom(compiler, instruction, 4); + } + +if (char2a == char2b) + OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char2a)); +else + { + bit2 = char2a ^ char2b; + if (is_powerof2(bit2)) + { + compare2_type = sse2_compare_match1i; + OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char2a | bit2)); + OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, character_to_int32(bit2)); + } + else + { + compare2_type = sse2_compare_match2; + bit2 = 0; + OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char2a)); + OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, character_to_int32(char2b)); + } + } + +instruction[3] = 0xc0 | (cmp2a_ind << 3) | tmp1_reg_ind; +sljit_emit_op_custom(compiler, instruction, 4); + +if (char2a != char2b) + { + instruction[3] = 0xc0 | (cmp2b_ind << 3) | tmp2_reg_ind; + sljit_emit_op_custom(compiler, instruction, 4); + } + +/* PSHUFD xmm1, xmm2/m128, imm8 */ +/* instruction[0] = 0x66; */ +/* instruction[1] = 0x0f; */ +instruction[2] = 0x70; +instruction[4] = 0; + +instruction[3] = 0xc0 | (cmp1a_ind << 3) | cmp1a_ind; +sljit_emit_op_custom(compiler, instruction, 5); + +if (char1a != char1b) + { + instruction[3] = 0xc0 | (cmp1b_ind << 3) | cmp1b_ind; + sljit_emit_op_custom(compiler, instruction, 5); + } + +instruction[3] = 0xc0 | (cmp2a_ind << 3) | cmp2a_ind; +sljit_emit_op_custom(compiler, instruction, 5); + +if (char2a != char2b) + { + instruction[3] = 0xc0 | (cmp2b_ind << 3) | cmp2b_ind; + sljit_emit_op_custom(compiler, instruction, 5); + } + +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 +restart = LABEL(); +#endif + +OP2(SLJIT_SUB, TMP1, 0, STR_PTR, 0, SLJIT_IMM, diff); +OP1(SLJIT_MOV, TMP2, 0, STR_PTR, 0); +OP2(SLJIT_AND, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, ~0xf); + +load_from_mem_sse2(compiler, data1_ind, str_ptr_reg_ind, 0); + +jump[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_PTR, 0); + +load_from_mem_sse2(compiler, data2_ind, str_ptr_reg_ind, -(sljit_s8)diff); +jump[1] = JUMP(SLJIT_JUMP); + +JUMPHERE(jump[0]); + +/* MOVDQA xmm1, xmm2/m128 */ +/* instruction[0] = 0x66; */ +/* instruction[1] = 0x0f; */ +instruction[2] = 0x6f; +instruction[3] = 0xc0 | (data2_ind << 3) | data1_ind; +sljit_emit_op_custom(compiler, instruction, 4); + +/* PSLLDQ xmm1, imm8 */ +/* instruction[0] = 0x66; */ +/* instruction[1] = 0x0f; */ +instruction[2] = 0x73; +instruction[3] = 0xc0 | (7 << 3) | data2_ind; +instruction[4] = diff; +sljit_emit_op_custom(compiler, instruction, 5); + +JUMPHERE(jump[1]); + +OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xf); + +for (i = 0; i < 4; i++) + { + fast_forward_char_pair_sse2_compare(compiler, compare2_type, i, data2_ind, cmp2a_ind, cmp2b_ind, tmp2_ind); + fast_forward_char_pair_sse2_compare(compiler, compare1_type, i, data1_ind, cmp1a_ind, cmp1b_ind, tmp1_ind); + } + +/* PAND xmm1, xmm2/m128 */ +/* instruction[0] = 0x66; */ +/* instruction[1] = 0x0f; */ +instruction[2] = 0xdb; +instruction[3] = 0xc0 | (data1_ind << 3) | data2_ind; +sljit_emit_op_custom(compiler, instruction, 4); + +/* PMOVMSKB reg, xmm */ +/* instruction[0] = 0x66; */ +/* instruction[1] = 0x0f; */ +instruction[2] = 0xd7; +instruction[3] = 0xc0 | (tmp1_reg_ind << 3) | 0; +sljit_emit_op_custom(compiler, instruction, 4); + +/* Ignore matches before the first STR_PTR. */ +OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0); +OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, TMP2, 0); + +jump[0] = CMP(SLJIT_NOT_ZERO, TMP1, 0, SLJIT_IMM, 0); + +OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0); + +/* Main loop. */ +start = LABEL(); + +OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 16); +add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); + +load_from_mem_sse2(compiler, data1_ind, str_ptr_reg_ind, 0); +load_from_mem_sse2(compiler, data2_ind, str_ptr_reg_ind, -(sljit_s8)diff); + +for (i = 0; i < 4; i++) + { + fast_forward_char_pair_sse2_compare(compiler, compare1_type, i, data1_ind, cmp1a_ind, cmp1b_ind, tmp2_ind); + fast_forward_char_pair_sse2_compare(compiler, compare2_type, i, data2_ind, cmp2a_ind, cmp2b_ind, tmp1_ind); + } + +/* PAND xmm1, xmm2/m128 */ +/* instruction[0] = 0x66; */ +/* instruction[1] = 0x0f; */ +instruction[2] = 0xdb; +instruction[3] = 0xc0 | (data1_ind << 3) | data2_ind; +sljit_emit_op_custom(compiler, instruction, 4); + +/* PMOVMSKB reg, xmm */ +/* instruction[0] = 0x66; */ +/* instruction[1] = 0x0f; */ +instruction[2] = 0xd7; +instruction[3] = 0xc0 | (tmp1_reg_ind << 3) | 0; +sljit_emit_op_custom(compiler, instruction, 4); + +CMPTO(SLJIT_ZERO, TMP1, 0, SLJIT_IMM, 0, start); + +JUMPHERE(jump[0]); + +/* BSF r32, r/m32 */ +instruction[0] = 0x0f; +instruction[1] = 0xbc; +instruction[2] = 0xc0 | (tmp1_reg_ind << 3) | tmp1_reg_ind; +sljit_emit_op_custom(compiler, instruction, 3); + +OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); + +add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); + +if (common->match_end_ptr != 0) + OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr); + +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 +if (common->utf) + { + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-offs1)); + + jump[0] = jump_if_utf_char_start(compiler, TMP1); + + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, restart); + + add_jump(compiler, &common->failed_match, JUMP(SLJIT_JUMP)); + + JUMPHERE(jump[0]); + } +#endif + +OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offs1)); + +if (common->match_end_ptr != 0) + OP1(SLJIT_MOV, STR_END, 0, TMP3, 0); +} + +#endif /* !_WIN64 */ + +#undef SSE2_COMPARE_TYPE_INDEX + +#endif /* SLJIT_CONFIG_X86 && !SUPPORT_VALGRIND */ + +#if (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64 && (defined __ARM_NEON || defined __ARM_NEON__)) + +#include + +typedef union { + unsigned int x; + struct { unsigned char c1, c2, c3, c4; } c; +} int_char; + +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 +static SLJIT_INLINE int utf_continue(sljit_u8 *s) +{ +#if PCRE2_CODE_UNIT_WIDTH == 8 +return (*s & 0xc0) == 0x80; +#elif PCRE2_CODE_UNIT_WIDTH == 16 +return (*s & 0xfc00) == 0xdc00; +#else +#error "Unknown code width" +#endif +} +#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */ + +#if PCRE2_CODE_UNIT_WIDTH == 8 +# define VECTOR_FACTOR 16 +# define vect_t uint8x16_t +# define VLD1Q(X) vld1q_u8((sljit_u8 *)(X)) +# define VCEQQ vceqq_u8 +# define VORRQ vorrq_u8 +# define VST1Q vst1q_u8 +# define VDUPQ vdupq_n_u8 +# define VEXTQ vextq_u8 +# define VANDQ vandq_u8 +typedef union { + uint8_t mem[16]; + uint64_t dw[2]; +} quad_word; +#elif PCRE2_CODE_UNIT_WIDTH == 16 +# define VECTOR_FACTOR 8 +# define vect_t uint16x8_t +# define VLD1Q(X) vld1q_u16((sljit_u16 *)(X)) +# define VCEQQ vceqq_u16 +# define VORRQ vorrq_u16 +# define VST1Q vst1q_u16 +# define VDUPQ vdupq_n_u16 +# define VEXTQ vextq_u16 +# define VANDQ vandq_u16 +typedef union { + uint16_t mem[8]; + uint64_t dw[2]; +} quad_word; +#else +# define VECTOR_FACTOR 4 +# define vect_t uint32x4_t +# define VLD1Q(X) vld1q_u32((sljit_u32 *)(X)) +# define VCEQQ vceqq_u32 +# define VORRQ vorrq_u32 +# define VST1Q vst1q_u32 +# define VDUPQ vdupq_n_u32 +# define VEXTQ vextq_u32 +# define VANDQ vandq_u32 +typedef union { + uint32_t mem[4]; + uint64_t dw[2]; +} quad_word; +#endif + +#define FFCS +#include "pcre2_jit_neon_inc.h" +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 +# define FF_UTF +# include "pcre2_jit_neon_inc.h" +# undef FF_UTF +#endif +#undef FFCS + +#define FFCS_2 +#include "pcre2_jit_neon_inc.h" +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 +# define FF_UTF +# include "pcre2_jit_neon_inc.h" +# undef FF_UTF +#endif +#undef FFCS_2 + +#define FFCS_MASK +#include "pcre2_jit_neon_inc.h" +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 +# define FF_UTF +# include "pcre2_jit_neon_inc.h" +# undef FF_UTF +#endif +#undef FFCS_MASK + +#define JIT_HAS_FAST_FORWARD_CHAR_SIMD 1 + +static void fast_forward_char_simd(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2, sljit_s32 offset) +{ +DEFINE_COMPILER; +int_char ic; +struct sljit_jump *partial_quit; +/* Save temporary registers. */ +OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STR_PTR, 0); +OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP3, 0); + +/* Prepare function arguments */ +OP1(SLJIT_MOV, SLJIT_R0, 0, STR_END, 0); +OP1(SLJIT_MOV, SLJIT_R1, 0, STR_PTR, 0); +OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, offset); + +if (char1 == char2) + { + ic.c.c1 = char1; + ic.c.c2 = char2; + OP1(SLJIT_MOV, SLJIT_R4, 0, SLJIT_IMM, ic.x); + +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 + if (common->utf && offset > 0) + sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(UW) | SLJIT_ARG3(UW) | SLJIT_ARG4(UW), + SLJIT_IMM, SLJIT_FUNC_OFFSET(ffcs_utf)); + else + sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(UW) | SLJIT_ARG3(UW) | SLJIT_ARG4(UW), + SLJIT_IMM, SLJIT_FUNC_OFFSET(ffcs)); +#else + sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(UW) | SLJIT_ARG3(UW) | SLJIT_ARG4(UW), + SLJIT_IMM, SLJIT_FUNC_OFFSET(ffcs)); +#endif + } +else + { + PCRE2_UCHAR mask = char1 ^ char2; + if (is_powerof2(mask)) + { + ic.c.c1 = char1 | mask; + ic.c.c2 = mask; + OP1(SLJIT_MOV, SLJIT_R4, 0, SLJIT_IMM, ic.x); + +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 + if (common->utf && offset > 0) + sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(UW) | SLJIT_ARG3(UW) | SLJIT_ARG4(UW), + SLJIT_IMM, SLJIT_FUNC_OFFSET(ffcs_mask_utf)); + else + sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(UW) | SLJIT_ARG3(UW) | SLJIT_ARG4(UW), + SLJIT_IMM, SLJIT_FUNC_OFFSET(ffcs_mask)); +#else + sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(UW) | SLJIT_ARG3(UW) | SLJIT_ARG4(UW), + SLJIT_IMM, SLJIT_FUNC_OFFSET(ffcs_mask)); +#endif + } + else + { + ic.c.c1 = char1; + ic.c.c2 = char2; + OP1(SLJIT_MOV, SLJIT_R4, 0, SLJIT_IMM, ic.x); + +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 + if (common->utf && offset > 0) + sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(UW) | SLJIT_ARG3(UW) | SLJIT_ARG4(UW), + SLJIT_IMM, SLJIT_FUNC_OFFSET(ffcs_2_utf)); + else + sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(UW) | SLJIT_ARG3(UW) | SLJIT_ARG4(UW), + SLJIT_IMM, SLJIT_FUNC_OFFSET(ffcs_2)); +#else + sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(UW) | SLJIT_ARG3(UW) | SLJIT_ARG4(UW), + SLJIT_IMM, SLJIT_FUNC_OFFSET(ffcs_2)); +#endif + } + } +/* Restore registers. */ +OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0); +OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1); + +/* Check return value. */ +partial_quit = CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0); +if (common->mode == PCRE2_JIT_COMPLETE) + add_jump(compiler, &common->failed_match, partial_quit); + +/* Fast forward STR_PTR to the result of memchr. */ +OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0); + +if (common->mode != PCRE2_JIT_COMPLETE) + JUMPHERE(partial_quit); +} + +typedef enum { + compare_match1, + compare_match1i, + compare_match2, +} compare_type; + +static inline vect_t fast_forward_char_pair_compare(compare_type ctype, vect_t dst, vect_t cmp1, vect_t cmp2) +{ +if (ctype == compare_match2) + { + vect_t tmp = dst; + dst = VCEQQ(dst, cmp1); + tmp = VCEQQ(tmp, cmp2); + dst = VORRQ(dst, tmp); + return dst; + } + +if (ctype == compare_match1i) + dst = VORRQ(dst, cmp2); +dst = VCEQQ(dst, cmp1); +return dst; +} + +static SLJIT_INLINE sljit_u32 max_fast_forward_char_pair_offset(void) +{ +#if PCRE2_CODE_UNIT_WIDTH == 8 +return 15; +#elif PCRE2_CODE_UNIT_WIDTH == 16 +return 7; +#elif PCRE2_CODE_UNIT_WIDTH == 32 +return 3; +#else +#error "Unsupported unit width" +#endif +} + +/* ARM doesn't have a shift left across lanes. */ +static SLJIT_INLINE vect_t shift_left_n_lanes(vect_t a, sljit_u8 n) +{ +vect_t zero = VDUPQ(0); +SLJIT_ASSERT(0 < n && n < VECTOR_FACTOR); +/* VEXTQ takes an immediate as last argument. */ +#define C(X) case X: return VEXTQ(zero, a, VECTOR_FACTOR - X); +switch (n) + { + C(1); C(2); C(3); +#if PCRE2_CODE_UNIT_WIDTH != 32 + C(4); C(5); C(6); C(7); +# if PCRE2_CODE_UNIT_WIDTH != 16 + C(8); C(9); C(10); C(11); C(12); C(13); C(14); C(15); +# endif +#endif + default: + /* Based on the ASSERT(0 < n && n < VECTOR_FACTOR) above, this won't + happen. The return is still here for compilers to not warn. */ + return a; + } +} + +#define FFCPS +#define FFCPS_DIFF1 +#define FFCPS_CHAR1A2A + +#define FFCPS_0 +#include "pcre2_jit_neon_inc.h" +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 +# define FF_UTF +# include "pcre2_jit_neon_inc.h" +# undef FF_UTF +#endif +#undef FFCPS_0 + +#undef FFCPS_CHAR1A2A + +#define FFCPS_1 +#include "pcre2_jit_neon_inc.h" +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 +# define FF_UTF +# include "pcre2_jit_neon_inc.h" +# undef FF_UTF +#endif +#undef FFCPS_1 + +#undef FFCPS_DIFF1 + +#define FFCPS_DEFAULT +#include "pcre2_jit_neon_inc.h" +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 +# define FF_UTF +# include "pcre2_jit_neon_inc.h" +# undef FF_UTF +#endif +#undef FFCPS + +#define JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD 1 + +static void fast_forward_char_pair_simd(compiler_common *common, sljit_s32 offs1, + PCRE2_UCHAR char1a, PCRE2_UCHAR char1b, sljit_s32 offs2, PCRE2_UCHAR char2a, PCRE2_UCHAR char2b) +{ +DEFINE_COMPILER; +sljit_u32 diff = IN_UCHARS(offs1 - offs2); +struct sljit_jump *partial_quit; +int_char ic; +SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE && offs1 > offs2); +SLJIT_ASSERT(diff <= IN_UCHARS(max_fast_forward_char_pair_offset())); +SLJIT_ASSERT(compiler->scratches == 5); + +/* Save temporary register STR_PTR. */ +OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STR_PTR, 0); + +/* Prepare arguments for the function call. */ +if (common->match_end_ptr == 0) + OP1(SLJIT_MOV, SLJIT_R0, 0, STR_END, 0); +else + { + OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr); + OP2(SLJIT_ADD, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, IN_UCHARS(offs1 + 1)); + + OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, STR_END, 0, SLJIT_R0, 0); + CMOV(SLJIT_LESS, SLJIT_R0, STR_END, 0); + } + +OP1(SLJIT_MOV, SLJIT_R1, 0, STR_PTR, 0); +OP1(SLJIT_MOV_S32, SLJIT_R2, 0, SLJIT_IMM, offs1); +OP1(SLJIT_MOV_S32, SLJIT_R3, 0, SLJIT_IMM, offs2); +ic.c.c1 = char1a; +ic.c.c2 = char1b; +ic.c.c3 = char2a; +ic.c.c4 = char2b; +OP1(SLJIT_MOV_U32, SLJIT_R4, 0, SLJIT_IMM, ic.x); + +if (diff == 1) { + if (char1a == char1b && char2a == char2b) { +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 + if (common->utf) + sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW) | SLJIT_ARG3(SW) | SLJIT_ARG4(SW), + SLJIT_IMM, SLJIT_FUNC_OFFSET(ffcps_0_utf)); + else +#endif + sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW) | SLJIT_ARG3(SW) | SLJIT_ARG4(SW), + SLJIT_IMM, SLJIT_FUNC_OFFSET(ffcps_0)); + } else { +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 + if (common->utf) + sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW) | SLJIT_ARG3(SW) | SLJIT_ARG4(SW), + SLJIT_IMM, SLJIT_FUNC_OFFSET(ffcps_1_utf)); + else +#endif + sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW) | SLJIT_ARG3(SW) | SLJIT_ARG4(SW), + SLJIT_IMM, SLJIT_FUNC_OFFSET(ffcps_1)); + } +} else { +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 + if (common->utf) + sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW) | SLJIT_ARG3(SW) | SLJIT_ARG4(SW), + SLJIT_IMM, SLJIT_FUNC_OFFSET(ffcps_default_utf)); + else +#endif + sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW) | SLJIT_ARG3(SW) | SLJIT_ARG4(SW), + SLJIT_IMM, SLJIT_FUNC_OFFSET(ffcps_default)); +} + +/* Restore STR_PTR register. */ +OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0); + +/* Check return value. */ +partial_quit = CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0); +add_jump(compiler, &common->failed_match, partial_quit); + +/* Fast forward STR_PTR to the result of memchr. */ +OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0); + +JUMPHERE(partial_quit); +} + +#endif /* SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64 */ diff --git a/pcre2/src/pcre2_match.c b/pcre2/src/pcre2_match.c index 11289d575..e3f78c2ca 100644 --- a/pcre2/src/pcre2_match.c +++ b/pcre2/src/pcre2_match.c @@ -6115,8 +6115,8 @@ BOOL has_req_cu = FALSE; BOOL startline; #if PCRE2_CODE_UNIT_WIDTH == 8 -BOOL memchr_not_found_first_cu = FALSE; -BOOL memchr_not_found_first_cu2 = FALSE; +BOOL memchr_not_found_first_cu; +BOOL memchr_not_found_first_cu2; #endif PCRE2_UCHAR first_cu = 0; @@ -6709,6 +6709,11 @@ the loop runs just once. */ start_partial = match_partial = NULL; mb->hitend = FALSE; +#if PCRE2_CODE_UNIT_WIDTH == 8 +memchr_not_found_first_cu = FALSE; +memchr_not_found_first_cu2 = FALSE; +#endif + for(;;) { PCRE2_SPTR new_start_match; @@ -7187,6 +7192,7 @@ if (utf && end_subject != true_end_subject && starting code units in 8-bit and 16-bit modes. */ start_match = end_subject + 1; + #if PCRE2_CODE_UNIT_WIDTH != 32 while (start_match < true_end_subject && NOT_FIRSTCU(*start_match)) start_match++; diff --git a/pcre2/src/pcre2_printint.c b/pcre2/src/pcre2_printint.c new file mode 100644 index 000000000..b9bab025a --- /dev/null +++ b/pcre2/src/pcre2_printint.c @@ -0,0 +1,836 @@ +/************************************************* +* Perl-Compatible Regular Expressions * +*************************************************/ + +/* PCRE is a library of functions to support regular expressions whose syntax +and semantics are as close as possible to those of the Perl 5 language. + + Written by Philip Hazel + Original API code Copyright (c) 1997-2012 University of Cambridge + New API code Copyright (c) 2016-2019 University of Cambridge + +----------------------------------------------------------------------------- +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of the University of Cambridge nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +----------------------------------------------------------------------------- +*/ + + +/* This module contains a PCRE private debugging function for printing out the +internal form of a compiled regular expression, along with some supporting +local functions. This source file is #included in pcre2test.c at each supported +code unit width, with PCRE2_SUFFIX set appropriately, just like the functions +that comprise the library. It can also optionally be included in +pcre2_compile.c for detailed debugging in error situations. */ + + +/* Tables of operator names. The same 8-bit table is used for all code unit +widths, so it must be defined only once. The list itself is defined in +pcre2_internal.h, which is #included by pcre2test before this file. */ + +#ifndef OP_LISTS_DEFINED +static const char *OP_names[] = { OP_NAME_LIST }; +#define OP_LISTS_DEFINED +#endif + +/* The functions and tables herein must all have mode-dependent names. */ + +#define OP_lengths PCRE2_SUFFIX(OP_lengths_) +#define get_ucpname PCRE2_SUFFIX(get_ucpname_) +#define pcre2_printint PCRE2_SUFFIX(pcre2_printint_) +#define print_char PCRE2_SUFFIX(print_char_) +#define print_custring PCRE2_SUFFIX(print_custring_) +#define print_custring_bylen PCRE2_SUFFIX(print_custring_bylen_) +#define print_prop PCRE2_SUFFIX(print_prop_) + +/* Table of sizes for the fixed-length opcodes. It's defined in a macro so that +the definition is next to the definition of the opcodes in pcre2_internal.h. +The contents of the table are, however, mode-dependent. */ + +static const uint8_t OP_lengths[] = { OP_LENGTHS }; + + + +/************************************************* +* Print one character from a string * +*************************************************/ + +/* In UTF mode the character may occupy more than one code unit. + +Arguments: + f file to write to + ptr pointer to first code unit of the character + utf TRUE if string is UTF (will be FALSE if UTF is not supported) + +Returns: number of additional code units used +*/ + +static unsigned int +print_char(FILE *f, PCRE2_SPTR ptr, BOOL utf) +{ +uint32_t c = *ptr; +BOOL one_code_unit = !utf; + +/* If UTF is supported and requested, check for a valid single code unit. */ + +#ifdef SUPPORT_UNICODE +if (utf) + { +#if PCRE2_CODE_UNIT_WIDTH == 8 + one_code_unit = c < 0x80; +#elif PCRE2_CODE_UNIT_WIDTH == 16 + one_code_unit = (c & 0xfc00) != 0xd800; +#else + one_code_unit = (c & 0xfffff800u) != 0xd800u; +#endif /* CODE_UNIT_WIDTH */ + } +#endif /* SUPPORT_UNICODE */ + +/* Handle a valid one-code-unit character at any width. */ + +if (one_code_unit) + { + if (PRINTABLE(c)) fprintf(f, "%c", (char)c); + else if (c < 0x80) fprintf(f, "\\x%02x", c); + else fprintf(f, "\\x{%02x}", c); + return 0; + } + +/* Code for invalid UTF code units and multi-unit UTF characters is different +for each width. If UTF is not supported, control should never get here, but we +need a return statement to keep the compiler happy. */ + +#ifndef SUPPORT_UNICODE +return 0; +#else + +/* Malformed UTF-8 should occur only if the sanity check has been turned off. +Rather than swallow random bytes, just stop if we hit a bad one. Print it with +\X instead of \x as an indication. */ + +#if PCRE2_CODE_UNIT_WIDTH == 8 +if ((c & 0xc0) != 0xc0) + { + fprintf(f, "\\X{%x}", c); /* Invalid starting byte */ + return 0; + } +else + { + int i; + int a = PRIV(utf8_table4)[c & 0x3f]; /* Number of additional bytes */ + int s = 6*a; + c = (c & PRIV(utf8_table3)[a]) << s; + for (i = 1; i <= a; i++) + { + if ((ptr[i] & 0xc0) != 0x80) + { + fprintf(f, "\\X{%x}", c); /* Invalid secondary byte */ + return i - 1; + } + s -= 6; + c |= (ptr[i] & 0x3f) << s; + } + fprintf(f, "\\x{%x}", c); + return a; +} +#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */ + +/* UTF-16: rather than swallow a low surrogate, just stop if we hit a bad one. +Print it with \X instead of \x as an indication. */ + +#if PCRE2_CODE_UNIT_WIDTH == 16 +if ((ptr[1] & 0xfc00) != 0xdc00) + { + fprintf(f, "\\X{%x}", c); + return 0; + } +c = (((c & 0x3ff) << 10) | (ptr[1] & 0x3ff)) + 0x10000; +fprintf(f, "\\x{%x}", c); +return 1; +#endif /* PCRE2_CODE_UNIT_WIDTH == 16 */ + +/* For UTF-32 we get here only for a malformed code unit, which should only +occur if the sanity check has been turned off. Print it with \X instead of \x +as an indication. */ + +#if PCRE2_CODE_UNIT_WIDTH == 32 +fprintf(f, "\\X{%x}", c); +return 0; +#endif /* PCRE2_CODE_UNIT_WIDTH == 32 */ +#endif /* SUPPORT_UNICODE */ +} + + + +/************************************************* +* Print string as a list of code units * +*************************************************/ + +/* These take no account of UTF as they always print each individual code unit. +The string is zero-terminated for print_custring(); the length is given for +print_custring_bylen(). + +Arguments: + f file to write to + ptr point to the string + len length for print_custring_bylen() + +Returns: nothing +*/ + +static void +print_custring(FILE *f, PCRE2_SPTR ptr) +{ +while (*ptr != '\0') + { + uint32_t c = *ptr++; + if (PRINTABLE(c)) fprintf(f, "%c", c); else fprintf(f, "\\x{%x}", c); + } +} + +static void +print_custring_bylen(FILE *f, PCRE2_SPTR ptr, PCRE2_UCHAR len) +{ +for (; len > 0; len--) + { + uint32_t c = *ptr++; + if (PRINTABLE(c)) fprintf(f, "%c", c); else fprintf(f, "\\x{%x}", c); + } +} + + + +/************************************************* +* Find Unicode property name * +*************************************************/ + +/* When there is no UTF/UCP support, the table of names does not exist. This +function should not be called in such configurations, because a pattern that +tries to use Unicode properties won't compile. Rather than put lots of #ifdefs +into the main code, however, we just put one into this function. */ + +static const char * +get_ucpname(unsigned int ptype, unsigned int pvalue) +{ +#ifdef SUPPORT_UNICODE +int i; +for (i = PRIV(utt_size) - 1; i >= 0; i--) + { + if (ptype == PRIV(utt)[i].type && pvalue == PRIV(utt)[i].value) break; + } +return (i >= 0)? PRIV(utt_names) + PRIV(utt)[i].name_offset : "??"; +#else /* No UTF support */ +(void)ptype; +(void)pvalue; +return "??"; +#endif /* SUPPORT_UNICODE */ +} + + + +/************************************************* +* Print Unicode property value * +*************************************************/ + +/* "Normal" properties can be printed from tables. The PT_CLIST property is a +pseudo-property that contains a pointer to a list of case-equivalent +characters. + +Arguments: + f file to write to + code pointer in the compiled code + before text to print before + after text to print after + +Returns: nothing +*/ + +static void +print_prop(FILE *f, PCRE2_SPTR code, const char *before, const char *after) +{ +if (code[1] != PT_CLIST) + { + fprintf(f, "%s%s %s%s", before, OP_names[*code], get_ucpname(code[1], + code[2]), after); + } +else + { + const char *not = (*code == OP_PROP)? "" : "not "; + const uint32_t *p = PRIV(ucd_caseless_sets) + code[2]; + fprintf (f, "%s%sclist", before, not); + while (*p < NOTACHAR) fprintf(f, " %04x", *p++); + fprintf(f, "%s", after); + } +} + + + +/************************************************* +* Print compiled pattern * +*************************************************/ + +/* The print_lengths flag controls whether offsets and lengths of items are +printed. Lenths can be turned off from pcre2test so that automatic tests on +bytecode can be written that do not depend on the value of LINK_SIZE. + +Arguments: + re a compiled pattern + f the file to write to + print_lengths show various lengths + +Returns: nothing +*/ + +static void +pcre2_printint(pcre2_code *re, FILE *f, BOOL print_lengths) +{ +PCRE2_SPTR codestart, nametable, code; +uint32_t nesize = re->name_entry_size; +BOOL utf = (re->overall_options & PCRE2_UTF) != 0; + +nametable = (PCRE2_SPTR)((uint8_t *)re + sizeof(pcre2_real_code)); +code = codestart = nametable + re->name_count * re->name_entry_size; + +for(;;) + { + PCRE2_SPTR ccode; + uint32_t c; + int i; + const char *flag = " "; + unsigned int extra = 0; + + if (print_lengths) + fprintf(f, "%3d ", (int)(code - codestart)); + else + fprintf(f, " "); + + switch(*code) + { +/* ========================================================================== */ + /* These cases are never obeyed. This is a fudge that causes a compile- + time error if the vectors OP_names or OP_lengths, which are indexed + by opcode, are not the correct length. It seems to be the only way to do + such a check at compile time, as the sizeof() operator does not work in + the C preprocessor. */ + + case OP_TABLE_LENGTH: + case OP_TABLE_LENGTH + + ((sizeof(OP_names)/sizeof(const char *) == OP_TABLE_LENGTH) && + (sizeof(OP_lengths) == OP_TABLE_LENGTH)): + return; +/* ========================================================================== */ + + case OP_END: + fprintf(f, " %s\n", OP_names[*code]); + fprintf(f, "------------------------------------------------------------------\n"); + return; + + case OP_CHAR: + fprintf(f, " "); + do + { + code++; + code += 1 + print_char(f, code, utf); + } + while (*code == OP_CHAR); + fprintf(f, "\n"); + continue; + + case OP_CHARI: + fprintf(f, " /i "); + do + { + code++; + code += 1 + print_char(f, code, utf); + } + while (*code == OP_CHARI); + fprintf(f, "\n"); + continue; + + case OP_CBRA: + case OP_CBRAPOS: + case OP_SCBRA: + case OP_SCBRAPOS: + if (print_lengths) fprintf(f, "%3d ", GET(code, 1)); + else fprintf(f, " "); + fprintf(f, "%s %d", OP_names[*code], GET2(code, 1+LINK_SIZE)); + break; + + case OP_BRA: + case OP_BRAPOS: + case OP_SBRA: + case OP_SBRAPOS: + case OP_KETRMAX: + case OP_KETRMIN: + case OP_KETRPOS: + case OP_ALT: + case OP_KET: + case OP_ASSERT: + case OP_ASSERT_NOT: + case OP_ASSERTBACK: + case OP_ASSERTBACK_NOT: + case OP_ASSERT_NA: + case OP_ASSERTBACK_NA: + case OP_ONCE: + case OP_SCRIPT_RUN: + case OP_COND: + case OP_SCOND: + case OP_REVERSE: + if (print_lengths) fprintf(f, "%3d ", GET(code, 1)); + else fprintf(f, " "); + fprintf(f, "%s", OP_names[*code]); + break; + + case OP_CLOSE: + fprintf(f, " %s %d", OP_names[*code], GET2(code, 1)); + break; + + case OP_CREF: + fprintf(f, "%3d %s", GET2(code,1), OP_names[*code]); + break; + + case OP_DNCREF: + { + PCRE2_SPTR entry = nametable + (GET2(code, 1) * nesize) + IMM2_SIZE; + fprintf(f, " %s Cond ref <", flag); + print_custring(f, entry); + fprintf(f, ">%d", GET2(code, 1 + IMM2_SIZE)); + } + break; + + case OP_RREF: + c = GET2(code, 1); + if (c == RREF_ANY) + fprintf(f, " Cond recurse any"); + else + fprintf(f, " Cond recurse %d", c); + break; + + case OP_DNRREF: + { + PCRE2_SPTR entry = nametable + (GET2(code, 1) * nesize) + IMM2_SIZE; + fprintf(f, " %s Cond recurse <", flag); + print_custring(f, entry); + fprintf(f, ">%d", GET2(code, 1 + IMM2_SIZE)); + } + break; + + case OP_FALSE: + fprintf(f, " Cond false"); + break; + + case OP_TRUE: + fprintf(f, " Cond true"); + break; + + case OP_STARI: + case OP_MINSTARI: + case OP_POSSTARI: + case OP_PLUSI: + case OP_MINPLUSI: + case OP_POSPLUSI: + case OP_QUERYI: + case OP_MINQUERYI: + case OP_POSQUERYI: + flag = "/i"; + /* Fall through */ + case OP_STAR: + case OP_MINSTAR: + case OP_POSSTAR: + case OP_PLUS: + case OP_MINPLUS: + case OP_POSPLUS: + case OP_QUERY: + case OP_MINQUERY: + case OP_POSQUERY: + case OP_TYPESTAR: + case OP_TYPEMINSTAR: + case OP_TYPEPOSSTAR: + case OP_TYPEPLUS: + case OP_TYPEMINPLUS: + case OP_TYPEPOSPLUS: + case OP_TYPEQUERY: + case OP_TYPEMINQUERY: + case OP_TYPEPOSQUERY: + fprintf(f, " %s ", flag); + + if (*code >= OP_TYPESTAR) + { + if (code[1] == OP_PROP || code[1] == OP_NOTPROP) + { + print_prop(f, code + 1, "", " "); + extra = 2; + } + else fprintf(f, "%s", OP_names[code[1]]); + } + else extra = print_char(f, code+1, utf); + fprintf(f, "%s", OP_names[*code]); + break; + + case OP_EXACTI: + case OP_UPTOI: + case OP_MINUPTOI: + case OP_POSUPTOI: + flag = "/i"; + /* Fall through */ + case OP_EXACT: + case OP_UPTO: + case OP_MINUPTO: + case OP_POSUPTO: + fprintf(f, " %s ", flag); + extra = print_char(f, code + 1 + IMM2_SIZE, utf); + fprintf(f, "{"); + if (*code != OP_EXACT && *code != OP_EXACTI) fprintf(f, "0,"); + fprintf(f, "%d}", GET2(code,1)); + if (*code == OP_MINUPTO || *code == OP_MINUPTOI) fprintf(f, "?"); + else if (*code == OP_POSUPTO || *code == OP_POSUPTOI) fprintf(f, "+"); + break; + + case OP_TYPEEXACT: + case OP_TYPEUPTO: + case OP_TYPEMINUPTO: + case OP_TYPEPOSUPTO: + if (code[1 + IMM2_SIZE] == OP_PROP || code[1 + IMM2_SIZE] == OP_NOTPROP) + { + print_prop(f, code + IMM2_SIZE + 1, " ", " "); + extra = 2; + } + else fprintf(f, " %s", OP_names[code[1 + IMM2_SIZE]]); + fprintf(f, "{"); + if (*code != OP_TYPEEXACT) fprintf(f, "0,"); + fprintf(f, "%d}", GET2(code,1)); + if (*code == OP_TYPEMINUPTO) fprintf(f, "?"); + else if (*code == OP_TYPEPOSUPTO) fprintf(f, "+"); + break; + + case OP_NOTI: + flag = "/i"; + /* Fall through */ + case OP_NOT: + fprintf(f, " %s [^", flag); + extra = print_char(f, code + 1, utf); + fprintf(f, "]"); + break; + + case OP_NOTSTARI: + case OP_NOTMINSTARI: + case OP_NOTPOSSTARI: + case OP_NOTPLUSI: + case OP_NOTMINPLUSI: + case OP_NOTPOSPLUSI: + case OP_NOTQUERYI: + case OP_NOTMINQUERYI: + case OP_NOTPOSQUERYI: + flag = "/i"; + /* Fall through */ + + case OP_NOTSTAR: + case OP_NOTMINSTAR: + case OP_NOTPOSSTAR: + case OP_NOTPLUS: + case OP_NOTMINPLUS: + case OP_NOTPOSPLUS: + case OP_NOTQUERY: + case OP_NOTMINQUERY: + case OP_NOTPOSQUERY: + fprintf(f, " %s [^", flag); + extra = print_char(f, code + 1, utf); + fprintf(f, "]%s", OP_names[*code]); + break; + + case OP_NOTEXACTI: + case OP_NOTUPTOI: + case OP_NOTMINUPTOI: + case OP_NOTPOSUPTOI: + flag = "/i"; + /* Fall through */ + + case OP_NOTEXACT: + case OP_NOTUPTO: + case OP_NOTMINUPTO: + case OP_NOTPOSUPTO: + fprintf(f, " %s [^", flag); + extra = print_char(f, code + 1 + IMM2_SIZE, utf); + fprintf(f, "]{"); + if (*code != OP_NOTEXACT && *code != OP_NOTEXACTI) fprintf(f, "0,"); + fprintf(f, "%d}", GET2(code,1)); + if (*code == OP_NOTMINUPTO || *code == OP_NOTMINUPTOI) fprintf(f, "?"); + else + if (*code == OP_NOTPOSUPTO || *code == OP_NOTPOSUPTOI) fprintf(f, "+"); + break; + + case OP_RECURSE: + if (print_lengths) fprintf(f, "%3d ", GET(code, 1)); + else fprintf(f, " "); + fprintf(f, "%s", OP_names[*code]); + break; + + case OP_REFI: + flag = "/i"; + /* Fall through */ + case OP_REF: + fprintf(f, " %s \\%d", flag, GET2(code,1)); + ccode = code + OP_lengths[*code]; + goto CLASS_REF_REPEAT; + + case OP_DNREFI: + flag = "/i"; + /* Fall through */ + case OP_DNREF: + { + PCRE2_SPTR entry = nametable + (GET2(code, 1) * nesize) + IMM2_SIZE; + fprintf(f, " %s \\k<", flag); + print_custring(f, entry); + fprintf(f, ">%d", GET2(code, 1 + IMM2_SIZE)); + } + ccode = code + OP_lengths[*code]; + goto CLASS_REF_REPEAT; + + case OP_CALLOUT: + fprintf(f, " %s %d %d %d", OP_names[*code], code[1 + 2*LINK_SIZE], + GET(code, 1), GET(code, 1 + LINK_SIZE)); + break; + + case OP_CALLOUT_STR: + c = code[1 + 4*LINK_SIZE]; + fprintf(f, " %s %c", OP_names[*code], c); + extra = GET(code, 1 + 2*LINK_SIZE); + print_custring_bylen(f, code + 2 + 4*LINK_SIZE, extra - 3 - 4*LINK_SIZE); + for (i = 0; PRIV(callout_start_delims)[i] != 0; i++) + if (c == PRIV(callout_start_delims)[i]) + { + c = PRIV(callout_end_delims)[i]; + break; + } + fprintf(f, "%c %d %d %d", c, GET(code, 1 + 3*LINK_SIZE), GET(code, 1), + GET(code, 1 + LINK_SIZE)); + break; + + case OP_PROP: + case OP_NOTPROP: + print_prop(f, code, " ", ""); + break; + + /* OP_XCLASS cannot occur in 8-bit, non-UTF mode. However, there's no harm + in having this code always here, and it makes it less messy without all + those #ifdefs. */ + + case OP_CLASS: + case OP_NCLASS: + case OP_XCLASS: + { + unsigned int min, max; + BOOL printmap; + BOOL invertmap = FALSE; + uint8_t *map; + uint8_t inverted_map[32]; + + fprintf(f, " ["); + + if (*code == OP_XCLASS) + { + extra = GET(code, 1); + ccode = code + LINK_SIZE + 1; + printmap = (*ccode & XCL_MAP) != 0; + if ((*ccode & XCL_NOT) != 0) + { + invertmap = (*ccode & XCL_HASPROP) == 0; + fprintf(f, "^"); + } + ccode++; + } + else + { + printmap = TRUE; + ccode = code + 1; + } + + /* Print a bit map */ + + if (printmap) + { + map = (uint8_t *)ccode; + if (invertmap) + { + /* Using 255 ^ instead of ~ avoids clang sanitize warning. */ + for (i = 0; i < 32; i++) inverted_map[i] = 255 ^ map[i]; + map = inverted_map; + } + + for (i = 0; i < 256; i++) + { + if ((map[i/8] & (1u << (i&7))) != 0) + { + int j; + for (j = i+1; j < 256; j++) + if ((map[j/8] & (1u << (j&7))) == 0) break; + if (i == '-' || i == ']') fprintf(f, "\\"); + if (PRINTABLE(i)) fprintf(f, "%c", i); + else fprintf(f, "\\x%02x", i); + if (--j > i) + { + if (j != i + 1) fprintf(f, "-"); + if (j == '-' || j == ']') fprintf(f, "\\"); + if (PRINTABLE(j)) fprintf(f, "%c", j); + else fprintf(f, "\\x%02x", j); + } + i = j; + } + } + ccode += 32 / sizeof(PCRE2_UCHAR); + } + + /* For an XCLASS there is always some additional data */ + + if (*code == OP_XCLASS) + { + PCRE2_UCHAR ch; + while ((ch = *ccode++) != XCL_END) + { + BOOL not = FALSE; + const char *notch = ""; + + switch(ch) + { + case XCL_NOTPROP: + not = TRUE; + notch = "^"; + /* Fall through */ + + case XCL_PROP: + { + unsigned int ptype = *ccode++; + unsigned int pvalue = *ccode++; + + switch(ptype) + { + case PT_PXGRAPH: + fprintf(f, "[:%sgraph:]", notch); + break; + + case PT_PXPRINT: + fprintf(f, "[:%sprint:]", notch); + break; + + case PT_PXPUNCT: + fprintf(f, "[:%spunct:]", notch); + break; + + default: + fprintf(f, "\\%c{%s}", (not? 'P':'p'), + get_ucpname(ptype, pvalue)); + break; + } + } + break; + + default: + ccode += 1 + print_char(f, ccode, utf); + if (ch == XCL_RANGE) + { + fprintf(f, "-"); + ccode += 1 + print_char(f, ccode, utf); + } + break; + } + } + } + + /* Indicate a non-UTF class which was created by negation */ + + fprintf(f, "]%s", (*code == OP_NCLASS)? " (neg)" : ""); + + /* Handle repeats after a class or a back reference */ + + CLASS_REF_REPEAT: + switch(*ccode) + { + case OP_CRSTAR: + case OP_CRMINSTAR: + case OP_CRPLUS: + case OP_CRMINPLUS: + case OP_CRQUERY: + case OP_CRMINQUERY: + case OP_CRPOSSTAR: + case OP_CRPOSPLUS: + case OP_CRPOSQUERY: + fprintf(f, "%s", OP_names[*ccode]); + extra += OP_lengths[*ccode]; + break; + + case OP_CRRANGE: + case OP_CRMINRANGE: + case OP_CRPOSRANGE: + min = GET2(ccode,1); + max = GET2(ccode,1 + IMM2_SIZE); + if (max == 0) fprintf(f, "{%u,}", min); + else fprintf(f, "{%u,%u}", min, max); + if (*ccode == OP_CRMINRANGE) fprintf(f, "?"); + else if (*ccode == OP_CRPOSRANGE) fprintf(f, "+"); + extra += OP_lengths[*ccode]; + break; + + /* Do nothing if it's not a repeat; this code stops picky compilers + warning about the lack of a default code path. */ + + default: + break; + } + } + break; + + case OP_MARK: + case OP_COMMIT_ARG: + case OP_PRUNE_ARG: + case OP_SKIP_ARG: + case OP_THEN_ARG: + fprintf(f, " %s ", OP_names[*code]); + print_custring_bylen(f, code + 2, code[1]); + extra += code[1]; + break; + + case OP_THEN: + fprintf(f, " %s", OP_names[*code]); + break; + + case OP_CIRCM: + case OP_DOLLM: + flag = "/m"; + /* Fall through */ + + /* Anything else is just an item with no data, but possibly a flag. */ + + default: + fprintf(f, " %s %s", flag, OP_names[*code]); + break; + } + + code += OP_lengths[*code] + extra; + fprintf(f, "\n"); + } +} + +/* End of pcre2_printint.c */ diff --git a/pcre2/src/pcre2demo.c b/pcre2/src/pcre2demo.c new file mode 100644 index 000000000..a49f1f8e5 --- /dev/null +++ b/pcre2/src/pcre2demo.c @@ -0,0 +1,494 @@ +/************************************************* +* PCRE2 DEMONSTRATION PROGRAM * +*************************************************/ + +/* This is a demonstration program to illustrate a straightforward way of +using the PCRE2 regular expression library from a C program. See the +pcre2sample documentation for a short discussion ("man pcre2sample" if you have +the PCRE2 man pages installed). PCRE2 is a revised API for the library, and is +incompatible with the original PCRE API. + +There are actually three libraries, each supporting a different code unit +width. This demonstration program uses the 8-bit library. The default is to +process each code unit as a separate character, but if the pattern begins with +"(*UTF)", both it and the subject are treated as UTF-8 strings, where +characters may occupy multiple code units. + +In Unix-like environments, if PCRE2 is installed in your standard system +libraries, you should be able to compile this program using this command: + +cc -Wall pcre2demo.c -lpcre2-8 -o pcre2demo + +If PCRE2 is not installed in a standard place, it is likely to be installed +with support for the pkg-config mechanism. If you have pkg-config, you can +compile this program using this command: + +cc -Wall pcre2demo.c `pkg-config --cflags --libs libpcre2-8` -o pcre2demo + +If you do not have pkg-config, you may have to use something like this: + +cc -Wall pcre2demo.c -I/usr/local/include -L/usr/local/lib \ + -R/usr/local/lib -lpcre2-8 -o pcre2demo + +Replace "/usr/local/include" and "/usr/local/lib" with wherever the include and +library files for PCRE2 are installed on your system. Only some operating +systems (Solaris is one) use the -R option. + +Building under Windows: + +If you want to statically link this program against a non-dll .a file, you must +define PCRE2_STATIC before including pcre2.h, so in this environment, uncomment +the following line. */ + +/* #define PCRE2_STATIC */ + +/* The PCRE2_CODE_UNIT_WIDTH macro must be defined before including pcre2.h. +For a program that uses only one code unit width, setting it to 8, 16, or 32 +makes it possible to use generic function names such as pcre2_compile(). Note +that just changing 8 to 16 (for example) is not sufficient to convert this +program to process 16-bit characters. Even in a fully 16-bit environment, where +string-handling functions such as strcmp() and printf() work with 16-bit +characters, the code for handling the table of named substrings will still need +to be modified. */ + +#define PCRE2_CODE_UNIT_WIDTH 8 + +#include +#include +#include + + +/************************************************************************** +* Here is the program. The API includes the concept of "contexts" for * +* setting up unusual interface requirements for compiling and matching, * +* such as custom memory managers and non-standard newline definitions. * +* This program does not do any of this, so it makes no use of contexts, * +* always passing NULL where a context could be given. * +**************************************************************************/ + +int main(int argc, char **argv) +{ +pcre2_code *re; +PCRE2_SPTR pattern; /* PCRE2_SPTR is a pointer to unsigned code units of */ +PCRE2_SPTR subject; /* the appropriate width (in this case, 8 bits). */ +PCRE2_SPTR name_table; + +int crlf_is_newline; +int errornumber; +int find_all; +int i; +int rc; +int utf8; + +uint32_t option_bits; +uint32_t namecount; +uint32_t name_entry_size; +uint32_t newline; + +PCRE2_SIZE erroroffset; +PCRE2_SIZE *ovector; +PCRE2_SIZE subject_length; + +pcre2_match_data *match_data; + + +/************************************************************************** +* First, sort out the command line. There is only one possible option at * +* the moment, "-g" to request repeated matching to find all occurrences, * +* like Perl's /g option. We set the variable find_all to a non-zero value * +* if the -g option is present. * +**************************************************************************/ + +find_all = 0; +for (i = 1; i < argc; i++) + { + if (strcmp(argv[i], "-g") == 0) find_all = 1; + else if (argv[i][0] == '-') + { + printf("Unrecognised option %s\n", argv[i]); + return 1; + } + else break; + } + +/* After the options, we require exactly two arguments, which are the pattern, +and the subject string. */ + +if (argc - i != 2) + { + printf("Exactly two arguments required: a regex and a subject string\n"); + return 1; + } + +/* Pattern and subject are char arguments, so they can be straightforwardly +cast to PCRE2_SPTR because we are working in 8-bit code units. The subject +length is cast to PCRE2_SIZE for completeness, though PCRE2_SIZE is in fact +defined to be size_t. */ + +pattern = (PCRE2_SPTR)argv[i]; +subject = (PCRE2_SPTR)argv[i+1]; +subject_length = (PCRE2_SIZE)strlen((char *)subject); + + +/************************************************************************* +* Now we are going to compile the regular expression pattern, and handle * +* any errors that are detected. * +*************************************************************************/ + +re = pcre2_compile( + pattern, /* the pattern */ + PCRE2_ZERO_TERMINATED, /* indicates pattern is zero-terminated */ + 0, /* default options */ + &errornumber, /* for error number */ + &erroroffset, /* for error offset */ + NULL); /* use default compile context */ + +/* Compilation failed: print the error message and exit. */ + +if (re == NULL) + { + PCRE2_UCHAR buffer[256]; + pcre2_get_error_message(errornumber, buffer, sizeof(buffer)); + printf("PCRE2 compilation failed at offset %d: %s\n", (int)erroroffset, + buffer); + return 1; + } + + +/************************************************************************* +* If the compilation succeeded, we call PCRE2 again, in order to do a * +* pattern match against the subject string. This does just ONE match. If * +* further matching is needed, it will be done below. Before running the * +* match we must set up a match_data block for holding the result. Using * +* pcre2_match_data_create_from_pattern() ensures that the block is * +* exactly the right size for the number of capturing parentheses in the * +* pattern. If you need to know the actual size of a match_data block as * +* a number of bytes, you can find it like this: * +* * +* PCRE2_SIZE match_data_size = pcre2_get_match_data_size(match_data); * +*************************************************************************/ + +match_data = pcre2_match_data_create_from_pattern(re, NULL); + +/* Now run the match. */ + +rc = pcre2_match( + re, /* the compiled pattern */ + subject, /* the subject string */ + subject_length, /* the length of the subject */ + 0, /* start at offset 0 in the subject */ + 0, /* default options */ + match_data, /* block for storing the result */ + NULL); /* use default match context */ + +/* Matching failed: handle error cases */ + +if (rc < 0) + { + switch(rc) + { + case PCRE2_ERROR_NOMATCH: printf("No match\n"); break; + /* + Handle other special cases if you like + */ + default: printf("Matching error %d\n", rc); break; + } + pcre2_match_data_free(match_data); /* Release memory used for the match */ + pcre2_code_free(re); /* data and the compiled pattern. */ + return 1; + } + +/* Match succeded. Get a pointer to the output vector, where string offsets are +stored. */ + +ovector = pcre2_get_ovector_pointer(match_data); +printf("Match succeeded at offset %d\n", (int)ovector[0]); + + +/************************************************************************* +* We have found the first match within the subject string. If the output * +* vector wasn't big enough, say so. Then output any substrings that were * +* captured. * +*************************************************************************/ + +/* The output vector wasn't big enough. This should not happen, because we used +pcre2_match_data_create_from_pattern() above. */ + +if (rc == 0) + printf("ovector was not big enough for all the captured substrings\n"); + +/* We must guard against patterns such as /(?=.\K)/ that use \K in an assertion +to set the start of a match later than its end. In this demonstration program, +we just detect this case and give up. */ + +if (ovector[0] > ovector[1]) + { + printf("\\K was used in an assertion to set the match start after its end.\n" + "From end to start the match was: %.*s\n", (int)(ovector[0] - ovector[1]), + (char *)(subject + ovector[1])); + printf("Run abandoned\n"); + pcre2_match_data_free(match_data); + pcre2_code_free(re); + return 1; + } + +/* Show substrings stored in the output vector by number. Obviously, in a real +application you might want to do things other than print them. */ + +for (i = 0; i < rc; i++) + { + PCRE2_SPTR substring_start = subject + ovector[2*i]; + PCRE2_SIZE substring_length = ovector[2*i+1] - ovector[2*i]; + printf("%2d: %.*s\n", i, (int)substring_length, (char *)substring_start); + } + + +/************************************************************************** +* That concludes the basic part of this demonstration program. We have * +* compiled a pattern, and performed a single match. The code that follows * +* shows first how to access named substrings, and then how to code for * +* repeated matches on the same subject. * +**************************************************************************/ + +/* See if there are any named substrings, and if so, show them by name. First +we have to extract the count of named parentheses from the pattern. */ + +(void)pcre2_pattern_info( + re, /* the compiled pattern */ + PCRE2_INFO_NAMECOUNT, /* get the number of named substrings */ + &namecount); /* where to put the answer */ + +if (namecount == 0) printf("No named substrings\n"); else + { + PCRE2_SPTR tabptr; + printf("Named substrings\n"); + + /* Before we can access the substrings, we must extract the table for + translating names to numbers, and the size of each entry in the table. */ + + (void)pcre2_pattern_info( + re, /* the compiled pattern */ + PCRE2_INFO_NAMETABLE, /* address of the table */ + &name_table); /* where to put the answer */ + + (void)pcre2_pattern_info( + re, /* the compiled pattern */ + PCRE2_INFO_NAMEENTRYSIZE, /* size of each entry in the table */ + &name_entry_size); /* where to put the answer */ + + /* Now we can scan the table and, for each entry, print the number, the name, + and the substring itself. In the 8-bit library the number is held in two + bytes, most significant first. */ + + tabptr = name_table; + for (i = 0; i < namecount; i++) + { + int n = (tabptr[0] << 8) | tabptr[1]; + printf("(%d) %*s: %.*s\n", n, name_entry_size - 3, tabptr + 2, + (int)(ovector[2*n+1] - ovector[2*n]), subject + ovector[2*n]); + tabptr += name_entry_size; + } + } + + +/************************************************************************* +* If the "-g" option was given on the command line, we want to continue * +* to search for additional matches in the subject string, in a similar * +* way to the /g option in Perl. This turns out to be trickier than you * +* might think because of the possibility of matching an empty string. * +* What happens is as follows: * +* * +* If the previous match was NOT for an empty string, we can just start * +* the next match at the end of the previous one. * +* * +* If the previous match WAS for an empty string, we can't do that, as it * +* would lead to an infinite loop. Instead, a call of pcre2_match() is * +* made with the PCRE2_NOTEMPTY_ATSTART and PCRE2_ANCHORED flags set. The * +* first of these tells PCRE2 that an empty string at the start of the * +* subject is not a valid match; other possibilities must be tried. The * +* second flag restricts PCRE2 to one match attempt at the initial string * +* position. If this match succeeds, an alternative to the empty string * +* match has been found, and we can print it and proceed round the loop, * +* advancing by the length of whatever was found. If this match does not * +* succeed, we still stay in the loop, advancing by just one character. * +* In UTF-8 mode, which can be set by (*UTF) in the pattern, this may be * +* more than one byte. * +* * +* However, there is a complication concerned with newlines. When the * +* newline convention is such that CRLF is a valid newline, we must * +* advance by two characters rather than one. The newline convention can * +* be set in the regex by (*CR), etc.; if not, we must find the default. * +*************************************************************************/ + +if (!find_all) /* Check for -g */ + { + pcre2_match_data_free(match_data); /* Release the memory that was used */ + pcre2_code_free(re); /* for the match data and the pattern. */ + return 0; /* Exit the program. */ + } + +/* Before running the loop, check for UTF-8 and whether CRLF is a valid newline +sequence. First, find the options with which the regex was compiled and extract +the UTF state. */ + +(void)pcre2_pattern_info(re, PCRE2_INFO_ALLOPTIONS, &option_bits); +utf8 = (option_bits & PCRE2_UTF) != 0; + +/* Now find the newline convention and see whether CRLF is a valid newline +sequence. */ + +(void)pcre2_pattern_info(re, PCRE2_INFO_NEWLINE, &newline); +crlf_is_newline = newline == PCRE2_NEWLINE_ANY || + newline == PCRE2_NEWLINE_CRLF || + newline == PCRE2_NEWLINE_ANYCRLF; + +/* Loop for second and subsequent matches */ + +for (;;) + { + uint32_t options = 0; /* Normally no options */ + PCRE2_SIZE start_offset = ovector[1]; /* Start at end of previous match */ + + /* If the previous match was for an empty string, we are finished if we are + at the end of the subject. Otherwise, arrange to run another match at the + same point to see if a non-empty match can be found. */ + + if (ovector[0] == ovector[1]) + { + if (ovector[0] == subject_length) break; + options = PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED; + } + + /* If the previous match was not an empty string, there is one tricky case to + consider. If a pattern contains \K within a lookbehind assertion at the + start, the end of the matched string can be at the offset where the match + started. Without special action, this leads to a loop that keeps on matching + the same substring. We must detect this case and arrange to move the start on + by one character. The pcre2_get_startchar() function returns the starting + offset that was passed to pcre2_match(). */ + + else + { + PCRE2_SIZE startchar = pcre2_get_startchar(match_data); + if (start_offset <= startchar) + { + if (startchar >= subject_length) break; /* Reached end of subject. */ + start_offset = startchar + 1; /* Advance by one character. */ + if (utf8) /* If UTF-8, it may be more */ + { /* than one code unit. */ + for (; start_offset < subject_length; start_offset++) + if ((subject[start_offset] & 0xc0) != 0x80) break; + } + } + } + + /* Run the next matching operation */ + + rc = pcre2_match( + re, /* the compiled pattern */ + subject, /* the subject string */ + subject_length, /* the length of the subject */ + start_offset, /* starting offset in the subject */ + options, /* options */ + match_data, /* block for storing the result */ + NULL); /* use default match context */ + + /* This time, a result of NOMATCH isn't an error. If the value in "options" + is zero, it just means we have found all possible matches, so the loop ends. + Otherwise, it means we have failed to find a non-empty-string match at a + point where there was a previous empty-string match. In this case, we do what + Perl does: advance the matching position by one character, and continue. We + do this by setting the "end of previous match" offset, because that is picked + up at the top of the loop as the point at which to start again. + + There are two complications: (a) When CRLF is a valid newline sequence, and + the current position is just before it, advance by an extra byte. (b) + Otherwise we must ensure that we skip an entire UTF character if we are in + UTF mode. */ + + if (rc == PCRE2_ERROR_NOMATCH) + { + if (options == 0) break; /* All matches found */ + ovector[1] = start_offset + 1; /* Advance one code unit */ + if (crlf_is_newline && /* If CRLF is a newline & */ + start_offset < subject_length - 1 && /* we are at CRLF, */ + subject[start_offset] == '\r' && + subject[start_offset + 1] == '\n') + ovector[1] += 1; /* Advance by one more. */ + else if (utf8) /* Otherwise, ensure we */ + { /* advance a whole UTF-8 */ + while (ovector[1] < subject_length) /* character. */ + { + if ((subject[ovector[1]] & 0xc0) != 0x80) break; + ovector[1] += 1; + } + } + continue; /* Go round the loop again */ + } + + /* Other matching errors are not recoverable. */ + + if (rc < 0) + { + printf("Matching error %d\n", rc); + pcre2_match_data_free(match_data); + pcre2_code_free(re); + return 1; + } + + /* Match succeded */ + + printf("\nMatch succeeded again at offset %d\n", (int)ovector[0]); + + /* The match succeeded, but the output vector wasn't big enough. This + should not happen. */ + + if (rc == 0) + printf("ovector was not big enough for all the captured substrings\n"); + + /* We must guard against patterns such as /(?=.\K)/ that use \K in an + assertion to set the start of a match later than its end. In this + demonstration program, we just detect this case and give up. */ + + if (ovector[0] > ovector[1]) + { + printf("\\K was used in an assertion to set the match start after its end.\n" + "From end to start the match was: %.*s\n", (int)(ovector[0] - ovector[1]), + (char *)(subject + ovector[1])); + printf("Run abandoned\n"); + pcre2_match_data_free(match_data); + pcre2_code_free(re); + return 1; + } + + /* As before, show substrings stored in the output vector by number, and then + also any named substrings. */ + + for (i = 0; i < rc; i++) + { + PCRE2_SPTR substring_start = subject + ovector[2*i]; + size_t substring_length = ovector[2*i+1] - ovector[2*i]; + printf("%2d: %.*s\n", i, (int)substring_length, (char *)substring_start); + } + + if (namecount == 0) printf("No named substrings\n"); else + { + PCRE2_SPTR tabptr = name_table; + printf("Named substrings\n"); + for (i = 0; i < namecount; i++) + { + int n = (tabptr[0] << 8) | tabptr[1]; + printf("(%d) %*s: %.*s\n", n, name_entry_size - 3, tabptr + 2, + (int)(ovector[2*n+1] - ovector[2*n]), subject + ovector[2*n]); + tabptr += name_entry_size; + } + } + } /* End of loop to find second and subsequent matches */ + +printf("\n"); +pcre2_match_data_free(match_data); +pcre2_code_free(re); +return 0; +} + +/* End of pcre2demo.c */ diff --git a/pcre2/src/pcre2posix.c b/pcre2/src/pcre2posix.c new file mode 100644 index 000000000..b24620a45 --- /dev/null +++ b/pcre2/src/pcre2posix.c @@ -0,0 +1,423 @@ +/************************************************* +* Perl-Compatible Regular Expressions * +*************************************************/ + +/* PCRE is a library of functions to support regular expressions whose syntax +and semantics are as close as possible to those of the Perl 5 language. + + Written by Philip Hazel + Original API code Copyright (c) 1997-2012 University of Cambridge + New API code Copyright (c) 2016-2019 University of Cambridge + +----------------------------------------------------------------------------- +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of the University of Cambridge nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +----------------------------------------------------------------------------- +*/ + + +/* This module is a wrapper that provides a POSIX API to the underlying PCRE2 +functions. The operative functions are called pcre2_regcomp(), etc., with +wrappers that use the plain POSIX names. In addition, pcre2posix.h defines the +POSIX names as macros for the pcre2_xxx functions, so any program that includes +it and uses the POSIX names will call the base functions directly. This makes +it easier for an application to be sure it gets the PCRE2 versions in the +presence of other POSIX regex libraries. */ + + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + + +/* Ensure that the PCRE2POSIX_EXP_xxx macros are set appropriately for +compiling these functions. This must come before including pcre2posix.h, where +they are set for an application (using these functions) if they have not +previously been set. */ + +#if defined(_WIN32) && !defined(PCRE2_STATIC) +# define PCRE2POSIX_EXP_DECL extern __declspec(dllexport) +# define PCRE2POSIX_EXP_DEFN __declspec(dllexport) +#endif + +/* Older versions of MSVC lack snprintf(). This define allows for +warning/error-free compilation and testing with MSVC compilers back to at least +MSVC 10/2010. Except for VC6 (which is missing some fundamentals and fails). */ + +#if defined(_MSC_VER) && (_MSC_VER < 1900) +#define snprintf _snprintf +#endif + + +/* Compile-time error numbers start at this value. It should probably never be +changed. This #define is a copy of the one in pcre2_internal.h. */ + +#define COMPILE_ERROR_BASE 100 + + +/* Standard C headers */ + +#include +#include +#include +#include +#include +#include + +/* PCRE2 headers */ + +#include "pcre2.h" +#include "pcre2posix.h" + +/* When compiling with the MSVC compiler, it is sometimes necessary to include +a "calling convention" before exported function names. (This is secondhand +information; I know nothing about MSVC myself). For example, something like + + void __cdecl function(....) + +might be needed. In order to make this easy, all the exported functions have +PCRE2_CALL_CONVENTION just before their names. It is rarely needed; if not +set, we ensure here that it has no effect. */ + +#ifndef PCRE2_CALL_CONVENTION +#define PCRE2_CALL_CONVENTION +#endif + +/* Table to translate PCRE2 compile time error codes into POSIX error codes. +Only a few PCRE2 errors with a value greater than 23 turn into special POSIX +codes: most go to REG_BADPAT. The second table lists, in pairs, those that +don't. */ + +static const int eint1[] = { + 0, /* No error */ + REG_EESCAPE, /* \ at end of pattern */ + REG_EESCAPE, /* \c at end of pattern */ + REG_EESCAPE, /* unrecognized character follows \ */ + REG_BADBR, /* numbers out of order in {} quantifier */ + /* 5 */ + REG_BADBR, /* number too big in {} quantifier */ + REG_EBRACK, /* missing terminating ] for character class */ + REG_ECTYPE, /* invalid escape sequence in character class */ + REG_ERANGE, /* range out of order in character class */ + REG_BADRPT, /* nothing to repeat */ + /* 10 */ + REG_ASSERT, /* internal error: unexpected repeat */ + REG_BADPAT, /* unrecognized character after (? or (?- */ + REG_BADPAT, /* POSIX named classes are supported only within a class */ + REG_BADPAT, /* POSIX collating elements are not supported */ + REG_EPAREN, /* missing ) */ + /* 15 */ + REG_ESUBREG, /* reference to non-existent subpattern */ + REG_INVARG, /* pattern passed as NULL */ + REG_INVARG, /* unknown compile-time option bit(s) */ + REG_EPAREN, /* missing ) after (?# comment */ + REG_ESIZE, /* parentheses nested too deeply */ + /* 20 */ + REG_ESIZE, /* regular expression too large */ + REG_ESPACE, /* failed to get memory */ + REG_EPAREN, /* unmatched closing parenthesis */ + REG_ASSERT /* internal error: code overflow */ + }; + +static const int eint2[] = { + 30, REG_ECTYPE, /* unknown POSIX class name */ + 32, REG_INVARG, /* this version of PCRE2 does not have Unicode support */ + 37, REG_EESCAPE, /* PCRE2 does not support \L, \l, \N{name}, \U, or \u */ + 56, REG_INVARG, /* internal error: unknown newline setting */ + 92, REG_INVARG, /* invalid option bits with PCRE2_LITERAL */ +}; + +/* Table of texts corresponding to POSIX error codes */ + +static const char *const pstring[] = { + "", /* Dummy for value 0 */ + "internal error", /* REG_ASSERT */ + "invalid repeat counts in {}", /* BADBR */ + "pattern error", /* BADPAT */ + "? * + invalid", /* BADRPT */ + "unbalanced {}", /* EBRACE */ + "unbalanced []", /* EBRACK */ + "collation error - not relevant", /* ECOLLATE */ + "bad class", /* ECTYPE */ + "bad escape sequence", /* EESCAPE */ + "empty expression", /* EMPTY */ + "unbalanced ()", /* EPAREN */ + "bad range inside []", /* ERANGE */ + "expression too big", /* ESIZE */ + "failed to get memory", /* ESPACE */ + "bad back reference", /* ESUBREG */ + "bad argument", /* INVARG */ + "match failed" /* NOMATCH */ +}; + + + +/************************************************* +* Wrappers with traditional POSIX names * +*************************************************/ + +/* Keep defining them to preseve the ABI for applications linked to the pcre2 +POSIX library before these names were changed into macros in pcre2posix.h. +This also ensures that the POSIX names are callable from languages that do not +include pcre2posix.h. It is vital to #undef the macro definitions from +pcre2posix.h! */ + +#undef regerror +PCRE2POSIX_EXP_DECL size_t regerror(int, const regex_t *, char *, size_t); +PCRE2POSIX_EXP_DEFN size_t PCRE2_CALL_CONVENTION +regerror(int errcode, const regex_t *preg, char *errbuf, size_t errbuf_size) +{ +return pcre2_regerror(errcode, preg, errbuf, errbuf_size); +} + +#undef regfree +PCRE2POSIX_EXP_DECL void regfree(regex_t *); +PCRE2POSIX_EXP_DEFN void PCRE2_CALL_CONVENTION +regfree(regex_t *preg) +{ +pcre2_regfree(preg); +} + +#undef regcomp +PCRE2POSIX_EXP_DECL int regcomp(regex_t *, const char *, int); +PCRE2POSIX_EXP_DEFN int PCRE2_CALL_CONVENTION +regcomp(regex_t *preg, const char *pattern, int cflags) +{ +return pcre2_regcomp(preg, pattern, cflags); +} + +#undef regexec +PCRE2POSIX_EXP_DECL int regexec(const regex_t *, const char *, size_t, + regmatch_t *, int); +PCRE2POSIX_EXP_DEFN int PCRE2_CALL_CONVENTION +regexec(const regex_t *preg, const char *string, size_t nmatch, + regmatch_t pmatch[], int eflags) +{ +return pcre2_regexec(preg, string, nmatch, pmatch, eflags); +} + + + +/************************************************* +* Translate error code to string * +*************************************************/ + +PCRE2POSIX_EXP_DEFN size_t PCRE2_CALL_CONVENTION +pcre2_regerror(int errcode, const regex_t *preg, char *errbuf, + size_t errbuf_size) +{ +int used; +const char *message; + +message = (errcode <= 0 || errcode >= (int)(sizeof(pstring)/sizeof(char *)))? + "unknown error code" : pstring[errcode]; + +if (preg != NULL && (int)preg->re_erroffset != -1) + { + used = snprintf(errbuf, errbuf_size, "%s at offset %-6d", message, + (int)preg->re_erroffset); + } +else + { + used = snprintf(errbuf, errbuf_size, "%s", message); + } + +return used + 1; +} + + + +/************************************************* +* Free store held by a regex * +*************************************************/ + +PCRE2POSIX_EXP_DEFN void PCRE2_CALL_CONVENTION +pcre2_regfree(regex_t *preg) +{ +pcre2_match_data_free(preg->re_match_data); +pcre2_code_free(preg->re_pcre2_code); +} + + + +/************************************************* +* Compile a regular expression * +*************************************************/ + +/* +Arguments: + preg points to a structure for recording the compiled expression + pattern the pattern to compile + cflags compilation flags + +Returns: 0 on success + various non-zero codes on failure +*/ + +PCRE2POSIX_EXP_DEFN int PCRE2_CALL_CONVENTION +pcre2_regcomp(regex_t *preg, const char *pattern, int cflags) +{ +PCRE2_SIZE erroffset; +PCRE2_SIZE patlen; +int errorcode; +int options = 0; +int re_nsub = 0; + +patlen = ((cflags & REG_PEND) != 0)? (PCRE2_SIZE)(preg->re_endp - pattern) : + PCRE2_ZERO_TERMINATED; + +if ((cflags & REG_ICASE) != 0) options |= PCRE2_CASELESS; +if ((cflags & REG_NEWLINE) != 0) options |= PCRE2_MULTILINE; +if ((cflags & REG_DOTALL) != 0) options |= PCRE2_DOTALL; +if ((cflags & REG_NOSPEC) != 0) options |= PCRE2_LITERAL; +if ((cflags & REG_UTF) != 0) options |= PCRE2_UTF; +if ((cflags & REG_UCP) != 0) options |= PCRE2_UCP; +if ((cflags & REG_UNGREEDY) != 0) options |= PCRE2_UNGREEDY; + +preg->re_cflags = cflags; +preg->re_pcre2_code = pcre2_compile((PCRE2_SPTR)pattern, patlen, options, + &errorcode, &erroffset, NULL); +preg->re_erroffset = erroffset; + +if (preg->re_pcre2_code == NULL) + { + unsigned int i; + + /* A negative value is a UTF error; otherwise all error codes are greater + than COMPILE_ERROR_BASE, but check, just in case. */ + + if (errorcode < COMPILE_ERROR_BASE) return REG_BADPAT; + errorcode -= COMPILE_ERROR_BASE; + + if (errorcode < (int)(sizeof(eint1)/sizeof(const int))) + return eint1[errorcode]; + for (i = 0; i < sizeof(eint2)/sizeof(const int); i += 2) + if (errorcode == eint2[i]) return eint2[i+1]; + return REG_BADPAT; + } + +(void)pcre2_pattern_info((const pcre2_code *)preg->re_pcre2_code, + PCRE2_INFO_CAPTURECOUNT, &re_nsub); +preg->re_nsub = (size_t)re_nsub; +preg->re_match_data = pcre2_match_data_create(re_nsub + 1, NULL); +preg->re_erroffset = (size_t)(-1); /* No meaning after successful compile */ + +if (preg->re_match_data == NULL) + { + pcre2_code_free(preg->re_pcre2_code); + return REG_ESPACE; + } + +return 0; +} + + + +/************************************************* +* Match a regular expression * +*************************************************/ + +/* A suitable match_data block, large enough to hold all possible captures, was +obtained when the pattern was compiled, to save having to allocate and free it +for each match. If REG_NOSUB was specified at compile time, the nmatch and +pmatch arguments are ignored, and the only result is yes/no/error. */ + +PCRE2POSIX_EXP_DEFN int PCRE2_CALL_CONVENTION +pcre2_regexec(const regex_t *preg, const char *string, size_t nmatch, + regmatch_t pmatch[], int eflags) +{ +int rc, so, eo; +int options = 0; +pcre2_match_data *md = (pcre2_match_data *)preg->re_match_data; + +if ((eflags & REG_NOTBOL) != 0) options |= PCRE2_NOTBOL; +if ((eflags & REG_NOTEOL) != 0) options |= PCRE2_NOTEOL; +if ((eflags & REG_NOTEMPTY) != 0) options |= PCRE2_NOTEMPTY; + +/* When REG_NOSUB was specified, or if no vector has been passed in which to +put captured strings, ensure that nmatch is zero. This will stop any attempt to +write to pmatch. */ + +if ((preg->re_cflags & REG_NOSUB) != 0 || pmatch == NULL) nmatch = 0; + +/* REG_STARTEND is a BSD extension, to allow for non-NUL-terminated strings. +The man page from OS X says "REG_STARTEND affects only the location of the +string, not how it is matched". That is why the "so" value is used to bump the +start location rather than being passed as a PCRE2 "starting offset". */ + +if ((eflags & REG_STARTEND) != 0) + { + if (pmatch == NULL) return REG_INVARG; + so = pmatch[0].rm_so; + eo = pmatch[0].rm_eo; + } +else + { + so = 0; + eo = (int)strlen(string); + } + +rc = pcre2_match((const pcre2_code *)preg->re_pcre2_code, + (PCRE2_SPTR)string + so, (eo - so), 0, options, md, NULL); + +/* Successful match */ + +if (rc >= 0) + { + size_t i; + PCRE2_SIZE *ovector = pcre2_get_ovector_pointer(md); + if ((size_t)rc > nmatch) rc = (int)nmatch; + for (i = 0; i < (size_t)rc; i++) + { + pmatch[i].rm_so = (ovector[i*2] == PCRE2_UNSET)? -1 : + (int)(ovector[i*2] + so); + pmatch[i].rm_eo = (ovector[i*2+1] == PCRE2_UNSET)? -1 : + (int)(ovector[i*2+1] + so); + } + for (; i < nmatch; i++) pmatch[i].rm_so = pmatch[i].rm_eo = -1; + return 0; + } + +/* Unsuccessful match */ + +if (rc <= PCRE2_ERROR_UTF8_ERR1 && rc >= PCRE2_ERROR_UTF8_ERR21) + return REG_INVARG; + +switch(rc) + { + default: return REG_ASSERT; + case PCRE2_ERROR_BADMODE: return REG_INVARG; + case PCRE2_ERROR_BADMAGIC: return REG_INVARG; + case PCRE2_ERROR_BADOPTION: return REG_INVARG; + case PCRE2_ERROR_BADUTFOFFSET: return REG_INVARG; + case PCRE2_ERROR_MATCHLIMIT: return REG_ESPACE; + case PCRE2_ERROR_NOMATCH: return REG_NOMATCH; + case PCRE2_ERROR_NOMEMORY: return REG_ESPACE; + case PCRE2_ERROR_NULL: return REG_INVARG; + } +} + +/* End of pcre2posix.c */ diff --git a/pcre2/src/pcre2posix.h b/pcre2/src/pcre2posix.h new file mode 100644 index 000000000..3a663b9ff --- /dev/null +++ b/pcre2/src/pcre2posix.h @@ -0,0 +1,170 @@ +/************************************************* +* Perl-Compatible Regular Expressions * +*************************************************/ + +/* PCRE2 is a library of functions to support regular expressions whose syntax +and semantics are as close as possible to those of the Perl 5 language. This is +the public header file to be #included by applications that call PCRE2 via the +POSIX wrapper interface. + + Written by Philip Hazel + Original API code Copyright (c) 1997-2012 University of Cambridge + New API code Copyright (c) 2016-2019 University of Cambridge + +----------------------------------------------------------------------------- +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of the University of Cambridge nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +----------------------------------------------------------------------------- +*/ + + +/* Have to include stdlib.h in order to ensure that size_t is defined. */ + +#include + +/* Allow for C++ users */ + +#ifdef __cplusplus +extern "C" { +#endif + +/* Options, mostly defined by POSIX, but with some extras. */ + +#define REG_ICASE 0x0001 /* Maps to PCRE2_CASELESS */ +#define REG_NEWLINE 0x0002 /* Maps to PCRE2_MULTILINE */ +#define REG_NOTBOL 0x0004 /* Maps to PCRE2_NOTBOL */ +#define REG_NOTEOL 0x0008 /* Maps to PCRE2_NOTEOL */ +#define REG_DOTALL 0x0010 /* NOT defined by POSIX; maps to PCRE2_DOTALL */ +#define REG_NOSUB 0x0020 /* Do not report what was matched */ +#define REG_UTF 0x0040 /* NOT defined by POSIX; maps to PCRE2_UTF */ +#define REG_STARTEND 0x0080 /* BSD feature: pass subject string by so,eo */ +#define REG_NOTEMPTY 0x0100 /* NOT defined by POSIX; maps to PCRE2_NOTEMPTY */ +#define REG_UNGREEDY 0x0200 /* NOT defined by POSIX; maps to PCRE2_UNGREEDY */ +#define REG_UCP 0x0400 /* NOT defined by POSIX; maps to PCRE2_UCP */ +#define REG_PEND 0x0800 /* GNU feature: pass end pattern by re_endp */ +#define REG_NOSPEC 0x1000 /* Maps to PCRE2_LITERAL */ + +/* This is not used by PCRE2, but by defining it we make it easier +to slot PCRE2 into existing programs that make POSIX calls. */ + +#define REG_EXTENDED 0 + +/* Error values. Not all these are relevant or used by the wrapper. */ + +enum { + REG_ASSERT = 1, /* internal error ? */ + REG_BADBR, /* invalid repeat counts in {} */ + REG_BADPAT, /* pattern error */ + REG_BADRPT, /* ? * + invalid */ + REG_EBRACE, /* unbalanced {} */ + REG_EBRACK, /* unbalanced [] */ + REG_ECOLLATE, /* collation error - not relevant */ + REG_ECTYPE, /* bad class */ + REG_EESCAPE, /* bad escape sequence */ + REG_EMPTY, /* empty expression */ + REG_EPAREN, /* unbalanced () */ + REG_ERANGE, /* bad range inside [] */ + REG_ESIZE, /* expression too big */ + REG_ESPACE, /* failed to get memory */ + REG_ESUBREG, /* bad back reference */ + REG_INVARG, /* bad argument */ + REG_NOMATCH /* match failed */ +}; + + +/* The structure representing a compiled regular expression. It is also used +for passing the pattern end pointer when REG_PEND is set. */ + +typedef struct { + void *re_pcre2_code; + void *re_match_data; + const char *re_endp; + size_t re_nsub; + size_t re_erroffset; + int re_cflags; +} regex_t; + +/* The structure in which a captured offset is returned. */ + +typedef int regoff_t; + +typedef struct { + regoff_t rm_so; + regoff_t rm_eo; +} regmatch_t; + +/* When an application links to a PCRE2 DLL in Windows, the symbols that are +imported have to be identified as such. When building PCRE2, the appropriate +export settings are needed, and are set in pcre2posix.c before including this +file. */ + +#if defined(_WIN32) && !defined(PCRE2_STATIC) && !defined(PCRE2POSIX_EXP_DECL) +# define PCRE2POSIX_EXP_DECL extern __declspec(dllimport) +# define PCRE2POSIX_EXP_DEFN __declspec(dllimport) +#endif + +/* By default, we use the standard "extern" declarations. */ + +#ifndef PCRE2POSIX_EXP_DECL +# ifdef __cplusplus +# define PCRE2POSIX_EXP_DECL extern "C" +# define PCRE2POSIX_EXP_DEFN extern "C" +# else +# define PCRE2POSIX_EXP_DECL extern +# define PCRE2POSIX_EXP_DEFN extern +# endif +#endif + +/* The functions. The actual code is in functions with pcre2_xxx names for +uniqueness. POSIX names are provided as macros for API compatibility with POSIX +regex functions. It's done this way to ensure to they are always linked from +the PCRE2 library and not by accident from elsewhere (regex_t differs in size +elsewhere). */ + +PCRE2POSIX_EXP_DECL int pcre2_regcomp(regex_t *, const char *, int); +PCRE2POSIX_EXP_DECL int pcre2_regexec(const regex_t *, const char *, size_t, + regmatch_t *, int); +PCRE2POSIX_EXP_DECL size_t pcre2_regerror(int, const regex_t *, char *, size_t); +PCRE2POSIX_EXP_DECL void pcre2_regfree(regex_t *); + +#define regcomp pcre2_regcomp +#define regexec pcre2_regexec +#define regerror pcre2_regerror +#define regfree pcre2_regfree + +/* Debian had a patch that used different names. These are now here to save +them having to maintain their own patch, but are not documented by PCRE2. */ + +#define PCRE2regcomp pcre2_regcomp +#define PCRE2regexec pcre2_regexec +#define PCRE2regerror pcre2_regerror +#define PCRE2regfree pcre2_regfree + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +/* End of pcre2posix.h */ diff --git a/pcre2/test-driver b/pcre2/test-driver new file mode 100755 index 000000000..89dba1e07 --- /dev/null +++ b/pcre2/test-driver @@ -0,0 +1,148 @@ +#! /bin/sh +# test-driver - basic testsuite driver script. + +scriptversion=2018-03-07.03; # UTC + +# Copyright (C) 2011-2020 Free Software Foundation, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +# As a special exception to the GNU General Public License, if you +# distribute this file as part of a program that contains a +# configuration script generated by Autoconf, you may include it under +# the same distribution terms that you use for the rest of that program. + +# This file is maintained in Automake, please report +# bugs to or send patches to +# . + +# Make unconditional expansion of undefined variables an error. This +# helps a lot in preventing typo-related bugs. +set -u + +usage_error () +{ + echo "$0: $*" >&2 + print_usage >&2 + exit 2 +} + +print_usage () +{ + cat <$log_file 2>&1 +estatus=$? + +if test $enable_hard_errors = no && test $estatus -eq 99; then + tweaked_estatus=1 +else + tweaked_estatus=$estatus +fi + +case $tweaked_estatus:$expect_failure in + 0:yes) col=$red res=XPASS recheck=yes gcopy=yes;; + 0:*) col=$grn res=PASS recheck=no gcopy=no;; + 77:*) col=$blu res=SKIP recheck=no gcopy=yes;; + 99:*) col=$mgn res=ERROR recheck=yes gcopy=yes;; + *:yes) col=$lgn res=XFAIL recheck=no gcopy=yes;; + *:*) col=$red res=FAIL recheck=yes gcopy=yes;; +esac + +# Report the test outcome and exit status in the logs, so that one can +# know whether the test passed or failed simply by looking at the '.log' +# file, without the need of also peaking into the corresponding '.trs' +# file (automake bug#11814). +echo "$res $test_name (exit status: $estatus)" >>$log_file + +# Report outcome to console. +echo "${col}${res}${std}: $test_name" + +# Register the test result, and other relevant metadata. +echo ":test-result: $res" > $trs_file +echo ":global-test-result: $res" >> $trs_file +echo ":recheck: $recheck" >> $trs_file +echo ":copy-in-global-log: $gcopy" >> $trs_file + +# Local Variables: +# mode: shell-script +# sh-indentation: 2 +# eval: (add-hook 'before-save-hook 'time-stamp) +# time-stamp-start: "scriptversion=" +# time-stamp-format: "%:y-%02m-%02d.%02H" +# time-stamp-time-zone: "UTC0" +# time-stamp-end: "; # UTC" +# End: