diff options
author | Juan J. Martinez <jjm@usebox.net> | 2023-11-05 11:22:55 +0000 |
---|---|---|
committer | Juan J. Martinez <jjm@usebox.net> | 2023-11-05 11:31:28 +0000 |
commit | 2fbdf974338bde8576efdae40a819a76b2391033 (patch) | |
tree | 64d41a37470143f142344f9a439d96de3e7918c2 /tools/apultra_src/src/libdivsufsort/examples | |
download | kitsunes-curse-2fbdf974338bde8576efdae40a819a76b2391033.tar.gz kitsunes-curse-2fbdf974338bde8576efdae40a819a76b2391033.zip |
Initial import of the open source release
Diffstat (limited to 'tools/apultra_src/src/libdivsufsort/examples')
-rw-r--r-- | tools/apultra_src/src/libdivsufsort/examples/CMakeLists.txt | 11 | ||||
-rw-r--r-- | tools/apultra_src/src/libdivsufsort/examples/bwt.c | 220 | ||||
-rw-r--r-- | tools/apultra_src/src/libdivsufsort/examples/mksary.c | 193 | ||||
-rw-r--r-- | tools/apultra_src/src/libdivsufsort/examples/sasearch.c | 165 | ||||
-rw-r--r-- | tools/apultra_src/src/libdivsufsort/examples/suftest.c | 164 | ||||
-rw-r--r-- | tools/apultra_src/src/libdivsufsort/examples/unbwt.c | 207 |
6 files changed, 960 insertions, 0 deletions
diff --git a/tools/apultra_src/src/libdivsufsort/examples/CMakeLists.txt b/tools/apultra_src/src/libdivsufsort/examples/CMakeLists.txt new file mode 100644 index 0000000..e801c81 --- /dev/null +++ b/tools/apultra_src/src/libdivsufsort/examples/CMakeLists.txt @@ -0,0 +1,11 @@ +## Add definitions ## +add_definitions(-D_LARGEFILE_SOURCE -D_LARGE_FILES -D_FILE_OFFSET_BITS=64) + +## Targets ## +include_directories("${CMAKE_CURRENT_SOURCE_DIR}/../include" + "${CMAKE_CURRENT_BINARY_DIR}/../include") +link_directories("${CMAKE_CURRENT_BINARY_DIR}/../lib") +foreach(src suftest mksary sasearch bwt unbwt) + add_executable(${src} ${src}.c) + target_link_libraries(${src} divsufsort) +endforeach(src) diff --git a/tools/apultra_src/src/libdivsufsort/examples/bwt.c b/tools/apultra_src/src/libdivsufsort/examples/bwt.c new file mode 100644 index 0000000..5a362d0 --- /dev/null +++ b/tools/apultra_src/src/libdivsufsort/examples/bwt.c @@ -0,0 +1,220 @@ +/* + * bwt.c for libdivsufsort + * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#if HAVE_CONFIG_H +# include "config.h" +#endif +#include <stdio.h> +#if HAVE_STRING_H +# include <string.h> +#endif +#if HAVE_STDLIB_H +# include <stdlib.h> +#endif +#if HAVE_MEMORY_H +# include <memory.h> +#endif +#if HAVE_STDDEF_H +# include <stddef.h> +#endif +#if HAVE_STRINGS_H +# include <strings.h> +#endif +#if HAVE_SYS_TYPES_H +# include <sys/types.h> +#endif +#if HAVE_IO_H && HAVE_FCNTL_H +# include <io.h> +# include <fcntl.h> +#endif +#include <time.h> +#include <divsufsort.h> +#include "lfs.h" + + +static +size_t +write_int(FILE *fp, saidx_t n) { + unsigned char c[4]; + c[0] = (unsigned char)((n >> 0) & 0xff), c[1] = (unsigned char)((n >> 8) & 0xff), + c[2] = (unsigned char)((n >> 16) & 0xff), c[3] = (unsigned char)((n >> 24) & 0xff); + return fwrite(c, sizeof(unsigned char), 4, fp); +} + +static +void +print_help(const char *progname, int status) { + fprintf(stderr, + "bwt, a burrows-wheeler transform program, version %s.\n", + divsufsort_version()); + fprintf(stderr, "usage: %s [-b num] INFILE OUTFILE\n", progname); + fprintf(stderr, " -b num set block size to num MiB [1..512] (default: 32)\n\n"); + exit(status); +} + +int +main(int argc, const char *argv[]) { + FILE *fp, *ofp; + const char *fname, *ofname; + sauchar_t *T; + saidx_t *SA; + LFS_OFF_T n; + size_t m; + saidx_t pidx; + clock_t start,finish; + saint_t i, blocksize = 32, needclose = 3; + + /* Check arguments. */ + if((argc == 1) || + (strcmp(argv[1], "-h") == 0) || + (strcmp(argv[1], "--help") == 0)) { print_help(argv[0], EXIT_SUCCESS); } + if((argc != 3) && (argc != 5)) { print_help(argv[0], EXIT_FAILURE); } + i = 1; + if(argc == 5) { + if(strcmp(argv[i], "-b") != 0) { print_help(argv[0], EXIT_FAILURE); } + blocksize = atoi(argv[i + 1]); + if(blocksize < 0) { blocksize = 1; } + else if(512 < blocksize) { blocksize = 512; } + i += 2; + } + blocksize <<= 20; + + /* Open a file for reading. */ + if(strcmp(argv[i], "-") != 0) { +#if HAVE_FOPEN_S + if(fopen_s(&fp, fname = argv[i], "rb") != 0) { +#else + if((fp = LFS_FOPEN(fname = argv[i], "rb")) == NULL) { +#endif + fprintf(stderr, "%s: Cannot open file `%s': ", argv[0], fname); + perror(NULL); + exit(EXIT_FAILURE); + } + } else { +#if HAVE__SETMODE && HAVE__FILENO + if(_setmode(_fileno(stdin), _O_BINARY) == -1) { + fprintf(stderr, "%s: Cannot set mode: ", argv[0]); + perror(NULL); + exit(EXIT_FAILURE); + } +#endif + fp = stdin; + fname = "stdin"; + needclose ^= 1; + } + i += 1; + + /* Open a file for writing. */ + if(strcmp(argv[i], "-") != 0) { +#if HAVE_FOPEN_S + if(fopen_s(&ofp, ofname = argv[i], "wb") != 0) { +#else + if((ofp = LFS_FOPEN(ofname = argv[i], "wb")) == NULL) { +#endif + fprintf(stderr, "%s: Cannot open file `%s': ", argv[0], ofname); + perror(NULL); + exit(EXIT_FAILURE); + } + } else { +#if HAVE__SETMODE && HAVE__FILENO + if(_setmode(_fileno(stdout), _O_BINARY) == -1) { + fprintf(stderr, "%s: Cannot set mode: ", argv[0]); + perror(NULL); + exit(EXIT_FAILURE); + } +#endif + ofp = stdout; + ofname = "stdout"; + needclose ^= 2; + } + + /* Get the file size. */ + if(LFS_FSEEK(fp, 0, SEEK_END) == 0) { + n = LFS_FTELL(fp); + rewind(fp); + if(n < 0) { + fprintf(stderr, "%s: Cannot ftell `%s': ", argv[0], fname); + perror(NULL); + exit(EXIT_FAILURE); + } + if(0x20000000L < n) { n = 0x20000000L; } + if((blocksize == 0) || (n < blocksize)) { blocksize = (saidx_t)n; } + } else if(blocksize == 0) { blocksize = 32 << 20; } + + /* Allocate 5blocksize bytes of memory. */ + T = (sauchar_t *)malloc(blocksize * sizeof(sauchar_t)); + SA = (saidx_t *)malloc(blocksize * sizeof(saidx_t)); + if((T == NULL) || (SA == NULL)) { + fprintf(stderr, "%s: Cannot allocate memory.\n", argv[0]); + exit(EXIT_FAILURE); + } + + /* Write the blocksize. */ + if(write_int(ofp, blocksize) != 4) { + fprintf(stderr, "%s: Cannot write to `%s': ", argv[0], ofname); + perror(NULL); + exit(EXIT_FAILURE); + } + + fprintf(stderr, " BWT (blocksize %" PRIdSAINT_T ") ... ", blocksize); + start = clock(); + for(n = 0; 0 < (m = fread(T, sizeof(sauchar_t), blocksize, fp)); n += m) { + /* Burrows-Wheeler Transform. */ + pidx = divbwt(T, T, SA, m); + if(pidx < 0) { + fprintf(stderr, "%s (bw_transform): %s.\n", + argv[0], + (pidx == -1) ? "Invalid arguments" : "Cannot allocate memory"); + exit(EXIT_FAILURE); + } + + /* Write the bwted data. */ + if((write_int(ofp, pidx) != 4) || + (fwrite(T, sizeof(sauchar_t), m, ofp) != m)) { + fprintf(stderr, "%s: Cannot write to `%s': ", argv[0], ofname); + perror(NULL); + exit(EXIT_FAILURE); + } + } + if(ferror(fp)) { + fprintf(stderr, "%s: Cannot read from `%s': ", argv[0], fname); + perror(NULL); + exit(EXIT_FAILURE); + } + finish = clock(); + fprintf(stderr, "%" PRIdOFF_T " bytes: %.4f sec\n", + n, (double)(finish - start) / (double)CLOCKS_PER_SEC); + + /* Close files */ + if(needclose & 1) { fclose(fp); } + if(needclose & 2) { fclose(ofp); } + + /* Deallocate memory. */ + free(SA); + free(T); + + return 0; +} diff --git a/tools/apultra_src/src/libdivsufsort/examples/mksary.c b/tools/apultra_src/src/libdivsufsort/examples/mksary.c new file mode 100644 index 0000000..b48177c --- /dev/null +++ b/tools/apultra_src/src/libdivsufsort/examples/mksary.c @@ -0,0 +1,193 @@ +/* + * mksary.c for libdivsufsort + * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#if HAVE_CONFIG_H +# include "config.h" +#endif +#include <stdio.h> +#if HAVE_STRING_H +# include <string.h> +#endif +#if HAVE_STDLIB_H +# include <stdlib.h> +#endif +#if HAVE_MEMORY_H +# include <memory.h> +#endif +#if HAVE_STDDEF_H +# include <stddef.h> +#endif +#if HAVE_STRINGS_H +# include <strings.h> +#endif +#if HAVE_SYS_TYPES_H +# include <sys/types.h> +#endif +#if HAVE_IO_H && HAVE_FCNTL_H +# include <io.h> +# include <fcntl.h> +#endif +#include <time.h> +#include <divsufsort.h> +#include "lfs.h" + + +static +void +print_help(const char *progname, int status) { + fprintf(stderr, + "mksary, a simple suffix array builder, version %s.\n", + divsufsort_version()); + fprintf(stderr, "usage: %s INFILE OUTFILE\n\n", progname); + exit(status); +} + +int +main(int argc, const char *argv[]) { + FILE *fp, *ofp; + const char *fname, *ofname; + sauchar_t *T; + saidx_t *SA; + LFS_OFF_T n; + clock_t start, finish; + saint_t needclose = 3; + + /* Check arguments. */ + if((argc == 1) || + (strcmp(argv[1], "-h") == 0) || + (strcmp(argv[1], "--help") == 0)) { print_help(argv[0], EXIT_SUCCESS); } + if(argc != 3) { print_help(argv[0], EXIT_FAILURE); } + + /* Open a file for reading. */ + if(strcmp(argv[1], "-") != 0) { +#if HAVE_FOPEN_S + if(fopen_s(&fp, fname = argv[1], "rb") != 0) { +#else + if((fp = LFS_FOPEN(fname = argv[1], "rb")) == NULL) { +#endif + fprintf(stderr, "%s: Cannot open file `%s': ", argv[0], fname); + perror(NULL); + exit(EXIT_FAILURE); + } + } else { +#if HAVE__SETMODE && HAVE__FILENO + if(_setmode(_fileno(stdin), _O_BINARY) == -1) { + fprintf(stderr, "%s: Cannot set mode: ", argv[0]); + perror(NULL); + exit(EXIT_FAILURE); + } +#endif + fp = stdin; + fname = "stdin"; + needclose ^= 1; + } + + /* Open a file for writing. */ + if(strcmp(argv[2], "-") != 0) { +#if HAVE_FOPEN_S + if(fopen_s(&ofp, ofname = argv[2], "wb") != 0) { +#else + if((ofp = LFS_FOPEN(ofname = argv[2], "wb")) == NULL) { +#endif + fprintf(stderr, "%s: Cannot open file `%s': ", argv[0], ofname); + perror(NULL); + exit(EXIT_FAILURE); + } + } else { +#if HAVE__SETMODE && HAVE__FILENO + if(_setmode(_fileno(stdout), _O_BINARY) == -1) { + fprintf(stderr, "%s: Cannot set mode: ", argv[0]); + perror(NULL); + exit(EXIT_FAILURE); + } +#endif + ofp = stdout; + ofname = "stdout"; + needclose ^= 2; + } + + /* Get the file size. */ + if(LFS_FSEEK(fp, 0, SEEK_END) == 0) { + n = LFS_FTELL(fp); + rewind(fp); + if(n < 0) { + fprintf(stderr, "%s: Cannot ftell `%s': ", argv[0], fname); + perror(NULL); + exit(EXIT_FAILURE); + } + if(0x7fffffff <= n) { + fprintf(stderr, "%s: Input file `%s' is too big.\n", argv[0], fname); + exit(EXIT_FAILURE); + } + } else { + fprintf(stderr, "%s: Cannot fseek `%s': ", argv[0], fname); + perror(NULL); + exit(EXIT_FAILURE); + } + + /* Allocate 5blocksize bytes of memory. */ + T = (sauchar_t *)malloc((size_t)n * sizeof(sauchar_t)); + SA = (saidx_t *)malloc((size_t)n * sizeof(saidx_t)); + if((T == NULL) || (SA == NULL)) { + fprintf(stderr, "%s: Cannot allocate memory.\n", argv[0]); + exit(EXIT_FAILURE); + } + + /* Read n bytes of data. */ + if(fread(T, sizeof(sauchar_t), (size_t)n, fp) != (size_t)n) { + fprintf(stderr, "%s: %s `%s': ", + argv[0], + (ferror(fp) || !feof(fp)) ? "Cannot read from" : "Unexpected EOF in", + fname); + perror(NULL); + exit(EXIT_FAILURE); + } + if(needclose & 1) { fclose(fp); } + + /* Construct the suffix array. */ + fprintf(stderr, "%s: %" PRIdOFF_T " bytes ... ", fname, n); + start = clock(); + if(divsufsort(T, SA, (saidx_t)n) != 0) { + fprintf(stderr, "%s: Cannot allocate memory.\n", argv[0]); + exit(EXIT_FAILURE); + } + finish = clock(); + fprintf(stderr, "%.4f sec\n", (double)(finish - start) / (double)CLOCKS_PER_SEC); + + /* Write the suffix array. */ + if(fwrite(SA, sizeof(saidx_t), (size_t)n, ofp) != (size_t)n) { + fprintf(stderr, "%s: Cannot write to `%s': ", argv[0], ofname); + perror(NULL); + exit(EXIT_FAILURE); + } + if(needclose & 2) { fclose(ofp); } + + /* Deallocate memory. */ + free(SA); + free(T); + + return 0; +} diff --git a/tools/apultra_src/src/libdivsufsort/examples/sasearch.c b/tools/apultra_src/src/libdivsufsort/examples/sasearch.c new file mode 100644 index 0000000..7e5ca4f --- /dev/null +++ b/tools/apultra_src/src/libdivsufsort/examples/sasearch.c @@ -0,0 +1,165 @@ +/* + * sasearch.c for libdivsufsort + * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#if HAVE_CONFIG_H +# include "config.h" +#endif +#include <stdio.h> +#if HAVE_STRING_H +# include <string.h> +#endif +#if HAVE_STDLIB_H +# include <stdlib.h> +#endif +#if HAVE_MEMORY_H +# include <memory.h> +#endif +#if HAVE_STDDEF_H +# include <stddef.h> +#endif +#if HAVE_STRINGS_H +# include <strings.h> +#endif +#if HAVE_SYS_TYPES_H +# include <sys/types.h> +#endif +#if HAVE_IO_H && HAVE_FCNTL_H +# include <io.h> +# include <fcntl.h> +#endif +#include <divsufsort.h> +#include "lfs.h" + + +static +void +print_help(const char *progname, int status) { + fprintf(stderr, + "sasearch, a simple SA-based full-text search tool, version %s\n", + divsufsort_version()); + fprintf(stderr, "usage: %s PATTERN FILE SAFILE\n\n", progname); + exit(status); +} + +int +main(int argc, const char *argv[]) { + FILE *fp; + const char *P; + sauchar_t *T; + saidx_t *SA; + LFS_OFF_T n; + size_t Psize; + saidx_t i, size, left; + + if((argc == 1) || + (strcmp(argv[1], "-h") == 0) || + (strcmp(argv[1], "--help") == 0)) { print_help(argv[0], EXIT_SUCCESS); } + if(argc != 4) { print_help(argv[0], EXIT_FAILURE); } + + P = argv[1]; + Psize = strlen(P); + + /* Open a file for reading. */ +#if HAVE_FOPEN_S + if(fopen_s(&fp, argv[2], "rb") != 0) { +#else + if((fp = LFS_FOPEN(argv[2], "rb")) == NULL) { +#endif + fprintf(stderr, "%s: Cannot open file `%s': ", argv[0], argv[2]); + perror(NULL); + exit(EXIT_FAILURE); + } + + /* Get the file size. */ + if(LFS_FSEEK(fp, 0, SEEK_END) == 0) { + n = LFS_FTELL(fp); + rewind(fp); + if(n < 0) { + fprintf(stderr, "%s: Cannot ftell `%s': ", argv[0], argv[2]); + perror(NULL); + exit(EXIT_FAILURE); + } + } else { + fprintf(stderr, "%s: Cannot fseek `%s': ", argv[0], argv[2]); + perror(NULL); + exit(EXIT_FAILURE); + } + + /* Allocate 5n bytes of memory. */ + T = (sauchar_t *)malloc((size_t)n * sizeof(sauchar_t)); + SA = (saidx_t *)malloc((size_t)n * sizeof(saidx_t)); + if((T == NULL) || (SA == NULL)) { + fprintf(stderr, "%s: Cannot allocate memory.\n", argv[0]); + exit(EXIT_FAILURE); + } + + /* Read n bytes of data. */ + if(fread(T, sizeof(sauchar_t), (size_t)n, fp) != (size_t)n) { + fprintf(stderr, "%s: %s `%s': ", + argv[0], + (ferror(fp) || !feof(fp)) ? "Cannot read from" : "Unexpected EOF in", + argv[2]); + perror(NULL); + exit(EXIT_FAILURE); + } + fclose(fp); + + /* Open the SA file for reading. */ +#if HAVE_FOPEN_S + if(fopen_s(&fp, argv[3], "rb") != 0) { +#else + if((fp = LFS_FOPEN(argv[3], "rb")) == NULL) { +#endif + fprintf(stderr, "%s: Cannot open file `%s': ", argv[0], argv[3]); + perror(NULL); + exit(EXIT_FAILURE); + } + + /* Read n * sizeof(saidx_t) bytes of data. */ + if(fread(SA, sizeof(saidx_t), (size_t)n, fp) != (size_t)n) { + fprintf(stderr, "%s: %s `%s': ", + argv[0], + (ferror(fp) || !feof(fp)) ? "Cannot read from" : "Unexpected EOF in", + argv[3]); + perror(NULL); + exit(EXIT_FAILURE); + } + fclose(fp); + + /* Search and print */ + size = sa_search(T, (saidx_t)n, + (const sauchar_t *)P, (saidx_t)Psize, + SA, (saidx_t)n, &left); + for(i = 0; i < size; ++i) { + fprintf(stdout, "%" PRIdSAIDX_T "\n", SA[left + i]); + } + + /* Deallocate memory. */ + free(SA); + free(T); + + return 0; +} diff --git a/tools/apultra_src/src/libdivsufsort/examples/suftest.c b/tools/apultra_src/src/libdivsufsort/examples/suftest.c new file mode 100644 index 0000000..71892ac --- /dev/null +++ b/tools/apultra_src/src/libdivsufsort/examples/suftest.c @@ -0,0 +1,164 @@ +/* + * suftest.c for libdivsufsort + * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#if HAVE_CONFIG_H +# include "config.h" +#endif +#include <stdio.h> +#if HAVE_STRING_H +# include <string.h> +#endif +#if HAVE_STDLIB_H +# include <stdlib.h> +#endif +#if HAVE_MEMORY_H +# include <memory.h> +#endif +#if HAVE_STDDEF_H +# include <stddef.h> +#endif +#if HAVE_STRINGS_H +# include <strings.h> +#endif +#if HAVE_SYS_TYPES_H +# include <sys/types.h> +#endif +#if HAVE_IO_H && HAVE_FCNTL_H +# include <io.h> +# include <fcntl.h> +#endif +#include <time.h> +#include <divsufsort.h> +#include "lfs.h" + + +static +void +print_help(const char *progname, int status) { + fprintf(stderr, + "suftest, a suffixsort tester, version %s.\n", + divsufsort_version()); + fprintf(stderr, "usage: %s FILE\n\n", progname); + exit(status); +} + +int +main(int argc, const char *argv[]) { + FILE *fp; + const char *fname; + sauchar_t *T; + saidx_t *SA; + LFS_OFF_T n; + clock_t start, finish; + saint_t needclose = 1; + + /* Check arguments. */ + if((argc == 1) || + (strcmp(argv[1], "-h") == 0) || + (strcmp(argv[1], "--help") == 0)) { print_help(argv[0], EXIT_SUCCESS); } + if(argc != 2) { print_help(argv[0], EXIT_FAILURE); } + + /* Open a file for reading. */ + if(strcmp(argv[1], "-") != 0) { +#if HAVE_FOPEN_S + if(fopen_s(&fp, fname = argv[1], "rb") != 0) { +#else + if((fp = LFS_FOPEN(fname = argv[1], "rb")) == NULL) { +#endif + fprintf(stderr, "%s: Cannot open file `%s': ", argv[0], fname); + perror(NULL); + exit(EXIT_FAILURE); + } + } else { +#if HAVE__SETMODE && HAVE__FILENO + if(_setmode(_fileno(stdin), _O_BINARY) == -1) { + fprintf(stderr, "%s: Cannot set mode: ", argv[0]); + perror(NULL); + exit(EXIT_FAILURE); + } +#endif + fp = stdin; + fname = "stdin"; + needclose = 0; + } + + /* Get the file size. */ + if(LFS_FSEEK(fp, 0, SEEK_END) == 0) { + n = LFS_FTELL(fp); + rewind(fp); + if(n < 0) { + fprintf(stderr, "%s: Cannot ftell `%s': ", argv[0], fname); + perror(NULL); + exit(EXIT_FAILURE); + } + if(0x7fffffff <= n) { + fprintf(stderr, "%s: Input file `%s' is too big.\n", argv[0], fname); + exit(EXIT_FAILURE); + } + } else { + fprintf(stderr, "%s: Cannot fseek `%s': ", argv[0], fname); + perror(NULL); + exit(EXIT_FAILURE); + } + + /* Allocate 5n bytes of memory. */ + T = (sauchar_t *)malloc((size_t)n * sizeof(sauchar_t)); + SA = (saidx_t *)malloc((size_t)n * sizeof(saidx_t)); + if((T == NULL) || (SA == NULL)) { + fprintf(stderr, "%s: Cannot allocate memory.\n", argv[0]); + exit(EXIT_FAILURE); + } + + /* Read n bytes of data. */ + if(fread(T, sizeof(sauchar_t), (size_t)n, fp) != (size_t)n) { + fprintf(stderr, "%s: %s `%s': ", + argv[0], + (ferror(fp) || !feof(fp)) ? "Cannot read from" : "Unexpected EOF in", + argv[1]); + perror(NULL); + exit(EXIT_FAILURE); + } + if(needclose & 1) { fclose(fp); } + + /* Construct the suffix array. */ + fprintf(stderr, "%s: %" PRIdOFF_T " bytes ... ", fname, n); + start = clock(); + if(divsufsort(T, SA, (saidx_t)n) != 0) { + fprintf(stderr, "%s: Cannot allocate memory.\n", argv[0]); + exit(EXIT_FAILURE); + } + finish = clock(); + fprintf(stderr, "%.4f sec\n", (double)(finish - start) / (double)CLOCKS_PER_SEC); + + /* Check the suffix array. */ + if(sufcheck(T, SA, (saidx_t)n, 1) != 0) { exit(EXIT_FAILURE); } + + /* Deallocate memory. */ + free(SA); + free(T); + + return 0; +} diff --git a/tools/apultra_src/src/libdivsufsort/examples/unbwt.c b/tools/apultra_src/src/libdivsufsort/examples/unbwt.c new file mode 100644 index 0000000..c0f19e9 --- /dev/null +++ b/tools/apultra_src/src/libdivsufsort/examples/unbwt.c @@ -0,0 +1,207 @@ +/* + * unbwt.c for libdivsufsort + * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#if HAVE_CONFIG_H +# include "config.h" +#endif +#include <stdio.h> +#if HAVE_STRING_H +# include <string.h> +#endif +#if HAVE_STDLIB_H +# include <stdlib.h> +#endif +#if HAVE_MEMORY_H +# include <memory.h> +#endif +#if HAVE_STDDEF_H +# include <stddef.h> +#endif +#if HAVE_STRINGS_H +# include <strings.h> +#endif +#if HAVE_SYS_TYPES_H +# include <sys/types.h> +#endif +#if HAVE_IO_H && HAVE_FCNTL_H +# include <io.h> +# include <fcntl.h> +#endif +#include <time.h> +#include <divsufsort.h> +#include "lfs.h" + + +static +size_t +read_int(FILE *fp, saidx_t *n) { + unsigned char c[4]; + size_t m = fread(c, sizeof(unsigned char), 4, fp); + if(m == 4) { + *n = (c[0] << 0) | (c[1] << 8) | + (c[2] << 16) | (c[3] << 24); + } + return m; +} + +static +void +print_help(const char *progname, int status) { + fprintf(stderr, + "unbwt, an inverse burrows-wheeler transform program, version %s.\n", + divsufsort_version()); + fprintf(stderr, "usage: %s INFILE OUTFILE\n\n", progname); + exit(status); +} + +int +main(int argc, const char *argv[]) { + FILE *fp, *ofp; + const char *fname, *ofname; + sauchar_t *T; + saidx_t *A; + LFS_OFF_T n; + size_t m; + saidx_t pidx; + clock_t start, finish; + saint_t err, blocksize, needclose = 3; + + /* Check arguments. */ + if((argc == 1) || + (strcmp(argv[1], "-h") == 0) || + (strcmp(argv[1], "--help") == 0)) { print_help(argv[0], EXIT_SUCCESS); } + if(argc != 3) { print_help(argv[0], EXIT_FAILURE); } + + /* Open a file for reading. */ + if(strcmp(argv[1], "-") != 0) { +#if HAVE_FOPEN_S + if(fopen_s(&fp, fname = argv[1], "rb") != 0) { +#else + if((fp = LFS_FOPEN(fname = argv[1], "rb")) == NULL) { +#endif + fprintf(stderr, "%s: Cannot open file `%s': ", argv[0], fname); + perror(NULL); + exit(EXIT_FAILURE); + } + } else { +#if HAVE__SETMODE && HAVE__FILENO + if(_setmode(_fileno(stdin), _O_BINARY) == -1) { + fprintf(stderr, "%s: Cannot set mode: ", argv[0]); + perror(NULL); + exit(EXIT_FAILURE); + } +#endif + fp = stdin; + fname = "stdin"; + needclose ^= 1; + } + + /* Open a file for writing. */ + if(strcmp(argv[2], "-") != 0) { +#if HAVE_FOPEN_S + if(fopen_s(&ofp, ofname = argv[2], "wb") != 0) { +#else + if((ofp = LFS_FOPEN(ofname = argv[2], "wb")) == NULL) { +#endif + fprintf(stderr, "%s: Cannot open file `%s': ", argv[0], ofname); + perror(NULL); + exit(EXIT_FAILURE); + } + } else { +#if HAVE__SETMODE && HAVE__FILENO + if(_setmode(_fileno(stdout), _O_BINARY) == -1) { + fprintf(stderr, "%s: Cannot set mode: ", argv[0]); + perror(NULL); + exit(EXIT_FAILURE); + } +#endif + ofp = stdout; + ofname = "stdout"; + needclose ^= 2; + } + + /* Read the blocksize. */ + if(read_int(fp, &blocksize) != 4) { + fprintf(stderr, "%s: Cannot read from `%s': ", argv[0], fname); + perror(NULL); + exit(EXIT_FAILURE); + } + + /* Allocate 5blocksize bytes of memory. */ + T = (sauchar_t *)malloc(blocksize * sizeof(sauchar_t)); + A = (saidx_t *)malloc(blocksize * sizeof(saidx_t)); + if((T == NULL) || (A == NULL)) { + fprintf(stderr, "%s: Cannot allocate memory.\n", argv[0]); + exit(EXIT_FAILURE); + } + + fprintf(stderr, "UnBWT (blocksize %" PRIdSAINT_T ") ... ", blocksize); + start = clock(); + for(n = 0; (m = read_int(fp, &pidx)) != 0; n += m) { + /* Read blocksize bytes of data. */ + if((m != 4) || ((m = fread(T, sizeof(sauchar_t), blocksize, fp)) == 0)) { + fprintf(stderr, "%s: %s `%s': ", + argv[0], + (ferror(fp) || !feof(fp)) ? "Cannot read from" : "Unexpected EOF in", + fname); + perror(NULL); + exit(EXIT_FAILURE); + } + + /* Inverse Burrows-Wheeler Transform. */ + if((err = inverse_bw_transform(T, T, A, m, pidx)) != 0) { + fprintf(stderr, "%s (reverseBWT): %s.\n", + argv[0], + (err == -1) ? "Invalid data" : "Cannot allocate memory"); + exit(EXIT_FAILURE); + } + + /* Write m bytes of data. */ + if(fwrite(T, sizeof(sauchar_t), m, ofp) != m) { + fprintf(stderr, "%s: Cannot write to `%s': ", argv[0], ofname); + perror(NULL); + exit(EXIT_FAILURE); + } + } + if(ferror(fp)) { + fprintf(stderr, "%s: Cannot read from `%s': ", argv[0], fname); + perror(NULL); + exit(EXIT_FAILURE); + } + finish = clock(); + fprintf(stderr, "%" PRIdOFF_T " bytes: %.4f sec\n", + n, (double)(finish - start) / (double)CLOCKS_PER_SEC); + + /* Close files */ + if(needclose & 1) { fclose(fp); } + if(needclose & 2) { fclose(ofp); } + + /* Deallocate memory. */ + free(A); + free(T); + + return 0; +} |