diff options
| author | Joel Klinghed <the_jk@spawned.biz> | 2025-09-10 22:12:22 +0200 |
|---|---|---|
| committer | Joel Klinghed <the_jk@spawned.biz> | 2025-09-10 22:12:22 +0200 |
| commit | 32e14551a90e85000e41b3f0445d34d58a1431e4 (patch) | |
| tree | 912c1e50b93b501446b1b179ee2a3e93586fb854 /data | |
| parent | cf99d0c865474105c14b2348fdbd1c83d87d5a29 (diff) | |
Add unicode general category lookup
Generate the lookup tables from UnicodeData.txt, do to that,
add gen_ugc, which uses csv, buffers, line, io and other modules
to do the job.
Diffstat (limited to 'data')
| -rwxr-xr-x | data/get_unicode.sh | 44 | ||||
| -rw-r--r-- | data/unicode-10.0.0/UnicodeData.txt.xz | bin | 0 -> 158464 bytes | |||
| -rw-r--r-- | data/unicode-11.0.0/UnicodeData.txt.xz | bin | 0 -> 161832 bytes | |||
| -rw-r--r-- | data/unicode-12.1.0/UnicodeData.txt.xz | bin | 0 -> 164764 bytes | |||
| -rw-r--r-- | data/unicode-13.0.0/UnicodeData.txt.xz | bin | 0 -> 167840 bytes | |||
| -rw-r--r-- | data/unicode-14.0.0/UnicodeData.txt.xz | bin | 0 -> 172072 bytes | |||
| -rw-r--r-- | data/unicode-15.0.0/UnicodeData.txt.xz | bin | 0 -> 173628 bytes | |||
| -rw-r--r-- | data/unicode-15.1.0/UnicodeData.txt.xz | bin | 0 -> 173736 bytes | |||
| -rw-r--r-- | data/unicode-16.0.0/UnicodeData.txt.xz | bin | 0 -> 181704 bytes | |||
| -rw-r--r-- | data/unicode-6.2.0/UnicodeData.txt.xz | bin | 0 -> 129932 bytes | |||
| -rw-r--r-- | data/unicode-8.0.0/UnicodeData.txt.xz | bin | 0 -> 151784 bytes |
11 files changed, 44 insertions, 0 deletions
diff --git a/data/get_unicode.sh b/data/get_unicode.sh new file mode 100755 index 0000000..99662b3 --- /dev/null +++ b/data/get_unicode.sh @@ -0,0 +1,44 @@ +#!/bin/bash + +declare -a versions + +# Java 8 +versions+=("6.2.0") + +# Java 9 +versions+=("8.0.0") + +# Java 11 +versions+=("10.0.0") + +# Java 12 +versions+=("11.0.0") + +# Java 13 +versions+=("12.1.0") + +# Java 15 +versions+=("13.0.0") + +# Java 19 +versions+=("14.0.0") + +# Java 20 +versions+=("15.0.0") + +# Java 22 +versions+=("15.1.0") + +# Java 24 +versions+=("16.0.0") + +basedir=$(dirname -- "${BASH_SOURCE[0]}") + +for version in "${versions[@]}"; do + target="$basedir"/unicode-"$version"/UnicodeData.txt + if [ ! -e "$target".xz ]; then + mkdir -p "$basedir"/unicode-"$version" + curl "https://www.unicode.org/Public/${version}/ucd/UnicodeData.txt" -o "$target" + xz -9 "$target" + fi +done diff --git a/data/unicode-10.0.0/UnicodeData.txt.xz b/data/unicode-10.0.0/UnicodeData.txt.xz Binary files differnew file mode 100644 index 0000000..25eb906 --- /dev/null +++ b/data/unicode-10.0.0/UnicodeData.txt.xz diff --git a/data/unicode-11.0.0/UnicodeData.txt.xz b/data/unicode-11.0.0/UnicodeData.txt.xz Binary files differnew file mode 100644 index 0000000..e586bd3 --- /dev/null +++ b/data/unicode-11.0.0/UnicodeData.txt.xz diff --git a/data/unicode-12.1.0/UnicodeData.txt.xz b/data/unicode-12.1.0/UnicodeData.txt.xz Binary files differnew file mode 100644 index 0000000..15f8880 --- /dev/null +++ b/data/unicode-12.1.0/UnicodeData.txt.xz diff --git a/data/unicode-13.0.0/UnicodeData.txt.xz b/data/unicode-13.0.0/UnicodeData.txt.xz Binary files differnew file mode 100644 index 0000000..9e723dd --- /dev/null +++ b/data/unicode-13.0.0/UnicodeData.txt.xz diff --git a/data/unicode-14.0.0/UnicodeData.txt.xz b/data/unicode-14.0.0/UnicodeData.txt.xz Binary files differnew file mode 100644 index 0000000..8ccc9cb --- /dev/null +++ b/data/unicode-14.0.0/UnicodeData.txt.xz diff --git a/data/unicode-15.0.0/UnicodeData.txt.xz b/data/unicode-15.0.0/UnicodeData.txt.xz Binary files differnew file mode 100644 index 0000000..dfb9976 --- /dev/null +++ b/data/unicode-15.0.0/UnicodeData.txt.xz diff --git a/data/unicode-15.1.0/UnicodeData.txt.xz b/data/unicode-15.1.0/UnicodeData.txt.xz Binary files differnew file mode 100644 index 0000000..aa89857 --- /dev/null +++ b/data/unicode-15.1.0/UnicodeData.txt.xz diff --git a/data/unicode-16.0.0/UnicodeData.txt.xz b/data/unicode-16.0.0/UnicodeData.txt.xz Binary files differnew file mode 100644 index 0000000..199d7c1 --- /dev/null +++ b/data/unicode-16.0.0/UnicodeData.txt.xz diff --git a/data/unicode-6.2.0/UnicodeData.txt.xz b/data/unicode-6.2.0/UnicodeData.txt.xz Binary files differnew file mode 100644 index 0000000..3001c3c --- /dev/null +++ b/data/unicode-6.2.0/UnicodeData.txt.xz diff --git a/data/unicode-8.0.0/UnicodeData.txt.xz b/data/unicode-8.0.0/UnicodeData.txt.xz Binary files differnew file mode 100644 index 0000000..d68fed1 --- /dev/null +++ b/data/unicode-8.0.0/UnicodeData.txt.xz |
