summaryrefslogtreecommitdiff
path: root/data
diff options
context:
space:
mode:
authorJoel Klinghed <the_jk@spawned.biz>2025-09-10 22:12:22 +0200
committerJoel Klinghed <the_jk@spawned.biz>2025-09-10 22:12:22 +0200
commit32e14551a90e85000e41b3f0445d34d58a1431e4 (patch)
tree912c1e50b93b501446b1b179ee2a3e93586fb854 /data
parentcf99d0c865474105c14b2348fdbd1c83d87d5a29 (diff)
Add unicode general category lookup
Generate the lookup tables from UnicodeData.txt, do to that, add gen_ugc, which uses csv, buffers, line, io and other modules to do the job.
Diffstat (limited to 'data')
-rwxr-xr-xdata/get_unicode.sh44
-rw-r--r--data/unicode-10.0.0/UnicodeData.txt.xzbin0 -> 158464 bytes
-rw-r--r--data/unicode-11.0.0/UnicodeData.txt.xzbin0 -> 161832 bytes
-rw-r--r--data/unicode-12.1.0/UnicodeData.txt.xzbin0 -> 164764 bytes
-rw-r--r--data/unicode-13.0.0/UnicodeData.txt.xzbin0 -> 167840 bytes
-rw-r--r--data/unicode-14.0.0/UnicodeData.txt.xzbin0 -> 172072 bytes
-rw-r--r--data/unicode-15.0.0/UnicodeData.txt.xzbin0 -> 173628 bytes
-rw-r--r--data/unicode-15.1.0/UnicodeData.txt.xzbin0 -> 173736 bytes
-rw-r--r--data/unicode-16.0.0/UnicodeData.txt.xzbin0 -> 181704 bytes
-rw-r--r--data/unicode-6.2.0/UnicodeData.txt.xzbin0 -> 129932 bytes
-rw-r--r--data/unicode-8.0.0/UnicodeData.txt.xzbin0 -> 151784 bytes
11 files changed, 44 insertions, 0 deletions
diff --git a/data/get_unicode.sh b/data/get_unicode.sh
new file mode 100755
index 0000000..99662b3
--- /dev/null
+++ b/data/get_unicode.sh
@@ -0,0 +1,44 @@
+#!/bin/bash
+
+declare -a versions
+
+# Java 8
+versions+=("6.2.0")
+
+# Java 9
+versions+=("8.0.0")
+
+# Java 11
+versions+=("10.0.0")
+
+# Java 12
+versions+=("11.0.0")
+
+# Java 13
+versions+=("12.1.0")
+
+# Java 15
+versions+=("13.0.0")
+
+# Java 19
+versions+=("14.0.0")
+
+# Java 20
+versions+=("15.0.0")
+
+# Java 22
+versions+=("15.1.0")
+
+# Java 24
+versions+=("16.0.0")
+
+basedir=$(dirname -- "${BASH_SOURCE[0]}")
+
+for version in "${versions[@]}"; do
+ target="$basedir"/unicode-"$version"/UnicodeData.txt
+ if [ ! -e "$target".xz ]; then
+ mkdir -p "$basedir"/unicode-"$version"
+ curl "https://www.unicode.org/Public/${version}/ucd/UnicodeData.txt" -o "$target"
+ xz -9 "$target"
+ fi
+done
diff --git a/data/unicode-10.0.0/UnicodeData.txt.xz b/data/unicode-10.0.0/UnicodeData.txt.xz
new file mode 100644
index 0000000..25eb906
--- /dev/null
+++ b/data/unicode-10.0.0/UnicodeData.txt.xz
Binary files differ
diff --git a/data/unicode-11.0.0/UnicodeData.txt.xz b/data/unicode-11.0.0/UnicodeData.txt.xz
new file mode 100644
index 0000000..e586bd3
--- /dev/null
+++ b/data/unicode-11.0.0/UnicodeData.txt.xz
Binary files differ
diff --git a/data/unicode-12.1.0/UnicodeData.txt.xz b/data/unicode-12.1.0/UnicodeData.txt.xz
new file mode 100644
index 0000000..15f8880
--- /dev/null
+++ b/data/unicode-12.1.0/UnicodeData.txt.xz
Binary files differ
diff --git a/data/unicode-13.0.0/UnicodeData.txt.xz b/data/unicode-13.0.0/UnicodeData.txt.xz
new file mode 100644
index 0000000..9e723dd
--- /dev/null
+++ b/data/unicode-13.0.0/UnicodeData.txt.xz
Binary files differ
diff --git a/data/unicode-14.0.0/UnicodeData.txt.xz b/data/unicode-14.0.0/UnicodeData.txt.xz
new file mode 100644
index 0000000..8ccc9cb
--- /dev/null
+++ b/data/unicode-14.0.0/UnicodeData.txt.xz
Binary files differ
diff --git a/data/unicode-15.0.0/UnicodeData.txt.xz b/data/unicode-15.0.0/UnicodeData.txt.xz
new file mode 100644
index 0000000..dfb9976
--- /dev/null
+++ b/data/unicode-15.0.0/UnicodeData.txt.xz
Binary files differ
diff --git a/data/unicode-15.1.0/UnicodeData.txt.xz b/data/unicode-15.1.0/UnicodeData.txt.xz
new file mode 100644
index 0000000..aa89857
--- /dev/null
+++ b/data/unicode-15.1.0/UnicodeData.txt.xz
Binary files differ
diff --git a/data/unicode-16.0.0/UnicodeData.txt.xz b/data/unicode-16.0.0/UnicodeData.txt.xz
new file mode 100644
index 0000000..199d7c1
--- /dev/null
+++ b/data/unicode-16.0.0/UnicodeData.txt.xz
Binary files differ
diff --git a/data/unicode-6.2.0/UnicodeData.txt.xz b/data/unicode-6.2.0/UnicodeData.txt.xz
new file mode 100644
index 0000000..3001c3c
--- /dev/null
+++ b/data/unicode-6.2.0/UnicodeData.txt.xz
Binary files differ
diff --git a/data/unicode-8.0.0/UnicodeData.txt.xz b/data/unicode-8.0.0/UnicodeData.txt.xz
new file mode 100644
index 0000000..d68fed1
--- /dev/null
+++ b/data/unicode-8.0.0/UnicodeData.txt.xz
Binary files differ