From 32e14551a90e85000e41b3f0445d34d58a1431e4 Mon Sep 17 00:00:00 2001 From: Joel Klinghed Date: Wed, 10 Sep 2025 22:12:22 +0200 Subject: Add unicode general category lookup Generate the lookup tables from UnicodeData.txt, do to that, add gen_ugc, which uses csv, buffers, line, io and other modules to do the job. --- data/get_unicode.sh | 44 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100755 data/get_unicode.sh (limited to 'data/get_unicode.sh') diff --git a/data/get_unicode.sh b/data/get_unicode.sh new file mode 100755 index 0000000..99662b3 --- /dev/null +++ b/data/get_unicode.sh @@ -0,0 +1,44 @@ +#!/bin/bash + +declare -a versions + +# Java 8 +versions+=("6.2.0") + +# Java 9 +versions+=("8.0.0") + +# Java 11 +versions+=("10.0.0") + +# Java 12 +versions+=("11.0.0") + +# Java 13 +versions+=("12.1.0") + +# Java 15 +versions+=("13.0.0") + +# Java 19 +versions+=("14.0.0") + +# Java 20 +versions+=("15.0.0") + +# Java 22 +versions+=("15.1.0") + +# Java 24 +versions+=("16.0.0") + +basedir=$(dirname -- "${BASH_SOURCE[0]}") + +for version in "${versions[@]}"; do + target="$basedir"/unicode-"$version"/UnicodeData.txt + if [ ! -e "$target".xz ]; then + mkdir -p "$basedir"/unicode-"$version" + curl "https://www.unicode.org/Public/${version}/ucd/UnicodeData.txt" -o "$target" + xz -9 "$target" + fi +done -- cgit v1.3