From 019b40adebac79536dcb20443f5ba7e5d44fa4af Mon Sep 17 00:00:00 2001 From: importer system account Date: Fri, 27 Sep 2024 09:51:33 -0400 Subject: [PATCH] Scripts, property file, docs for Clickhouse import process --- scripts/clickhouse_import_support/README.md | 27 ++ .../clone_mysql_database.sh | 234 ++++++++++++++++++ .../drop_tables_in_mysql_database.sh | 147 +++++++++++ ...anage_cbioportal_databases_tool.properties | 27 ++ .../mysql_command_line_functions.sh | 186 ++++++++++++++ .../parse_property_file_functions.sh | 179 ++++++++++++++ 6 files changed, 800 insertions(+) create mode 100644 scripts/clickhouse_import_support/README.md create mode 100755 scripts/clickhouse_import_support/clone_mysql_database.sh create mode 100755 scripts/clickhouse_import_support/drop_tables_in_mysql_database.sh create mode 100644 scripts/clickhouse_import_support/manage_cbioportal_databases_tool.properties create mode 100644 scripts/clickhouse_import_support/mysql_command_line_functions.sh create mode 100644 scripts/clickhouse_import_support/parse_property_file_functions.sh diff --git a/scripts/clickhouse_import_support/README.md b/scripts/clickhouse_import_support/README.md new file mode 100644 index 0000000..acd554e --- /dev/null +++ b/scripts/clickhouse_import_support/README.md @@ -0,0 +1,27 @@ +# cBioPortal Import Process Database Management Tools +These tools support a blue-green deployment approach to cBioPortal database updates. +This strategy was introduced to support the introduction of a coupled ClickHouse database +which will be used in conjunction with the prior MySQL database in order to improve the +runtime performance of the cBioPortal study view page. + +Import of cancer studies is now directed into a not-in-production copy of the production +MySQL database using the existing import codebase. The newly populated MySQL database is +used as a datasource for populating a not-in-production ClickHouse database. Using this +approach, the production databases remain consistent because no changes occur to either +database during import operations. Once the ClickHouse database has been fully populated +and related derived tables and persistent views have been created in ClickHouse, the +cBioPortal web server backend can be switched over quickly to use the newly populated +database and make the newly imported cancer studies availabile in production. + +## clone\_mysql\_database.sh +This bash script uses the *mysql* command line interface tool to make a complete copy +of the current production database into a separate database on the same MySQL server. +This will occur to initialize the not-in-production database and prepare it for cancer +study import. + +## drop\_tables\_in\_mysql\_database.sh +This bash script uses the *mysql* command line interface tool to drop all tables which +exist in a mysql database. This will occur at the end of an import process in order to +clear the data from the prior production database (or the backup copy database) in order +to make the database empty and available for reuse during the next cycle of cancer study +import. diff --git a/scripts/clickhouse_import_support/clone_mysql_database.sh b/scripts/clickhouse_import_support/clone_mysql_database.sh new file mode 100755 index 0000000..03c17e8 --- /dev/null +++ b/scripts/clickhouse_import_support/clone_mysql_database.sh @@ -0,0 +1,234 @@ +#!/usr/bin/env bash + +# bash declaration dependencies +source parse_property_file_functions.sh +source mysql_command_line_functions.sh + +# non-local environment variables in use +unset my_properties +unset database_table_list +unset source_database_name +unset destination_database_name +unset source_database_create_table_statement_list +declare -A my_properties +declare -a database_table_list +declare -a source_database_create_table_statement_list +database_table_list_filepath="$(pwd)/cmd_database_table_list.txt" +create_table_statement_filepath="$(pwd)/cmd_create_table_statement.txt" +create_table_result_filepath="$(pwd)/cmd_create_database_result.txt" +insert_table_data_result_filepath="$(pwd)/cmd_copy_table_data.txt" +record_count_comparison_filepath="$(pwd)/cmd_table_record_count.txt" + +function usage() { + echo "usage: clone_mysql_database.sh properties_filepath database_to_clone_tables_from database_to_clone_tables_to" >&2 + echo " databases (from/to) must be in {blue, green}" >&2 +} + +function initialize_main() { + local properties_filepath=$1 + local database_to_clone_tables_from=$2 + local database_to_clone_tables_to=$3 + if ! parse_property_file "$properties_filepath" my_properties ; then + usage + return 1 + fi + if ! initialize_mysql_command_line_functions ; then # this also purges the mysql credentials from the environment for security + usage + return 1 + fi + if [ "$database_to_clone_tables_from" == "blue" ] ; then + source_database_name="${my_properties['blue_database_name']}" + else + if [ "$database_to_clone_tables_from" == "green" ] ; then + source_database_name="${my_properties['green_database_name']}" + else + echo "Error : database_to_clone_tables_from must be one of {blue, green}" >&2 + usage + return 1 + fi + fi + if [ "$database_to_clone_tables_to" == "blue" ] ; then + destination_database_name="${my_properties['blue_database_name']}" + else + if [ "$database_to_clone_tables_to" == "green" ] ; then + destination_database_name="${my_properties['green_database_name']}" + else + echo "Error : database_to_clone_tables_to must be one of {blue, green}" >&2 + usage + return 1 + fi + fi + if [ "$database_to_clone_tables_to" == "$database_to_clone_tables_from" ] ; then + echo "Error : database_to_clone_tables_to cannot be the same as database_to_clone_tables_from" >&2 + return 1 + fi + return 0 +} + +function delete_output_stream_files() { + rm -f "$database_table_list_filepath" + rm -f "$create_table_statement_filepath" + rm -f "$create_table_result_filepath" + rm -f "$insert_table_data_result_filepath" + rm -f "$record_count_comparison_filepath" +} + +function shutdown_main_and_clean_up() { + shutdown_mysql_command_line_functions + delete_output_stream_files + unset my_properties + unset database_table_list + unset source_database_name + unset destination_database_name + unset source_database_create_table_statement_list + unset database_table_list_filepath + unset create_table_statement_filepath + unset create_table_result_filepath + unset insert_table_data_result_filepath + unset record_count_comparison_filepath +} + +function destination_database_exists_and_is_empty() { + if ! database_exists "$destination_database_name" ; then + echo "Error : could not proceed with database cloning because destination database does not exist: $destination_database_name" >&2 + return 1 + fi + if ! database_is_empty "$destination_database_name" ; then + echo "Error : could not proceed with database cloning because destination database is not empty: $destination_database_name" >&2 + return 2 + fi + return 0 +} + +function set_database_table_list() { + local statement="SHOW TABLES IN $source_database_name" + rm -f "$database_table_list_filepath" + if ! execute_sql_statement_via_mysql "$statement" "$database_table_list_filepath" ; then + echo "Warning : failed to execute mysql statement : $statement" >&2 + return 1 + fi + unset sql_data_array + if ! set_sql_data_array_from_file "$database_table_list_filepath" 0 ; then + return 1 + fi + database_table_list=(${sql_data_array[@]}) + return 0 +} + +function print_database_table_list() { + local pos=0 + local num_tables=${#database_table_list[@]} + while [ $pos -lt $num_tables ] ; do + echo "${database_table_list[$pos]}" + pos=$(($pos+1)) + done +} + +function set_source_database_create_table_statement_list() { + source_database_create_table_statement_list=() + local pos=0 + local num_tables=${#database_table_list[@]} + while [ $pos -lt $num_tables ] ; do + local table_name="\`$source_database_name\`.\`${database_table_list[$pos]}\`" + local statement="SHOW CREATE TABLE $table_name" + rm -f "$create_table_statement_filepath" + if ! execute_sql_statement_via_mysql "$statement" "$create_table_statement_filepath" ; then + echo "Warning : failed to execute mysql statement : $statement" >&2 + return 1 + fi + if ! set_sql_data_array_from_file "$create_table_statement_filepath" 1 ; then + return 1 + fi + source_database_create_table_statement_list+=("${sql_data_array[0]}") + pos=$(($pos+1)) + done + return 0 +} + +function print_source_database_create_table_statement_list() { + local pos=0 + local num_tables=${#source_database_create_table_statement_list[@]} + while [ $pos -lt $num_tables ] ; do + echo "for table ${database_table_list[$pos]} : ${source_database_create_table_statement_list[$pos]}" + pos=$(($pos+1)) + done +} + +function create_destination_database_table_schema_only() { + local pos=$1 + local create_source_table_statement="${source_database_create_table_statement_list[$pos]}" + local create_destination_table_statement="SET FOREIGN_KEY_CHECKS=0; USE \`$destination_database_name\`; ${source_database_create_table_statement_list[$pos]};" + if ! execute_sql_statement_via_mysql "$create_destination_table_statement" "$create_table_result_filepath" ; then + return 1 + fi + return 0 +} + +function copy_source_database_table_data_to_destination() { + local table_name=$1 + local source_table_full_name="\`$source_database_name\`.\`$table_name\`" + local destination_table_full_name="\`$destination_database_name\`.\`$table_name\`" + local copy_data_from_source_to_destination_table_statement="SET FOREIGN_KEY_CHECKS=0; INSERT INTO $destination_table_full_name TABLE $source_table_full_name;" + if ! execute_sql_statement_via_mysql "$copy_data_from_source_to_destination_table_statement" "$insert_table_data_result_filepath" ; then + return 1 + fi + return 0 +} + +function destination_table_matches_source_table() { + local table_name=$1 + local source_table_full_name="\`$source_database_name\`.\`$table_name\`" + local destination_table_full_name="\`$destination_database_name\`.\`$table_name\`" + local get_record_counts_statement="SELECT count(*) AS record_count from $destination_table_full_name UNION DISTINCT SELECT count(*) as record_count from $source_table_full_name;" + execute_sql_statement_via_mysql "$get_record_counts_statement" "$record_count_comparison_filepath" + # if recourd_counts match, only one distinct value will be returned. If they differ, there will be 2 values + set_sql_data_array_from_file "$record_count_comparison_filepath" 0 + if [[ "${#sql_data_array[@]}" -ne 1 ]] ; then + local record_count_0="${sql_data_array[0]}" + local record_count_1="${sql_data_array[1]}" + echo "Error : when cloning data from table $table_name, source database and destination database tables contain different record counts ($record_count_0 v. $record_count_1)" >&2 + return 1 + fi + return 0 +} + +function clone_all_source_database_tables_to_destination_database() { + pos=0 + local num_tables=${#source_database_create_table_statement_list[@]} + while [ "$pos" -lt "$num_tables" ] ; do + local table_name="${database_table_list[$pos]}" + echo "cloning $table_name" + if ! create_destination_database_table_schema_only "$pos" ; then + echo "Error : could not create database table schema for $table_name in destination database" >&2 + return 1 + fi + if ! copy_source_database_table_data_to_destination "$table_name" ; then + echo "Error : could not copy data from table $table_name into destination database" >&2 + return 1 + fi + if ! destination_table_matches_source_table "$table_name" ; then + echo "Cloning operation canceled" >&2 + return 1 + fi + pos=$(($pos+1)) + done + return 0 +} + +function main() { + local properties_filepath=$1 + local database_to_clone_tables_from=$2 + local database_to_clone_tables_to=$3 + local exit_status=0 + if ! initialize_main "$properties_filepath" "$database_to_clone_tables_from" "$database_to_clone_tables_to" || + ! destination_database_exists_and_is_empty || + ! set_database_table_list || + ! set_source_database_create_table_statement_list || + ! clone_all_source_database_tables_to_destination_database ; then + exit_status=1 + fi + shutdown_main_and_clean_up + return $exit_status +} + +main "$1" "$2" "$3" diff --git a/scripts/clickhouse_import_support/drop_tables_in_mysql_database.sh b/scripts/clickhouse_import_support/drop_tables_in_mysql_database.sh new file mode 100755 index 0000000..3e8be22 --- /dev/null +++ b/scripts/clickhouse_import_support/drop_tables_in_mysql_database.sh @@ -0,0 +1,147 @@ +#!/usr/bin/env bash + +# bash declaration dependencies +source parse_property_file_functions.sh +source mysql_command_line_functions.sh + +# non-local environment variables in use +unset my_properties +unset database_table_list +unset database_name +declare -A my_properties +declare -a database_table_list +database_name="" +database_table_list_filepath="$(pwd)/dtmd_database_table_list.txt" +drop_table_result_filepath="$(pwd)/dtmd_drop_table_result.txt" + +function usage() { + echo "usage: drop_tables_in_mysql_database.sh properties_filepath database" >&2 + echo " database must be in {blue, green, shelved}" >&2 +} + +function initialize_main() { + local properties_filepath=$1 + local database_to_drop_tables_from=$2 + if ! parse_property_file "$properties_filepath" my_properties ; then + usage + return 1 + fi + if ! initialize_mysql_command_line_functions ; then # this also purges the mysql credentials from the environment for security + usage + return 1 + fi + if [ "$database_to_drop_tables_from" == "blue" ] ; then + database_name="${my_properties['blue_database_name']}" + else + if [ "$database_to_drop_tables_from" == "green" ] ; then + database_name="${my_properties['green_database_name']}" + else + if [ "$database_to_drop_tables_from" == "shelved" ] ; then + database_name="${my_properties['shelved_database_name']}" + else + echo "Error : database must be one of {blue, green, shelved}" >&2 + usage + return 1 + + fi + fi + fi + return 0 +} + +function delete_output_stream_files() { + rm -f "$database_table_list_filepath" + rm -f "$drop_table_result_filepath" +} + +function shutdown_main_and_clean_up() { + shutdown_mysql_command_line_functions + delete_output_stream_files + unset my_properties + unset database_table_list + unset database_name + unset database_table_list_filepath + unset drop_table_result_filepath +} + +function selected_database_exists() { + if ! database_exists "$database_name" ; then + echo "Error : could not proceed with database table dropping because database does not exist: $database_name" >&2 + return 1 + fi + return 0 +} + +function set_database_table_list() { + local statement="SHOW TABLES IN $database_name" + rm -f "$database_table_list_filepath" + if ! execute_sql_statement_via_mysql "$statement" "$database_table_list_filepath" ; then + echo "Warning : failed to execute mysql statement : $statement" >&2 + return 1 + fi + unset sql_data_array + if ! set_sql_data_array_from_file "$database_table_list_filepath" 0 ; then + return 1 + fi + database_table_list=(${sql_data_array[@]}) + return 0 +} + +function print_database_table_list() { + local pos=0 + local num_tables=${#database_table_list[@]} + while [ $pos -lt $num_tables ] ; do + echo "${database_table_list[$pos]}" + pos=$(($pos+1)) + done +} + +function drop_database_table() { + local pos=$1 + local drop_table_statement="SET FOREIGN_KEY_CHECKS=0; USE \`$database_name\`; DROP TABLE ${database_table_list[$pos]};" + if ! execute_sql_statement_via_mysql "$drop_table_statement" "$drop_table_result_filepath" ; then + return 1 + fi + return 0 +} + +function drop_all_database_tables() { + local pos=0 + local num_tables=${#database_table_list[@]} + while [ "$pos" -lt "$num_tables" ] ; do + local table_name="${database_table_list[$pos]}" + echo "dropping $table_name" + if ! drop_database_table "$pos" ; then + echo "Error : could not drop database table $table_name" >&2 + return 1 + fi + pos=$(($pos+1)) + done + # TODO : add check that database has no tables + return 0 +} + +function selected_database_is_empty() { + if ! database_is_empty "$database_name" ; then + echo "Error : table dropping failed to drop all tables in database : $database_name" >&2 + return 1 + fi + return 0 +} + +function main() { + local properties_filepath=$1 + local database_to_drop_tables_from=$2 + local exit_status=0 + if ! initialize_main "$properties_filepath" "$database_to_drop_tables_from" || + ! selected_database_exists || + ! set_database_table_list || + ! drop_all_database_tables || + ! selected_database_is_empty ; then + exit_status=1 + fi + shutdown_main_and_clean_up + return $exit_status +} + +main "$1" "$2" diff --git a/scripts/clickhouse_import_support/manage_cbioportal_databases_tool.properties b/scripts/clickhouse_import_support/manage_cbioportal_databases_tool.properties new file mode 100644 index 0000000..c05e2a4 --- /dev/null +++ b/scripts/clickhouse_import_support/manage_cbioportal_databases_tool.properties @@ -0,0 +1,27 @@ +# clickhouse import support tools properties file +# mostly follows format as desribed here : https://en.wikipedia.org/wiki/.properties +# - line continuation (through end of line backslash) is not supported +# - interpreted escape sequences limited to the following: +# - \u0009 will be interpreted as a tab character (ASCII TAB) +# - \u000A will be interpreted as a newline/linefeed character (ASCII LF) +# - \u000D will be interpreted as a carriage return character (ASCII CR) +# - \u0000 will be interpreted as a NULL character (ASCII 0), but this character is prohibited. Parsing will then fail. +# any other sequence will be uninterpreted by the parser: +# - \t will be interpreted as these two literal characters: "\" "t" +# - \\ will be interpreted as these two literal characters: "\" "\" +# - \u0041 will be parsed as 6 literal characters (not "A") +# +# the specified mysql user must have authorities to alter/drop/create/insert-into all (*) tables in each named database +# any additional args needed for use with the mysql command line tool can be specified using mysql_server_additional_args, +# for instance, "--port 9906" could be added to connect to a server on (non-standard) port 9906 + +mysql_server_username= +mysql_server_password= +mysql_server_host_name= +mysql_server_additional_args= +disk_capacity_mysql_data_filesystem_megabytes= +disk_usage_invisible_to_mysql_user_megabytes= +disk_consumption_anticipated_during_import_megabytes= +blue_database_name= +green_database_name= +shelved_database_name= diff --git a/scripts/clickhouse_import_support/mysql_command_line_functions.sh b/scripts/clickhouse_import_support/mysql_command_line_functions.sh new file mode 100644 index 0000000..40e395f --- /dev/null +++ b/scripts/clickhouse_import_support/mysql_command_line_functions.sh @@ -0,0 +1,186 @@ +#!/usr/bin/env bash + +unset configured_mysql_defaults_config_file_path +unset sql_data_field_value +unset sql_data_array +configured_mysql_defaults_config_file_path="" +sql_data_field_value="" +declare -a sql_data_array +database_exists_filepath="$(pwd)/mclf_database_exists.txt" +database_table_list_filepath="$(pwd)/mclf_database_table_list.txt" + +function purge_mysql_credentials_from_environment_variables() { + unset my_properties['mysql_server_username'] + unset my_properties['mysql_server_password'] + unset my_properties['mysql_server_host_name'] +} + +function write_mysql_defaults_config_file() { + configured_mysql_defaults_config_file_path="$(pwd)/mclf_mysql_defaults_$(date "+%Y-%m-%d-%H-%M-%S").cnf" + echo "[client]" > "$configured_mysql_defaults_config_file_path" + echo "user=\"${my_properties['mysql_server_username']}\"" >> "$configured_mysql_defaults_config_file_path" + echo "password=\"${my_properties['mysql_server_password']}\"" >> "$configured_mysql_defaults_config_file_path" + echo "host=\"${my_properties['mysql_server_host_name']}\"" >> "$configured_mysql_defaults_config_file_path" + # once written to the configuration file, drop the credential from the environment for security + purge_mysql_credentials_from_environment_variables + if ! [ "$(cat $configured_mysql_defaults_config_file_path | wc -l)" == "4" ] ; then + echo "Error : could not successfully write default mysql properties to file $configured_mysql_defaults_config_file_path" >&2 + return 1 + fi + return 0 +} + +function initialize_mysql_command_line_functions() { + write_mysql_defaults_config_file +} + +function shutdown_mysql_command_line_functions() { + rm -f "$configured_mysql_defaults_config_file_path" + rm -f "$database_exists_filepath" + rm -f "$database_table_list_filepath" + unset configured_mysql_defaults_config_file_path + unset sql_data_field_value + unset sql_data_array + unset database_exists_filepath + unset database_table_list_filepath +} + +function execute_sql_statement_via_mysql() { + local statement=$1 + local output_filepath=$2 + if [ -e "$output_filepath" ] && ! rm -f "$output_filepath" ; then + echo "Error : could not overwrite existing output file $output_filepath when executing mysql statment $statement" >&2 + fi + local extra_args="${my_properties['mysql_server_additional_args']}" + mysql --defaults-extra-file="$configured_mysql_defaults_config_file_path" --batch $extra_args <<< "$statement" > $output_filepath +} + +function set_sql_data_field_value_from_record() { + local record_string=$1 + local column_number=$2 + unset sql_data_field_value + local record_string_length=${#record_string} + local LF=$'\n' + local TAB=$'\t' + local BACKSLASH=$'\\' + local NULL_MARKER='NULL_CHARACTER_CANNOT_BE_REPRESENTED' + local ENCODED_LF='\n' + local ENCODED_TAB='\t' + local ENCODED_BACKSLASH='\\' + local ENCODED_NULL='\0' + local pos=0 + local field_index=0 + local parsed_value="" + while [ $pos -lt $record_string_length ] ; do + local character_at_position="${record_string:$pos:1}" + # a newline should occur at the end of the read line, and only there. Embedded newlines are encoded with '\n' + if [ "$character_at_position" == "$NL" ] ; then + field_index=$((field_index+1)) + if [ "$field_index" -gt "$column_number" ] ; then + # field has been completely parsed + sql_data_field_value="$parsed_value" + return 0 + fi + echo "Error : unable to locate column $column_number while parsing returned database record : $record_string" >&2 + return 1 + fi + # a tab character delimits the beginning of a new field, and is not part of the field. Embedded tabs are encoded with '\t' + if [ "$character_at_position" == "$TAB" ] ; then + field_index=$((field_index+1)) + if [ "$field_index" -gt "$column_number" ] ; then + # field has been completely parsed + sql_data_field_value="$parsed_value" + return 0 + fi + pos=$(($pos+1)) + continue + fi + # a backslash must begin one of 4 possible escape sequences, all of which are made up of 2 characters : {'\n', '\t', '\\', '\0'}. No "plain" backslashes should be encountered. + if [ "$character_at_position" == "$BACKSLASH" ] ; then + local candidate_escape_string="${record_string:$pos:2}" + local decoded_character="" + if [ "$candidate_escape_string" == "$ENCODED_LF" ] ; then + decoded_character="$LF" + fi + if [ "$candidate_escape_string" == "$ENCODED_TAB" ] ; then + decoded_character="$TAB" + fi + if [ "$candidate_escape_string" == "$ENCODED_BACKSLASH" ] ; then + decoded_character="$BACKSLASH" + fi + if [ "$candidate_escape_string" == "$ENCODED_NULL" ] ; then + decoded_character="$NULL_MARKER" + fi + # pass over the escape sequence + pos=$(($pos+2)) + if [ "$field_index" -eq "$column_number" ] ; then + if [ "$decoded_character" == "$NULL_MARKER" ] ; then + echo "Warning : discarding encoded NULL character (\\0) encountered at position $pos while parsing returned database record : $record_string" >&2 + continue + fi + if [ -z "$decoded_character" ] ; then + echo "Error : unrecoginzed backslash escape sequence encountered at position $pos while parsing returned database record : $record_string" >&2 + return 1 + fi + parsed_value+="$decoded_character" + fi + continue + fi + # pass over the current (plain) character + pos=$(($pos+1)) + if [ "$field_index" -eq "$column_number" ] ; then + parsed_value+="$character_at_position" + fi + done + sql_data_field_value="$parsed_value" +} + +function set_sql_data_array_from_file() { + local filepath=$1 + local column_number=$2 + unset sql_data_array + if ! [ -r "$filepath" ] ; then + echo "Error : could not read output mysql query results from file : $filepath" >&2 + return 1 + fi + local headers_have_been_parsed=0 + sql_data_array=() + while IFS='' read -r line ; do + if [ "$headers_have_been_parsed" -eq 0 ] ; then + headers_have_been_parsed=1 + else + set_sql_data_field_value_from_record "$line" "$column_number" + sql_data_array+=("$sql_data_field_value") + fi + done < "$filepath" +} + +function database_exists() { + local database_name=$1 + local statement="SHOW DATABASES LIKE '$database_name'" + if ! execute_sql_statement_via_mysql "$statement" "$database_exists_filepath" ; then + echo "Warning : unable to determine if database $database_name exists using : $statement" >&2 + return 1 + fi + set_sql_data_array_from_file "$database_exists_filepath" 0 + if [[ "${#sql_data_array[@]}" -ne 1 ]] ; then + echo "Warning : database $database_name not present on database server, or there are multiple listings for that name" >&2 + return 2 + fi + return 0 +} + +function database_is_empty() { + local database_name=$1 + local statement="SHOW TABLES IN \`$database_name\`" + if ! execute_sql_statement_via_mysql "$statement" "$database_table_list_filepath" ; then + echo "Warning : unable to retrieve table list from database $database_name using : $statement" >&2 + return 1 + fi + set_sql_data_array_from_file "$database_table_list_filepath" 0 + if [[ "${#sql_data_array[@]}" -ne 0 ]] ; then + echo "Warning : database $database_name has tables (is not empty as required)" >&2 + return 2 + fi + return 0 +} diff --git a/scripts/clickhouse_import_support/parse_property_file_functions.sh b/scripts/clickhouse_import_support/parse_property_file_functions.sh new file mode 100644 index 0000000..37f5a73 --- /dev/null +++ b/scripts/clickhouse_import_support/parse_property_file_functions.sh @@ -0,0 +1,179 @@ +#!/usr/bin/env bash +# +# This file defines function parse_property_file() in the current processing shell. +# +# Usage: parse_property_file property_file_path associative_array_name +# +# The file at the indicated path will be parsed for property settings and results will be returned in the associative array +# with the indicated name. The associative array must be declared by the caller prior to call. +# +# Parsing will ignore lines which are only whitespace, or which have "#" or "!" as the first non-whitespace character +# Other lines must begin (after ignored whitespace) with a key name, and can contain a delimiter followed by a value string. +# If no delimiter is present, the entire line is considered a key name, which will be set in the array with an empty string value. +# A delimiter is the first encountered "=" characdter or ":" character. (a tab character is not recognized as a delimiter) +# key names may contain interior spaces, but are prohibited from containing the apostrophe character "'". +# The value assigned to a key name may contain any characters, but leading and trailing whitespace will be removed from all +# key names and values. Multi line values are not constructed using the conventional "end line with a backslash" semantics. +# Instead the multiline values can be constructed on a single line in the file by using string "\u000A" as an encoded linefeed +# and/or string "\u000D" as an encoded carriage return. "\u0009" can also be used for an encoded tab character within a value. +# Other escape sequences such as '\t', '\r', '\n', or other unicode characters encodings are not interpreted (remain literal). + +function variable_name_refers_to_an_associative_array() { + variable_name=$1 + declare_command_output="$(declare -p $variable_name 2>/dev/null)" + if [[ "$declare_command_output" == "declare -A"* ]] ; then + return 0 + else + return 1 + fi +} + +unset trimmed_whitespace_string +trimmed_whitespace_string="" + +function set_trimmed_whitespace_string() { + string=$1 + trimmed_whitespace_string="$(echo $string | xargs -0)" +} + +function property_line_is_commented() { + line=$1 + set_trimmed_whitespace_string "$line" + trimmed_line="$trimmed_whitespace_string" + if [ ${#trimmed_line} == 0 ] ; then + return 0 # empty lines are commented + fi + first_nonspace_character=${trimmed_line:0:1} + if [ "$first_nonspace_character" == "#" ] || [ "$first_nonspace_character" == "!" ] ; then + return 0 # start with comment character + fi + return 1 +} + +unset index_of_property_line_delimiter +index_of_property_line_delimiter=-1 + +function find_and_set_index_of_property_line_delimiter() { + line=$1 + line_length=${#line} + index_of_property_line_delimiter=-1 # default / not found + pos=0 + while [ $pos -lt $line_length ] ; do + character=${line:pos:1} + if [ "$character" == "=" ] || [ "$character" == ":" ] ; then + index_of_property_line_delimiter=$pos + break + fi + pos=$((pos+1)) + done +} + +unset escaped_string_for_eval +escaped_string_for_eval="" + +function set_escaped_string_for_eval() { + string=$1 + string_length=${#string} + escaped_string_for_eval="" + pos=0 + TAB=$'\t' + CR=$'\r' + LF=$'\n' + while [ $pos -lt $string_length ] ; do + character_at_position="${string:$pos:1}" + if [ "$character_at_position" == "'" ] ; then + escaped_string_for_eval+="'\"'\"'" + else + if [ "$character_at_position" == "\\" ] ; then + candidate_escape_string="${string:$pos:6}" + if [ ${#candidate_escape_string} -eq 6 ] ; then + if [ "$candidate_escape_string" == "\\u0009" ] ; then + escaped_string_for_eval+="'" + escaped_string_for_eval+="$'\t'" + escaped_string_for_eval+="'" + pos=$(($pos+6)) + continue + fi + if [ "$candidate_escape_string" == "\\u000A" ] ; then + escaped_string_for_eval+="'" + escaped_string_for_eval+="$'\n'" + escaped_string_for_eval+="'" + pos=$(($pos+6)) + continue + fi + if [ "$candidate_escape_string" == "\\u000D" ] ; then + escaped_string_for_eval+="'" + escaped_string_for_eval+="$'\r'" + escaped_string_for_eval+="'" + pos=$(($pos+6)) + continue + fi + fi + fi + escaped_string_for_eval+="$character_at_position" + fi + pos=$(($pos+1)) + done +} + +function string_contains_apostrophe() { + string=$1 + apostrophe="'" + [[ "$string" == *"$apostrophe"* ]] +} + +function parse_property_line() { + line=$1 + associative_array_name=$2 + key_name="" + value="" + if property_line_is_commented "$line" ; then + continue + fi + find_and_set_index_of_property_line_delimiter "$line" + if [ $index_of_property_line_delimiter -eq 0 ] ; then + echo "warning: ignoring property file ($property_file_path) line beginning with delimiter: $line" >&2 + return 1 + fi + if [ $index_of_property_line_delimiter -eq -1 ] ; then + # no delimiter .. so key_name is entire line (trimming whitespace), with no value + set_trimmed_whitespace_string "$line" + key_name="$trimmed_whitespace_string" + else + key_length=$index_of_property_line_delimiter + key_name_untrimmed=${line:0:$key_length} + set_trimmed_whitespace_string "$key_name_untrimmed" + key_name="$trimmed_whitespace_string" + line_length=${#line} + value_start_pos=$(($index_of_property_line_delimiter+1)) + value_length=$(($line_length-$index_of_property_line_delimiter-1)) + value_untrimmed=${line:$value_start_pos:$value_length} + set_trimmed_whitespace_string "$value_untrimmed" + value_unescaped="$trimmed_whitespace_string" + set_escaped_string_for_eval "$value_unescaped" + value="$escaped_string_for_eval" + fi + if string_contains_apostrophe "$key_name" ; then + echo "warning: ignoring property file ($property_file_path) key name which contains the apostrophe character: $key_name" >&2 + return 1 + fi + assignment_command="$associative_array_name['$key_name']='$value'" + eval $assignment_command +} + +function parse_property_file() { + property_file_path=$1 + associative_array_name=$2 # array names must be proper identifiers (no spaces) + if ! [ -r $property_file_path ] ; then + echo "error: filepath $property_file_path was passed to function parse_property_file() but did not refer to a readable file" >&2 + return 1 + fi + if ! variable_name_refers_to_an_associative_array $associative_array_name ; then + echo "error: variable name '$associative_array_name' was passed to function parse_property_file() but was not available in the environment, or did not refer to a created associative array." >&2 + return 1 + fi + while read line; do + parse_property_line "$line" "$associative_array_name" + done < $property_file_path + return 0 +}