Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add proxy configuration support #31

Merged
merged 11 commits into from
Jan 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 14 additions & 3 deletions .github/workflows/Linux.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ jobs:
GEN: Ninja
VCPKG_TOOLCHAIN_PATH: ${{ github.workspace }}/vcpkg/scripts/buildsystems/vcpkg.cmake
AZURE_STORAGE_CONNECTION_STRING: 'DefaultEndpointsProtocol=http;AccountName=devstoreaccount1;AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;BlobEndpoint=http://127.0.0.1:10000/devstoreaccount1;QueueEndpoint=http://127.0.0.1:10001/devstoreaccount1;TableEndpoint=http://127.0.0.1:10002/devstoreaccount1;'
HTTP_PROXY_RUNNING: '1'

steps:
- name: Install required ubuntu packages
Expand Down Expand Up @@ -56,7 +57,7 @@ jobs:
- name: install Azure test service
if: ${{ matrix.arch == 'linux_amd64_gcc4' }}
run: |
yum install -y nodejs npm
yum install -y nodejs npm squid
npm install -g azurite
echo -e "[azure-cli]\nname=Azure CLI\nbaseurl=https://packages.microsoft.com/yumrepos/azure-cli\nenabled=1\ngpgcheck=1\ngpgkey=https://packages.microsoft.com/keys/microsoft.asc" | tee /etc/yum.repos.d/azure-cli.repo
yum install -y azure-cli
Expand All @@ -65,16 +66,19 @@ jobs:
if: ${{ matrix.arch == 'linux_amd64' }}
run: |
curl -fsSL https://deb.nodesource.com/setup_16.x | bash
apt-get install -y -qq nodejs
apt-get install -y -qq nodejs squid
node -v
npm -v
squid --version
npm install -g azurite
curl -sL https://aka.ms/InstallAzureCLIDeb | bash

- name: Launch & populate Azure test service
if: ${{ matrix.arch == 'linux_amd64' || matrix.arch == 'linux_amd64_gcc4' }}
run: |
azurite > azurite_log.txt 2>&1 &
./scripts/run_squid.sh --port 3128 --log_dir squid_logs &
./scripts/run_squid.sh --port 3129 --log_dir squid_auth_logs --auth &
sleep 10
./scripts/upload_test_files_to_azurite.sh

Expand Down Expand Up @@ -121,4 +125,11 @@ jobs:
if: always() && matrix.arch == 'linux_amd64_gcc4'
shell: bash
run: |
cat azurite_log.txt
echo "## azurite"
cat azurite_log.txt

echo "## squid"
cat squid_logs/*

echo "## squid auth"
cat squid_auth_logs/*
13 changes: 12 additions & 1 deletion .github/workflows/MacOS.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ jobs:
GEN: Ninja
VCPKG_TOOLCHAIN_PATH: ${{ github.workspace }}/vcpkg/scripts/buildsystems/vcpkg.cmake
AZURE_STORAGE_CONNECTION_STRING: 'DefaultEndpointsProtocol=http;AccountName=devstoreaccount1;AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;BlobEndpoint=http://127.0.0.1:10000/devstoreaccount1;QueueEndpoint=http://127.0.0.1:10001/devstoreaccount1;TableEndpoint=http://127.0.0.1:10002/devstoreaccount1;'
HTTP_PROXY_RUNNING: '1'

steps:
- uses: actions/checkout@v3
Expand Down Expand Up @@ -59,7 +60,10 @@ jobs:
- name: Launch & populate Azure test service
if: ${{ matrix.osx_build_arch == 'x86_64'}}
run: |
brew install squid
npm install -g azurite
./scripts/run_squid.sh --port 3128 --log_dir squid_logs &
./scripts/run_squid.sh --port 3129 --log_dir squid_auth_logs --auth &
azurite > azurite_log.txt 2>&1 &
sleep 10
./scripts/upload_test_files_to_azurite.sh
Expand All @@ -74,4 +78,11 @@ jobs:
if: always() && matrix.osx_build_arch == 'x86_64'
shell: bash
run: |
cat azurite_log.txt
echo "## azurite"
cat azurite_log.txt

echo "## squid"
cat squid_logs/*

echo "## squid with auth"
cat squid_auth_logs/*
14 changes: 14 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,8 +1,22 @@
build
.idea
.vscode
cmake-build-debug
duckdb_unittest_tempdir/
.DS_Store
testext
test/python/__pycache__/
.Rhistory

# Azurite
__blobstorage__/
__queuestorage__/
__azurite_db_*__.json

# squid
squid_logs
squid_users
squid.pid
squid.pid
squid.conf
squid_auth.conf
84 changes: 84 additions & 0 deletions scripts/run_squid.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
#!/bin/bash

help() {
echo "Usage: ${0} [port] [auth]"
echo " port Port number for squid to lisen to (by default 3128)"
echo " auth Optional string ('auth') to force user basic authentification (autherwise no authentification is required)"
exit 0
}

port='3128'
auth='false'
log_dir="squid_logs"
conf_file="squid.conf"
pid_file='${service_name}.pid'

while [[ $# -gt 0 ]]; do
case "${1}" in
-h|--help)
help
;;
-p|--port)
port="${2}"
shift # past argument
shift # past value
;;
--auth)
auth='true'
conf_file="squid_auth.conf"
pid_file='${service_name}_auth.pid'
shift # past argument
;;
--log_dir)
log_dir="${2}"
shift # past argument
shift # past value
;;
*)
echo "Unknown option ${1}"
exit 1
;;
esac
done

mkdir "${log_dir}"
touch "${log_dir}/daemon.log"
chmod -R 777 "${log_dir}"

echo "http_port 127.0.0.1:${port}" >"${conf_file}"
echo "pid_filename ${pid_file}" >>"${conf_file}"

echo 'logfile_rotate 0' >>"${conf_file}"
echo "logfile_daemon ${log_dir}/daemon.log" >>"${conf_file}"
echo "access_log ${log_dir}/access.log" >>"${conf_file}"
echo "cache_log ${log_dir}/cache.log" >>"${conf_file}"
echo "cache_store_log ${log_dir}/cache_store.log" >>"${conf_file}"


if [[ "${auth}" == "true" ]]; then
# User 'john' with password 'doe'
echo 'john:$apr1$dalj9e7s$AhqY28Hvl3EcNblNJMiXa0' >squid_users

squid_version="$(squid -v | head -n1 | grep -o 'Version [^ ]*' | cut -d ' ' -f 2)"
if [[ "$(uname)" == "Darwin" ]]; then
auth_basic_program="/usr/local/Cellar/squid/${squid_version}/libexec/basic_ncsa_auth"
else
if [[ -e '/usr/lib64/squid/basic_ncsa_auth' ]]; then
auth_basic_program="/usr/lib64/squid/basic_ncsa_auth"
else
auth_basic_program="/usr/lib/squid/basic_ncsa_auth"
fi
fi

echo '# Add authentification options' >>"${conf_file}"
echo "auth_param basic program ${auth_basic_program} squid_users" >>"${conf_file}"
echo 'auth_param basic children 3' >>"${conf_file}"
echo 'auth_param basic realm Squid BA' >>"${conf_file}"
echo 'acl auth_users proxy_auth REQUIRED' >>"${conf_file}"
echo 'http_access allow auth_users' >>"${conf_file}"
echo 'http_access deny all' >>"${conf_file}"
else
echo 'http_access allow localhost' >>"${conf_file}"
fi

exec squid -N -f "${conf_file}"
96 changes: 86 additions & 10 deletions src/azure_extension.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
#include <azure/storage/blobs.hpp>
#include <duckdb/parser/parsed_data/create_scalar_function_info.hpp>
#include <iostream>
#include <cstdlib>

namespace duckdb {

Expand Down Expand Up @@ -56,19 +57,22 @@ static void Log(Logger::Level level, std::string const &message) {
}

static Azure::Identity::ChainedTokenCredential::Sources
CreateCredentialChainFromSetting(const string &credential_chain) {
CreateCredentialChainFromSetting(const string &credential_chain,
const Azure::Core::Http::Policies::TransportOptions &transport_options) {
auto chain_list = StringUtil::Split(credential_chain, ';');
Azure::Identity::ChainedTokenCredential::Sources result;

Azure::Core::Credentials::TokenCredentialOptions options;
options.Transport = transport_options;
for (const auto &item : chain_list) {
if (item == "cli") {
result.push_back(std::make_shared<Azure::Identity::AzureCliCredential>());
result.push_back(std::make_shared<Azure::Identity::AzureCliCredential>(options));
} else if (item == "managed_identity") {
result.push_back(std::make_shared<Azure::Identity::ManagedIdentityCredential>());
result.push_back(std::make_shared<Azure::Identity::ManagedIdentityCredential>(options));
} else if (item == "env") {
result.push_back(std::make_shared<Azure::Identity::EnvironmentCredential>());
result.push_back(std::make_shared<Azure::Identity::EnvironmentCredential>(options));
} else if (item == "default") {
result.push_back(std::make_shared<Azure::Identity::DefaultAzureCredential>());
result.push_back(std::make_shared<Azure::Identity::DefaultAzureCredential>(options));
} else if (item != "none") {
throw InvalidInputException("Unknown credential provider found: " + item);
}
Expand Down Expand Up @@ -114,6 +118,22 @@ static AzureAuthentication ParseAzureAuthSettings(FileOpener *opener, const stri
}
}

// Load proxy options
Value http_proxy;
if (FileOpener::TryGetCurrentSetting(opener, "azure_http_proxy", http_proxy)) {
auth.proxy_options.http_proxy = http_proxy.ToString();
}

Value http_proxy_user_name;
if (FileOpener::TryGetCurrentSetting(opener, "azure_proxy_user_name", http_proxy_user_name)) {
auth.proxy_options.user_name = http_proxy_user_name.ToString();
}

Value http_proxy_password;
if (FileOpener::TryGetCurrentSetting(opener, "azure_proxy_password", http_proxy_password)) {
auth.proxy_options.password = http_proxy_password.ToString();
}

return auth;
}

Expand All @@ -138,20 +158,65 @@ static AzureReadOptions ParseAzureReadOptions(FileOpener *opener) {
return options;
}

static Azure::Core::Http::Policies::TransportOptions GetTransportOptions(AzureAuthentication &auth) {
Azure::Core::Http::Policies::TransportOptions options;
if (auth.secret) {
auto http_proxy = auth.secret->TryGetValue("http_proxy");
if (!http_proxy.IsNull()) {
options.HttpProxy = http_proxy.ToString();
} else {
// Keep honoring the env variable if present
auto *http_proxy_env = std::getenv("HTTP_PROXY");
if (http_proxy_env != nullptr) {
options.HttpProxy = http_proxy_env;
}
}

auto http_proxy_user_name = auth.secret->TryGetValue("proxy_user_name");
if (!http_proxy_user_name.IsNull()) {
options.ProxyUserName = http_proxy_user_name.ToString();
}

auto http_proxypassword = auth.secret->TryGetValue("proxy_password");
if (!http_proxypassword.IsNull()) {
options.ProxyPassword = http_proxypassword.ToString();
}
} else {
const auto &proxy_options = auth.proxy_options;
if (!proxy_options.http_proxy.empty()) {
options.HttpProxy = proxy_options.http_proxy;
}

if (!proxy_options.user_name.empty()) {
options.ProxyUserName = proxy_options.user_name;
}

if (!proxy_options.password.empty()) {
options.ProxyPassword = proxy_options.password;
}
}

return options;
}

static Azure::Storage::Blobs::BlobContainerClient GetContainerClient(AzureAuthentication &auth, AzureParsedUrl &url) {
string connection_string;
bool use_secret = false;
string chain;
string account_name;
string endpoint;

auto transport_options = GetTransportOptions(auth);
Azure::Storage::Blobs::BlobClientOptions options;
options.Transport = transport_options;

// Firstly, try to use the auth from the secret
if (auth.secret) {
// If connection string, we're done heres
auto connection_string_value = auth.secret->TryGetValue("connection_string");
if (!connection_string_value.IsNull()) {
return Azure::Storage::Blobs::BlobContainerClient::CreateFromConnectionString(
connection_string_value.ToString(), url.container);
connection_string_value.ToString(), url.container, options);
}

// Account_name can be used both for unauthenticated
Expand Down Expand Up @@ -181,7 +246,7 @@ static Azure::Storage::Blobs::BlobContainerClient GetContainerClient(AzureAuthen

if (!auth.connection_string.empty()) {
return Azure::Storage::Blobs::BlobContainerClient::CreateFromConnectionString(auth.connection_string,
url.container);
url.container, options);
}
}

Expand All @@ -192,17 +257,17 @@ static Azure::Storage::Blobs::BlobContainerClient GetContainerClient(AzureAuthen
// Build credential chain, from last to first
Azure::Identity::ChainedTokenCredential::Sources credential_chain;
if (!chain.empty()) {
credential_chain = CreateCredentialChainFromSetting(chain);
credential_chain = CreateCredentialChainFromSetting(chain, transport_options);
}

auto accountURL = "https://" + account_name + "." + endpoint;
if (!credential_chain.empty()) {
// A set of credentials providers was passed
auto chainedTokenCredential = std::make_shared<Azure::Identity::ChainedTokenCredential>(credential_chain);
Azure::Storage::Blobs::BlobServiceClient blob_service_client(accountURL, chainedTokenCredential);
Azure::Storage::Blobs::BlobServiceClient blob_service_client(accountURL, chainedTokenCredential, options);
return blob_service_client.GetBlobContainerClient(url.container);
} else if (!account_name.empty()) {
return Azure::Storage::Blobs::BlobContainerClient(accountURL + "/" + url.container);
return Azure::Storage::Blobs::BlobContainerClient(accountURL + "/" + url.container, options);
} else {
throw InvalidInputException("No valid Azure credentials found!");
}
Expand Down Expand Up @@ -361,6 +426,17 @@ static void LoadInternal(DatabaseInstance &instance) {
"Size of the read buffer. It is recommended that this is evenly divisible by "
"azure_read_transfer_chunk_size.",
LogicalType::UBIGINT, Value::UBIGINT(default_read_options.buffer_size));

auto *http_proxy = std::getenv("HTTP_PROXY");
Value default_http_value = http_proxy ? Value(http_proxy) : Value(nullptr);
config.AddExtensionOption("azure_http_proxy",
"Proxy to use when login & performing request to azure. "
"By default it will use the HTTP_PROXY environment variable if set.",
LogicalType::VARCHAR, default_http_value);
config.AddExtensionOption("azure_proxy_user_name", "Http proxy user name if needed.", LogicalType::VARCHAR,
Value(nullptr));
config.AddExtensionOption("azure_proxy_password", "Http proxy password if needed.", LogicalType::VARCHAR,
Value(nullptr));
}

int64_t AzureStorageFileSystem::Read(FileHandle &handle, void *buffer, int64_t nr_bytes) {
Expand Down
Loading
Loading