Skip to content

Commit 567b0eb

Browse files
committed
Add SC-54468 min/max regression test case
1 parent 7fd6328 commit 567b0eb

File tree

2 files changed

+249
-0
lines changed

2 files changed

+249
-0
lines changed

test/regression/CMakeLists.txt

+1
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ if (TILEDB_CPP_API)
5555
list(APPEND SOURCES targets/sc-53334.cc)
5656
list(APPEND SOURCES targets/sc-53791.cc)
5757
list(APPEND SOURCES targets/sc-53970.cc)
58+
list(APPEND SOURCES targets/sc-54468.cc)
5859
endif()
5960

6061
add_executable(tiledb_regression

test/regression/targets/sc-54468.cc

+248
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,248 @@
1+
/**
2+
* @file sc-53791.cc
3+
*
4+
* @section LICENSE
5+
*
6+
* The MIT License
7+
*
8+
* @copyright Copyright (c) 2024 TileDB, Inc.
9+
*
10+
* Permission is hereby granted, free of charge, to any person obtaining a copy
11+
* of this software and associated documentation files (the "Software"), to deal
12+
* in the Software without restriction, including without limitation the rights
13+
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14+
* copies of the Software, and to permit persons to whom the Software is
15+
* furnished to do so, subject to the following conditions:
16+
*
17+
* The above copyright notice and this permission notice shall be included in
18+
* all copies or substantial portions of the Software.
19+
*
20+
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21+
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22+
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23+
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24+
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25+
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
26+
* THE SOFTWARE.
27+
*/
28+
29+
#include <stdio.h>
30+
#include <stdlib.h>
31+
#include <tiledb/tiledb.h>
32+
#include <iostream>
33+
#include <optional>
34+
#include <tiledb/tiledb>
35+
#include <tiledb/tiledb_experimental>
36+
#include <vector>
37+
38+
#include <test/support/tdb_catch.h>
39+
40+
using namespace tiledb;
41+
42+
static void create_or_replace_array(
43+
Context& ctx, const char* array_name, bool is_nullable) {
44+
if (Object::object(ctx, array_name).type() != Object::Type::Invalid) {
45+
Object::remove(ctx, array_name);
46+
}
47+
48+
auto dx = Dimension::create<uint64_t>(ctx, "x", {{1, 100}}, 10);
49+
50+
// Create domain
51+
Domain domain(ctx);
52+
domain.add_dimension(dx);
53+
54+
// Create a single attribute "a" so each (i,j) cell can store a character
55+
Attribute a(ctx, "a", TILEDB_UINT64);
56+
a.set_nullable(is_nullable);
57+
58+
// Create array schema
59+
ArraySchema schema(ctx, TILEDB_SPARSE);
60+
schema.set_domain(domain);
61+
schema.set_cell_order(TILEDB_ROW_MAJOR);
62+
schema.set_tile_order(TILEDB_ROW_MAJOR);
63+
schema.add_attribute(a);
64+
65+
// Create array
66+
Array::create(array_name, schema);
67+
}
68+
69+
static void write_array(
70+
Context& ctx, const char* array_name, bool is_nullable) {
71+
// Data
72+
std::vector<uint64_t> x = {1, 2, 3, 4, 5, 6, 7, 8};
73+
std::vector<uint64_t> atts = {10, 20, 30, 40, 50, 60, 70, 80};
74+
75+
std::vector<uint8_t> a_validity = {1, 1, 1, 1, 1, 1, 1, 1};
76+
77+
// Open array for writing
78+
Array array(ctx, array_name, TILEDB_WRITE);
79+
80+
Query query(ctx, array);
81+
query.set_data_buffer("x", x).set_data_buffer("a", atts);
82+
if (is_nullable) {
83+
query.set_validity_buffer("a", a_validity);
84+
}
85+
86+
query.submit();
87+
array.close();
88+
}
89+
90+
static std::pair<std::optional<uint64_t>, std::optional<uint64_t>>
91+
query_min_max(
92+
Context& ctx,
93+
const char* array_name,
94+
bool is_nullable,
95+
std::optional<std::pair<uint64_t, uint64_t>> subarray) {
96+
// note, use C API because the CPP API doesn't seem to have Min yet
97+
Array array(ctx, array_name, TILEDB_READ);
98+
99+
Query query(ctx, array);
100+
query.set_layout(TILEDB_UNORDERED);
101+
if (subarray) {
102+
Subarray s(ctx, array);
103+
s.add_range(0, subarray->first, subarray->second);
104+
query.set_subarray(s);
105+
}
106+
107+
QueryChannel default_channel = QueryExperimental::get_default_channel(query);
108+
109+
ChannelOperation op_min =
110+
QueryExperimental::create_unary_aggregate<MinOperator>(query, "a");
111+
default_channel.apply_aggregate("Min", op_min);
112+
113+
ChannelOperation op_max =
114+
QueryExperimental::create_unary_aggregate<MaxOperator>(query, "a");
115+
default_channel.apply_aggregate("Max", op_max);
116+
117+
std::vector<uint64_t> min(1);
118+
std::vector<uint8_t> min_validity(1);
119+
query.set_data_buffer("Min", min);
120+
if (is_nullable) {
121+
query.set_validity_buffer("Min", min_validity);
122+
}
123+
124+
std::vector<uint64_t> max(1);
125+
std::vector<uint8_t> max_validity(1);
126+
query.set_data_buffer("Max", max);
127+
if (is_nullable) {
128+
query.set_validity_buffer("Max", max_validity);
129+
}
130+
131+
query.submit();
132+
query.finalize();
133+
134+
std::optional<uint64_t> maybe_min, maybe_max;
135+
if (!is_nullable || min_validity[0]) {
136+
maybe_min = min[0];
137+
}
138+
if (!is_nullable || max_validity[0]) {
139+
maybe_max = max[0];
140+
}
141+
return std::make_pair(maybe_min, maybe_max);
142+
}
143+
144+
bool array_exists(Context& ctx, const char* uri);
145+
146+
TEST_CASE(
147+
"SC-54468 min/max aggregate on empty nullable attribute",
148+
"[regression][bug][sc-54468]") {
149+
Context ctx;
150+
std::string uri("sc-54468-empty-min-max-nullable");
151+
152+
const bool is_attribute_nullable = true;
153+
154+
create_or_replace_array(ctx, uri.c_str(), is_attribute_nullable);
155+
156+
const auto extrema =
157+
query_min_max(ctx, uri.c_str(), is_attribute_nullable, std::nullopt);
158+
const auto min = extrema.first;
159+
const auto max = extrema.second;
160+
161+
CHECK(!min.has_value());
162+
CHECK(!max.has_value());
163+
}
164+
165+
TEST_CASE(
166+
"SC-54468 min/max aggregate on empty non-nullable attribute",
167+
"[regression][bug][sc-54468][!shouldfail]") {
168+
Context ctx;
169+
std::string uri("sc-54468-empty-min-max-not-nullable");
170+
171+
const bool is_attribute_nullable = false;
172+
173+
create_or_replace_array(ctx, uri.c_str(), is_attribute_nullable);
174+
175+
const auto extrema =
176+
query_min_max(ctx, uri.c_str(), is_attribute_nullable, std::nullopt);
177+
const auto min = extrema.first;
178+
const auto max = extrema.second;
179+
180+
// EXPECTATION:
181+
// In SQL the min/max functions return NULL if there are no
182+
// non-NULL values in the input.
183+
// In this example the arrays are empty, so there are no
184+
// non-NULL values, so to be compliant with SQL (which is what
185+
// most novice users would expect) we must return NULL.
186+
//
187+
// REALITY:
188+
// We don't do that and return 0.
189+
// It is an error to set validity buffers on the Min/Max
190+
// operation output because the underlying attribute "a"
191+
// is not nullable.
192+
CHECK(!min.has_value());
193+
CHECK(!max.has_value());
194+
}
195+
196+
TEST_CASE(
197+
"SC-54468 min/max aggregate on nullable attribute, no results pass filters"
198+
"[regression][bug][sc-54468]") {
199+
Context ctx;
200+
std::string uri("sc-54468-filtered-min-max-nullable");
201+
202+
const bool is_attribute_nullable = true;
203+
204+
create_or_replace_array(ctx, uri.c_str(), is_attribute_nullable);
205+
write_array(ctx, uri.c_str(), is_attribute_nullable);
206+
207+
// subarray filters all data
208+
const auto extrema = query_min_max(
209+
ctx, uri.c_str(), is_attribute_nullable, std::make_pair(10, 20));
210+
const auto min = extrema.first;
211+
const auto max = extrema.second;
212+
213+
CHECK(!min.has_value());
214+
CHECK(!max.has_value());
215+
}
216+
217+
TEST_CASE(
218+
"SC-54468 min/max aggregate on non-nullable attribute, no results pass "
219+
"filters"
220+
"[regression][bug][sc-54468][!shouldfail]") {
221+
Context ctx;
222+
std::string uri("sc-54468-filtered-min-max-not-nullable");
223+
224+
const bool is_attribute_nullable = false;
225+
226+
create_or_replace_array(ctx, uri.c_str(), is_attribute_nullable);
227+
write_array(ctx, uri.c_str(), is_attribute_nullable);
228+
229+
const auto extrema = query_min_max(
230+
ctx, uri.c_str(), is_attribute_nullable, std::make_pair(10, 20));
231+
const auto min = extrema.first;
232+
const auto max = extrema.second;
233+
234+
// EXPECTATION:
235+
// In SQL the min/max functions return NULL if there are no
236+
// non-NULL values in the input.
237+
// In this example the subarray filters out cells, so there are no
238+
// non-NULL values, so to be compliant with SQL (which is what
239+
// most novice users would expect) we must return NULL.
240+
//
241+
// REALITY:
242+
// We don't do that and return 0.
243+
// It is an error to set validity buffers on the Min/Max
244+
// operation output because the underlying attribute "a"
245+
// is not nullable.
246+
CHECK(!min.has_value());
247+
CHECK(!max.has_value());
248+
}

0 commit comments

Comments
 (0)