Skip to content

Commit a3b3562

Browse files
committed
Bugfix
1 parent 7d274d5 commit a3b3562

File tree

1 file changed

+14
-5
lines changed

1 file changed

+14
-5
lines changed

py_css/models/base.py

+14-5
Original file line numberDiff line numberDiff line change
@@ -206,36 +206,45 @@ def gen_context_docs(context: Context) -> Generator[Document, None, None]:
206206
for _, docs in reversed(context):
207207
if docs is not None:
208208
for doc in docs:
209-
if doc.docno != EMPTY_PLACEHOLDER_DOC.docno:
209+
if (
210+
doc.docno != EMPTY_PLACEHOLDER_DOC.docno
211+
and doc.content != ""
212+
):
210213
yield doc
214+
i: int = 1
215+
while True:
216+
yield Document(f"{i}", "")
217+
i += 1
211218

212219
# if in df there are multiple rows that have the same qid and docno, keep the one with the highest score. For the ones removed, add a row each with the EMPTY_PLACEHOLDER_DOC
213220
rank_size_per_qid: int = df.groupby("qid").size().max()
214-
print(f"Rank size per qid: {rank_size_per_qid}")
221+
logging.info(f"Rank size per qid: {rank_size_per_qid}")
215222
df = df.sort_values(["qid", "docno", "score"], ascending=[True, True, False])
216223
total_size = df.shape[0]
217224
df = df.drop_duplicates(subset=["qid", "docno"], keep="first")
218225
dropped_any: bool = total_size != df.shape[0]
219-
print(f"Dropped any: {dropped_any}")
226+
logging.info(f"Dropped any: {dropped_any}")
220227
df = df.reset_index(drop=True)
221228
df = self.pad_empty_documents(
222229
df, df["qid"].unique(), rank_size_per_qid, df[["qid", "query"]]
223230
)
224-
print(f"Number of max rank size per qid: {df.groupby('qid').size().max()}")
231+
logging.info(
232+
f"Number of max rank size per qid: {df.groupby('qid').size().max()}"
233+
)
225234
df = df.reset_index(drop=True)
226235
df = df.sort_values(["qid", "rank"], ascending=[True, True])
227236

228237
for query, context in context_list:
229238
# check if there is a row in the df with "qid" == query.query_id, where "docno" == EMPTY_PLACEHOLDER_DOC.docno
230239
# if yes, replace it with the top document from the context
240+
doc_gen = gen_context_docs(context)
231241
while True:
232242
if not df[
233243
(df["qid"] == query.query_id)
234244
& (df["docno"] == EMPTY_PLACEHOLDER_DOC.docno)
235245
].empty:
236246
# Check if gen_docs has next element
237247
doc: Document
238-
doc_gen = gen_context_docs(context)
239248
try:
240249
doc = next(doc_gen)
241250
while (

0 commit comments

Comments
 (0)