Skip to content

Commit 2b4a0c4

Browse files
committed
Removing empty documents from index
1 parent b7dcf16 commit 2b4a0c4

1 file changed

Lines changed: 4 additions & 1 deletion

File tree

py_css/indexer/index.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,9 +77,12 @@ def document_collection_dataframe() -> pd.DataFrame:
7777
pd.DataFrame
7878
A dataframe of the document collection.
7979
"""
80-
return pd.read_table(
80+
df = pd.read_table(
8181
DATA_PATH, names=["docno", "text"], header=0, dtype={"docno": str, "text": str}
8282
)
83+
# Remove the rows where text is empty
84+
df = df[df["text"].str.strip().astype(bool)]
85+
return df
8386

8487

8588
def add_query_description_to_documents(df: pd.DataFrame) -> pd.DataFrame:

0 commit comments

Comments
 (0)