This commit is contained in:
Jidong Xiao
2025-04-03 10:56:58 -04:00
committed by JamesFlare1212
parent 9fd32f72f6
commit 604ad8b5c2
41 changed files with 70227 additions and 1 deletions

View File

@@ -0,0 +1,29 @@
import random
# Function to read a file and sample a given number of lines
def sample_lines_from_file(file_path, num_samples):
with open(file_path, 'r', encoding='utf-8') as f:
lines = f.readlines() # Read all lines into a list
return random.sample(lines, min(num_samples, len(lines))) # Randomly pick lines, ensuring we don't pick more than available lines
# Paths to input text files
file1 = "file1.txt"
file2 = "file2.txt"
# Load and sample 3000 lines from each file
sampled_lines_file1 = sample_lines_from_file(file1, 3000)
sampled_lines_file2 = sample_lines_from_file(file2, 3000)
# Combine sampled lines from both files
combined_sampled_lines = sampled_lines_file1 + sampled_lines_file2
# Shuffle the combined lines to mix lines from both files
random.shuffle(combined_sampled_lines)
# Write the sampled lines to a new text file
output_file = "sampled_lines.txt"
with open(output_file, 'w', encoding='utf-8') as f:
f.writelines(combined_sampled_lines)
print(f"Sampled lines saved to {output_file}")