📝笔记:读写文本常用操作
source link: https://vincentqin.tech/posts/read-write-files/
Go to the source link to view the article. You can view the picture content, updated content and better typesetting reading experience. If the link is broken, please click the button below to view the snapshot at that time.
近期本人工作中频繁用到跨语言文本读写,本文结合实例介绍使用Matlab/Python/C++Matlab/Python/C++读写文件的常规操作,以便后续查阅。
语言:Matlab
importdata
data=importdata(data_path);
这种方式适合结构化数据的读取,如euroc_ground_truth.txt
文件,其内容如下(节选):
# timestamp,tx,ty,tz,qw,qx,qy,qz
1403636580838555648,4.688319,-1.786938,0.783338,0.534108,-0.153029,-0.827383,-0.082152
1403636580843555328,4.688177,-1.786770,0.787350,0.534640,-0.152990,-0.826976,-0.082863
1403636580848555520,4.688028,-1.786598,0.791382,0.535178,-0.152945,-0.826562,-0.083605
1403636580853555456,4.687878,-1.786421,0.795429,0.535715,-0.152884,-0.826146,-0.084391
1403636580858555648,4.687727,-1.786240,0.799484,0.536244,-0.152821,-0.825731,-0.085213
1403636580863555328,4.687579,-1.786059,0.803540,0.536768,-0.152768,-0.825314,-0.086049
1403636580868555520,4.687435,-1.785881,0.807594,0.537289,-0.152725,-0.824896,-0.086890
1403636580873555456,4.687295,-1.785709,0.811642,0.537804,-0.152680,-0.824481,-0.087725
1403636580878555648,4.687158,-1.785544,0.815682,0.538317,-0.152627,-0.824067,-0.088553
1403636580883555328,4.687025,-1.785390,0.819712,0.538828,-0.152566,-0.823657,-0.089371
1403636580888555520,4.686893,-1.785247,0.823734,0.539337,-0.152496,-0.823250,-0.090170
1403636580893555456,4.686763,-1.785116,0.827749,0.539846,-0.152427,-0.822845,-0.090943
可以通过下述方式导入上述数据:
filename = 'euroc_ground_truth.txt';
delimiterIn = ' ';
headerlinesIn = 1;
A = importdata(filename,delimiterIn,headerlinesIn);
txtread
data_pattern='%f,%f,%f,%f,%f,%f,%f,%f';
[timestamp, tx, ty, tz , qw , qx , qy , qz] = ...
textread(data_path,data_pattern,'delimiter', '\n','headerlines', 1);
textscan
上面几种方式读取到时间戳被自动转换成double类型,这不符合我们的预期,此时可以进一步指定格式读取数据,因此提到textscan函数。
fid = fopen(data_path);
tline = fgets(fid); %读取第一行无用数据
data_pattern = '%u64,%f,%f,%f,%f,%f,%f,%f';
data_cell = textscan(fid,data_pattern) ;
fclose(fid);
File操作
SFM得到的模型文件中images.txt
的格式如下所示(节选):
# Image list with two lines of data per image:
# IMAGE_ID, QW, QX, QY, QZ, TX, TY, TZ, CAMERA_ID, NAME
# POINTS2D[] as (X, Y, POINT3D_ID)
# Number of images: 31, mean observations per image: 780.80645161290317
1 0.884099 -0.141937 0.435855 0.0908508 0.456173 1.10575 -0.638635 12 IMG_4774.jpg
1664.41 2219.53 -1 3398.7 902.05 -1 2712.06 961.02 -1 1699.48 1238.63 -1 2879.4 2405.39 -1 2660.97 381.236 -1 3644.9 1452.33 -1 1140.93 1660.45 -1 3591.46 2353.79 -1 79.85 2666.42 -1 1140.46 2684.59 -1 2557.4 1692.47 -1 3016.67 1926.3 -1 454.761 2708.41 -1 1485.06 2722.92 -1 1492.38 1858.58 -1 1433.93 1420.58 -1 1669.55 1613.83 -1 2520.72 2673.94 -1 1046 455.571 -1 3259.22 1748.17 7126 2594.15 2494.27 -1 3093.89 876.393 -1 1660.63 1339.07 -1 832.575 1383.51 -1 149.518 2744.39 -1 3315.03 2473.41 7124 2088.65 620.031 -1 2078.78 966.732 -1 1401 1096.82 8134 3865.21 1484.07 -1 860.423 516.002 -1 3769.77 1725.51 -1 2808.02 1954.06 -1 551.916 2168.01 -1 213.281 2411.6 -1 2802.37 1706.15 -1 3677.91 2856.51 -1 874.193 2740.2 -1 858.867 2068.12 -1 3637.86 1157.71 -1 2475.61 2157.57 -1 2966.61 587.535 6881 3887.21 1767.42 -1 3182.04 1737.79 -1 841.659 1499.39 -1 843.027 1267.73 -1 1213.94 956.537 -1 3728.5 796.223 -1 3283.44 2532.15 -1 3835.46 1967.03 2538 328.52 2892.58 -1 734.794 2687.09 -1 714.173 2556.91 -1 1217.6 2491.75 -1 2723.55 2299.31 -1 873.913 2299 -1 3801.61 2217 -1 1276.33 2783.46 -1 3012.51 2168.26 -1 2890.92 2129.36 5648 3829.79 2093.15 -1 3061.59 1762.5 -1 3531.56 2084.99 -1 1246.53 1960.89 -1 3535.97 1792.58 6883 2109.03 934.487 -1 1288.39 2370.23 -1 892.76 2397.14 -1 763.122 2439.22 -1 958.999 2545.59 -1 870.587 2670.47 -1 337.499 2776.81 -1 2822.44 2829.3 -1 1445.31 2906.54 -1 3812.19 2458.6 5695 1454.09 2816.63 -1 1868.71 984.07 -1 3066.34 1223.46 -1 3864.86 1257.88 -1 3947.42 1345.05 -1 1520.25 1409.18 -1 1481.34 1526.1 -1 735.2 2137.66 -1 3127.57 2157.58 6504 687.177 2245.03 -1 559.242 2298.25 7777 1696.43 1351.53 -1 2880.04 1439.3 -1 38.7915 1549.01 -1 2890.66 1548.36 -1 47.8064 1656.89 -1 3620.52 1682.84 1716 3687.85 1720.03 -1 1722.31 1764.86 -1 254.117 2823.77 -1 181.262 2706.08 -1 160.79 2629.34 -1 324.581 2614.24 -1 2903.75 1780.66 -1 664.931 2602.83 -1 2437.42 2599.08 6885 3511.25 2305.26 -1 868.83 2488.58 -1 374.989 2360.49 -1 374.989 2360.49 -1 450.599 2331.43 -1 2801 2217.58 5656 3507.39 2202.53 7127 3204.93 2177.21 -1 2480.14 2100.03 -1 3218.11 2014.01 -1 3031.36 2002.24 5673 841.39 2006.93 -1 2657.5 1917.97 7549 2699.19 1831.71 -1 3401.06 2131.06 7396 1270.71 2606.61 -1 3595.47 1957.88 7474 3837.27 2881.06 -1 1115.83 2528.9 7779 2630.58 1986.87 6888 1955.06 1036.5 -1 2739.36 1913.07 -1 1778.16 1753.93 -1 1507.33 1745.1 -1 54.5862 1603.19 -1 3902.87 1535.15 -1 1612.35 1659.53 -1 2933.7 1483.71 -1 1604.73 1470.23 -1 3521.92 2931.27 6520 3400.6 2903.59 -1 3225.46 2900.27 6889 2857.93 2025.9 -1 2761.73 1404.53 -1 3757.05 2636.56 -1 941.592 2625.63 -1 3273.06 2553.15 -1 1316.81 2531.96 -1 2448.55 2008.76 -1 247.038 2505 7149 1603.3 1359.87 -1 528.52 2494.4 -1 87.2797 2497.3 -1 310.672 2465.84 6890 371.206 2431.08 -1
此时文本格式并不规则,此时我们采用fopen+fgets+strsplit
对数据进行读取,代码如下:
images = read_images(images.txt);
% 定义读取SFM图像文本函数
function images = read_images(path)
images = containers.Map('KeyType', 'int64', 'ValueType', 'any');
fid = fopen(path);
tline = fgets(fid); %读取一行数据
while ischar(tline) %判断是否是文件结尾
elems = strsplit(tline,' '); %分割以' '为间隔的字符,返回元胞数组
if numel(elems) < 4 || strcmp(elems(1), '#') %略过非数据行
tline = fgets(fid);
continue
end
if mod(images.Count, 10) == 0 %打印读取信息
fprintf('Reading image %d\n', images.length);
end
image = struct;
image.image_id = str2num(elems{1});
qw = str2double(elems{2});
qx = str2double(elems{3});
qy = str2double(elems{4});
qz = str2double(elems{5});
image.R = quat2rotmat([qw, qx, qy, qz]);
tx = str2double(elems{6});
ty = str2double(elems{7});
tz = str2double(elems{8});
image.t = [tx; ty; tz];
image.camera_id = str2num(elems{9});
image.name = elems{10};
tline = fgets(fid);
elems = sscanf(tline, '%f'); % 读取图像特点
elems = reshape(elems, [3, numel(elems) / 3]);
image.xys = elems(1:2,:)';
image.point3D_ids = elems(3,:)';
images(image.image_id) = image;
tline = fgets(fid);
end
fclose(fid);
end
%四元数转旋转矩阵
function rotmat = quat2rotmat(qvec)
rotmat = [1 - 2 * qvec(3).^2 - 2 * qvec(4).^2, ...
2 * qvec(2) * qvec(3) - 2 * qvec(1) * qvec(4), ...
2 * qvec(4) * qvec(2) + 2 * qvec(1) * qvec(3); ...
2 * qvec(2) * qvec(3) + 2 * qvec(1) * qvec(4), ...
1 - 2 * qvec(2).^2 - 2 * qvec(4).^2, ...
2 * qvec(3) * qvec(4) - 2 * qvec(1) * qvec(2); ...
2 * qvec(4) * qvec(2) - 2 * qvec(1) * qvec(3), ...
2 * qvec(3) * qvec(4) + 2 * qvec(1) * qvec(2), ...
1 - 2 * qvec(2).^2 - 2 * qvec(3).^2];
end
File操作
下面的write_ply.m
是写点云数据的方法,主要使用fprintf
。
function write_ply(path, xyz, normals, rgb)
% Write point cloud to PLY text file.
file = fopen(path, 'W');
fprintf(file,'ply\n');
fprintf(file,'format ascii 1.0\n');
fprintf(file,'element vertex %d\n',size(xyz,1));
fprintf(file,'property float x\n');
fprintf(file,'property float y\n');
fprintf(file,'property float z\n');
fprintf(file,'property float nx\n');
fprintf(file,'property float ny\n');
fprintf(file,'property float nz\n');
fprintf(file,'property uchar diffuse_red\n');
fprintf(file,'property uchar diffuse_green\n');
fprintf(file,'property uchar diffuse_blue\n');
fprintf(file,'end_header\n');
for i = 1:size(xyz, 1)
fprintf(file, '%f %f %f %f %f %f %d %d %d\n', ...
xyz(i,1), xyz(i,2), xyz(i,3), ...
normals(i,1), normals(i,2), normals(i,3), ...
uint8(rgb(i,1)), uint8(rgb(i,2)), uint8(rgb(i,3)));
end
fclose(file);
end
语言:c++
读的难点在于读取一行数据并实现字符的切分,可采用stringstream
或者getline
对字符切割。
下图是文件读写用到的函数继承关系以及对应的头文件,具体操作可参考这篇文章。
在fstream
类中,成员函数open(file_path,flag)
实现打开文件的操作,从而将数据流和文件进行关联,作为传入参数的文件打开模式标记(flag)可参看这个文章。
// 使用stringstream分割字符,注意引入头文件#include <sstream>
void read_images_text(const string& path)
{
std::ifstream file(path);//文件到流缓冲,即内存
std::string line;
std::string item;
long double qw, qx, qy, qz, tx, ty, tz;
unsigned long camera_id;
string image_name;
while (std::getline(file, line))
{
if (line.empty() || line[0] == '#') {
continue;
}
std::stringstream line_stream1(line);
line_stream1 >> item; qw = std::stold(item);
line_stream1 >> item; qx = std::stold(item);
line_stream1 >> item; qy = std::stold(item);
line_stream1 >> item; qz = std::stold(item);
line_stream1 >> item; tx = std::stold(item);
line_stream1 >> item; ty = std::stold(item);
line_stream1 >> item; tz = std::stold(item);
line_stream1 >> item; qw = std::stold(item);
line_stream1 >> item; camera_id = std::stoul(item);
line_stream1 >> item; image_name = item;
// POINTS2D
if (!std::getline(file, line)) {
break;
}
std::cout <<"processing : " << image_name << std::endl;
std::stringstream line_stream2(line);
long double x,y;
long long point3D_id;
if (!line.empty())
{
while (!line_stream2.eof())
{
line_stream2 >> item; x = std::stold(item);
line_stream2 >> item; y = std::stold(item);
line_stream2 >> item; point3D_id = std::stoll(item);
// cout <<x <<" " << y << " " << point3D_id<<" ";
}
}
}
file.close();
}
// 使用getline分割字符,需指定间隔字符
void read_images_text(const string& path)
{
std::ifstream file(path);
std::string line;
std::string item;
while (std::getline(file, line)) {
StringTrim(&line);
if (line.empty() || line[0] == '#') continue;
std::stringstream line_stream1(line);
// ID
std::getline(line_stream1, item, ' ');
// QVEC (qw, qx, qy, qz)
std::getline(line_stream1, item, ' ');long double qw = std::stold(item);
std::getline(line_stream1, item, ' ');long double qx = std::stold(item);
std::getline(line_stream1, item, ' ');long double qy = std::stold(item);
std::getline(line_stream1, item, ' ');long double qz = std::stold(item);
// TVEC
std::getline(line_stream1, item, ' ');long double tx = std::stold(item);
std::getline(line_stream1, item, ' ');long double ty = std::stold(item);
std::getline(line_stream1, item, ' ');long double tz = std::stold(item);
// CAMERA_ID
std::getline(line_stream1, item, ' ');unsigned long camera_id = std::stoul(item);
// NAME
std::getline(line_stream1, item, ' ');std::string image_name = item;
// POINTS2D
if (!std::getline(file, line)) break;
StringTrim(&line);
std::stringstream line_stream2(line);
if (!line.empty())
{
while (!line_stream2.eof())
{
std::getline(line_stream2, item, ' ');double x = std::stold(item);
std::getline(line_stream2, item, ' ');double y = std::stold(item);
std::getline(line_stream2, item, ' ');
long long point3D_id;
if (item == "-1") {
std::cout <<"2D point has no 3D points" <<std::endl;
point3D_id = -1;
}
else {
std::cout <<"2D point ok!!!" <<std::endl;
point3D_id = std::stoll(item);
}
}
}
}
file.close();
}
写文件比读文件容易得多,此处可用ofstream
进行写入文件。
// 此处给出colmap中WriteImagesText函数
void Reconstruction::WriteImagesText(const std::string& path) const {
std::ofstream file(path, std::ios::trunc);
CHECK(file.is_open()) << path;
// Ensure that we don't loose any precision by storing in text.
file.precision(17);
file << "# Image list with two lines of data per image:" << std::endl;
file << "# IMAGE_ID, QW, QX, QY, QZ, TX, TY, TZ, CAMERA_ID, "
"NAME"
<< std::endl;
file << "# POINTS2D[] as (X, Y, POINT3D_ID)" << std::endl;
file << "# Number of images: " << reg_image_ids_.size()
<< ", mean observations per image: "
<< ComputeMeanObservationsPerRegImage() << std::endl;
for (const auto& image : images_) {
if (!image.second.IsRegistered()) {
continue;
}
std::ostringstream line; //接收一行数据,通过'<<'接收
std::string line_string;
line << image.first << " ";
// QVEC (qw, qx, qy, qz)
const Eigen::Vector4d normalized_qvec =
NormalizeQuaternion(image.second.Qvec());
line << normalized_qvec(0) << " ";
line << normalized_qvec(1) << " ";
line << normalized_qvec(2) << " ";
line << normalized_qvec(3) << " ";
// TVEC
line << image.second.Tvec(0) << " ";
line << image.second.Tvec(1) << " ";
line << image.second.Tvec(2) << " ";
line << image.second.CameraId() << " ";
line << image.second.Name();
file << line.str() << std::endl;
line.str("");
line.clear();
for (const Point2D& point2D : image.second.Points2D()) {
line << point2D.X() << " ";
line << point2D.Y() << " ";
if (point2D.HasPoint3D()) {
line << point2D.Point3DId() << " ";
} else {
line << -1 << " ";
}
}
line_string = line.str();
line_string = line_string.substr(0, line_string.size() - 1);
file << line_string << std::endl;
}
}
补充:读写数据时可能会涉及字符串与数字的转换,如下给出二者转换的方法。
//string to number
template <typename Type>
Type tonumber(string str)
{
std::stringstream ss;
ss << str;
Type ret = 0;
ss >> ret;
return ret;
}
//number to str
template <typename Type>
string tostring(Type number)
{
std::stringstream ss;
ss << number;
string ret;
ss >> ret;
return ret;
}
// 调用时:
string str = "1403636580838555648";
uint64_t number = 1403636580838555648;
num_convert = tonumber<uint64_t >(str);
str_convert = tostring<uint64_t >(number);
语言:Python
def read_images_text(path):
images = {}
with open(path, "r") as fid:
while True:
line = fid.readline()#读取一行数据
if not line:
break
line = line.strip() #移除字符串头尾指定的字符(默认为空格或换行符)或字符序列
if len(line) > 0 and line[0] != "#":
elems = line.split() #分割字符
image_id = int(elems[0])
qvec = np.array(tuple(map(float, elems[1:5])))
tvec = np.array(tuple(map(float, elems[5:8])))
camera_id = int(elems[8])
image_name = elems[9]
elems = fid.readline().split()
xys = np.column_stack([tuple(map(float, elems[0::3])),
tuple(map(float, elems[1::3]))])
point3D_ids = np.array(tuple(map(int, elems[2::3])))
images[image_id] = Image(
id=image_id, qvec=qvec, tvec=tvec,
camera_id=camera_id, name=image_name,
xys=xys, point3D_ids=point3D_ids)
return images
使用File(文件) open(file, mode='r')
方法,如下面写SFM matchs-pair.txt的方法:
pairs = []
for query, indices in zip(query_names, topk):
for i in indices:
pair = (query, db_names[i])
pairs.append(pair)
logging.info(f'Found {len(pairs)} pairs.')
with open(output, 'w') as f:
f.write('\n'.join(' '.join([i, j]) for i, j in pairs))
以及写SFM模型images.txt
的方法都可值得参考。
# 写
def write_images_text(images, path):
if len(images) == 0:
mean_observations = 0
else:
mean_observations = sum((len(img.point3D_ids) \
for _, img in images.items()))/len(images)
HEADER = "# Image list with two lines of data per image:\n"
"# IMAGE_ID, QW, QX, QY, QZ, TX, TY, TZ, CAMERA_ID, NAME\n"
"# POINTS2D[] as (X, Y, POINT3D_ID)\n"
"# Number of images: {}, mean observations per image: {}\n"\
.format(len(images),mean_observations)
with open(path, "w") as fid:
fid.write(HEADER)
for _, img in images.items():
#写图像ID/位姿/相机ID/图像名
image_header = [img.id, *img.qvec, *img.tvec, img.camera_id, img.name]
first_line = " ".join(map(str, image_header))
fid.write(first_line + "\n")
#写2D点
points_strings = []
for xy, point3D_id in zip(img.xys, img.point3D_ids):
points_strings.append(" ".join(map(str, [*xy, point3D_id])))
fid.write(" ".join(points_strings) + "\n")
Recommend
About Joyk
Aggregate valuable and interesting links.
Joyk means Joy of geeK