voc_utils.py 2.8 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586
  1. # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. from __future__ import absolute_import
  15. from __future__ import division
  16. from __future__ import print_function
  17. import os
  18. import os.path as osp
  19. import re
  20. import random
  21. __all__ = ['create_list']
  22. def create_list(year_dirs, output_dir):
  23. """
  24. create following list:
  25. 1. trainval.txt
  26. 2. test.txt
  27. """
  28. trainval_list = []
  29. test_list = []
  30. for year_dir in year_dirs:
  31. trainval, test = _walk_voc_dir(year_dir, output_dir)
  32. trainval_list.extend(trainval)
  33. test_list.extend(test)
  34. random.shuffle(trainval_list)
  35. with open(osp.join(output_dir, 'trainval.txt'), 'w') as ftrainval:
  36. for item in trainval_list:
  37. ftrainval.write(item[0] + ' ' + item[1] + '\n')
  38. with open(osp.join(output_dir, 'test.txt'), 'w') as fval:
  39. ct = 0
  40. for item in test_list:
  41. ct += 1
  42. fval.write(item[0] + ' ' + item[1] + '\n')
  43. def _walk_voc_dir(year_dir, output_dir):
  44. filelist_dir = osp.join(year_dir, 'ImageSets/Main')
  45. annotation_dir = osp.join(year_dir, 'Annotations')
  46. img_dir = osp.join(year_dir, 'JPEGImages')
  47. trainval_list = []
  48. test_list = []
  49. added = set()
  50. img_dict = {}
  51. for img_file in os.listdir(img_dir):
  52. img_dict[img_file.split('.')[0]] = img_file
  53. for _, _, files in os.walk(filelist_dir):
  54. for fname in files:
  55. img_ann_list = []
  56. if re.match('trainval\.txt', fname):
  57. img_ann_list = trainval_list
  58. elif re.match('test\.txt', fname):
  59. img_ann_list = test_list
  60. else:
  61. continue
  62. fpath = osp.join(filelist_dir, fname)
  63. for line in open(fpath):
  64. name_prefix = line.strip().split()[0]
  65. if name_prefix in added:
  66. continue
  67. added.add(name_prefix)
  68. ann_path = osp.join(
  69. osp.relpath(annotation_dir, output_dir),
  70. name_prefix + '.xml')
  71. img_path = osp.join(
  72. osp.relpath(img_dir, output_dir), img_dict[name_prefix])
  73. img_ann_list.append((img_path, ann_path))
  74. return trainval_list, test_list