voc_utils.py 2.8 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586
  1. # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. from __future__ import absolute_import
  15. from __future__ import division
  16. from __future__ import print_function
  17. import os
  18. import os.path as osp
  19. import re
  20. import random
  21. __all__ = ['create_list']
  22. def create_list(devkit_dir, years, output_dir):
  23. """
  24. create following list:
  25. 1. trainval.txt
  26. 2. test.txt
  27. """
  28. trainval_list = []
  29. test_list = []
  30. for year in years:
  31. trainval, test = _walk_voc_dir(devkit_dir, year, output_dir)
  32. trainval_list.extend(trainval)
  33. test_list.extend(test)
  34. random.shuffle(trainval_list)
  35. with open(osp.join(output_dir, 'trainval.txt'), 'w') as ftrainval:
  36. for item in trainval_list:
  37. ftrainval.write(item[0] + ' ' + item[1] + '\n')
  38. with open(osp.join(output_dir, 'test.txt'), 'w') as fval:
  39. ct = 0
  40. for item in test_list:
  41. ct += 1
  42. fval.write(item[0] + ' ' + item[1] + '\n')
  43. def _get_voc_dir(devkit_dir, year, type):
  44. return osp.join(devkit_dir, 'VOC' + year, type)
  45. def _walk_voc_dir(devkit_dir, year, output_dir):
  46. filelist_dir = _get_voc_dir(devkit_dir, year, 'ImageSets/Main')
  47. annotation_dir = _get_voc_dir(devkit_dir, year, 'Annotations')
  48. img_dir = _get_voc_dir(devkit_dir, year, 'JPEGImages')
  49. trainval_list = []
  50. test_list = []
  51. added = set()
  52. for _, _, files in os.walk(filelist_dir):
  53. for fname in files:
  54. img_ann_list = []
  55. if re.match(r'[a-z]+_trainval\.txt', fname):
  56. img_ann_list = trainval_list
  57. elif re.match(r'[a-z]+_test\.txt', fname):
  58. img_ann_list = test_list
  59. else:
  60. continue
  61. fpath = osp.join(filelist_dir, fname)
  62. for line in open(fpath):
  63. name_prefix = line.strip().split()[0]
  64. if name_prefix in added:
  65. continue
  66. added.add(name_prefix)
  67. ann_path = osp.join(
  68. osp.relpath(annotation_dir, output_dir),
  69. name_prefix + '.xml')
  70. img_path = osp.join(
  71. osp.relpath(img_dir, output_dir), name_prefix + '.jpg')
  72. img_ann_list.append((img_path, ann_path))
  73. return trainval_list, test_list